g | x | w | all
Bytes Lang Time Link
005C++250122T073349Z138 Aspe
023Rust230918T174934ZAnders K
006Python230916T225842ZValue In
011Rust230918T162624Zcorvus_1
010Python230917T081332ZJonathan
081Charcoal230916T163835ZNeil

C++, ~(12,5)

C++ port of @Anders Kaseorg's Rust answer.


Try it online!

#include <iostream>
#include <vector>
#include <unordered_map>
#include <string>
#include <algorithm>
#include <cstdint>
#include <functional>

// A helper struct to hash a vector of uint8_t.
struct VectorHash {
    std::size_t operator()(const std::vector<uint8_t>& v) const noexcept {
        // A simple FNV-like hash for demonstration.
        std::size_t hash = 2166136261u;
        for (auto &byte : v) {
            hash ^= static_cast<std::size_t>(byte);
            hash *= 16777619u;
        }
        return hash;
    }
};

// A helper struct to compare two vectors of uint8_t for equality keys in the map.
struct VectorEqual {
    bool operator()(const std::vector<uint8_t>& a, const std::vector<uint8_t>& b) const noexcept {
        if (a.size() != b.size()) return false;
        for (size_t i = 0; i < a.size(); i++){
            if (a[i] != b[i]) return false;
        }
        return true;
    }
};

using Count = uint64_t;
using Cache = std::unordered_map<std::vector<uint8_t>, Count, VectorHash, VectorEqual>;

/*
 * countState:
 *  - target:   the array of bools (pattern) we compare against
 *  - distance: the maximum distance allowed (k in the Rust code)
 *  - state:    a modifiable "state" vector representing the current Levenshtein state
 *  - cache:    memoization cache mapping from state -> count
 *
 * This function checks if 'state' is in cache. If yes, returns the cached value.
 * Otherwise, it calculates how many valid states exist by trying symbol=false/true,
 * updates 'state' accordingly, and accumulates the number of valid states.
 */
Count countState(const std::vector<bool>& target,
                 uint8_t distance,
                 std::vector<uint8_t>& state,
                 Cache& cache)
{
    auto it = cache.find(state);
    if (it != cache.end()) {
        return it->second;
    }

    // 'count' starts at 1 if state[target.size()] == distance, else 0
    // Because the Rust code checks: state[target.len()] == distance
    // This is effectively a boolean -> Count
    Count count = (state[target.size()] == distance) ? 1 : 0;

    // We try symbol in [false, true]
    for (bool symbol : {false, true}) {
        // Save oldState so we can revert changes if needed
        std::vector<uint8_t> oldState = state;

        // Initialize z = min(state[0], distance) + 1
        uint8_t z = (std::min(state[0], distance)) + 1;
        state[0] = z;

        // This loop replicates the Rust nested iteration:
        //   for (((o, &c), &x), &y) in new_state[1..].iter_mut().zip(target).zip(&state[..])
        //   ...
        // The idea: we move through the arrays, update each new_state[i] using
        // comparisons with target[i-1], symbol, and previous states.
        for (size_t i = 1; i < state.size(); i++) {
            bool c = (i - 1 < target.size()) ? target[i - 1] : false;
            uint8_t x = (i - 1 < oldState.size()) ? oldState[i - 1] : 0;
            uint8_t y = (i < oldState.size())     ? oldState[i]     : 0;

            uint8_t cost = static_cast<uint8_t>(symbol != c);
            // z = (x + cost).min( y.min(z).min(distance) + 1 )
            z = std::min({
                static_cast<uint8_t>(x + cost),
                static_cast<uint8_t>(y + 1),
                static_cast<uint8_t>(z + 1),
                static_cast<uint8_t>(distance + 1)
            });
            state[i] = z;
        }

        count += countState(target, distance, state, cache);

        // Revert the state to oldState for the next loop iteration
        state = oldState;
    }

    // Cache the result
    cache[state] = count;
    return count;
}

/*
 * countFunction:
 *  - target: array of bools (pattern)
 *  - distance: maximum allowed distance
 *
 * Builds the initial state, prepares the cache, then calls countState.
 */
Count countFunction(const std::vector<bool>& target, uint8_t distance) {
    // The initial 'state' is (0..=target.size()) mapped to i.min(distance + 1)
    // We add 1 because Rust used (0..=target.len()), so we have target.size() + 1 elements.
    std::vector<uint8_t> state;
    state.reserve(target.size() + 1);
    for (size_t i = 0; i <= target.size(); i++) {
        uint8_t val = static_cast<uint8_t>(std::min<uint8_t>(i, distance + 1));
        state.push_back(val);
    }

    // Prepare a cache. Insert one special entry as in the Rust code:
    Cache cache;
    {
        // In Rust, we had &arena.alloc_extend((0..=target.len()).map(|_| distance + 1))[..]
        // as a key with value 0. We'll produce the same key here:
        std::vector<uint8_t> initKey(target.size() + 1, distance + 1);
        cache[initKey] = 0;
    }

    // Recursively compute
    return countState(target, distance, state, cache);
}

int main(int argc, char* argv[]) {
    if (argc != 3) {
        std::cerr << "Usage: levenshtein-neighborhood <s> <k>\n";
        return 1;
    }

    // <s> is a string of '0' or '1' used to fill a bool array
    // <k> is the allowed distance
    std::string targetStr = argv[1];
    uint8_t distance = static_cast<uint8_t>(std::stoi(argv[2]));

    std::vector<bool> targetVec;
    targetVec.reserve(targetStr.size());
    for (char c : targetStr) {
        // true if '1', false otherwise
        targetVec.push_back(c == '1');
    }

    Count result = countFunction(targetVec, distance);
    std::cout << result << std::endl;
    return 0;
}

Rust, score ≈ (28, 23)

The number of strings at distance \$k\$ from \$s\$ with some given prefix \$p\$ is a function of \$s\$, \$k\$, and the last column of the Wagner–Fischer table of \$s\$ and \$p\$. We use a recursive search on prefixes \$p\$, caching results based on this last column.

Build with cargo build --release, run with target/release/levenshtein-neighborhood <s> <k>.

Cargo.toml

[package]
name = "levenshtein-neighborhood"
version = "0.1.0"
edition = "2021"

[dependencies]
hashbrown = "0.14.0"
rustc-hash = "1.1.0"
typed-arena = "2.0.2"

src/main.rs

use hashbrown::hash_map::HashMap;
use rustc_hash::FxHasher;
use std::env;
use std::hash::{BuildHasher, BuildHasherDefault};
use typed_arena::Arena;

type Count = u64; // For n + k > 64, change this to u128
type Cache<'a> = HashMap<&'a [u8], Count, BuildHasherDefault<FxHasher>>;

fn count_state<'a>(
    target: &[bool],
    distance: u8,
    arena: &'a Arena<u8>,
    cache: &mut Cache<'a>,
    state: &mut [u8],
) -> Count {
    let hash = cache.hasher().hash_one(&state);
    if let Some((_, &count)) = cache.raw_entry().from_key_hashed_nocheck(hash, state) {
        count
    } else {
        let new_state = state;
        let state = arena.alloc_extend(new_state.iter().copied());
        let mut count = Count::from(state[target.len()] == distance);
        for symbol in [false, true] {
            let mut z = state[0].min(distance) + 1;
            new_state[0] = z;
            for (((o, &c), &x), &y) in new_state[1..]
                .iter_mut()
                .zip(target)
                .zip(&state[..state.len() - 1])
                .zip(&state[1..])
            {
                z = (x + u8::from(symbol != c)).min(y.min(z).min(distance) + 1);
                *o = z;
            }
            count += count_state(target, distance, arena, cache, new_state);
        }
        cache
            .raw_entry_mut()
            .from_hash(hash, |_| false)
            .insert(state, count);
        count
    }
}

fn count(target: &[bool], distance: u8) -> Count {
    let arena = Arena::new();
    let state = arena.alloc_extend((0..=target.len()).map(|i| (i as u8).min(distance + 1)));
    let mut cache = Cache::default();
    cache.insert(
        &arena.alloc_extend((0..=target.len()).map(|_| distance + 1))[..],
        0,
    );
    count_state(target, distance, &arena, &mut cache, state)
}

fn main() {
    if let [_program, target, distance] = &Vec::from_iter(env::args())[..] {
        let target = Vec::from_iter(target.chars().map(|c| c == '1'));
        let distance = distance.parse().unwrap();
        println!("{}", count(&target, distance));
    } else {
        panic!("usage: levenshtein-neighborhood <s> <k>");
    }
}

Python, ~(11,6)

Direct adaptation of my answer on Pick a random string at a fixed edit distance with a minor optimization of generating the sets inline instead of each one being a function call. ATO seems to flip-flop between (11,6) and (11,7) for me right now.

from collections import deque

def countFixedDistance(s: str, k: int) -> int:
  if k <= 0: return s

  chars: set[str]          = set('01')
  distances: dict[str,int] = {s: 0}
  queue                    = deque((s,))
  valid: set[str]          = set()

  while len(queue) > 0:
    current = queue.popleft()
    nextdist = distances[current] + 1
    for nxt in {current[:i] + c + current[i:] for c in chars for i in range(len(s)+1)} \
               | {current[:i] + c + current[i+1:] for c in chars for i in range(len(s)) if c != s[i]} \
               | {current[:i] + current[i+1:] for i in range(len(s))}:
      if nxt in distances: continue
      distances[nxt] = nextdist
      if nextdist < k:
        queue.append(nxt)
      else:
        valid.add(nxt)

  return len(valid)

Attempt This Online!

Rust, (12, 11)

This is basically a line-by-line port of Jonathan Allan's answer, with is already very good. Thanks! I wrote this to practice some Rust.

It uses the nohash-hasher crate for extremely fast HashMap lookups, with take up the vast majority of the time.

Try it online on rustexplorer.com. Runs an unoptimized debug build, so don't rely on it for measurements.

I have a github repo with build instructions.

#![warn(clippy::all)]

use std::collections::VecDeque;
use std::collections::hash_map::Entry;
use std::time::Instant;

use nohash_hasher::{IntMap, IntSet};

fn count_fixed_distance(s: &str, k: u32) -> usize {
    let n = i32::from_str_radix(&format!("1{s}"), 2).unwrap();
    let mut distances = IntMap::with_capacity_and_hasher(1 << (s.len() + k as usize), Default::default());
    distances.insert(n, 0);
    let mut queue = VecDeque::from([(n, s.len())]);
    // capacity is just experimentation
    let mut valid = IntSet::with_capacity_and_hasher(1 << (k + 1), Default::default());
    while let Some((current, bit_count)) = queue.pop_front() {
        let next_dist = distances.get(&current).unwrap() + 1;
        let mut pow = 0;
        for e in 0..bit_count {
            let left = current >> e;
            let left_1 = left << 1;
            pow = 1 << e;
            let right = current % pow;
            let bp1 = bit_count + 1;
            for (next, next_bit_count) in [
                ((left >> 1 << e) | right, bit_count - 1),
                (current ^ pow, bit_count),
                ((left_1 << e) | right, bp1),
                (((left_1 | 1) << e) | right, bp1)
            ] {
                match distances.entry(next) {
                    Entry::Vacant(ve) => {
                        ve.insert(next_dist);
                    }
                    Entry::Occupied(_) => continue,
                }
                if next_dist < k {
                    queue.push_back((next, next_bit_count));
                } else {
                    valid.insert(next);
                }
            }
        }

        for next in [pow << 2 | current, 3 << bit_count ^ current] {
            match distances.entry(next) {
                Entry::Vacant(ve) => {
                    ve.insert(next_dist);
                }
                Entry::Occupied(_) => continue,
            }
            if next_dist < k {
                queue.push_back((next, bit_count + 1));
            } else {
                valid.insert(next);
            }
        }
    }
    valid.len()
}

fn main() {
    let start = Instant::now();
    let mut s = String::with_capacity(32);
    s.push('0');
    let mut v = 0;
    while s.len() <= 12 { // approx 2 minutes for profiling
        for k in 1..(s.len() + 1) {
            println!("{} {} {}", s.len(), k, count_fixed_distance(&s, k as u32));
        }
        s.push(['1', '1', '0', '0'][v % 4]);
        v += 1;
        println!("{}", s)
    }
    println!("{} seconds", start.elapsed().as_secs_f32());
}

Python, (11, 8); with Python PyPy (12, 10)

New version of Value Ink's answer using bit twiddling in place of string manipulation.

from collections import deque

def countFixedDistance(s: str, k: int) -> int:
    n = int('1' + s, 2)
    distances: dict[int, int] = {n: 0}
    queue = deque(((n, len(s)),))
    valid: set[int] = set()
    while len(queue) > 0:
        current, bit_count = queue.popleft()
        next_dist = distances[current] + 1
        next_bit_count = bit_count - 1
        for next_gen in (
            ((current >> e + 1 << e) + current % (1 << e) for e in range(bit_count)),
            (current ^ (1 << e) for e in range(bit_count)),
            (((current >> e << 1 | b) << e) + current % (1 << e) for b in (0, 1) for e in range(bit_count + 1)),
        ):
          for next in next_gen:
              if next in distances: continue
              distances[next] = next_dist
              if next_dist < k:
                  queue.append((next, next_bit_count))
              else:
                  valid.add(next)
          next_bit_count += 1
    return len(valid)

Attempt This Online!


With a lot of tweaks to do fewer operations, it does get further but still not to completion of (11, 9) on ATO, so it may also be worth benchmarking too:

from collections import deque

def countFixedDistance(s: str, k: int) -> int:
    n = int('1' + s, 2)
    distances: dict[int, int] = {n: 0}
    queue = deque(((n, len(s)),))
    valid: set[int] = set()
    while len(queue) > 0:
        current, bit_count = queue.popleft()
        next_dist = distances[current] + 1
        for e in range(bit_count):
            left = current >> e
            left_1 = left << 1
            pow = 1 << e
            right = current % pow
            bp1 = bit_count + 1
            for next, next_bit_count in (
                ((left >> 1 << e) | right, bit_count - 1),
                (current ^ pow, bit_count),
                ((left_1 << e) | right, bp1),
                (((left_1 | 1) << e) | right, bp1),
            ):
                if next in distances: continue
                distances[next] = next_dist
                if next_dist < k:
                    queue.append((next, next_bit_count))
                else:
                    valid.add(next)
        for next in (
            pow << 2 | current,
            3 << bit_count ^ current,
        ):
            if next in distances: continue
            distances[next] = next_dist
            if next_dist < k:
                queue.append((next, bit_count + 1))
            else:
                valid.add(next)
    return len(valid)

Attempt This Online!

Charcoal, 81 bytes

⊞υη≔⦃η¹⦄ζFN«≔υε≔⟦⟧υFεFΣ⁺Eκ⟦⭆κ¬⁼Iν⁼μξ⭆κ⎇⁼μξων⟧E²E⊕Lκ⁺⁺✂κ⁰ν¹λ✂κν¿¬§ζλ«⊞υλ§≔ζλ¹»»ILυ

Attempt This Online! Link is to verbose version of code. Can handle results of up to about 20000 on ATO, so 5 1011001101 or 4 101001100101. Explanation:

⊞υη

Start with one string of distance 0.

≔⦃η¹⦄ζ

Start with one string found so far.

FN«

Loop k times.

≔υε≔⟦⟧υFε

Start collecting strings of distance i+1.

FΣ⁺Eκ⟦⭆κ¬⁼Iν⁼μξ⭆κ⎇⁼μξων⟧E²E⊕Lκ⁺⁺✂κ⁰ν¹λ✂κν

Loop over all strings of edit distance 1 from the current string of distance i.

¿¬§ζλ«⊞υλ§≔ζλ¹»

If this string is new then mark it as seen and add it to the list of strings of edit distance i+1.

»ILυ

Output the count of strings of edit distance k.