| Bytes | Lang | Time | Link |
|---|---|---|---|
| 005 | C++ | 250122T073349Z | 138 Aspe |
| 023 | Rust | 230918T174934Z | Anders K |
| 006 | Python | 230916T225842Z | Value In |
| 011 | Rust | 230918T162624Z | corvus_1 |
| 010 | Python | 230917T081332Z | Jonathan |
| 081 | Charcoal | 230916T163835Z | Neil |
C++, ~(12,5)
C++ port of @Anders Kaseorg's Rust answer.
#include <iostream>
#include <vector>
#include <unordered_map>
#include <string>
#include <algorithm>
#include <cstdint>
#include <functional>
// A helper struct to hash a vector of uint8_t.
struct VectorHash {
std::size_t operator()(const std::vector<uint8_t>& v) const noexcept {
// A simple FNV-like hash for demonstration.
std::size_t hash = 2166136261u;
for (auto &byte : v) {
hash ^= static_cast<std::size_t>(byte);
hash *= 16777619u;
}
return hash;
}
};
// A helper struct to compare two vectors of uint8_t for equality keys in the map.
struct VectorEqual {
bool operator()(const std::vector<uint8_t>& a, const std::vector<uint8_t>& b) const noexcept {
if (a.size() != b.size()) return false;
for (size_t i = 0; i < a.size(); i++){
if (a[i] != b[i]) return false;
}
return true;
}
};
using Count = uint64_t;
using Cache = std::unordered_map<std::vector<uint8_t>, Count, VectorHash, VectorEqual>;
/*
* countState:
* - target: the array of bools (pattern) we compare against
* - distance: the maximum distance allowed (k in the Rust code)
* - state: a modifiable "state" vector representing the current Levenshtein state
* - cache: memoization cache mapping from state -> count
*
* This function checks if 'state' is in cache. If yes, returns the cached value.
* Otherwise, it calculates how many valid states exist by trying symbol=false/true,
* updates 'state' accordingly, and accumulates the number of valid states.
*/
Count countState(const std::vector<bool>& target,
uint8_t distance,
std::vector<uint8_t>& state,
Cache& cache)
{
auto it = cache.find(state);
if (it != cache.end()) {
return it->second;
}
// 'count' starts at 1 if state[target.size()] == distance, else 0
// Because the Rust code checks: state[target.len()] == distance
// This is effectively a boolean -> Count
Count count = (state[target.size()] == distance) ? 1 : 0;
// We try symbol in [false, true]
for (bool symbol : {false, true}) {
// Save oldState so we can revert changes if needed
std::vector<uint8_t> oldState = state;
// Initialize z = min(state[0], distance) + 1
uint8_t z = (std::min(state[0], distance)) + 1;
state[0] = z;
// This loop replicates the Rust nested iteration:
// for (((o, &c), &x), &y) in new_state[1..].iter_mut().zip(target).zip(&state[..])
// ...
// The idea: we move through the arrays, update each new_state[i] using
// comparisons with target[i-1], symbol, and previous states.
for (size_t i = 1; i < state.size(); i++) {
bool c = (i - 1 < target.size()) ? target[i - 1] : false;
uint8_t x = (i - 1 < oldState.size()) ? oldState[i - 1] : 0;
uint8_t y = (i < oldState.size()) ? oldState[i] : 0;
uint8_t cost = static_cast<uint8_t>(symbol != c);
// z = (x + cost).min( y.min(z).min(distance) + 1 )
z = std::min({
static_cast<uint8_t>(x + cost),
static_cast<uint8_t>(y + 1),
static_cast<uint8_t>(z + 1),
static_cast<uint8_t>(distance + 1)
});
state[i] = z;
}
count += countState(target, distance, state, cache);
// Revert the state to oldState for the next loop iteration
state = oldState;
}
// Cache the result
cache[state] = count;
return count;
}
/*
* countFunction:
* - target: array of bools (pattern)
* - distance: maximum allowed distance
*
* Builds the initial state, prepares the cache, then calls countState.
*/
Count countFunction(const std::vector<bool>& target, uint8_t distance) {
// The initial 'state' is (0..=target.size()) mapped to i.min(distance + 1)
// We add 1 because Rust used (0..=target.len()), so we have target.size() + 1 elements.
std::vector<uint8_t> state;
state.reserve(target.size() + 1);
for (size_t i = 0; i <= target.size(); i++) {
uint8_t val = static_cast<uint8_t>(std::min<uint8_t>(i, distance + 1));
state.push_back(val);
}
// Prepare a cache. Insert one special entry as in the Rust code:
Cache cache;
{
// In Rust, we had &arena.alloc_extend((0..=target.len()).map(|_| distance + 1))[..]
// as a key with value 0. We'll produce the same key here:
std::vector<uint8_t> initKey(target.size() + 1, distance + 1);
cache[initKey] = 0;
}
// Recursively compute
return countState(target, distance, state, cache);
}
int main(int argc, char* argv[]) {
if (argc != 3) {
std::cerr << "Usage: levenshtein-neighborhood <s> <k>\n";
return 1;
}
// <s> is a string of '0' or '1' used to fill a bool array
// <k> is the allowed distance
std::string targetStr = argv[1];
uint8_t distance = static_cast<uint8_t>(std::stoi(argv[2]));
std::vector<bool> targetVec;
targetVec.reserve(targetStr.size());
for (char c : targetStr) {
// true if '1', false otherwise
targetVec.push_back(c == '1');
}
Count result = countFunction(targetVec, distance);
std::cout << result << std::endl;
return 0;
}
Rust, score ≈ (28, 23)
The number of strings at distance \$k\$ from \$s\$ with some given prefix \$p\$ is a function of \$s\$, \$k\$, and the last column of the Wagner–Fischer table of \$s\$ and \$p\$. We use a recursive search on prefixes \$p\$, caching results based on this last column.
Build with cargo build --release, run with target/release/levenshtein-neighborhood <s> <k>.
Cargo.toml
[package]
name = "levenshtein-neighborhood"
version = "0.1.0"
edition = "2021"
[dependencies]
hashbrown = "0.14.0"
rustc-hash = "1.1.0"
typed-arena = "2.0.2"
src/main.rs
use hashbrown::hash_map::HashMap;
use rustc_hash::FxHasher;
use std::env;
use std::hash::{BuildHasher, BuildHasherDefault};
use typed_arena::Arena;
type Count = u64; // For n + k > 64, change this to u128
type Cache<'a> = HashMap<&'a [u8], Count, BuildHasherDefault<FxHasher>>;
fn count_state<'a>(
target: &[bool],
distance: u8,
arena: &'a Arena<u8>,
cache: &mut Cache<'a>,
state: &mut [u8],
) -> Count {
let hash = cache.hasher().hash_one(&state);
if let Some((_, &count)) = cache.raw_entry().from_key_hashed_nocheck(hash, state) {
count
} else {
let new_state = state;
let state = arena.alloc_extend(new_state.iter().copied());
let mut count = Count::from(state[target.len()] == distance);
for symbol in [false, true] {
let mut z = state[0].min(distance) + 1;
new_state[0] = z;
for (((o, &c), &x), &y) in new_state[1..]
.iter_mut()
.zip(target)
.zip(&state[..state.len() - 1])
.zip(&state[1..])
{
z = (x + u8::from(symbol != c)).min(y.min(z).min(distance) + 1);
*o = z;
}
count += count_state(target, distance, arena, cache, new_state);
}
cache
.raw_entry_mut()
.from_hash(hash, |_| false)
.insert(state, count);
count
}
}
fn count(target: &[bool], distance: u8) -> Count {
let arena = Arena::new();
let state = arena.alloc_extend((0..=target.len()).map(|i| (i as u8).min(distance + 1)));
let mut cache = Cache::default();
cache.insert(
&arena.alloc_extend((0..=target.len()).map(|_| distance + 1))[..],
0,
);
count_state(target, distance, &arena, &mut cache, state)
}
fn main() {
if let [_program, target, distance] = &Vec::from_iter(env::args())[..] {
let target = Vec::from_iter(target.chars().map(|c| c == '1'));
let distance = distance.parse().unwrap();
println!("{}", count(&target, distance));
} else {
panic!("usage: levenshtein-neighborhood <s> <k>");
}
}
Python, ~(11,6)
Direct adaptation of my answer on Pick a random string at a fixed edit distance with a minor optimization of generating the sets inline instead of each one being a function call. ATO seems to flip-flop between (11,6) and (11,7) for me right now.
from collections import deque
def countFixedDistance(s: str, k: int) -> int:
if k <= 0: return s
chars: set[str] = set('01')
distances: dict[str,int] = {s: 0}
queue = deque((s,))
valid: set[str] = set()
while len(queue) > 0:
current = queue.popleft()
nextdist = distances[current] + 1
for nxt in {current[:i] + c + current[i:] for c in chars for i in range(len(s)+1)} \
| {current[:i] + c + current[i+1:] for c in chars for i in range(len(s)) if c != s[i]} \
| {current[:i] + current[i+1:] for i in range(len(s))}:
if nxt in distances: continue
distances[nxt] = nextdist
if nextdist < k:
queue.append(nxt)
else:
valid.add(nxt)
return len(valid)
Rust, (12, 11)
This is basically a line-by-line port of Jonathan Allan's answer, with is already very good. Thanks! I wrote this to practice some Rust.
It uses the nohash-hasher crate for extremely fast HashMap lookups, with take up the vast majority of the time.
Try it online on rustexplorer.com. Runs an unoptimized debug build, so don't rely on it for measurements.
I have a github repo with build instructions.
#![warn(clippy::all)]
use std::collections::VecDeque;
use std::collections::hash_map::Entry;
use std::time::Instant;
use nohash_hasher::{IntMap, IntSet};
fn count_fixed_distance(s: &str, k: u32) -> usize {
let n = i32::from_str_radix(&format!("1{s}"), 2).unwrap();
let mut distances = IntMap::with_capacity_and_hasher(1 << (s.len() + k as usize), Default::default());
distances.insert(n, 0);
let mut queue = VecDeque::from([(n, s.len())]);
// capacity is just experimentation
let mut valid = IntSet::with_capacity_and_hasher(1 << (k + 1), Default::default());
while let Some((current, bit_count)) = queue.pop_front() {
let next_dist = distances.get(¤t).unwrap() + 1;
let mut pow = 0;
for e in 0..bit_count {
let left = current >> e;
let left_1 = left << 1;
pow = 1 << e;
let right = current % pow;
let bp1 = bit_count + 1;
for (next, next_bit_count) in [
((left >> 1 << e) | right, bit_count - 1),
(current ^ pow, bit_count),
((left_1 << e) | right, bp1),
(((left_1 | 1) << e) | right, bp1)
] {
match distances.entry(next) {
Entry::Vacant(ve) => {
ve.insert(next_dist);
}
Entry::Occupied(_) => continue,
}
if next_dist < k {
queue.push_back((next, next_bit_count));
} else {
valid.insert(next);
}
}
}
for next in [pow << 2 | current, 3 << bit_count ^ current] {
match distances.entry(next) {
Entry::Vacant(ve) => {
ve.insert(next_dist);
}
Entry::Occupied(_) => continue,
}
if next_dist < k {
queue.push_back((next, bit_count + 1));
} else {
valid.insert(next);
}
}
}
valid.len()
}
fn main() {
let start = Instant::now();
let mut s = String::with_capacity(32);
s.push('0');
let mut v = 0;
while s.len() <= 12 { // approx 2 minutes for profiling
for k in 1..(s.len() + 1) {
println!("{} {} {}", s.len(), k, count_fixed_distance(&s, k as u32));
}
s.push(['1', '1', '0', '0'][v % 4]);
v += 1;
println!("{}", s)
}
println!("{} seconds", start.elapsed().as_secs_f32());
}
Python, (11, 8); with Python PyPy (12, 10)
New version of Value Ink's answer using bit twiddling in place of string manipulation.
from collections import deque
def countFixedDistance(s: str, k: int) -> int:
n = int('1' + s, 2)
distances: dict[int, int] = {n: 0}
queue = deque(((n, len(s)),))
valid: set[int] = set()
while len(queue) > 0:
current, bit_count = queue.popleft()
next_dist = distances[current] + 1
next_bit_count = bit_count - 1
for next_gen in (
((current >> e + 1 << e) + current % (1 << e) for e in range(bit_count)),
(current ^ (1 << e) for e in range(bit_count)),
(((current >> e << 1 | b) << e) + current % (1 << e) for b in (0, 1) for e in range(bit_count + 1)),
):
for next in next_gen:
if next in distances: continue
distances[next] = next_dist
if next_dist < k:
queue.append((next, next_bit_count))
else:
valid.add(next)
next_bit_count += 1
return len(valid)
With a lot of tweaks to do fewer operations, it does get further but still not to completion of (11, 9) on ATO, so it may also be worth benchmarking too:
from collections import deque
def countFixedDistance(s: str, k: int) -> int:
n = int('1' + s, 2)
distances: dict[int, int] = {n: 0}
queue = deque(((n, len(s)),))
valid: set[int] = set()
while len(queue) > 0:
current, bit_count = queue.popleft()
next_dist = distances[current] + 1
for e in range(bit_count):
left = current >> e
left_1 = left << 1
pow = 1 << e
right = current % pow
bp1 = bit_count + 1
for next, next_bit_count in (
((left >> 1 << e) | right, bit_count - 1),
(current ^ pow, bit_count),
((left_1 << e) | right, bp1),
(((left_1 | 1) << e) | right, bp1),
):
if next in distances: continue
distances[next] = next_dist
if next_dist < k:
queue.append((next, next_bit_count))
else:
valid.add(next)
for next in (
pow << 2 | current,
3 << bit_count ^ current,
):
if next in distances: continue
distances[next] = next_dist
if next_dist < k:
queue.append((next, bit_count + 1))
else:
valid.add(next)
return len(valid)
Charcoal, 81 bytes
⊞υη≔⦃η¹⦄ζFN«≔υε≔⟦⟧υFεFΣ⁺Eκ⟦⭆κ¬⁼Iν⁼μξ⭆κ⎇⁼μξων⟧E²E⊕Lκ⁺⁺✂κ⁰ν¹λ✂κν¿¬§ζλ«⊞υλ§≔ζλ¹»»ILυ
Attempt This Online! Link is to verbose version of code. Can handle results of up to about 20000 on ATO, so 5 1011001101 or 4 101001100101. Explanation:
⊞υη
Start with one string of distance 0.
≔⦃η¹⦄ζ
Start with one string found so far.
FN«
Loop k times.
≔υε≔⟦⟧υFε
Start collecting strings of distance i+1.
FΣ⁺Eκ⟦⭆κ¬⁼Iν⁼μξ⭆κ⎇⁼μξων⟧E²E⊕Lκ⁺⁺✂κ⁰ν¹λ✂κν
Loop over all strings of edit distance 1 from the current string of distance i.
¿¬§ζλ«⊞υλ§≔ζλ¹»
If this string is new then mark it as seen and add it to the list of strings of edit distance i+1.
»ILυ
Output the count of strings of edit distance k.