Last active
September 19, 2024 19:20
-
-
Save HeroicKatora/0f67915cebaddf5a6d960027b922b60b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
symbolic_table = ['b', 'ra', 'al', 'ar'] | |
def parse_linear_but_naive(instr, options): | |
possible = [ | |
tuple() for _ in range(max(len(o) for o in options) + 1) | |
] | |
instr = instr[::-1] | |
options = [ch[::-1] for ch in options] | |
for i in range(len(instr)): | |
with_possible = possible[:-1] | |
with_possible[0:0] = [None] | |
for opt in options: | |
if not instr[:i+1].endswith(opt): | |
continue | |
if (postfix := with_possible[len(opt)]) is None: | |
continue | |
with_possible[0] = (opt, postfix) | |
break | |
possible = with_possible | |
el = possible[0] | |
if el is None: | |
raise RuntimeError('Not spellable') | |
while el: | |
pre, tail = el | |
yield pre[::-1] | |
el = tail | |
string = 'ba' + 10**6*'ra' + 'l' | |
spelling = list(parse_linear_but_naive(string, symbolic_table)) | |
assert ''.join(spelling) == string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[derive(Clone, Copy)] | |
struct StringIndex(usize); | |
fn parse_linear_but_naive(instr: &[u8], options: &[&[u8]]) -> Option<Vec<StringIndex>> { | |
let max_len = options | |
.iter() | |
.map(|x| x.len()) | |
.max() | |
.expect("empty dictionary") | |
+ 1; | |
let instr = { | |
let mut v = instr.to_vec(); | |
v.reverse(); | |
v | |
}; | |
// Precalculate this! | |
let mut buf = vec![]; | |
let options: Vec<_> = options | |
.iter() | |
.map(|&st| { | |
let start = buf.len(); | |
buf.extend_from_slice(&st); | |
buf[start..].reverse(); | |
start..buf.len() | |
}) | |
.collect(); | |
// We don't gargabe collect failed suffixes, that's just fine. | |
let mut link_el: Vec<(StringIndex, usize)> = vec![]; | |
let mut possible_top: Vec<Option<usize>> = vec![Some(usize::MAX); max_len]; | |
let suffixes = (0..instr.len()).map(|idx| &instr[..idx+1]); | |
for rev_suffix in suffixes { | |
possible_top.insert(0, None); | |
let _ = possible_top.pop(); | |
for (idx, opt) in options.iter().enumerate() { | |
let Some(top) = possible_top[opt.len()] else { | |
continue; | |
}; | |
if !rev_suffix.ends_with(&buf[opt.start..opt.end]) { | |
continue; | |
} | |
possible_top[0] = Some(link_el.len()); | |
link_el.push((StringIndex(idx), top)); | |
break; | |
} | |
} | |
let Some(idx) = possible_top[0] else { | |
return None; | |
}; | |
let mut result = vec![]; | |
let mut el = idx; | |
while let Some((word, next)) = link_el.get(el) { | |
result.push(*word); | |
el = *next; | |
} | |
Some(result) | |
} | |
fn main() { | |
let mut st = vec![]; | |
st.extend_from_slice(b"ba"); | |
for _ in 0..1_000_000 { | |
st.extend_from_slice(b"ra"); | |
} | |
st.extend_from_slice(b"l"); | |
let symbolic_table = vec![b"b" as &[u8], b"ra", b"al", b"ar"]; | |
let okay = parse_linear_but_naive(&st, &symbolic_table); | |
let wordlist = okay.expect("no word found"); | |
let mut reconstructed = vec![]; | |
for &StringIndex(idx) in wordlist.iter() { | |
reconstructed.extend_from_slice(symbolic_table[idx]); | |
} | |
assert_eq!( | |
reconstructed, | |
st, | |
"{}", | |
String::from_utf8_lossy(&reconstructed).as_ref() | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment