Created
January 15, 2024 12:39
-
-
Save ssvb/147e7ca4a4fde729fe99e5f1c487aa8d to your computer and use it in GitHub Desktop.
The optimized Dlang solution for https://flownet.com/ron/papers/lisp-java/instructions.html and https://github.com/renatoathaydes/prechelt-phone-number-encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/+dub.sdl:+/ | |
// Siarhei Siamashka - Programming Challange from Erann Gat: | |
// http://www.flownet.com/ron/papers/lisp-java/ | |
// This work is marked with CC0 1.0. To view a copy of this license, visit | |
// http://creativecommons.org/publicdomain/zero/1.0 | |
@safe: | |
import std.stdio, std.file, std.conv, std.ascii, std.algorithm, std.string; | |
import core.bitop; | |
immutable letter2digit = (){ | |
char[256] tmp; | |
tmp[] = 0; | |
auto letter2digit_txt = | |
"E | J N Q | R W X | D S Y | F T | A M | C I V | B K U | L O P | G H Z | |
e | j n q | r w x | d s y | f t | a m | c i v | b k u | l o p | g h z | |
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9"; | |
char curdigit = '0'; | |
foreach (ch; letter2digit_txt) { | |
if (ch == '|') | |
curdigit++; | |
else if (ch == '\n') | |
curdigit = '0'; | |
else if (std.ascii.isAlpha(ch)) | |
tmp[ch] = curdigit; | |
} | |
return tmp; | |
}(); | |
uint simplehash(uint seed, uint val) { | |
return (seed ^ val) * 16777619; // 32-bit FNV-1a hash | |
} | |
uint simplehash(string str) { | |
return str.fold!((val, ch) => simplehash(val, ch))(0U); | |
} | |
// A primitive https://en.wikipedia.org/wiki/Open_addressing | |
// hash table with https://en.wikipedia.org/wiki/Linear_probing | |
// for resolving collisions | |
struct HashTbl { | |
struct HashTblEntry { string num, word; } | |
HashTblEntry[] tblv; | |
uint[] tblh; | |
size_t mask; | |
this(size_t max_capacity) { | |
// use load factor between 0.25 and 0.5 | |
tblv.length = 1.to!size_t << (bsr(max_capacity | 1) + 2); | |
tblh.length = tblv.length; | |
mask = tblv.length - 1; | |
} | |
void insert(string num, string word) { | |
uint hashval = simplehash(num); | |
size_t idx = hashval & mask; | |
while (tblh[idx]) | |
idx = (idx + 1) & mask; | |
tblv[idx] = HashTblEntry(num, word); | |
tblh[idx] = hashval | 1; | |
} | |
} | |
void main(string[] argv) { | |
if (argv.length < 4 || (argv[1] != "print" && argv[1] != "count")) { | |
writefln!"Usage: %s [print|count] [dictionary.txt] [input.txt]"(argv[0]); | |
return; | |
} | |
bool print_mode = (argv[1] == "print"); | |
bool count_mode = (argv[1] == "count"); | |
string dictionary_txt = readText(argv[2]).strip; | |
auto h = HashTbl(dictionary_txt.count('\n')); | |
foreach (line; dictionary_txt.splitter('\n')) { | |
auto word = line.strip; | |
auto num = word.map!(ch => letter2digit[ch]).filter!"a != 0".to!string; | |
h.insert(num, word); | |
} | |
string[] result; | |
char[] phonenum; | |
size_t counter = 0; | |
void solve(size_t depth, bool can_use_digit, string phonenum) { | |
if (phonenum.empty) { | |
counter++; | |
if (print_mode) | |
writeln(result[0], ": ", result[1 .. depth].joiner(" ")); | |
return; | |
} | |
uint hashval = 0; | |
foreach (i, ch; phonenum) { | |
// incrementally update hash for each newly appended digit instead | |
// of repeatedly recalculating it for the whole string again and again | |
hashval = simplehash(hashval, ch); | |
size_t idx = hashval & h.mask; | |
while (h.tblh[idx]) { | |
if ((h.tblh[idx] == (hashval | 1)) && | |
(h.tblv[idx].num == phonenum[0 .. i + 1])) { | |
can_use_digit = false; | |
result[depth] = h.tblv[idx].word; | |
solve(depth + 1, true, phonenum[i + 1 .. $]); | |
} | |
idx = (idx + 1) & h.mask; | |
} | |
} | |
if (can_use_digit) { | |
result[depth] = phonenum[0 .. 1]; | |
solve(depth + 1, false, phonenum[1 .. $]); | |
} | |
} | |
void process_line(char[] line) { | |
line = line.strip; | |
if (phonenum.length < line.length) | |
phonenum.length = line.length; | |
size_t phonenum_length = 0; | |
foreach (ch; line) | |
if (ch >= '0' && ch <= '9') | |
phonenum[phonenum_length++] = ch; | |
if (phonenum_length + 1 > result.length) | |
result.length = phonenum_length + 1; | |
result[0] = cast(string)line; | |
solve(1, true, cast(string)phonenum[0 .. phonenum_length]); | |
} | |
() @trusted { // workaround https://issues.dlang.org/show_bug.cgi?id=18110 | |
foreach (line; File(argv[3]).byLine) | |
process_line(line); | |
}(); | |
if (count_mode) | |
writeln(counter); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment