Created
January 15, 2024 19:46
-
-
Save ssvb/38a14d3d7323ecfaf580e5482f657068 to your computer and use it in GitHub Desktop.
Even more optimized Dlang solution for https://flownet.com/ron/papers/lisp-java/instructions.html and https://github.com/renatoathaydes/prechelt-phone-number-encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/+dub.sdl:+/ | |
// Siarhei Siamashka - Programming Challange from Erann Gat: | |
// http://www.flownet.com/ron/papers/lisp-java/ | |
// This work is marked with CC0 1.0. To view a copy of this license, visit | |
// http://creativecommons.org/publicdomain/zero/1.0 | |
@safe: | |
import std.stdio, std.file, std.conv, std.ascii, std.algorithm, std.string; | |
import core.bitop, std.bigint; | |
// Can be changed to 'long' for more speed, but will overflow in extreme cases | |
alias CounterType = BigInt; | |
// Can be increased to 1 or 2 for more speed at the expense of higher memory usage | |
const load_factor_boost = 0; | |
// An artificial fancy type for use in the "print" mode, where the counter is | |
// actually not needed and only [-1, 0, 1] values have to be supported. | |
struct PrintCounterType | |
{ | |
byte v; | |
this(int vv) { v = vv.to!byte; } | |
void opOpAssign(string op: "+", T)(T rhs) { v |= (rhs != 0); } | |
void opOpAssign(string op: "=", T)(T rhs) { v = rhs.to!byte; } | |
bool opEquals(int vv) { return v == vv; } | |
} | |
immutable letter2digit = (){ | |
char[256] tmp; | |
tmp[] = 0; | |
auto letter2digit_txt = | |
"E | J N Q | R W X | D S Y | F T | A M | C I V | B K U | L O P | G H Z | |
e | j n q | r w x | d s y | f t | a m | c i v | b k u | l o p | g h z | |
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9"; | |
char curdigit = '0'; | |
foreach (ch; letter2digit_txt) { | |
if (ch == '|') | |
curdigit++; | |
else if (ch == '\n') | |
curdigit = '0'; | |
else if (std.ascii.isAlpha(ch)) | |
tmp[ch] = curdigit; | |
} | |
return tmp; | |
}(); | |
uint simplehash(uint seed, uint val) { | |
return (seed ^ val) * 16777619; // 32-bit FNV-1a hash | |
} | |
uint simplehash(string str) { | |
return str.fold!((val, ch) => simplehash(val, ch))(0U); | |
} | |
// A primitive https://en.wikipedia.org/wiki/Open_addressing | |
// hash table with https://en.wikipedia.org/wiki/Linear_probing | |
// for resolving collisions | |
struct HashTbl { | |
struct HashTblEntry { string num, word; } | |
HashTblEntry[] tblv; | |
uint[] tblh; | |
size_t mask; | |
this(size_t max_capacity) { | |
// use load factor between 0.25 and 0.5 by default (without boost) | |
tblv.length = 1.to!size_t << (bsr(max_capacity | 1) + 2 + load_factor_boost); | |
tblh.length = tblv.length; | |
mask = tblv.length - 1; | |
} | |
void insert(string num, string word) { | |
uint hashval = simplehash(num); | |
size_t idx = hashval & mask; | |
while (tblh[idx]) | |
idx = (idx + 1) & mask; | |
tblv[idx] = HashTblEntry(num, word); | |
tblh[idx] = hashval | 1; | |
} | |
} | |
void main(string[] argv) { | |
if (argv.length < 4 || (argv[1] != "print" && argv[1] != "count")) { | |
writefln!"Usage: %s [print|count] [dictionary.txt] [input.txt]"(argv[0]); | |
return; | |
} | |
bool print_mode = (argv[1] == "print"); | |
bool count_mode = (argv[1] == "count"); | |
string dictionary_txt = readText(argv[2]).strip; | |
auto h = HashTbl(dictionary_txt.count('\n')); | |
foreach (line; dictionary_txt.splitter('\n')) { | |
auto word = line.strip; | |
auto num = word.map!(ch => letter2digit[ch]).filter!"a != 0".to!string; | |
h.insert(num, word); | |
} | |
string[] result; | |
char[] phonenum; | |
T solve(T)(size_t depth, bool can_use_digit, string phonenum, T[] dp) { | |
T *dp_val = &dp[phonenum.length * 2 + can_use_digit]; | |
if (*dp_val != -1 && (*dp_val == 0 || !print_mode)) | |
return *dp_val; | |
if (phonenum.empty) { | |
if (print_mode) | |
writeln(result[0], ": ", result[1 .. depth].joiner(" ")); | |
return 1.to!T; | |
} | |
T counter = 0; | |
uint hashval = 0; | |
bool found_match = false; | |
foreach (i, ch; phonenum) { | |
// incrementally update hash for each newly appended digit instead | |
// of repeatedly recalculating it for the whole string again and again | |
hashval = simplehash(hashval, ch); | |
size_t idx = hashval & h.mask; | |
while (h.tblh[idx]) { | |
if ((h.tblh[idx] == (hashval | 1)) && | |
(h.tblv[idx].num == phonenum[0 .. i + 1])) { | |
found_match = true; | |
result[depth] = h.tblv[idx].word; | |
counter += solve(depth + 1, true, phonenum[i + 1 .. $], dp); | |
} | |
idx = (idx + 1) & h.mask; | |
} | |
} | |
if (!found_match && can_use_digit) { | |
result[depth] = phonenum[0 .. 1]; | |
counter += solve(depth + 1, false, phonenum[1 .. $], dp); | |
} | |
return (*dp_val = counter); | |
} | |
T process_line(T)(char[] line, ref T[] dp) { | |
line = line.strip; | |
if (phonenum.length < line.length) | |
phonenum.length = line.length; | |
size_t phonenum_length = 0; | |
foreach (ch; line) | |
if (ch >= '0' && ch <= '9') | |
phonenum[phonenum_length++] = ch; | |
if (phonenum_length + 1 > result.length) | |
result.length = phonenum_length + 1; | |
if ((phonenum_length + 1) * 2 > dp.length) | |
dp.length = (phonenum_length + 1) * 2; | |
dp[0 .. (phonenum_length + 1) * 2] = T(-1); | |
result[0] = cast(string)line; | |
return solve(1, true, cast(string)phonenum[0 .. phonenum_length], dp); | |
} | |
() @trusted { // workaround https://issues.dlang.org/show_bug.cgi?id=18110 | |
if (print_mode) { | |
PrintCounterType[] dp_print; | |
foreach (line; File(argv[3]).byLine) | |
process_line(line, dp_print); | |
} else { | |
CounterType[] dp_count; | |
CounterType total_counter = 0; | |
foreach (line; File(argv[3]).byLine) | |
total_counter += process_line(line, dp_count); | |
total_counter.to!string.writeln; | |
} | |
}(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment