Skip to content

Instantly share code, notes, and snippets.

@ssvb
Created January 15, 2024 19:46
Show Gist options
  • Save ssvb/38a14d3d7323ecfaf580e5482f657068 to your computer and use it in GitHub Desktop.
Save ssvb/38a14d3d7323ecfaf580e5482f657068 to your computer and use it in GitHub Desktop.
/+dub.sdl:+/
// Siarhei Siamashka - Programming Challange from Erann Gat:
// http://www.flownet.com/ron/papers/lisp-java/
// This work is marked with CC0 1.0. To view a copy of this license, visit
// http://creativecommons.org/publicdomain/zero/1.0
@safe:
import std.stdio, std.file, std.conv, std.ascii, std.algorithm, std.string;
import core.bitop, std.bigint;
// Can be changed to 'long' for more speed, but will overflow in extreme cases
alias CounterType = BigInt;
// Can be increased to 1 or 2 for more speed at the expense of higher memory usage
const load_factor_boost = 0;
// An artificial fancy type for use in the "print" mode, where the counter is
// actually not needed and only [-1, 0, 1] values have to be supported.
struct PrintCounterType
{
byte v;
this(int vv) { v = vv.to!byte; }
void opOpAssign(string op: "+", T)(T rhs) { v |= (rhs != 0); }
void opOpAssign(string op: "=", T)(T rhs) { v = rhs.to!byte; }
bool opEquals(int vv) { return v == vv; }
}
immutable letter2digit = (){
char[256] tmp;
tmp[] = 0;
auto letter2digit_txt =
"E | J N Q | R W X | D S Y | F T | A M | C I V | B K U | L O P | G H Z
e | j n q | r w x | d s y | f t | a m | c i v | b k u | l o p | g h z
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9";
char curdigit = '0';
foreach (ch; letter2digit_txt) {
if (ch == '|')
curdigit++;
else if (ch == '\n')
curdigit = '0';
else if (std.ascii.isAlpha(ch))
tmp[ch] = curdigit;
}
return tmp;
}();
uint simplehash(uint seed, uint val) {
return (seed ^ val) * 16777619; // 32-bit FNV-1a hash
}
uint simplehash(string str) {
return str.fold!((val, ch) => simplehash(val, ch))(0U);
}
// A primitive https://en.wikipedia.org/wiki/Open_addressing
// hash table with https://en.wikipedia.org/wiki/Linear_probing
// for resolving collisions
struct HashTbl {
struct HashTblEntry { string num, word; }
HashTblEntry[] tblv;
uint[] tblh;
size_t mask;
this(size_t max_capacity) {
// use load factor between 0.25 and 0.5 by default (without boost)
tblv.length = 1.to!size_t << (bsr(max_capacity | 1) + 2 + load_factor_boost);
tblh.length = tblv.length;
mask = tblv.length - 1;
}
void insert(string num, string word) {
uint hashval = simplehash(num);
size_t idx = hashval & mask;
while (tblh[idx])
idx = (idx + 1) & mask;
tblv[idx] = HashTblEntry(num, word);
tblh[idx] = hashval | 1;
}
}
void main(string[] argv) {
if (argv.length < 4 || (argv[1] != "print" && argv[1] != "count")) {
writefln!"Usage: %s [print|count] [dictionary.txt] [input.txt]"(argv[0]);
return;
}
bool print_mode = (argv[1] == "print");
bool count_mode = (argv[1] == "count");
string dictionary_txt = readText(argv[2]).strip;
auto h = HashTbl(dictionary_txt.count('\n'));
foreach (line; dictionary_txt.splitter('\n')) {
auto word = line.strip;
auto num = word.map!(ch => letter2digit[ch]).filter!"a != 0".to!string;
h.insert(num, word);
}
string[] result;
char[] phonenum;
T solve(T)(size_t depth, bool can_use_digit, string phonenum, T[] dp) {
T *dp_val = &dp[phonenum.length * 2 + can_use_digit];
if (*dp_val != -1 && (*dp_val == 0 || !print_mode))
return *dp_val;
if (phonenum.empty) {
if (print_mode)
writeln(result[0], ": ", result[1 .. depth].joiner(" "));
return 1.to!T;
}
T counter = 0;
uint hashval = 0;
bool found_match = false;
foreach (i, ch; phonenum) {
// incrementally update hash for each newly appended digit instead
// of repeatedly recalculating it for the whole string again and again
hashval = simplehash(hashval, ch);
size_t idx = hashval & h.mask;
while (h.tblh[idx]) {
if ((h.tblh[idx] == (hashval | 1)) &&
(h.tblv[idx].num == phonenum[0 .. i + 1])) {
found_match = true;
result[depth] = h.tblv[idx].word;
counter += solve(depth + 1, true, phonenum[i + 1 .. $], dp);
}
idx = (idx + 1) & h.mask;
}
}
if (!found_match && can_use_digit) {
result[depth] = phonenum[0 .. 1];
counter += solve(depth + 1, false, phonenum[1 .. $], dp);
}
return (*dp_val = counter);
}
T process_line(T)(char[] line, ref T[] dp) {
line = line.strip;
if (phonenum.length < line.length)
phonenum.length = line.length;
size_t phonenum_length = 0;
foreach (ch; line)
if (ch >= '0' && ch <= '9')
phonenum[phonenum_length++] = ch;
if (phonenum_length + 1 > result.length)
result.length = phonenum_length + 1;
if ((phonenum_length + 1) * 2 > dp.length)
dp.length = (phonenum_length + 1) * 2;
dp[0 .. (phonenum_length + 1) * 2] = T(-1);
result[0] = cast(string)line;
return solve(1, true, cast(string)phonenum[0 .. phonenum_length], dp);
}
() @trusted { // workaround https://issues.dlang.org/show_bug.cgi?id=18110
if (print_mode) {
PrintCounterType[] dp_print;
foreach (line; File(argv[3]).byLine)
process_line(line, dp_print);
} else {
CounterType[] dp_count;
CounterType total_counter = 0;
foreach (line; File(argv[3]).byLine)
total_counter += process_line(line, dp_count);
total_counter.to!string.writeln;
}
}();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment