Created
November 18, 2012 10:47
-
-
Save JarrettBillingsley/4104522 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module anagramgen; | |
import tango.core.Array; | |
import Path = tango.io.Path; | |
import tango.io.Stdout; | |
import tango.io.stream.DataFile; | |
import tango.io.stream.TextFile; | |
const ulong[] primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101]; | |
const Min = 12; | |
const Max = 30; | |
ulong toRepr(char[] w) | |
{ | |
ulong ret = 1; | |
foreach(c; w) | |
ret *= primes[c - 'a']; | |
return ret; | |
} | |
struct Word | |
{ | |
char[6] buf; | |
size_t len; | |
ulong repr; | |
static Word opCall(char[] w) | |
{ | |
Word ret = void; | |
ret.buf[0 .. w.length] = w; | |
ret.len = w.length; | |
ret.repr = toRepr(w); | |
return ret; | |
} | |
char[] w() | |
{ | |
return buf[0 .. len]; | |
} | |
} | |
Word[] readWords(char[] file) | |
{ | |
Word[] ret; | |
scope f = new TextFileInput(file); | |
foreach(line; f) | |
ret ~= Word(line); | |
return ret; | |
} | |
size_t[][ulong] findAnagrams(Word[] words) | |
{ | |
size_t[Max] anagrams; | |
size_t anidx = 0; | |
bool addAn(size_t a) | |
{ | |
if(anidx >= anagrams.length) | |
return false; | |
anagrams[anidx++] = a; | |
return true; | |
} | |
size_t[][ulong] table; | |
mainLoop: foreach(ref w; words) | |
{ | |
if(w.len < 6 || w.repr in table) | |
continue; | |
anidx = 0; | |
foreach(i, ref other; words) | |
if(w.repr % other.repr == 0) | |
if(!addAn(i)) | |
continue mainLoop; | |
if(anidx > Min) | |
table[w.repr] = anagrams[0 .. anidx].dup; | |
} | |
return table; | |
} | |
bool[] haveGarbage(Word[] words, size_t[][ulong] table) | |
{ | |
auto ret = new bool[](words.length); | |
foreach(anagram; table) | |
foreach(w; anagram) | |
ret[w] = true; | |
if(ret.find(false) == ret.length) | |
return null; | |
else | |
return ret; | |
} | |
void writeOptimizedWordlist(Word[] words, bool[] used, char[] filename) | |
{ | |
scope outFile = new TextFileOutput(filename); | |
bool first = true; | |
foreach(i, flag; used) | |
{ | |
if(flag) | |
{ | |
if(first) | |
first = false; | |
else | |
outFile.newline; | |
outFile(words[i].w); | |
} | |
} | |
outFile.flush().close(); | |
} | |
void writeDataFile(Word[] words, size_t[][ulong] table, char[] filename) | |
{ | |
scope outFile = new DataFileOutput(filename); | |
// char[] table | |
outFile.int16(cast(short)words.length); | |
foreach(ref w; words) | |
{ | |
outFile.int8(cast(byte)w.w.length); | |
outFile.write(w.w); | |
} | |
// Anagram table | |
outFile.int16(cast(short)table.length); | |
foreach(anagram; table) | |
{ | |
outFile.int8(cast(byte)anagram.length); | |
foreach(idx; anagram) | |
outFile.int16(cast(short)idx); | |
} | |
outFile.flush().close(); | |
} | |
void main() | |
{ | |
char[] inFile = "anagrams.txt"; | |
Path.PathParser pp; | |
pp.parse(inFile); | |
auto words = readWords(inFile); | |
auto table = findAnagrams(words); | |
if(auto used = haveGarbage(words, table)) | |
{ | |
Stdout.formatln("File {} has unused words. Optimizing...", inFile).flush(); | |
auto outFileName = pp.path ~ pp.name ~ "_opt" ~ pp.suffix; | |
writeOptimizedWordlist(words, used, outFileName); | |
words = readWords(outFileName); | |
table = findAnagrams(words); | |
} | |
else | |
Stdout.formatln("File {} is already optimal.", inFile).flush(); | |
Stdout.formatln("Writing data file...").flush(); | |
writeDataFile(words, table, pp.path ~ pp.name ~ ".dat"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment