JarrettBillingsley · November 18, 2012 10:47
diff --git a/gistfile1.d b/gistfile1.d
 module anagramgen;

 import tango.core.Array;
 import Path = tango.io.Path;
 import tango.io.Stdout;
 import tango.io.stream.DataFile;
 import tango.io.stream.TextFile;

 const ulong[] primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101];
 const Min = 12;
 const Max = 30;

 ulong toRepr(char[] w)
 {
    ulong ret = 1;

    foreach(c; w)
        ret *= primes[c - 'a'];

    return ret;
 }

 struct Word
 {
    char[6] buf;
    size_t len;
    ulong repr;

    static Word opCall(char[] w)
    {
        Word ret = void;
        ret.buf[0 .. w.length] = w;
        ret.len = w.length;
        ret.repr = toRepr(w);
        return ret;
    }

    char[] w()
    {
        return buf[0 .. len];
    }
 }

 Word[] readWords(char[] file)
 {
    Word[] ret;
    scope f = new TextFileInput(file);

    foreach(line; f)
        ret ~= Word(line);

    return ret;
 }

 size_t[][ulong] findAnagrams(Word[] words)
 {
    size_t[Max] anagrams;
    size_t anidx = 0;

    bool addAn(size_t a)
    {
        if(anidx >= anagrams.length)
            return false;

        anagrams[anidx++] = a;
        return true;
    }

    size_t[][ulong] table;

    mainLoop: foreach(ref w; words)
    {
        if(w.len < 6 || w.repr in table)
            continue;

        anidx = 0;

        foreach(i, ref other; words)
            if(w.repr % other.repr == 0)
                if(!addAn(i))
                    continue mainLoop;

        if(anidx > Min)
            table[w.repr] = anagrams[0 .. anidx].dup;
    }

    return table;
 }

 bool[] haveGarbage(Word[] words, size_t[][ulong] table)
 {
    auto ret = new bool[](words.length);

    foreach(anagram; table)
        foreach(w; anagram)
            ret[w] = true;

    if(ret.find(false) == ret.length)
        return null;
    else
        return ret;
 }

 void writeOptimizedWordlist(Word[] words, bool[] used, char[] filename)
 {
    scope outFile = new TextFileOutput(filename);

    bool first = true;

    foreach(i, flag; used)
    {
        if(flag)
        {
            if(first)
                first = false;
            else
                outFile.newline;

            outFile(words[i].w);
        }
    }

    outFile.flush().close();
 }

 void writeDataFile(Word[] words, size_t[][ulong] table, char[] filename)
 {
    scope outFile = new DataFileOutput(filename);

    // char[] table
    outFile.int16(cast(short)words.length);

    foreach(ref w; words)
    {
        outFile.int8(cast(byte)w.w.length);
        outFile.write(w.w);
    }

    // Anagram table
    outFile.int16(cast(short)table.length);

    foreach(anagram; table)
    {
        outFile.int8(cast(byte)anagram.length);

        foreach(idx; anagram)
            outFile.int16(cast(short)idx);
    }

    outFile.flush().close();
 }

 void main()
 {
    char[] inFile = "anagrams.txt";
    Path.PathParser pp;
    pp.parse(inFile);

    auto words = readWords(inFile);
    auto table = findAnagrams(words);

    if(auto used = haveGarbage(words, table))
    {
        Stdout.formatln("File {} has unused words.  Optimizing...", inFile).flush();
        auto outFileName = pp.path ~ pp.name ~ "_opt" ~ pp.suffix;
        writeOptimizedWordlist(words, used, outFileName);
        words = readWords(outFileName);
        table = findAnagrams(words);
    }
    else
        Stdout.formatln("File {} is already optimal.", inFile).flush();

    Stdout.formatln("Writing data file...").flush();
    writeDataFile(words, table, pp.path ~ pp.name ~ ".dat");
 }
	module anagramgen;

	import tango.core.Array;
	import Path = tango.io.Path;
	import tango.io.Stdout;
	import tango.io.stream.DataFile;
	import tango.io.stream.TextFile;

	const ulong[] primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101];
	const Min = 12;
	const Max = 30;

	ulong toRepr(char[] w)
	{
	ulong ret = 1;

	foreach(c; w)
	ret *= primes[c - 'a'];

	return ret;
	}

	struct Word
	{
	char[6] buf;
	size_t len;
	ulong repr;

	static Word opCall(char[] w)
	{
	Word ret = void;
	ret.buf[0 .. w.length] = w;
	ret.len = w.length;
	ret.repr = toRepr(w);
	return ret;
	}

	char[] w()
	{
	return buf[0 .. len];
	}
	}

	Word[] readWords(char[] file)
	{
	Word[] ret;
	scope f = new TextFileInput(file);

	foreach(line; f)
	ret ~= Word(line);

	return ret;
	}

	size_t[][ulong] findAnagrams(Word[] words)
	{
	size_t[Max] anagrams;
	size_t anidx = 0;

	bool addAn(size_t a)
	{
	if(anidx >= anagrams.length)
	return false;

	anagrams[anidx++] = a;
	return true;
	}

	size_t[][ulong] table;

	mainLoop: foreach(ref w; words)
	{
	if(w.len < 6 \|\| w.repr in table)
	continue;

	anidx = 0;

	foreach(i, ref other; words)
	if(w.repr % other.repr == 0)
	if(!addAn(i))
	continue mainLoop;

	if(anidx > Min)
	table[w.repr] = anagrams[0 .. anidx].dup;
	}

	return table;
	}

	bool[] haveGarbage(Word[] words, size_t[][ulong] table)
	{
	auto ret = new bool[](words.length);

	foreach(anagram; table)
	foreach(w; anagram)
	ret[w] = true;

	if(ret.find(false) == ret.length)
	return null;
	else
	return ret;
	}

	void writeOptimizedWordlist(Word[] words, bool[] used, char[] filename)
	{
	scope outFile = new TextFileOutput(filename);

	bool first = true;

	foreach(i, flag; used)
	{
	if(flag)
	{
	if(first)
	first = false;
	else
	outFile.newline;

	outFile(words[i].w);
	}
	}

	outFile.flush().close();
	}

	void writeDataFile(Word[] words, size_t[][ulong] table, char[] filename)
	{
	scope outFile = new DataFileOutput(filename);

	// char[] table
	outFile.int16(cast(short)words.length);

	foreach(ref w; words)
	{
	outFile.int8(cast(byte)w.w.length);
	outFile.write(w.w);
	}

	// Anagram table
	outFile.int16(cast(short)table.length);

	foreach(anagram; table)
	{
	outFile.int8(cast(byte)anagram.length);

	foreach(idx; anagram)
	outFile.int16(cast(short)idx);
	}

	outFile.flush().close();
	}

	void main()
	{
	char[] inFile = "anagrams.txt";
	Path.PathParser pp;
	pp.parse(inFile);

	auto words = readWords(inFile);
	auto table = findAnagrams(words);

	if(auto used = haveGarbage(words, table))
	{
	Stdout.formatln("File {} has unused words. Optimizing...", inFile).flush();
	auto outFileName = pp.path ~ pp.name ~ "_opt" ~ pp.suffix;
	writeOptimizedWordlist(words, used, outFileName);
	words = readWords(outFileName);
	table = findAnagrams(words);
	}
	else
	Stdout.formatln("File {} is already optimal.", inFile).flush();

	Stdout.formatln("Writing data file...").flush();
	writeDataFile(words, table, pp.path ~ pp.name ~ ".dat");
	}