Skip to content

Instantly share code, notes, and snippets.

@cowtowncoder
Created February 4, 2015 18:15
Show Gist options
  • Save cowtowncoder/4e0b2308b1f660b8b855 to your computer and use it in GitHub Desktop.
Save cowtowncoder/4e0b2308b1f660b8b855 to your computer and use it in GitHub Desktop.
Trie (tr13) builder that uses a LevelDB instance to create a tr13 file to use for reading (note: mem usage linear to result size)
import com.ning.tr13.KeyValueSource;
import com.ning.tr13.impl.bytes.BytesValueReader;
import com.ning.tr13.impl.bytes.SimpleBytesTrieBuilder;
import org.iq80.leveldb.CompressionType;
import org.iq80.leveldb.DB;
import org.iq80.leveldb.DBIterator;
import org.iq80.leveldb.Options;
import org.iq80.leveldb.impl.Iq80DBFactory;
import java.io.*;
import java.util.Map;
public class PrintNamesAsTrie
{
public static void main(String[] args) throws Exception
{
if (args.length != 2) {
System.err.println("Usage: java ... [leveldb-directory] [trie-output-file]");
System.exit(1);
}
File dbDir = new File(args[0]).getCanonicalFile();
File trieFile = new File(args[1]);
Options options = new Options();
options.createIfMissing(false);
DB db = new Iq80DBFactory().open(dbDir, options);
DBIterator iterator = db.iterator();
MyInputHandler src = new MyInputHandler(iterator);
SimpleBytesTrieBuilder b = new SimpleBytesTrieBuilder(src, true);
File outputFile = new File(args[1]);
OutputStream out = new FileOutputStream(outputFile);
b.buildAndWrite(out, true);
out.close();
System.out.println("Build complete: "+src.getLineNumber()+" lines read, result file length is "+(outputFile.length()>>10)+" kB");
db.close();
}
static class MyInputHandler extends KeyValueSource<byte[]>
{
protected final DBIterator iterator;
protected int entries;
public MyInputHandler(DBIterator it) {
iterator = it;
}
public void readAll(ValueCallback<byte[]> handler) throws IOException {
while (iterator.hasNext()) {
++entries;
Map.Entry<byte[], byte[]> entry = iterator.next();
// could strip out trailing zero, but leave in for now
handler.handleEntry(entry.getKey(), entry.getValue());
}
iterator.close();
}
public int getLineNumber() {
return entries;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment