Last active
May 4, 2017 18:44
-
-
Save aexaey/58aa86ac5c7f868554a2aeac7a33cedb to your computer and use it in GitHub Desktop.
Source-level binary tree search code generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# makesbtree.py: Source-level binary tree search code generator | |
# | |
# makesbtree.py is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; version 2 of the License only. | |
# | |
# This would generate a source code snippet that efficiently looks up a string | |
# against pre-defined list of matches. Complete list of matches is stored | |
# directly inside generated code. This is optimized for single-shot lookups, | |
# i.e. lookup in a (relatively) large list that happens only once in program's | |
# lifetime. There, traditional approach would spend most of its time loading | |
# and parsing data from e.g. external file into memory. | |
# | |
# Usage: | |
# echo -e "one\ntwo\nthree\nfour\nfive" | ./makesbtree.py c >lookup.c | |
# gcc -o lookup lookup.c | |
# ./lookup three; echo $? | |
# 1 | |
# ./lookup ten; echo $? | |
# 0 | |
# | |
# Benchmark (looking up in 500 000-entry list): generated C code is 3x faster | |
# than grep: | |
# | |
# for i in {1..500000}; do echo $i; done >data.txt | |
# cat data.txt | ./makesbtree.py c >lookup.c && gcc -o lookup lookup.c | |
# | |
# time (for i in {1..1000}; do ./lookup foo; done) | |
# real 0m0.834s | |
# user 0m0.124s | |
# sys 0m0.805s | |
# | |
# time (for i in {1..1000}; do grep -q foo data.txt; done) | |
# real 0m2.663s | |
# user 0m0.876s | |
# sys 0m1.893s | |
from __future__ import print_function | |
import fileinput | |
from sys import argv | |
from math import log, floor | |
lang = { | |
'c': { | |
'indent': lambda n: | |
" " * n, | |
'prologue': lambda n, depth: | |
("#include <string.h>\n" | |
"\n" | |
"// auto-generated binary lookup for %s elements (%s steps deep)\n" | |
"int lookup(char *x) {\n" | |
) % (n, depth), | |
'if-select': lambda indents, value: | |
"%sif (strcmp(x, \"%s\") <= 0) {\n" % (indents, value), | |
'if-match': lambda indents, value: | |
("%sif (strcmp(x, \"%s\") == 0) return 1;\n" | |
) % (indents, value), | |
'else': lambda idents: | |
"%s} else {\n" % idents, | |
'endif': lambda idents: | |
"%s}\n" % idents, | |
'epilogue': | |
(" return 0;\n" | |
"}\n" | |
"\n" | |
"int main(int argc, char *argv[]) {\n" | |
" return(lookup(argv[1]));\n" | |
"}\n" | |
) | |
}, | |
'python': { | |
'indent': lambda n: | |
" " * n, | |
'prologue': lambda n, depth: | |
("import sys\n" | |
"\n" | |
"# auto-generated binary lookup for %s elements (%s steps deep)\n" | |
"def lookup(x):\n" | |
) % (n, depth), | |
'if-select': lambda indents, value: | |
"%sif x <= \"%s\":\n" % (indents, value), | |
'if-match': lambda indents, value: | |
("%sif x == \"%s\":\n" | |
"%s return 1\n" | |
) % (indents, value, indents), | |
'else': lambda idents: | |
"%selse:\n" % idents, | |
'endif': lambda idents: | |
"", | |
'epilogue': | |
(" return 0\n" | |
"\n" | |
"print(lookup(sys.argv[1].strip()))\n" | |
) | |
}, | |
'java': { | |
'indent': lambda n: | |
" " * n, | |
'prologue': lambda n, depth: | |
("public class changeme {\n" | |
" // auto-generated binary lookup for %s elements (%s steps deep)\n" | |
" public static int lookup(String x) {\n" | |
) % (n, depth), | |
'if-select': lambda indents, value: | |
"%s if (x.compareTo(\"%s\") <= 0) {\n" % (indents, value), | |
'if-match': lambda indents, value: | |
("%s if (x.equals(\"%s\")) { return 1; }\n" | |
) % (indents, value), | |
'else': lambda idents: | |
"%s } else {\n" % idents, | |
'endif': lambda idents: | |
"%s }\n" % idents, | |
'epilogue': | |
(" return 0;\n" | |
" }\n" | |
" public static void main(String[] args) {\n" | |
" System.out.println(lookup(args[1]));\n" | |
" }\n" | |
"}\n" | |
) | |
}, | |
'ts': { | |
'indent': lambda n: | |
" " * n, | |
'prologue': lambda n, depth: | |
("# auto-generated binary lookup for %s elements (%s steps deep)\n" | |
"sub lookup($x){\n" | |
) % (n, depth), | |
'if-select': lambda indents, value: | |
"%sif ($x <= \"%s\") {\n" % (indents, value), | |
'if-match': lambda indents, value: | |
("%sif ($x == \"%s\") { return 1; }\n" | |
) % (indents, value), | |
'else': lambda idents: | |
"%s} else {\n" % idents, | |
'endif': lambda idents: | |
"%s}\n" % idents, | |
'epilogue': | |
(" return 0;\n" | |
"}\n" | |
"\n" | |
) | |
} | |
} | |
def makesbtree(lines, l): | |
ind = l['indent'] | |
indent = 1 | |
depth = int(floor(log(len(lines) - 1, 2) + 1)) | |
yield(l['prologue'](len(lines), depth)) | |
for i in range(len(lines)): | |
if (i % 2) == 0: | |
ii = i >> 1 | |
while ii != 0 and ii % 2 == 0: | |
indent -= 1 | |
yield(l['endif'](ind(indent))) | |
ii >>= 1 | |
if i > 0: | |
yield(l['else'](ind(indent - 1))) | |
while indent < depth: | |
ii = (i | 0xffffffff >> (32 - depth + indent)) | |
if ii >= len(lines): | |
ii = len(lines) - 1 | |
yield(l['if-select'](ind(indent), lines[ii])) | |
indent += 1 | |
yield(l['if-match'](ind(indent), lines[i])) | |
for i in range(indent - 1, 0, -1): | |
yield(l['endif'](ind(i))) | |
yield(l['epilogue']) | |
if len(argv) < 2 or argv[1] not in lang: | |
print("Usage: " + __file__ + | |
" {lang} {input-file} {input-file} ... >{output-file}") | |
print(" " + __file__ + | |
" {lang} <{input-file} >{output-file}") | |
print("Where: lang := 'c', 'python', 'java', 'ts'") | |
exit(1) | |
lines = sorted(set([x.strip().replace('"', '\\"') for x | |
in fileinput.input(argv[2:])])) | |
for x in makesbtree(lines, lang[argv[1]]): | |
print(x, end="") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment