Skip to content

Instantly share code, notes, and snippets.

@aexaey
Last active May 4, 2017 18:44
Show Gist options
  • Save aexaey/58aa86ac5c7f868554a2aeac7a33cedb to your computer and use it in GitHub Desktop.
Save aexaey/58aa86ac5c7f868554a2aeac7a33cedb to your computer and use it in GitHub Desktop.
Source-level binary tree search code generator
#!/usr/bin/python
# makesbtree.py: Source-level binary tree search code generator
#
# makesbtree.py is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License only.
#
# This would generate a source code snippet that efficiently looks up a string
# against pre-defined list of matches. Complete list of matches is stored
# directly inside generated code. This is optimized for single-shot lookups,
# i.e. lookup in a (relatively) large list that happens only once in program's
# lifetime. There, traditional approach would spend most of its time loading
# and parsing data from e.g. external file into memory.
#
# Usage:
# echo -e "one\ntwo\nthree\nfour\nfive" | ./makesbtree.py c >lookup.c
# gcc -o lookup lookup.c
# ./lookup three; echo $?
# 1
# ./lookup ten; echo $?
# 0
#
# Benchmark (looking up in 500 000-entry list): generated C code is 3x faster
# than grep:
#
# for i in {1..500000}; do echo $i; done >data.txt
# cat data.txt | ./makesbtree.py c >lookup.c && gcc -o lookup lookup.c
#
# time (for i in {1..1000}; do ./lookup foo; done)
# real 0m0.834s
# user 0m0.124s
# sys 0m0.805s
#
# time (for i in {1..1000}; do grep -q foo data.txt; done)
# real 0m2.663s
# user 0m0.876s
# sys 0m1.893s
from __future__ import print_function
import fileinput
from sys import argv
from math import log, floor
lang = {
'c': {
'indent': lambda n:
" " * n,
'prologue': lambda n, depth:
("#include <string.h>\n"
"\n"
"// auto-generated binary lookup for %s elements (%s steps deep)\n"
"int lookup(char *x) {\n"
) % (n, depth),
'if-select': lambda indents, value:
"%sif (strcmp(x, \"%s\") <= 0) {\n" % (indents, value),
'if-match': lambda indents, value:
("%sif (strcmp(x, \"%s\") == 0) return 1;\n"
) % (indents, value),
'else': lambda idents:
"%s} else {\n" % idents,
'endif': lambda idents:
"%s}\n" % idents,
'epilogue':
(" return 0;\n"
"}\n"
"\n"
"int main(int argc, char *argv[]) {\n"
" return(lookup(argv[1]));\n"
"}\n"
)
},
'python': {
'indent': lambda n:
" " * n,
'prologue': lambda n, depth:
("import sys\n"
"\n"
"# auto-generated binary lookup for %s elements (%s steps deep)\n"
"def lookup(x):\n"
) % (n, depth),
'if-select': lambda indents, value:
"%sif x <= \"%s\":\n" % (indents, value),
'if-match': lambda indents, value:
("%sif x == \"%s\":\n"
"%s return 1\n"
) % (indents, value, indents),
'else': lambda idents:
"%selse:\n" % idents,
'endif': lambda idents:
"",
'epilogue':
(" return 0\n"
"\n"
"print(lookup(sys.argv[1].strip()))\n"
)
},
'java': {
'indent': lambda n:
" " * n,
'prologue': lambda n, depth:
("public class changeme {\n"
" // auto-generated binary lookup for %s elements (%s steps deep)\n"
" public static int lookup(String x) {\n"
) % (n, depth),
'if-select': lambda indents, value:
"%s if (x.compareTo(\"%s\") <= 0) {\n" % (indents, value),
'if-match': lambda indents, value:
("%s if (x.equals(\"%s\")) { return 1; }\n"
) % (indents, value),
'else': lambda idents:
"%s } else {\n" % idents,
'endif': lambda idents:
"%s }\n" % idents,
'epilogue':
(" return 0;\n"
" }\n"
" public static void main(String[] args) {\n"
" System.out.println(lookup(args[1]));\n"
" }\n"
"}\n"
)
},
'ts': {
'indent': lambda n:
" " * n,
'prologue': lambda n, depth:
("# auto-generated binary lookup for %s elements (%s steps deep)\n"
"sub lookup($x){\n"
) % (n, depth),
'if-select': lambda indents, value:
"%sif ($x <= \"%s\") {\n" % (indents, value),
'if-match': lambda indents, value:
("%sif ($x == \"%s\") { return 1; }\n"
) % (indents, value),
'else': lambda idents:
"%s} else {\n" % idents,
'endif': lambda idents:
"%s}\n" % idents,
'epilogue':
(" return 0;\n"
"}\n"
"\n"
)
}
}
def makesbtree(lines, l):
ind = l['indent']
indent = 1
depth = int(floor(log(len(lines) - 1, 2) + 1))
yield(l['prologue'](len(lines), depth))
for i in range(len(lines)):
if (i % 2) == 0:
ii = i >> 1
while ii != 0 and ii % 2 == 0:
indent -= 1
yield(l['endif'](ind(indent)))
ii >>= 1
if i > 0:
yield(l['else'](ind(indent - 1)))
while indent < depth:
ii = (i | 0xffffffff >> (32 - depth + indent))
if ii >= len(lines):
ii = len(lines) - 1
yield(l['if-select'](ind(indent), lines[ii]))
indent += 1
yield(l['if-match'](ind(indent), lines[i]))
for i in range(indent - 1, 0, -1):
yield(l['endif'](ind(i)))
yield(l['epilogue'])
if len(argv) < 2 or argv[1] not in lang:
print("Usage: " + __file__ +
" {lang} {input-file} {input-file} ... >{output-file}")
print(" " + __file__ +
" {lang} <{input-file} >{output-file}")
print("Where: lang := 'c', 'python', 'java', 'ts'")
exit(1)
lines = sorted(set([x.strip().replace('"', '\\"') for x
in fileinput.input(argv[2:])]))
for x in makesbtree(lines, lang[argv[1]]):
print(x, end="")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment