Skip to content

Instantly share code, notes, and snippets.

@bremac
Last active May 17, 2017 21:35
Show Gist options
  • Save bremac/34b6aaaec36144ecee64b7a4e19d36c3 to your computer and use it in GitHub Desktop.
Save bremac/34b6aaaec36144ecee64b7a4e19d36c3 to your computer and use it in GitHub Desktop.
Approximate search for a prefixed line in a sorted file (ex. a huge log file)
import os.path
import sys
def bsearch(prefix, filename, steps, lines):
lo = 0
hi = os.path.getsize(filename)
with open(filename, 'r') as fp:
while steps > 0:
mid = (lo + hi) / 2
fp.seek(mid)
fp.readline()
line = fp.readline()
if line < prefix:
lo = mid
elif line > prefix:
hi = mid
steps -= 1
sys.stdout.write(line)
for lineno, line in enumerate(fp):
sys.stdout.write(line)
if lineno + 1 >= lines:
break
if __name__ == '__main__':
prefix = sys.argv[1]
filename = sys.argv[2]
lines = int(sys.argv[3])
bsearch(prefix, filename, 20, lines)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment