Skip to content

Instantly share code, notes, and snippets.

@dallasmarlow
Created January 7, 2017 19:06
Show Gist options
  • Save dallasmarlow/9bf8dbbd884fd79c5b8719c7a038afc1 to your computer and use it in GitHub Desktop.
Save dallasmarlow/9bf8dbbd884fd79c5b8719c7a038afc1 to your computer and use it in GitHub Desktop.
from datetime import datetime
import os, os.path
import json
def checkpoint_path(input_file):
return '{}.checkpoint'.format(
os.path.basename(
input_file))
def load_checkpoint(checkpoint_file):
if os.path.exists(checkpoint_file):
with open(checkpoint_file) as f:
return json.loads(f.read())
def save_checkpoint(checkpoint_file, pos):
with open(checkpoint_file, 'w') as f:
f.write(json.dumps({
'pos': pos,
'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'pid': os.getpid()}))
def _scan(input_file, fn, checkpoint_file = None, limit = 1):
checkpoint = load_checkpoint(checkpoint_file)
with open(input_file) as f:
if checkpoint and checkpoint.get('pos', 0) > 0:
f.seek(checkpoint['pos'])
pos = 0
processed = 0
try:
for l in iter(f.readline, ''):
pos = f.tell()
print '{} - {} - {}'.format(processed, pos, l)
fn(l)
processed += 1
if processed == limit:
return l
finally:
save_checkpoint(checkpoint_file, pos)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment