Created
January 7, 2017 19:06
-
-
Save dallasmarlow/9bf8dbbd884fd79c5b8719c7a038afc1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
import os, os.path | |
import json | |
def checkpoint_path(input_file): | |
return '{}.checkpoint'.format( | |
os.path.basename( | |
input_file)) | |
def load_checkpoint(checkpoint_file): | |
if os.path.exists(checkpoint_file): | |
with open(checkpoint_file) as f: | |
return json.loads(f.read()) | |
def save_checkpoint(checkpoint_file, pos): | |
with open(checkpoint_file, 'w') as f: | |
f.write(json.dumps({ | |
'pos': pos, | |
'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), | |
'pid': os.getpid()})) | |
def _scan(input_file, fn, checkpoint_file = None, limit = 1): | |
checkpoint = load_checkpoint(checkpoint_file) | |
with open(input_file) as f: | |
if checkpoint and checkpoint.get('pos', 0) > 0: | |
f.seek(checkpoint['pos']) | |
pos = 0 | |
processed = 0 | |
try: | |
for l in iter(f.readline, ''): | |
pos = f.tell() | |
print '{} - {} - {}'.format(processed, pos, l) | |
fn(l) | |
processed += 1 | |
if processed == limit: | |
return l | |
finally: | |
save_checkpoint(checkpoint_file, pos) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment