Created
October 10, 2014 10:56
-
-
Save rizsotto/89a2865e1c03d017f975 to your computer and use it in GitHub Desktop.
custom json diff tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import json | |
import argparse | |
import multiprocessing | |
import datetime | |
import gzip | |
import codecs | |
def load(fn): | |
def strip(entries, time_key): | |
return dict([(entry['ppid'], entry[time_key]) for entry in entries]) | |
reader = codecs.getreader("utf-8") | |
with reader(gzip.open(fn)) as handle: | |
content = json.load(handle)['payload'] | |
return {'edits': strip(content['edit'], 'lastAction'), | |
'deletes': strip(content['delete'], 'timestamp'), | |
'creates': strip(content['create'], 'lastAction')} | |
def diff(name, lhs, rhs): | |
for key, value in lhs.items(): | |
if key not in rhs: | |
yield (name, '<', key, value) | |
for key, value in rhs.items(): | |
if key not in lhs: | |
yield (name, '>', key, value) | |
def check_date(candidate, less): | |
reference = datetime.datetime.strptime(candidate, "%Y-%m-%d").date() \ | |
if candidate else None | |
def wrap(time): | |
if reference is None: | |
return True | |
else: | |
this = datetime.datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.000Z") \ | |
.date() | |
return this < reference if less else this > reference | |
return wrap | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--before', metavar='<year-month-day>') | |
parser.add_argument('--after', metavar='<year-month-day>') | |
parser.add_argument(dest='lhs') | |
parser.add_argument(dest='rhs') | |
args = parser.parse_args() | |
before = check_date(args.before, True) | |
after = check_date(args.after, False) | |
pool = multiprocessing.Pool() | |
[lhs, rhs] = pool.map(load, [args.lhs, args.rhs]) | |
pool.close() | |
pool.join() | |
for key in lhs.keys(): | |
for name, direction, ppid, time in diff(key, lhs[key], rhs[key]): | |
if not after(time): | |
pass | |
elif not before(time): | |
pass | |
else: | |
print('{} {} [{} @ {}]'.format(direction, ppid, name, time)) | |
if __name__ == '__main__': | |
multiprocessing.freeze_support() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment