Skip to content

Instantly share code, notes, and snippets.

@meeuw
Last active August 29, 2015 14:03
Show Gist options
  • Save meeuw/096a3d098698e473d624 to your computer and use it in GitHub Desktop.
Save meeuw/096a3d098698e473d624 to your computer and use it in GitHub Desktop.
compare two (sorted) csv files and print differences
import sys
def mycmp(a, b):
if a.isdigit() and b.isdigit():
a = int(a)
b = int(b)
return cmp(a, b)
def cmpk(a, b, h):
for i in h:
c = mycmp(a[i], b[i])
if c != 0:
return c
return 0
class Reader:
def __init__(self, filename):
self.f = open(filename)
self.first = True
self.next()
self.filename = filename
def next(self):
if self.first:
self.first = False
self.line = self.f.readline()[:-1]
self.headers = self.line.split('\t')
self.line = self.f.readline()[:-1]
self.s = dict(zip(self.headers, self.line.split('\t')))
return self.line
class SQL:
def where(self, keys, frm):
where = []
for k in keys:
where.append(k+'='+frm.s[k])
return " and ".join(where)
def delete(self, keys, frm):
print 'delete from `%s` where %s;' % (frm.filename, self.where(keys, frm))
def update(self, keys, col, frm, to):
print 'update `%s` set `%s` = \'%s\' where %s; # %s' % (frm.filename, col, to.s[col].replace("'", "\\'"), self.where(keys, frm), frm.s[col])
def insert(self, frm, to):
print 'insert into `%s` (%s) VALUES (%s);' % (frm.filename, ", ".join(to.s), ", ".join(to.s.values()))
output = SQL()
frm = Reader(sys.argv[1])
to = Reader(sys.argv[2])
key = sys.argv[3].split(',')
ignore = sys.argv[4].split(',')
while 1:
if not(frm.line or to.line): break
if not(frm.line):
while to.line:
output.insert(frm, to)
to.next()
break
if not(to.line):
while frm.line:
output.delete(key, frm)
frm.next()
break
c = cmpk(frm.s, to.s, key)
if (c < 0): # from < to
output.delete(key, frm)
frm.next()
continue
elif (c > 0): # from > to
values = []
for h in to.headers:
if h in ignore: values.append('NULL')
else: values.append(to.s[h])
print 'insert into %s (%s) values (%s);' % (sys.argv[1], ", ".join(to.headers),", ".join(values))
to.next()
continue
else: #from == to
for i in frm.s.iterkeys():
if (i in key): continue
if (i in ignore): continue
if frm.s[i] != to.s[i]: output.update(key, i, frm, to)
to.next()
frm.next()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment