Skip to content

Instantly share code, notes, and snippets.

@hughdbrown
Created July 13, 2017 19:37
Show Gist options
  • Save hughdbrown/8b9ca8877de6a214fafd514880cb3779 to your computer and use it in GitHub Desktop.
Save hughdbrown/8b9ca8877de6a214fafd514880cb3779 to your computer and use it in GitHub Desktop.
A python script to diff the columns of CSV files that match in different directories
#!/usr/bin/env python
from __future__ import print_function
import os
import os.path
import sys
from csv import DictReader
class DictDiffer(object):
"""
Calculate the difference between two dictionaries as:
(1) items added
(2) items removed
(3) keys same in both but changed values
(4) keys same in both and unchanged values
"""
def __init__(self, current_dict, past_dict):
self.current_dict, self.past_dict = current_dict, past_dict
self.current_keys, self.past_keys = [
set(d.keys()) for d in (current_dict, past_dict)
]
self.intersect = self.current_keys.intersection(self.past_keys)
def added(self):
return self.current_keys - self.intersect
def removed(self):
return self.past_keys - self.intersect
def changed(self):
return set(o for o in self.intersect
if self.past_dict[o] != self.current_dict[o])
def unchanged(self):
return set(o for o in self.intersect
if self.past_dict[o] == self.current_dict[o])
def rowdiff(row1, row2):
dd = DictDiffer(row1, row2)
return dd.changed()
def csvdiff(filename, dir1, dir2):
filename1 = os.path.join(dir1, filename)
filename2 = os.path.join(dir2, filename)
csv1 = DictReader(open(filename1))
csv2 = DictReader(open(filename2))
print("-" * 30)
print(filename)
for i, (row1, row2) in enumerate(zip(csv1, csv2), start=1):
diffs = rowdiff(row1, row2)
if diffs:
print("line {0}".format(i))
for column in diffs:
print("{0}:\n\t{1}\n\t{2}".format(column, row1[column], row2[column]))
def diff(dir1, dir2):
dir1_files = os.listdir(dir1)
dir2_files = os.listdir(dir2)
common = set(dir1_files).intersection(set(dir2_files))
for filename in common:
csvdiff(filename, dir1, dir2)
if __name__ == '__main__':
dir1 = sys.argv[1]
dir2 = sys.argv[2]
diff(dir1, dir2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment