Last active
January 28, 2016 18:31
-
-
Save edwindotcom/52fce21e31ddca5a34cd to your computer and use it in GitHub Desktop.
Takes two files and strips bad chars, spaces and diffs lists
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Takes two files and strips bad chars, spaces and diffs lists | |
Usage: python diff_lists.py new.txt old.txt | |
Example: | |
:: new.txt :: | |
len: 14 | |
['1044530', '1179274', '1184663', '1191162', '1195569', '1210586', '1221294', '1224056', '1226094', '1236372', '1237824', '1239042', '1241141', '1241851'] | |
:: old.txt :: | |
len: 15 | |
['1044530', '1179274', '1184663', '1191162', '1195569', '1210586', '1221294', '1224056', '1226094', '1236372', '1237824', '1239042', '1241141', '1241851', '1241860'] | |
unique items between two lists | |
['1241860'] | |
''' | |
import re | |
import sys | |
bad_chars = "(){}<>'" | |
# def data_to_csv(d): | |
# li = re.replace('\n|,', d) | |
# return map(str.strip, li) | |
def data_to_list(data): | |
for ch in bad_chars: | |
data = data.replace(ch, "") | |
li = re.split('\n|,', data) | |
return map(str.strip, li) | |
def compare_lists(listA, listB): | |
a = list(set(listA) - set(listB)) | |
b = list(set(listB) - set(listA)) | |
return a + b | |
def read_file(f): | |
with open(f) as file: | |
data = file.read() | |
return data | |
def test_1(): | |
s = 'a,b,c,d,e,f' | |
s1 = 'z,y,x,d,e,f,m,n,o' | |
l = data_to_list(s) | |
l1 = data_to_list(s1) | |
print compare_lists(l, l1) | |
def diff_lists(f1, f2): | |
s = read_file(f1) | |
s1 = read_file(f2) | |
l = data_to_list(s) | |
l.sort() | |
print "::", f1, "::" | |
print 'len: ', len(l) | |
print l | |
l1 = data_to_list(s1) | |
l1.sort() | |
print "::", f2, "::" | |
print 'len: ', len(l1) | |
print l1 | |
print "Unique items between two lists" | |
out = compare_lists(l, l1) | |
print 'len: ', len(out) | |
print out | |
print "In vertical list format:" | |
for p in out: print p | |
if __name__ == '__main__': | |
if len(sys.argv) < 3: | |
sys.stderr.write('Usage: python diff_lists.py file1 file2 ') | |
sys.exit(1) | |
diff_lists(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment