Skip to content

Instantly share code, notes, and snippets.

@rmasters
Created September 25, 2011 22:32
Show Gist options
  • Save rmasters/1241259 to your computer and use it in GitHub Desktop.
Save rmasters/1241259 to your computer and use it in GitHub Desktop.
CSV merge-subtract-doo-dah
#!/usr/bin/python
"""
Imports two exported CSV files containing tabular data, merges them and
outputs the difference between one list and the merged copy.
"""
import csv
# Import
def loadEmails(fileName, email_index):
emails = []
reader = csv.reader(open(fileName, "r"), delimiter=',', quotechar='"')
i = 0
for row in reader:
if i > 0:
emails.append(row[email_index])
i += 1
return emails
oldList = loadEmails("old.csv", 14)
newList = loadEmails("new.csv", 4)
"""
A = old
B = new
(UNIQUE(A + B)) - B
"""
# A + B without duplicates
merged = list(oldList)
for b in newList:
if b not in merged:
merged.append(b)
print "Old: %d" % len(oldList)
print "New: %d" % len(newList)
print "Merged: %d" % len(merged)
# U-B
for b in newList:
if b in merged:
merged.remove(b)
print "Non-cont: %d" % len(merged)
for o in oldList:
if o not in merged:
print o
with open("output.txt", "wb") as out:
for m in merged:
out.write(m + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment