Skip to content

Instantly share code, notes, and snippets.

@tbuckl
Last active September 4, 2015 18:42
Show Gist options
  • Save tbuckl/887f6aaff1e0523895bb to your computer and use it in GitHub Desktop.
Save tbuckl/887f6aaff1e0523895bb to your computer and use it in GitHub Desktop.
Compare the headers in 2 csv files with python
import csv
file1 = "fromAbag.csv"
file2 = "tazData2010.csv"
def sort_lowercase_headers(csvfile):
csvreader = csv.reader(csvfile, delimiter=',',quotechar='|')
header=csvreader.next()
header.sort()
header = [x.lower() for x in header]
return header
# coding: utf-8
import csv
with open(file1, 'rb') as csvfile:
file1_headers = sort_lowercase_headers(csvfile)
with open(file2, 'rb') as csvfile:
file2_headers = sort_lowercase_headers(csvfile)
print "headers in both files:"
print "---------------------"
print list(set(file1_headers) & set(file2_headers))
print ""
print "headers in {} but not in {}".format(file1,file2)
print "---------------------"
print '[%s]' % '\n '.join(map(str, list(set(file1_headers) - set(file2_headers))))
print ""
print "headers in {} but not in {}".format(file2,file1)
print "---------------------"
print '[%s]' % '\n '.join(map(str, list(set(file2_headers) - set(file1_headers))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment