Skip to content

Instantly share code, notes, and snippets.

@ankitml
Last active January 5, 2016 12:25
Show Gist options
  • Save ankitml/bf090b76a684b770ced7 to your computer and use it in GitHub Desktop.
Save ankitml/bf090b76a684b770ced7 to your computer and use it in GitHub Desktop.
A python function to read data from multiple csvs, each of them can have extra rows or columns or both between them. Also needs a primary column header called key_header
from real_type import get_type
#real type can be taken from https://github.com/ankitml/real_type or https://pypi.python.org/pypi/real_type/0.1
def read_multiple_csv(files, key_header):
"""
takes a list of filesnames and a key_header string to merge the files.
Can merge if there are different columns in the files, different records in
the files.
Assumption - all the files should have the column named key_header
example : files = ['grades.csv', 'previous_grades.csv']
key_header = 'email'
merged_data = read_multiple_csv(files, key_header)
"""
data = {}
# files = ['a.csv', 'b.csv', 'c.csv']
# key_header = 'email'
for filename in files:
file_generator = open(filename)
headers = file_generator.next().split(',')
headers = [h.strip() for h in headers]
for line in file_generator:
splits = line.split(',')
inner_dict = {}
for k,header in enumerate(headers):
#assumes header is a string. probably it is
if header is not '':
inner_dict[header] = int(splits[k]) if get_type(splits[k]) is int else splits[k]
data_key = inner_dict.pop(key_header)
try:
z = data[data_key].copy()
except KeyError:
z = {}
z.update(inner_dict)
data[data_key] = z
return data
def write_combined_csv(data, key='id', file_name='combined.csv'):
import csv
headers = [key]
headers.extend(data[data.keys()[0]].keys())
list_data = []
for key_value, inner_dict in data.items():
inner_dict[key] = key_value
list_data.append(inner_dict)
with open(file_name, 'wb') as combined_file:
dict_writer = csv.DictWriter(combined_file, headers)
dict_writer.writeheader()
dict_writer.writerows(list_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment