Last active
January 5, 2016 12:25
-
-
Save ankitml/bf090b76a684b770ced7 to your computer and use it in GitHub Desktop.
A python function to read data from multiple csvs, each of them can have extra rows or columns or both between them. Also needs a primary column header called key_header
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from real_type import get_type | |
#real type can be taken from https://github.com/ankitml/real_type or https://pypi.python.org/pypi/real_type/0.1 | |
def read_multiple_csv(files, key_header): | |
""" | |
takes a list of filesnames and a key_header string to merge the files. | |
Can merge if there are different columns in the files, different records in | |
the files. | |
Assumption - all the files should have the column named key_header | |
example : files = ['grades.csv', 'previous_grades.csv'] | |
key_header = 'email' | |
merged_data = read_multiple_csv(files, key_header) | |
""" | |
data = {} | |
# files = ['a.csv', 'b.csv', 'c.csv'] | |
# key_header = 'email' | |
for filename in files: | |
file_generator = open(filename) | |
headers = file_generator.next().split(',') | |
headers = [h.strip() for h in headers] | |
for line in file_generator: | |
splits = line.split(',') | |
inner_dict = {} | |
for k,header in enumerate(headers): | |
#assumes header is a string. probably it is | |
if header is not '': | |
inner_dict[header] = int(splits[k]) if get_type(splits[k]) is int else splits[k] | |
data_key = inner_dict.pop(key_header) | |
try: | |
z = data[data_key].copy() | |
except KeyError: | |
z = {} | |
z.update(inner_dict) | |
data[data_key] = z | |
return data | |
def write_combined_csv(data, key='id', file_name='combined.csv'): | |
import csv | |
headers = [key] | |
headers.extend(data[data.keys()[0]].keys()) | |
list_data = [] | |
for key_value, inner_dict in data.items(): | |
inner_dict[key] = key_value | |
list_data.append(inner_dict) | |
with open(file_name, 'wb') as combined_file: | |
dict_writer = csv.DictWriter(combined_file, headers) | |
dict_writer.writeheader() | |
dict_writer.writerows(list_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment