Last active
December 29, 2016 02:18
-
-
Save rosiecakes/0747b75cc9729ada766092b23bafdd50 to your computer and use it in GitHub Desktop.
dataquest birth years
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# convert | |
['2000,1,1,6,9083', | |
'2000,1,2,7,8006', | |
'2000,1,3,1,11363', | |
'2000,1,4,2,13032', | |
'2000,1,5,3,12558', | |
'2000,1,6,4,12466', | |
'2000,1,7,5,12516', | |
'2000,1,8,6,8934', | |
'2000,1,9,7,7949', | |
'2000,1,10,1,11668'] | |
# to | |
[[2000, 1, 1, 6, 9083], | |
[2000, 1, 2, 7, 8006], | |
[2000, 1, 3, 1, 11363], | |
[2000, 1, 4, 2, 13032], | |
[2000, 1, 5, 3, 12558], | |
[2000, 1, 6, 4, 12466], | |
[2000, 1, 7, 5, 12516], | |
[2000, 1, 8, 6, 8934], | |
[2000, 1, 9, 7, 7949], | |
[2000, 1, 10, 1, 11668], | |
[2000, 1, 11, 2, 12611]] | |
def read_csv(filename): | |
data = open(filename).read().split('\n') | |
string_list = data[1:] | |
final_list = [] | |
for each in string_list: | |
int_fields = [] | |
string_fields = each.split(',') | |
int_fields = [int(x) for x in string_fields] | |
final_list.append(int_fields) | |
return final_list | |
def month_births(list_of_lists): | |
births_per_month = {} | |
for l in list_of_lists: | |
month = l[1] | |
births = l[4] | |
if month in births_per_month.keys(): | |
births_per_month[month] += births | |
births_per_month[month] = births | |
return births_per_month | |
cdc_month_births = month_births(cdc_list) | |
# {1: 11843, | |
# 2: 11671, | |
# 3: 11511, | |
# 4: 11591, | |
# 5: 8462, | |
# 6: 12243, | |
# 7: 12673, | |
# 8: 7884, | |
# 9: 12959, | |
# 10: 10837, | |
# 11: 7228, | |
# 12: 11990} | |
def dow_births(list_of_lists): | |
births_per_dow = {} | |
for l in list_of_lists: | |
dow = l[3] | |
births = l[4] | |
if dow in births_per_dow.keys(): | |
births_per_dow[dow] += births | |
births_per_dow[dow] = births | |
return births_per_dow | |
cdc_day_births = dow_births(cdc_list) | |
# {1: 12811, 2: 13634, 3: 11990, 4: 6749, 5: 10386, 6: 8656, 7: 7724} | |
def calc_counts(data, column): | |
results = {} | |
for d in data: | |
unit = d[column] | |
if unit in results.keys(): | |
results[unit] += d[4] | |
results[unit] = d[4] | |
return results | |
cdc_year_births = calc_counts(cdc_list, 0) | |
# {2000: 7892, | |
# 2001: 10272, | |
# 2002: 12582, | |
# 2003: 12540, | |
# 2004: 10130, | |
# 2005: 8635, | |
# 2006: 7569, | |
# 2007: 11102, | |
# 2008: 12906, | |
# 2009: 11667, | |
# 2010: 9751, | |
# 2011: 8035, | |
# 2012: 10634, | |
# 2013: 12525, | |
# 2014: 11990} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment