Created
July 6, 2020 09:36
-
-
Save amitt001/7372ec44118bfd49e2771c449860c7a0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Returns result in the format: | |
{ | |
"B": { | |
2401: "1/6 = 2%", | |
3101: "1/6 = 2%" | |
}, | |
"C": { | |
"02": "3/6 = 50%" | |
}, | |
"K": { | |
" | |
} | |
} | |
""" | |
import csv | |
from collections import defaultdict | |
columns = defaultdict(list) # each value in each column is appended to a list | |
def read_csv_by_column(file_path): | |
with open(file_path) as f: | |
reader = csv.DictReader(f) # read rows into a dictionary format | |
for row in reader: # read a row as {column1: value1, column2: value2,...} | |
for (k, v) in row.items(): # go over each column name and value | |
columns[k].append(v) # append the value into the appropriate list | |
return columns | |
def grouped_result(columns): | |
result = {} | |
for header, values in columns.items(): | |
# Special case | |
if header.lower() in ['e', 'k']: | |
tmp = [] | |
for v in values: | |
if not v.strip(): | |
continue | |
vs = [i.strip() for i in "{}".format(v).split("-") if i.strip()] | |
tmp.extend(vs) | |
values = tmp | |
result[header] = {} | |
tmp = {} | |
for idx, v in enumerate(values): | |
if v not in tmp: | |
tmp[v] = 0 | |
tmp[v] += 1 | |
# Fix 0 start index | |
idx += 1 | |
for v in values: | |
perc = round((tmp[v]*100.0)/idx, 2) | |
result[header]["{}".format(v)] = "{}/{} = {}".format(tmp[v], idx, perc) | |
return result | |
if __name__ == '__main__': | |
file_path = "data.csv" | |
columns = read_csv_by_column(file_path) | |
import json | |
print(json.dumps(grouped_result(columns))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment