amitt001 · July 6, 2020 09:36
diff --git a/article_occurrence_coding.py b/article_occurrence_coding.py
 """
 Returns result in the format:

 {
    "B": {
        2401: "1/6 = 2%",
        3101: "1/6 = 2%"
    },
    "C": {
        "02": "3/6 = 50%"
    },
    "K": {
        "
    }
 }

 """

 import csv
 from collections import defaultdict

 columns = defaultdict(list)     # each value in each column is appended to a list


 def read_csv_by_column(file_path):
    with open(file_path) as f:
        reader = csv.DictReader(f)  # read rows into a dictionary format
        for row in reader:  # read a row as {column1: value1, column2: value2,...}
            for (k, v) in row.items():   # go over each column name and value
                columns[k].append(v)    # append the value into the appropriate list
    return columns


 def grouped_result(columns):
    result = {}
    for header, values in columns.items():
        # Special case
        if header.lower() in ['e', 'k']:
            tmp = []
            for v in values:
                if not v.strip():
                    continue
                vs = [i.strip() for i in "{}".format(v).split("-") if i.strip()]
                tmp.extend(vs)
            values = tmp
        result[header] = {}
        tmp = {}
        for idx, v in enumerate(values):
            if v not in tmp:
                tmp[v] = 0
            tmp[v] += 1
        # Fix 0 start index
        idx += 1
        for v in values:
            perc = round((tmp[v]*100.0)/idx, 2)
            result[header]["{}".format(v)] = "{}/{} = {}".format(tmp[v], idx, perc)
    return result


 if __name__ == '__main__':
    file_path = "data.csv"
    columns = read_csv_by_column(file_path)
    import json
    print(json.dumps(grouped_result(columns)))
	"""
	Returns result in the format:

	{
	"B": {
	2401: "1/6 = 2%",
	3101: "1/6 = 2%"
	},
	"C": {
	"02": "3/6 = 50%"
	},
	"K": {
	"
	}
	}

	"""

	import csv
	from collections import defaultdict

	columns = defaultdict(list) # each value in each column is appended to a list


	def read_csv_by_column(file_path):
	with open(file_path) as f:
	reader = csv.DictReader(f) # read rows into a dictionary format
	for row in reader: # read a row as {column1: value1, column2: value2,...}
	for (k, v) in row.items(): # go over each column name and value
	columns[k].append(v) # append the value into the appropriate list
	return columns


	def grouped_result(columns):
	result = {}
	for header, values in columns.items():
	# Special case
	if header.lower() in ['e', 'k']:
	tmp = []
	for v in values:
	if not v.strip():
	continue
	vs = [i.strip() for i in "{}".format(v).split("-") if i.strip()]
	tmp.extend(vs)
	values = tmp
	result[header] = {}
	tmp = {}
	for idx, v in enumerate(values):
	if v not in tmp:
	tmp[v] = 0
	tmp[v] += 1
	# Fix 0 start index
	idx += 1
	for v in values:
	perc = round((tmp[v]*100.0)/idx, 2)
	result[header]["{}".format(v)] = "{}/{} = {}".format(tmp[v], idx, perc)
	return result


	if __name__ == '__main__':
	file_path = "data.csv"
	columns = read_csv_by_column(file_path)
	import json
	print(json.dumps(grouped_result(columns)))