Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save hari4274/c011ac843be60cb00a18066772f52fa1 to your computer and use it in GitHub Desktop.
Save hari4274/c011ac843be60cb00a18066772f52fa1 to your computer and use it in GitHub Desktop.
Groupby and sum columns in python(default functionalities, Pandas and ittertools)
# -*- coding: utf-8 -*-
# Default Method
# Get Unique product details => group by reapit_product_id and sum by quantity
grpby_key = 'product_id'
sum_key = 'qty'
data = [
{'product_id': 1, 'desc': 'Desc1', 'qty': 2, 'unit_price': 2, 'total': 4},
{'product_id': 1, 'desc': 'Desc1', 'qty': 3, 'unit_price': 2, 'total': 6},
{'product_id': 1, 'desc': 'Desc1', 'qty': 3, 'unit_price': 2, 'total': 6},
{'product_id': 2, 'desc': 'Desc2', 'qty': 2, 'unit_price': 5, 'total': 10},
{'product_id': 3, 'desc': 'Desc3', 'qty': 3, 'unit_price': 7, 'total': 21},
{'product_id': 1, 'desc': 'Desc1', 'qty': 3, 'unit_price': 2, 'total': 6},
]
def grpby_sum_list_dict(list_data, groupby_key, sum_key):
new_data = []
for p in {x.get(groupby_key) for x in list_data}:
val = {}
qty = sum([x.get(sum_key) for x in list_data if x[groupby_key] == p])
for i in list_data:
if i.get(groupby_key) == p:
val = i
val[sum_key] = qty
new_data.append(val)
return new_data
print(grpby_sum_list_dict(data, grpby_key, sum_key))
fdata = []
print "Original Data : ", data
# Group by and sum Using Pandas
import pandas as pd
df = pd.DataFrame(data) # Dict to Data frame
product_id = df.groupby(gby_field) # Grouped dataframe
p_sum = product_id.sum() # sum all the float fields based on groupby field
fdict = p_sum.to_dict() # Convert dataframe to Dict
gby_field_data = list(set([x.get(gby_field) for x in data])) # To get uniuqe product_data
gby_field_desc_data = {x.get(gby_field): x.get('desc') for x in data} # To get Unique product desc(String Fields)
for gby in gby_field_data:
fdata.append({'product_id': gby, 'qty': fdict['qty'][gby], 'desc': gby_field_desc_data[gby], 'unit_price': fdict['unit_price'][gby], 'total': fdict['total'][gby]})
print("Results From Pandas: ", fdata)
# Group by and sum Using ittertools
import itertools
from operator import itemgetter
fdata = []
sorted_data = sorted(data, key=itemgetter(gby_field)) # Sort the data
group_dict = itertools.groupby(sorted_data, key=lambda x: x[gby_field]) # group the content data
for key, group in group_dict:
key_data = list(group)
field_data = {gby_field: key}
for x in key_data:
for k, v in x.items():
if k != gby_field:
if k in field_data:
field_data[k] += v
else:
field_data.update({k: v})
fdata.append(field_data)
print("Results From ItterTools: ", fdata)
# Output:
# ('Results From Pandas: ', [{'total': 22, 'desc': 'Desc1', 'product_id': 1, 'unit_price': 8, 'qty': 11}, {'total': 10, 'desc': 'Desc2', 'product_id': 2, 'unit_price': 5, 'qty': 2}, {'total': 21, 'desc': 'Desc3', 'product_id': 3, 'unit_price': 7, 'qty': 3}])
# ('Results From ItterTools: ', [{'desc': 'Desc1Desc1Desc1Desc1', 'total': 22, 'product_id': 1, 'unit_price': 8, 'qty': 11}, {'desc': 'Desc2', 'total': 10, 'product_id': 2, 'unit_price': 5, 'qty': 2}, {'desc': 'Desc3', 'total': 21, 'product_id': 3, 'unit_price': 7, 'qty': 3}])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment