Last active
May 25, 2020 13:36
-
-
Save hari4274/c011ac843be60cb00a18066772f52fa1 to your computer and use it in GitHub Desktop.
Groupby and sum columns in python(default functionalities, Pandas and ittertools)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Default Method | |
# Get Unique product details => group by reapit_product_id and sum by quantity | |
grpby_key = 'product_id' | |
sum_key = 'qty' | |
data = [ | |
{'product_id': 1, 'desc': 'Desc1', 'qty': 2, 'unit_price': 2, 'total': 4}, | |
{'product_id': 1, 'desc': 'Desc1', 'qty': 3, 'unit_price': 2, 'total': 6}, | |
{'product_id': 1, 'desc': 'Desc1', 'qty': 3, 'unit_price': 2, 'total': 6}, | |
{'product_id': 2, 'desc': 'Desc2', 'qty': 2, 'unit_price': 5, 'total': 10}, | |
{'product_id': 3, 'desc': 'Desc3', 'qty': 3, 'unit_price': 7, 'total': 21}, | |
{'product_id': 1, 'desc': 'Desc1', 'qty': 3, 'unit_price': 2, 'total': 6}, | |
] | |
def grpby_sum_list_dict(list_data, groupby_key, sum_key): | |
new_data = [] | |
for p in {x.get(groupby_key) for x in list_data}: | |
val = {} | |
qty = sum([x.get(sum_key) for x in list_data if x[groupby_key] == p]) | |
for i in list_data: | |
if i.get(groupby_key) == p: | |
val = i | |
val[sum_key] = qty | |
new_data.append(val) | |
return new_data | |
print(grpby_sum_list_dict(data, grpby_key, sum_key)) | |
fdata = [] | |
print "Original Data : ", data | |
# Group by and sum Using Pandas | |
import pandas as pd | |
df = pd.DataFrame(data) # Dict to Data frame | |
product_id = df.groupby(gby_field) # Grouped dataframe | |
p_sum = product_id.sum() # sum all the float fields based on groupby field | |
fdict = p_sum.to_dict() # Convert dataframe to Dict | |
gby_field_data = list(set([x.get(gby_field) for x in data])) # To get uniuqe product_data | |
gby_field_desc_data = {x.get(gby_field): x.get('desc') for x in data} # To get Unique product desc(String Fields) | |
for gby in gby_field_data: | |
fdata.append({'product_id': gby, 'qty': fdict['qty'][gby], 'desc': gby_field_desc_data[gby], 'unit_price': fdict['unit_price'][gby], 'total': fdict['total'][gby]}) | |
print("Results From Pandas: ", fdata) | |
# Group by and sum Using ittertools | |
import itertools | |
from operator import itemgetter | |
fdata = [] | |
sorted_data = sorted(data, key=itemgetter(gby_field)) # Sort the data | |
group_dict = itertools.groupby(sorted_data, key=lambda x: x[gby_field]) # group the content data | |
for key, group in group_dict: | |
key_data = list(group) | |
field_data = {gby_field: key} | |
for x in key_data: | |
for k, v in x.items(): | |
if k != gby_field: | |
if k in field_data: | |
field_data[k] += v | |
else: | |
field_data.update({k: v}) | |
fdata.append(field_data) | |
print("Results From ItterTools: ", fdata) | |
# Output: | |
# ('Results From Pandas: ', [{'total': 22, 'desc': 'Desc1', 'product_id': 1, 'unit_price': 8, 'qty': 11}, {'total': 10, 'desc': 'Desc2', 'product_id': 2, 'unit_price': 5, 'qty': 2}, {'total': 21, 'desc': 'Desc3', 'product_id': 3, 'unit_price': 7, 'qty': 3}]) | |
# ('Results From ItterTools: ', [{'desc': 'Desc1Desc1Desc1Desc1', 'total': 22, 'product_id': 1, 'unit_price': 8, 'qty': 11}, {'desc': 'Desc2', 'total': 10, 'product_id': 2, 'unit_price': 5, 'qty': 2}, {'desc': 'Desc3', 'total': 21, 'product_id': 3, 'unit_price': 7, 'qty': 3}]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment