Skip to content

Instantly share code, notes, and snippets.

@Ayehavgunne
Forked from gregpinero/crosstab.py
Created June 18, 2019 20:21
Show Gist options
  • Save Ayehavgunne/a60c3a8d4aac9b67a524eff1c0c78af1 to your computer and use it in GitHub Desktop.
Save Ayehavgunne/a60c3a8d4aac9b67a524eff1c0c78af1 to your computer and use it in GitHub Desktop.
Create a cross tab / pivot table in Python
import itertools
def crosstab(rows, columns, col_idx_for_columns, lst_col_idx_for_rows, value_col_idx, fill_val, format_func=None):
"""Take col_idx_to_cross_tab and make its unique values be new columns at the end filled with
value_col_idx values. col idx arguments are 0 based.
This is basically a simplified pivot table creator with the limitations that you can only have one field in the columns
and there is no aggregation.
Usage:
>>> columns = ['month','owner','dogs owned']
>>> data = [
[1, 'greg', 3],
[1, 'greg', 5],
[2, 'greg', 3],
[3, 'greg', 15],
[4, 'ken', 25],
[4, 'greg', 53],
[5, 'greg', 5],
]
>>> print crosstab(data, columns, col_idx_for_columns=0, lst_col_idx_for_rows=[1], value_col_idx=2, fill_val=0)
(['owner', 1, 2, 3, 4, 5], [['greg', 5, 3, 15, 53, 5], ['ken', 0, 0, 0, 25, 0]])
"""
#find all unique values
cross_tab_cols = sorted(list(set([row[col_idx_for_columns] for row in rows])))
columns.pop(max(col_idx_for_columns,value_col_idx))
columns.pop(min(col_idx_for_columns,value_col_idx))
columns += cross_tab_cols
def get_key(x):
key = []
for idx in lst_col_idx_for_rows:
key.append(x[idx])
return key
rows = sorted(rows, key=get_key)
grouped_rows = itertools.groupby(rows, get_key)
results = []
for key, rows in grouped_rows:
rows = list(rows)
new_row = list(rows[0]) + [fill_val]*len(cross_tab_cols)
#remove columns:
new_row.pop(max(col_idx_for_columns,value_col_idx))
new_row.pop(min(col_idx_for_columns,value_col_idx))
assert len(new_row) == len(columns)
for row in rows:
target_col = row[col_idx_for_columns]
value = row[value_col_idx]
new_row[columns.index(target_col)] = (format_func and format_func(value)) or value
results.append(new_row)
return columns, results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment