-
-
Save Ayehavgunne/a60c3a8d4aac9b67a524eff1c0c78af1 to your computer and use it in GitHub Desktop.
Create a cross tab / pivot table in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
def crosstab(rows, columns, col_idx_for_columns, lst_col_idx_for_rows, value_col_idx, fill_val, format_func=None): | |
"""Take col_idx_to_cross_tab and make its unique values be new columns at the end filled with | |
value_col_idx values. col idx arguments are 0 based. | |
This is basically a simplified pivot table creator with the limitations that you can only have one field in the columns | |
and there is no aggregation. | |
Usage: | |
>>> columns = ['month','owner','dogs owned'] | |
>>> data = [ | |
[1, 'greg', 3], | |
[1, 'greg', 5], | |
[2, 'greg', 3], | |
[3, 'greg', 15], | |
[4, 'ken', 25], | |
[4, 'greg', 53], | |
[5, 'greg', 5], | |
] | |
>>> print crosstab(data, columns, col_idx_for_columns=0, lst_col_idx_for_rows=[1], value_col_idx=2, fill_val=0) | |
(['owner', 1, 2, 3, 4, 5], [['greg', 5, 3, 15, 53, 5], ['ken', 0, 0, 0, 25, 0]]) | |
""" | |
#find all unique values | |
cross_tab_cols = sorted(list(set([row[col_idx_for_columns] for row in rows]))) | |
columns.pop(max(col_idx_for_columns,value_col_idx)) | |
columns.pop(min(col_idx_for_columns,value_col_idx)) | |
columns += cross_tab_cols | |
def get_key(x): | |
key = [] | |
for idx in lst_col_idx_for_rows: | |
key.append(x[idx]) | |
return key | |
rows = sorted(rows, key=get_key) | |
grouped_rows = itertools.groupby(rows, get_key) | |
results = [] | |
for key, rows in grouped_rows: | |
rows = list(rows) | |
new_row = list(rows[0]) + [fill_val]*len(cross_tab_cols) | |
#remove columns: | |
new_row.pop(max(col_idx_for_columns,value_col_idx)) | |
new_row.pop(min(col_idx_for_columns,value_col_idx)) | |
assert len(new_row) == len(columns) | |
for row in rows: | |
target_col = row[col_idx_for_columns] | |
value = row[value_col_idx] | |
new_row[columns.index(target_col)] = (format_func and format_func(value)) or value | |
results.append(new_row) | |
return columns, results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment