Skip to content

Instantly share code, notes, and snippets.

@danieltomasz
Created March 11, 2021 20:39
Show Gist options
  • Save danieltomasz/c8e68fb9d32c4a71198d5756c05cacf5 to your computer and use it in GitHub Desktop.
Save danieltomasz/c8e68fb9d32c4a71198d5756c05cacf5 to your computer and use it in GitHub Desktop.
File to play with long and wide format
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.10.3
# kernelspec:
# display_name: Python 3.8.7 64-bit
# metadata:
# interpreter:
# hash: 719f0e7d5e00575541adf0654af7819dee753b35cdfdf7b361adc0a54809f9ea
# name: python3
# ---
# %%
import pandas as pd
import string
import numpy as np
import random
import matplotlib.pyplot as plt
def generate_example_dataframe()-> pd.DataFrame:
"""
This simple function will generate simple dataframe in long format
"""
num = 20 # number of regions udsed in simulations
subjects_num = 10
random.seed(1)
conditions = ["open", "closed"]
groups = ["old", "young"]
means = [1,1.5,1.25,1.75]
regions = [f"region_{s}" for s in string.ascii_letters[:num]]
subjects = [f"subject_{s}" for s in list(range(1, subjects_num))]
list_of_dataframes = []
for subject in subjects:
for region in regions:
lst = iter(means)
for condition in conditions:
for group in groups:
mean = next(lst)
values = mean + np.random.rand(num) + 0.2*random.random()
temp_df = pd.DataFrame({'region':[region] *num, 'group':[group] * num, 'condition':[condition] *num ,'subject':[subject] *num ,'values':values})
list_of_dataframes.append(temp_df)
return pd.concat(list_of_dataframes)
# %% [markdown]
# Genereting sample dataframe is presented in the long format - one obe
# %%
df = generate_example_dataframe()
df.head()
# %%
# big region
# joining dataframes
lobesList = ["parietal", "frontal", "temporal", "occipital"]
lobes = random.choices(LobesList, k=len(pd.unique(df.region)))
lobe_info= pd.DataFrame({'region':pd.unique(df.region),'lobe':lobes})
df = pd.merge(df, lobe_info, on='region')
print(df.columns.values.tolist())
# %%
index_list= ["lobe"]
conditions = ['group', 'condition', 'subject']
values = ['values']
# %%
groupdf = df.groupby(index_list + conditions)[values].mean()
groupdf = groupdf.reset_index()
groupdf.head()
# %%
print(df.columns.values.tolist())
# %%
pivoted = df.pivot_table(index=["subject"],
columns=['group', 'condition', 'lobe'],
values=values)
pivoted.columns = pivoted.columns.map(''.join)
pivoted = pivoted.reset_index()
pivoted.head()
# %%
ages = np.random.randint(20,70,len(pd.unique(df.subject)))
age_info = pd.DataFrame({'subject':pd.unique(df.subject),'age':ages})
pivoted = pd.merge(pivoted, age_info, on='subject')
plt.plot(pivoted.age, pivoted.valuesoldclosedfrontal, 'ro')
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment