Created
March 11, 2021 20:39
-
-
Save danieltomasz/c8e68fb9d32c4a71198d5756c05cacf5 to your computer and use it in GitHub Desktop.
File to play with long and wide format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --- | |
# jupyter: | |
# jupytext: | |
# text_representation: | |
# extension: .py | |
# format_name: percent | |
# format_version: '1.3' | |
# jupytext_version: 1.10.3 | |
# kernelspec: | |
# display_name: Python 3.8.7 64-bit | |
# metadata: | |
# interpreter: | |
# hash: 719f0e7d5e00575541adf0654af7819dee753b35cdfdf7b361adc0a54809f9ea | |
# name: python3 | |
# --- | |
# %% | |
import pandas as pd | |
import string | |
import numpy as np | |
import random | |
import matplotlib.pyplot as plt | |
def generate_example_dataframe()-> pd.DataFrame: | |
""" | |
This simple function will generate simple dataframe in long format | |
""" | |
num = 20 # number of regions udsed in simulations | |
subjects_num = 10 | |
random.seed(1) | |
conditions = ["open", "closed"] | |
groups = ["old", "young"] | |
means = [1,1.5,1.25,1.75] | |
regions = [f"region_{s}" for s in string.ascii_letters[:num]] | |
subjects = [f"subject_{s}" for s in list(range(1, subjects_num))] | |
list_of_dataframes = [] | |
for subject in subjects: | |
for region in regions: | |
lst = iter(means) | |
for condition in conditions: | |
for group in groups: | |
mean = next(lst) | |
values = mean + np.random.rand(num) + 0.2*random.random() | |
temp_df = pd.DataFrame({'region':[region] *num, 'group':[group] * num, 'condition':[condition] *num ,'subject':[subject] *num ,'values':values}) | |
list_of_dataframes.append(temp_df) | |
return pd.concat(list_of_dataframes) | |
# %% [markdown] | |
# Genereting sample dataframe is presented in the long format - one obe | |
# %% | |
df = generate_example_dataframe() | |
df.head() | |
# %% | |
# big region | |
# joining dataframes | |
lobesList = ["parietal", "frontal", "temporal", "occipital"] | |
lobes = random.choices(LobesList, k=len(pd.unique(df.region))) | |
lobe_info= pd.DataFrame({'region':pd.unique(df.region),'lobe':lobes}) | |
df = pd.merge(df, lobe_info, on='region') | |
print(df.columns.values.tolist()) | |
# %% | |
index_list= ["lobe"] | |
conditions = ['group', 'condition', 'subject'] | |
values = ['values'] | |
# %% | |
groupdf = df.groupby(index_list + conditions)[values].mean() | |
groupdf = groupdf.reset_index() | |
groupdf.head() | |
# %% | |
print(df.columns.values.tolist()) | |
# %% | |
pivoted = df.pivot_table(index=["subject"], | |
columns=['group', 'condition', 'lobe'], | |
values=values) | |
pivoted.columns = pivoted.columns.map(''.join) | |
pivoted = pivoted.reset_index() | |
pivoted.head() | |
# %% | |
ages = np.random.randint(20,70,len(pd.unique(df.subject))) | |
age_info = pd.DataFrame({'subject':pd.unique(df.subject),'age':ages}) | |
pivoted = pd.merge(pivoted, age_info, on='subject') | |
plt.plot(pivoted.age, pivoted.valuesoldclosedfrontal, 'ro') | |
# %% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment