Last active
March 1, 2019 18:06
Revisions
-
joefutrelle revised this gist
Mar 1, 2019 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -31,6 +31,9 @@ def wide_to_long(df, wide_cols_list, value_cols, long_col, long_labels): | something | 2 | 20 | b | +-----------+---+----+-----------+ """ assert len(wide_cols_list) == len(long_labels) for w in wide_cols_list: assert len(w) == len(value_cols) exclude_cols = [] for w in wide_cols_list: exclude_cols = exclude_cols + w -
joefutrelle revised this gist
Mar 1, 2019 . 1 changed file with 8 additions and 10 deletions.There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,7 @@ def wide_to_long(df, wide_cols_list, value_cols, long_col, long_labels): """converts selected columns from wide to long format. params: - df: the input dataframe - wide_cols_list: for each set of wide columns, a list of their names - value_cols: for each set of wide columns, the name of the long column to hold the values - long_col: the name of the column to indicate which set of wide columns the value comes from @@ -16,31 +17,28 @@ def wide_to_long(df, wide_cols_list, value_cols, long_col, long_labels): And I pass these arguments: wide_cols_list = [['x_a','y_a'],['x_b','y_b']] value_cols = ['x','y'] long_col = 'replicate' long_labels = ['a','b'] It'll generate this dataframe: +-----------+---+----+-----------+ | other_col | x | y | replicate | +-----------+---+----+-----------+ | something | 1 | 10 | a | | something | 2 | 20 | b | +-----------+---+----+-----------+ """ exclude_cols = [] for w in wide_cols_list: exclude_cols = exclude_cols + w common_cols = [c for c in df.columns if c not in exclude_cols] dfs = [] for wide_cols, long_label in zip(wide_cols_list, long_labels): sdf = df[common_cols + wide_cols].copy() sdf[long_col] = long_label sdf.columns = common_cols + value_cols + [long_col] dfs.append(sdf) return pd.concat(dfs).sort_index() -
joefutrelle created this gist
Mar 1, 2019 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,46 @@ def wide_to_long(df, wide_cols_list, value_cols, long_col, long_labels): """converts selected columns from wide to long format. params: - wide_cols_list: for each set of wide columns, a list of their names - value_cols: for each set of wide columns, the name of the long column to hold the values - long_col: the name of the column to indicate which set of wide columns the value comes from - long_labels: for each set of wide columns, what to call it in the long_col values. For example if I have the following DataFrame: +-----------+-----+-----+-----+-----+ | other_col | x_a | x_b | y_a | y_b | +-----------+-----+-----+-----+-----+ | something | 1 | 2 | 10 | 20 | +-----------+-----+-----+-----+-----+ And I pass these arguments: wide_cols_list = [['x_a','x_b'],['y_a','y_b']] value_cols = ['x','y'] long_col = 'replicate' long_labels = ['a','b'] it'll generate this dataframe: +-----------+---+----+-----------+ | other_col | x | y | replicate | +-----------+---+----+-----------+ | something | 1 | 10 | a | | something | 2 | 20 | b | +-----------+---+----+-----------+ """ assert len(wide_cols_list) == len(value_cols) assert len(wide_cols_list) == len(long_labels) for wcs in wide_cols_list: assert len(wcs) == len(long_labels) exclude_cols = [] for w in wide_cols_list: exclude_cols = exclude_cols + w common_cols = [c for c in df.columns if c not in exclude_cols] dfs = [] for wide_cols, value_col, long_label in zip(wide_cols_list, value_cols, long_labels): sdf = df[common_cols + wide_cols].copy() sdf.columns = common_cols + value_cols sdf[long_col] = long_label dfs.append(sdf) return pd.concat(dfs).sort_index()[dfs[0].columns]