Skip to content

Instantly share code, notes, and snippets.

@joefutrelle
Last active March 1, 2019 18:06

Revisions

  1. joefutrelle revised this gist Mar 1, 2019. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions wide_to_long.py
    Original file line number Diff line number Diff line change
    @@ -31,6 +31,9 @@ def wide_to_long(df, wide_cols_list, value_cols, long_col, long_labels):
    | something | 2 | 20 | b |
    +-----------+---+----+-----------+
    """
    assert len(wide_cols_list) == len(long_labels)
    for w in wide_cols_list:
    assert len(w) == len(value_cols)
    exclude_cols = []
    for w in wide_cols_list:
    exclude_cols = exclude_cols + w
  2. joefutrelle revised this gist Mar 1, 2019. 1 changed file with 8 additions and 10 deletions.
    18 changes: 8 additions & 10 deletions wide_to_long.py
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,7 @@
    def wide_to_long(df, wide_cols_list, value_cols, long_col, long_labels):
    """converts selected columns from wide to long format. params:
    - df: the input dataframe
    - wide_cols_list: for each set of wide columns, a list of their names
    - value_cols: for each set of wide columns, the name of the long column to hold the values
    - long_col: the name of the column to indicate which set of wide columns the value comes from
    @@ -16,31 +17,28 @@ def wide_to_long(df, wide_cols_list, value_cols, long_col, long_labels):
    And I pass these arguments:
    wide_cols_list = [['x_a','x_b'],['y_a','y_b']]
    wide_cols_list = [['x_a','y_a'],['x_b','y_b']]
    value_cols = ['x','y']
    long_col = 'replicate'
    long_labels = ['a','b']
    it'll generate this dataframe:
    It'll generate this dataframe:
    +-----------+---+----+-----------+
    | other_col | x | y | replicate |
    +-----------+---+----+-----------+
    | something | 1 | 10 | a |
    | something | 2 | 20 | b |
    +-----------+---+----+-----------+
    """
    assert len(wide_cols_list) == len(value_cols)
    assert len(wide_cols_list) == len(long_labels)
    for wcs in wide_cols_list:
    assert len(wcs) == len(long_labels)
    exclude_cols = []
    for w in wide_cols_list:
    exclude_cols = exclude_cols + w
    common_cols = [c for c in df.columns if c not in exclude_cols]
    dfs = []
    for wide_cols, value_col, long_label in zip(wide_cols_list, value_cols, long_labels):
    for wide_cols, long_label in zip(wide_cols_list, long_labels):
    sdf = df[common_cols + wide_cols].copy()
    sdf.columns = common_cols + value_cols
    sdf[long_col] = long_label
    sdf[long_col] = long_label
    sdf.columns = common_cols + value_cols + [long_col]
    dfs.append(sdf)
    return pd.concat(dfs).sort_index()[dfs[0].columns]
    return pd.concat(dfs).sort_index()
  3. joefutrelle created this gist Mar 1, 2019.
    46 changes: 46 additions & 0 deletions wide_to_long.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,46 @@
    def wide_to_long(df, wide_cols_list, value_cols, long_col, long_labels):
    """converts selected columns from wide to long format. params:
    - wide_cols_list: for each set of wide columns, a list of their names
    - value_cols: for each set of wide columns, the name of the long column to hold the values
    - long_col: the name of the column to indicate which set of wide columns the value comes from
    - long_labels: for each set of wide columns, what to call it in the long_col values.
    For example if I have the following DataFrame:
    +-----------+-----+-----+-----+-----+
    | other_col | x_a | x_b | y_a | y_b |
    +-----------+-----+-----+-----+-----+
    | something | 1 | 2 | 10 | 20 |
    +-----------+-----+-----+-----+-----+
    And I pass these arguments:
    wide_cols_list = [['x_a','x_b'],['y_a','y_b']]
    value_cols = ['x','y']
    long_col = 'replicate'
    long_labels = ['a','b']
    it'll generate this dataframe:
    +-----------+---+----+-----------+
    | other_col | x | y | replicate |
    +-----------+---+----+-----------+
    | something | 1 | 10 | a |
    | something | 2 | 20 | b |
    +-----------+---+----+-----------+
    """
    assert len(wide_cols_list) == len(value_cols)
    assert len(wide_cols_list) == len(long_labels)
    for wcs in wide_cols_list:
    assert len(wcs) == len(long_labels)
    exclude_cols = []
    for w in wide_cols_list:
    exclude_cols = exclude_cols + w
    common_cols = [c for c in df.columns if c not in exclude_cols]
    dfs = []
    for wide_cols, value_col, long_label in zip(wide_cols_list, value_cols, long_labels):
    sdf = df[common_cols + wide_cols].copy()
    sdf.columns = common_cols + value_cols
    sdf[long_col] = long_label
    dfs.append(sdf)
    return pd.concat(dfs).sort_index()[dfs[0].columns]