Skip to content

Instantly share code, notes, and snippets.

@jimmytuc
Created June 18, 2019 07:10
Show Gist options
  • Save jimmytuc/2e926862b2fc30dc8379a5220a054d49 to your computer and use it in GitHub Desktop.
Save jimmytuc/2e926862b2fc30dc8379a5220a054d49 to your computer and use it in GitHub Desktop.
flatten-df-columns
def flatten_dataframe(df, columns, fillna_value='', preserve_index=False):
if (columns is not None \
and len(columns) > 0 \
and not isinstance(columns, (list, tuple, np.ndarray, pd.Series))):
columns = [columns]
diff_columns = df.columns.difference(columns)
# get column's series of length
series_length = df[columns[0]].str.len()
length_gt_zero = (series_length > 0)
length_zero = ~length_gt_zero
indexes = np.repeat(df.index.values, series_length)
data_need_to_fill = {col: np.repeat(df[col].values, series_length) for col in diff_columns}
df_need_to_fill = pd.DataFrame(data_need_to_fill, index=indexes)
data_origin = {col: np.concatenate(df.loc[length_gt_zero, col].values) for col in columns}
df_flatten = df_need_to_fill.assign(**data_origin)
# append rows has empty lists
if length_zero.any():
df_flatten = (df_flatten.append(df.loc[length_zero, diff_columns], sort=False).fillna(fillna_value))
df_flatten = df_flatten.sort_index()
if not preserve_index:
df_flatten = df_flatten.reset_index(drop=True)
return df_flatten
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment