Created
June 18, 2019 07:10
-
-
Save jimmytuc/2e926862b2fc30dc8379a5220a054d49 to your computer and use it in GitHub Desktop.
flatten-df-columns
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def flatten_dataframe(df, columns, fillna_value='', preserve_index=False): | |
if (columns is not None \ | |
and len(columns) > 0 \ | |
and not isinstance(columns, (list, tuple, np.ndarray, pd.Series))): | |
columns = [columns] | |
diff_columns = df.columns.difference(columns) | |
# get column's series of length | |
series_length = df[columns[0]].str.len() | |
length_gt_zero = (series_length > 0) | |
length_zero = ~length_gt_zero | |
indexes = np.repeat(df.index.values, series_length) | |
data_need_to_fill = {col: np.repeat(df[col].values, series_length) for col in diff_columns} | |
df_need_to_fill = pd.DataFrame(data_need_to_fill, index=indexes) | |
data_origin = {col: np.concatenate(df.loc[length_gt_zero, col].values) for col in columns} | |
df_flatten = df_need_to_fill.assign(**data_origin) | |
# append rows has empty lists | |
if length_zero.any(): | |
df_flatten = (df_flatten.append(df.loc[length_zero, diff_columns], sort=False).fillna(fillna_value)) | |
df_flatten = df_flatten.sort_index() | |
if not preserve_index: | |
df_flatten = df_flatten.reset_index(drop=True) | |
return df_flatten |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment