Last active
April 27, 2020 21:50
-
-
Save Ze1598/11183d96d487decbb415c5b129a40f05 to your computer and use it in GitHub Desktop.
Unpivot data with delimiters (v2)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from typing import List | |
| # Base clode for splitting and unpivoting a Series | |
| # https://stackoverflow.com/questions/19482970/get-list-from-pandas-dataframe-column-headers | |
| def unpivot_delimited_series( | |
| series: pd.Series, | |
| delimiter: str | |
| ) -> pd.DataFrame: | |
| unpivoted_data = series\ | |
| .apply( lambda series_row: pd.Series(series_row.split(delimiter)) )\ | |
| .stack() | |
| unpivoted_data = unpivoted_data.reset_index() | |
| return unpivoted_data | |
| def get_other_colum_names( | |
| source_df: pd.DataFrame, | |
| unpivoted_column: str | |
| ) -> List[str]: | |
| other_columns = list(source_df.columns.values) | |
| other_columns.remove(unpivoted_column) | |
| return other_columns | |
| def unpivot_delimited_data( | |
| source_df: pd.DataFrame, | |
| target_column: str, | |
| delimiter: str | |
| ) -> pd.DataFrame: | |
| unpivoted_series = unpivot_delimited_series( | |
| source_df[target_column], | |
| ";" | |
| ) | |
| source_df = source_df.reset_index() | |
| source_df_other_columns = get_other_colum_names( | |
| source_df, | |
| target_column | |
| ) | |
| source_data_to_merge = source_df[source_df_other_columns] | |
| merged_data = pd.merge( | |
| unpivoted_series, | |
| source_data_to_merge, | |
| how="inner", | |
| left_on="level_0", | |
| right_on="index" | |
| ) | |
| merged_data.rename( | |
| columns={0: target_column}, | |
| inplace=True | |
| ) | |
| # List with the names of the columns to keep (the only index kept is the\ | |
| # post-reset index of the source DataFrame) | |
| columns_to_keep = source_df_other_columns + [target_column] | |
| columns_to_keep.remove("index") | |
| merged_data = merged_data[columns_to_keep] | |
| return merged_data | |
| if __name__ == "__main__": | |
| # Load the original dataset | |
| data = pd.read_csv("sample_data.csv") | |
| # Split and unpivot the column with delimited data, keeping the\ | |
| # complete DataFrame | |
| final_data = unpivot_delimited_data( | |
| data, | |
| "Used Social Networks", | |
| ";" | |
| ) | |
| print(data) | |
| print(final_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment