Last active
April 27, 2020 21:49
-
-
Save Ze1598/e8fc8c7a7afaad91e47d1fe7f83901f2 to your computer and use it in GitHub Desktop.
Unpivot data with delimiters (v1)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from typing import List | |
| # Base clode for splitting and unpivoting a Series | |
| # https://stackoverflow.com/questions/19482970/get-list-from-pandas-dataframe-column-headers | |
| def unpivot_delimited_series( | |
| target_series: pd.Series, | |
| delimiter: str | |
| ) -> pd.DataFrame: | |
| unpivoted_data = target_series\ | |
| .apply( lambda series_row: pd.Series(series_row.split(delimiter)) )\ | |
| .stack() | |
| unpivoted_data = unpivoted_data.reset_index() | |
| return unpivoted_data | |
| def get_other_colum_names( | |
| source_df: pd.DataFrame, | |
| unpivoted_column: str | |
| ) -> List[str]: | |
| other_columns = list(source_df.columns.values) | |
| other_columns.remove(unpivoted_column) | |
| return other_columns | |
| def unpivot_delimited_data( | |
| source_df: pd.DataFrame, | |
| target_column: str, | |
| index_column: str, | |
| delimiter: str | |
| ) -> pd.DataFrame: | |
| unpivoted_data = unpivot_delimited_series( | |
| source_df[target_column], | |
| ";" | |
| ) | |
| source_df = source_df.reset_index() | |
| source_df_other_columns = get_other_colum_names( | |
| source_df, | |
| target_column | |
| ) | |
| source_data_to_merge = source_df[source_df_other_columns] | |
| merged_data = pd.merge( | |
| unpivoted_data, | |
| source_data_to_merge, | |
| how="inner", | |
| on=index_column | |
| ) | |
| merged_data.rename( | |
| columns={0: target_column}, | |
| inplace=True | |
| ) | |
| merged_data = merged_data[ source_df_other_columns + [target_column] ] | |
| return merged_data | |
| if __name__ == "__main__": | |
| # Load the data and set the first column as the index column | |
| data = pd.read_csv( | |
| "sample_data.csv", | |
| index_col=0 | |
| ) | |
| # Split and unpivot the column with delimited data, keeping the\ | |
| # complete DataFrame | |
| final_data = unpivot_delimited_data( | |
| data, | |
| "Used Social Networks", | |
| "Respondent ID", | |
| ";" | |
| ) | |
| print(data) | |
| print(final_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment