Skip to content

Instantly share code, notes, and snippets.

@Ze1598
Last active April 27, 2020 21:50
Show Gist options
  • Save Ze1598/11183d96d487decbb415c5b129a40f05 to your computer and use it in GitHub Desktop.
Save Ze1598/11183d96d487decbb415c5b129a40f05 to your computer and use it in GitHub Desktop.
Unpivot data with delimiters (v2)
import pandas as pd
from typing import List
# Base clode for splitting and unpivoting a Series
# https://stackoverflow.com/questions/19482970/get-list-from-pandas-dataframe-column-headers
def unpivot_delimited_series(
series: pd.Series,
delimiter: str
) -> pd.DataFrame:
unpivoted_data = series\
.apply( lambda series_row: pd.Series(series_row.split(delimiter)) )\
.stack()
unpivoted_data = unpivoted_data.reset_index()
return unpivoted_data
def get_other_colum_names(
source_df: pd.DataFrame,
unpivoted_column: str
) -> List[str]:
other_columns = list(source_df.columns.values)
other_columns.remove(unpivoted_column)
return other_columns
def unpivot_delimited_data(
source_df: pd.DataFrame,
target_column: str,
delimiter: str
) -> pd.DataFrame:
unpivoted_series = unpivot_delimited_series(
source_df[target_column],
";"
)
source_df = source_df.reset_index()
source_df_other_columns = get_other_colum_names(
source_df,
target_column
)
source_data_to_merge = source_df[source_df_other_columns]
merged_data = pd.merge(
unpivoted_series,
source_data_to_merge,
how="inner",
left_on="level_0",
right_on="index"
)
merged_data.rename(
columns={0: target_column},
inplace=True
)
# List with the names of the columns to keep (the only index kept is the\
# post-reset index of the source DataFrame)
columns_to_keep = source_df_other_columns + [target_column]
columns_to_keep.remove("index")
merged_data = merged_data[columns_to_keep]
return merged_data
if __name__ == "__main__":
# Load the original dataset
data = pd.read_csv("sample_data.csv")
# Split and unpivot the column with delimited data, keeping the\
# complete DataFrame
final_data = unpivot_delimited_data(
data,
"Used Social Networks",
";"
)
print(data)
print(final_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment