Last active
November 30, 2018 17:59
-
-
Save mpschr/5db20df78c034654f030 to your computer and use it in GitHub Desktop.
Join a list (dict) of pandas dataframes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas | |
| import logging | |
| def multi_df_join(df_dict): | |
| """ | |
| Takes a list of dataframes with the same primary key (id) and merges the columns, using | |
| the dictionary keys as suffixes in case there are conflicts with the column names. | |
| :param df_dict: A dictionary where keys are strings (collision suffixes) and values are DataFrames | |
| :return pandas.DataFrame: The merged data frames | |
| """ | |
| # check if there are colliding column names | |
| all_columns = [] | |
| for df in df_dict.values(): | |
| all_columns = all_columns + list(df.columns) | |
| colliding_columns = set([x for x in all_columns if all_columns.count(x) > 1]) | |
| df = None | |
| for suffix, input_df in df_dict.items(): | |
| # rename colliding columns | |
| renamer_dict = {} | |
| for col in list(input_df.columns): | |
| if col in colliding_columns: | |
| renamer_dict[col] = col + suffix | |
| input_df.rename(columns=renamer_dict, inplace=True) | |
| # join columns | |
| if df is None: | |
| df = input_df | |
| else: | |
| df = df.join(input_df, how='outer') | |
| logging.debug("Shape is currently: {}".format(df.shape)) | |
| return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment