Skip to content

Instantly share code, notes, and snippets.

@ionox0
Created January 15, 2019 02:11
Show Gist options
  • Save ionox0/c4f33635e17e8993eee184963266e932 to your computer and use it in GitHub Desktop.
Save ionox0/c4f33635e17e8993eee184963266e932 to your computer and use it in GitHub Desktop.
Find the intersecting and unique rows of two Pandas DataFrames
def compare_dfs(df_a, df_b, merge_cols):
"""
Returns intersection, its complement, only in df_a, only in df_b
:param merge_cols: Columns to use for merging the two DataFrames
"""
df_a = df_a.copy()
df_b = df_b.copy()
DUM_A = 'DUMMY_COL_A'
DUM_B = 'DUMMY_COL_B'
df_a[DUM_A] = DUM_A
df_b[DUM_B] = DUM_B
merged_outer = df_a.merge(df_b, how='outer', on=merge_cols)
intersection = merged_outer[~merged_outer[DUM_A].isnull() & ~merged_outer[DUM_B].isnull()]
interse_comp = merged_outer[merged_outer[DUM_A].isnull() | merged_outer[DUM_B].isnull()]
in_a_not_b = merged_outer[~merged_outer[DUM_A].isnull() & merged_outer[DUM_B].isnull()]
in_b_not_a = merged_outer[merged_outer[DUM_A].isnull() & ~merged_outer[DUM_B].isnull()]
intersection.drop([DUM_A, DUM_B], inplace=True, axis=1)
interse_comp.drop([DUM_A, DUM_B], inplace=True, axis=1)
in_a_not_b.drop([DUM_A, DUM_B], inplace=True, axis=1)
in_b_not_a.drop([DUM_A, DUM_B], inplace=True, axis=1)
return intersection, interse_comp, in_a_not_b, in_b_not_a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment