Last active
October 13, 2019 19:52
-
-
Save cmaureir/decae88e5dadb13a1ecb3aa6f5e9fd08 to your computer and use it in GitHub Desktop.
Comparing two columns in dataframes to get the different ones
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
df1 = pd.DataFrame(columns=["product_id", "PMID"]) | |
df2 = pd.DataFrame(columns=["product_id", "PMID"]) | |
# El generado de ahora | |
df1.loc[0] = [1, 111] | |
df1.loc[1] = [2, 222] | |
df1.loc[2] = [3, 333] | |
# el nuevo | |
df2.loc[0] = [2, 888] | |
df2.loc[1] = [3, 333] | |
# Nuevo | |
# "1", "111" | |
# "2", "222" | |
# Old | |
# "3", "333" | |
df_merge = df1.merge(df2, how="left", on=["product_id"]) | |
old = df_merge[df_merge.PMID_x==df_merge.PMID_y] | |
new = df_merge[df_merge.PMID_x!=df_merge.PMID_y] | |
new = new.rename(columns={"PMID_x": "PMID"}) | |
old = old.rename(columns={"PMID_x": "PMID"}) | |
print(new[["product_id", "PMID"]]) | |
print(old[["product_id", "PMID"]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment