Last active
September 9, 2020 18:04
-
-
Save Ze1598/60d7329d1f118fd519ad6b8a41fe209c to your computer and use it in GitHub Desktop.
Update a DataFrame based on common values of two DataFrames
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| # Current employee dataset | |
| original = pd.DataFrame({ | |
| "id": [1, 2, 3, 4], | |
| "name": ["Michael", "Jim", "Pam", "Dwight"], | |
| "age": [46, 35, 35, 38], | |
| "email": ["[email protected]", "[email protected]", "[email protected]", "[email protected]"], | |
| "in_company": [True, True, True, True] | |
| }) | |
| # The latest employee listing. It has two new employees (ids 5 and 6)\ | |
| # and is missing one person from the previous data (id 2). Some employees\ | |
| # also updated their data | |
| new_data = pd.DataFrame({ | |
| "id": [1, 3, 4, 5, 6], | |
| "name": ["Michael", "Pam", "Dwight", "Kevin", "Stanley"], | |
| "age": [47, 36, 38, 41, 55], | |
| "email": ["[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]"], | |
| "in_company": [True, True, True, True, True] | |
| }) | |
| # Update the employment status of employees missing in the new listing | |
| original.loc[~original["id"].isin(new_data["id"]), "in_company"] = False | |
| # Append the new listing to the original to create a single DF | |
| updated = original.append(new_data, ignore_index=True) | |
| # For the employees in both DFs, keep only their latest information\ | |
| # (rows are considered duplicate based on the "id") | |
| updated = updated.drop_duplicates(["id"], keep="last") | |
| # Sort the employees by their id | |
| updated.sort_values("id", inplace=True) | |
| # Reset the DF index after all the transformations (don't keep the\ | |
| # previous index a.k.a. `drop=True`) | |
| updated.reset_index(drop=True, inplace=True) | |
| print("---Updated employee data:\n", updated) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment