Last active
March 8, 2025 14:18
-
-
Save rtkaleta/ec4e25263dc6ec2b4c5cc1edc921f4be to your computer and use it in GitHub Desktop.
fuzzymatch.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from rapidfuzz import process, fuzz | |
def main(): | |
# Load CSV files | |
salesforce_df = pd.read_csv("<...>") | |
brands_df = pd.read_csv("<...>") | |
# Convert brand names into a dictionary for efficient lookup | |
brand_dict = dict(zip(brands_df["brand_name"], brands_df["brand_id"])) | |
brand_names = list(brand_dict.keys()) # List of brand names for matching | |
# Function to find best match | |
def find_best_match(account_name): | |
best_match, score, *_ = process.extractOne(str(account_name), brand_names, scorer=fuzz.token_sort_ratio) | |
return best_match, round(score / 10, 1) | |
# Apply fuzzy matching across all rows at once (vectorized) | |
salesforce_df[["Best Match Brand Name", "Match Score"]] = salesforce_df["Account Name"].apply( | |
lambda name: pd.Series(find_best_match(name)) | |
) | |
# Merge results with brand IDs | |
salesforce_df["Brand ID"] = salesforce_df["Best Match Brand Name"].map(brand_dict) | |
# Filter for pretty good matches only | |
matched_df = salesforce_df[salesforce_df["Match Score"] >= 9] | |
# Select relevant columns | |
matched_df = matched_df[[ | |
"Account ID", "Account Name", "Brand ID", "Best Match Brand Name" | |
]] | |
# Rename columns | |
matched_df.columns = [ | |
"Salesforce Account ID", "Salesforce Account Name", "Brand ID", "Brand Name" | |
] | |
# Save the results to a CSV file | |
matched_df.to_csv("matched_salesforce_brands.csv", index=False) | |
print("✅ Matching complete! Check 'matched_salesforce_brands.csv'.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment