Skip to content

Instantly share code, notes, and snippets.

@rtkaleta
Last active March 8, 2025 14:18
Show Gist options
  • Save rtkaleta/ec4e25263dc6ec2b4c5cc1edc921f4be to your computer and use it in GitHub Desktop.
Save rtkaleta/ec4e25263dc6ec2b4c5cc1edc921f4be to your computer and use it in GitHub Desktop.
fuzzymatch.py
import pandas as pd
from rapidfuzz import process, fuzz
def main():
# Load CSV files
salesforce_df = pd.read_csv("<...>")
brands_df = pd.read_csv("<...>")
# Convert brand names into a dictionary for efficient lookup
brand_dict = dict(zip(brands_df["brand_name"], brands_df["brand_id"]))
brand_names = list(brand_dict.keys()) # List of brand names for matching
# Function to find best match
def find_best_match(account_name):
best_match, score, *_ = process.extractOne(str(account_name), brand_names, scorer=fuzz.token_sort_ratio)
return best_match, round(score / 10, 1)
# Apply fuzzy matching across all rows at once (vectorized)
salesforce_df[["Best Match Brand Name", "Match Score"]] = salesforce_df["Account Name"].apply(
lambda name: pd.Series(find_best_match(name))
)
# Merge results with brand IDs
salesforce_df["Brand ID"] = salesforce_df["Best Match Brand Name"].map(brand_dict)
# Filter for pretty good matches only
matched_df = salesforce_df[salesforce_df["Match Score"] >= 9]
# Select relevant columns
matched_df = matched_df[[
"Account ID", "Account Name", "Brand ID", "Best Match Brand Name"
]]
# Rename columns
matched_df.columns = [
"Salesforce Account ID", "Salesforce Account Name", "Brand ID", "Brand Name"
]
# Save the results to a CSV file
matched_df.to_csv("matched_salesforce_brands.csv", index=False)
print("✅ Matching complete! Check 'matched_salesforce_brands.csv'.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment