rtkaleta · March 8, 2025 14:18
diff --git a/fuzzymatch.py b/fuzzymatch.py
 import pandas as pd
 from rapidfuzz import process, fuzz

 def main():
  # Load CSV files
  salesforce_df = pd.read_csv("<...>")
  brands_df = pd.read_csv("<...>")

  # Convert brand names into a dictionary for efficient lookup
  brand_dict = dict(zip(brands_df["brand_name"], brands_df["brand_id"]))
  brand_names = list(brand_dict.keys())  # List of brand names for matching

  # Function to find best match
  def find_best_match(account_name):
      best_match, score, *_ = process.extractOne(str(account_name), brand_names, scorer=fuzz.token_sort_ratio)
      return best_match, round(score / 10, 1)

  # Apply fuzzy matching across all rows at once (vectorized)
  salesforce_df[["Best Match Brand Name", "Match Score"]] = salesforce_df["Account Name"].apply(
      lambda name: pd.Series(find_best_match(name))
  )

  # Merge results with brand IDs
  salesforce_df["Brand ID"] = salesforce_df["Best Match Brand Name"].map(brand_dict)

  # Filter for pretty good matches only
  matched_df = salesforce_df[salesforce_df["Match Score"] >= 9]

  # Select relevant columns
  matched_df = matched_df[[
      "Account ID", "Account Name", "Brand ID", "Best Match Brand Name"
  ]]

  # Rename columns
  matched_df.columns = [
      "Salesforce Account ID", "Salesforce Account Name", "Brand ID", "Brand Name"
  ]

  # Save the results to a CSV file
  matched_df.to_csv("matched_salesforce_brands.csv", index=False)

  print("✅ Matching complete! Check 'matched_salesforce_brands.csv'.")

 if __name__ == "__main__":
    main()
	import pandas as pd
	from rapidfuzz import process, fuzz

	def main():
	# Load CSV files
	salesforce_df = pd.read_csv("<...>")
	brands_df = pd.read_csv("<...>")

	# Convert brand names into a dictionary for efficient lookup
	brand_dict = dict(zip(brands_df["brand_name"], brands_df["brand_id"]))
	brand_names = list(brand_dict.keys()) # List of brand names for matching

	# Function to find best match
	def find_best_match(account_name):
	best_match, score, *_ = process.extractOne(str(account_name), brand_names, scorer=fuzz.token_sort_ratio)
	return best_match, round(score / 10, 1)

	# Apply fuzzy matching across all rows at once (vectorized)
	salesforce_df[["Best Match Brand Name", "Match Score"]] = salesforce_df["Account Name"].apply(
	lambda name: pd.Series(find_best_match(name))
	)

	# Merge results with brand IDs
	salesforce_df["Brand ID"] = salesforce_df["Best Match Brand Name"].map(brand_dict)

	# Filter for pretty good matches only
	matched_df = salesforce_df[salesforce_df["Match Score"] >= 9]

	# Select relevant columns
	matched_df = matched_df[[
	"Account ID", "Account Name", "Brand ID", "Best Match Brand Name"
	]]

	# Rename columns
	matched_df.columns = [
	"Salesforce Account ID", "Salesforce Account Name", "Brand ID", "Brand Name"
	]

	# Save the results to a CSV file
	matched_df.to_csv("matched_salesforce_brands.csv", index=False)

	print("✅ Matching complete! Check 'matched_salesforce_brands.csv'.")

	if __name__ == "__main__":
	main()