Skip to content

Instantly share code, notes, and snippets.

@thehappycheese
Created January 24, 2023 08:13
Show Gist options
  • Save thehappycheese/6967087c2a039bc28fcccb1b3c64cff8 to your computer and use it in GitHub Desktop.
Save thehappycheese/6967087c2a039bc28fcccb1b3c64cff8 to your computer and use it in GitHub Desktop.
Helper function to generate dictionary map to rename columns in one pandas dataframe to match those in another dataframe
def fuzzy_column_name_match(list1, list2):
from fuzzywuzzy import process
threshold = 60
response = {}
for name_to_find in list1:
resp_match = process.extractOne(name_to_find ,list2)
if resp_match[1] > threshold:
response[name_to_find] = resp_match[0]
return response
# Example Useage
fuzzy_map(source.columns, target.columns)
# {'LINK_NO': 'Link_No',
# 'Route_No': 'ROAD_NO',
# 'State Link name': 'ST',
# 'Road Name ': 'ROAD_NAME',
# 'Road No': 'ROAD_NO',
# 'LINK_SUB_CATEGORY': 'Link Category',
# 'SLK Start': 'ST',
# 'SLK End': 'end_SLK',
# 'Region': 'section',
# 'Func_ Class': 'MR_Class',
# 'Func_ Class.1': 'MR_Class',
# 'AADT Max': 'AADT',
# 'AADT Min': 'AADT',
# 'Select AADT to Use': 'AADT',
# 'Site No of selected AADT': 'AADT',
# 'SLK of Selected AADT': 'AADT',
# 'Date of Selected AADT': 'AADT',
# 'Unnamed: 24': 'Unnamed: 11'}
# as you can see in the example output, it kinda sucks, but it is still easier to keep valid rows than to type from scratch
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment