Created
January 24, 2023 08:13
-
-
Save thehappycheese/6967087c2a039bc28fcccb1b3c64cff8 to your computer and use it in GitHub Desktop.
Helper function to generate dictionary map to rename columns in one pandas dataframe to match those in another dataframe
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fuzzy_column_name_match(list1, list2): | |
from fuzzywuzzy import process | |
threshold = 60 | |
response = {} | |
for name_to_find in list1: | |
resp_match = process.extractOne(name_to_find ,list2) | |
if resp_match[1] > threshold: | |
response[name_to_find] = resp_match[0] | |
return response | |
# Example Useage | |
fuzzy_map(source.columns, target.columns) | |
# {'LINK_NO': 'Link_No', | |
# 'Route_No': 'ROAD_NO', | |
# 'State Link name': 'ST', | |
# 'Road Name ': 'ROAD_NAME', | |
# 'Road No': 'ROAD_NO', | |
# 'LINK_SUB_CATEGORY': 'Link Category', | |
# 'SLK Start': 'ST', | |
# 'SLK End': 'end_SLK', | |
# 'Region': 'section', | |
# 'Func_ Class': 'MR_Class', | |
# 'Func_ Class.1': 'MR_Class', | |
# 'AADT Max': 'AADT', | |
# 'AADT Min': 'AADT', | |
# 'Select AADT to Use': 'AADT', | |
# 'Site No of selected AADT': 'AADT', | |
# 'SLK of Selected AADT': 'AADT', | |
# 'Date of Selected AADT': 'AADT', | |
# 'Unnamed: 24': 'Unnamed: 11'} | |
# as you can see in the example output, it kinda sucks, but it is still easier to keep valid rows than to type from scratch |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment