jhconning · June 2, 2016 02:27 · filipposebastio · Jul 3, 2018
diff --git a/fuzzy matching with pandas b/fuzzy matching with pandas


 #df is the original dataframe with a list of names you want to prevail
 #dfF is the dataframe with Names that can be matched only fuzzily
 #For each Name in df the code finds the most likely match from the dfF and saves that name
 #We then merge on that new key 'Name_r'
 #some code is to cover the event of no match (perhaps b/c df has names not in dfF) 
 # From http://stackoverflow.com/questions/13636848/is-it-possible-to-do-fuzzy-match-merge-with-python-pandas 
 # http://stackoverflow.com/questions/36557722/python-pandas-difflib-throws-list-index-out-of-range-error

 import difflib
 df['Name_r'] = df.Name_x.map(lambda x: (difflib.get_close_matches(x, dfF.Name)[:1] or [None])[0])


	#df is the original dataframe with a list of names you want to prevail
	#dfF is the dataframe with Names that can be matched only fuzzily
	#For each Name in df the code finds the most likely match from the dfF and saves that name
	#We then merge on that new key 'Name_r'
	#some code is to cover the event of no match (perhaps b/c df has names not in dfF)
	# From http://stackoverflow.com/questions/13636848/is-it-possible-to-do-fuzzy-match-merge-with-python-pandas
	# http://stackoverflow.com/questions/36557722/python-pandas-difflib-throws-list-index-out-of-range-error

	import difflib
	df['Name_r'] = df.Name_x.map(lambda x: (difflib.get_close_matches(x, dfF.Name)[:1] or [None])[0])