revolutionisme · March 7, 2019 09:40
diff --git a/merge_after_spelling_fix.py b/merge_after_spelling_fix.py
 import pandas as pd
 import difflib

 df1 = pd.read_stata('path to first dataset')
 df2 = pd.read_stata('path to second dataset')

 def fix_spelling(x):
    try:
        return difflib.get_close_matches(x, df2['common_column'])[0]
    except Exception:
        return x  # Or return "NA", depending on your usecase

 # Take the column with smaller number of values in the column to merge data faster,
 # otherwise take the column with better spelling
 df1['common_column'] = df1['common_column'].apply(lambda x: fix_spelling(x))

 merged_df = df1.merge(df2, 'outer', on='common_column')
	import pandas as pd
	import difflib

	df1 = pd.read_stata('path to first dataset')
	df2 = pd.read_stata('path to second dataset')

	def fix_spelling(x):
	try:
	return difflib.get_close_matches(x, df2['common_column'])[0]
	except Exception:
	return x # Or return "NA", depending on your usecase

	# Take the column with smaller number of values in the column to merge data faster,
	# otherwise take the column with better spelling
	df1['common_column'] = df1['common_column'].apply(lambda x: fix_spelling(x))

	merged_df = df1.merge(df2, 'outer', on='common_column')
No results found