ryancollingwood · September 11, 2018 02:11 · ryancollingwood · Sep 11, 2018
diff --git a/compare_dfs.py b/compare_dfs.py
 import pandas as pd
 import numpy as np

 def compare_two_dfs(input_df_1, input_df_2):
    
    # explicitly calling fillna with ""
    # as if you've used np.nan it has the 
    # property of nevery being able to be equals
    # i.e. `np.nan == np.nan` will always be False
    df_1, df_2 = input_df_1.copy().fillna(""), input_df_2.copy().fillna("")    
    
    ne_stacked = (df_1 != df_2).stack()
    changed = ne_stacked[ne_stacked]
    changed.index.names = ["id", "col"]
    difference_locations = np.where(df_1 != df_2)

    changed_from = df_1.values[difference_locations]

    changed_to = df_2.values[difference_locations]

    df = pd.DataFrame({"from": changed_from, "to": changed_to}, index=changed.index)
    return df
	import pandas as pd
	import numpy as np

	def compare_two_dfs(input_df_1, input_df_2):

	# explicitly calling fillna with ""
	# as if you've used np.nan it has the
	# property of nevery being able to be equals
	# i.e. `np.nan == np.nan` will always be False
	df_1, df_2 = input_df_1.copy().fillna(""), input_df_2.copy().fillna("")

	ne_stacked = (df_1 != df_2).stack()
	changed = ne_stacked[ne_stacked]
	changed.index.names = ["id", "col"]
	difference_locations = np.where(df_1 != df_2)

	changed_from = df_1.values[difference_locations]

	changed_to = df_2.values[difference_locations]

	df = pd.DataFrame({"from": changed_from, "to": changed_to}, index=changed.index)
	return df