Skip to content

Instantly share code, notes, and snippets.

@vsouza
Created September 7, 2015 23:25
Show Gist options
  • Save vsouza/0f7c889e6f19f62eb403 to your computer and use it in GitHub Desktop.
Save vsouza/0f7c889e6f19f62eb403 to your computer and use it in GitHub Desktop.
import pandas as pd
data = {'a': [1, 3, 4, 4], 'b': [1, 3, 2, 3]}
df = pd.DataFrame(data=data)
df.to_csv("data/old_data.csv")
data2 = {'a': [1, 3, 5, 4], 'b': [1, 3, 2, 3]}
df2 = pd.DataFrame(data=data2)
df2.to_csv("data/new_data.csv")
def report_diff(x):
return x[0] if x[0] == x[1] else "{} ---> {}".format(*x)
old = pd.DataFrame.from_csv("data/old_data.csv")
new = pd.DataFrame.from_csv("data/new_data.csv")
diff_panel = pd.Panel(dict(old=old, new=new))
diff_output = diff_panel.apply(report_diff, axis=0)
def has_change(row):
if "--->" in row.to_string():
print row['a']
print row['b']
return "Y"
else:
return "N"
diff_output['has_change'] = diff_output.apply(has_change, axis=1)
diff_output[(diff_output.has_change == 'Y')]
final_df = pd.DataFrame(data=diff_output)
print final_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment