Last active
October 27, 2020 17:02
-
-
Save jiffyclub/ac2e7506428d5e1d587b to your computer and use it in GitHub Desktop.
Example of a function to compare two DataFrames independent of row/column ordering and with handling of null values.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:9c26f46f87352d6bedc804325404deca8e9cf8b7e2e0c151b7a2635f27e6d447" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import numpy.testing as npt\n", | |
"import pandas as pd" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def assert_frames_equal(actual, expected, use_close=False):\n", | |
" \"\"\"\n", | |
" Compare DataFrame items by index and column and\n", | |
" raise AssertionError if any item is not equal.\n", | |
"\n", | |
" Ordering is unimportant, items are compared only by label.\n", | |
" NaN and infinite values are supported.\n", | |
" \n", | |
" Parameters\n", | |
" ----------\n", | |
" actual : pandas.DataFrame\n", | |
" expected : pandas.DataFrame\n", | |
" use_close : bool, optional\n", | |
" If True, use numpy.testing.assert_allclose instead of\n", | |
" numpy.testing.assert_equal.\n", | |
"\n", | |
" \"\"\"\n", | |
" if use_close:\n", | |
" comp = npt.assert_allclose\n", | |
" else:\n", | |
" comp = npt.assert_equal\n", | |
"\n", | |
" assert (isinstance(actual, pd.DataFrame) and\n", | |
" isinstance(expected, pd.DataFrame)), \\\n", | |
" 'Inputs must both be pandas DataFrames.'\n", | |
"\n", | |
" for i, exp_row in expected.iterrows():\n", | |
" assert i in actual.index, 'Expected row {!r} not found.'.format(i)\n", | |
"\n", | |
" act_row = actual.loc[i]\n", | |
"\n", | |
" for j, exp_item in exp_row.iteritems():\n", | |
" assert j in act_row.index, \\\n", | |
" 'Expected column {!r} not found.'.format(j)\n", | |
"\n", | |
" act_item = act_row[j]\n", | |
"\n", | |
" try:\n", | |
" comp(act_item, exp_item)\n", | |
" except AssertionError as e:\n", | |
" raise AssertionError(\n", | |
" e.message + '\\n\\nColumn: {!r}\\nRow: {!r}'.format(j, i))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 53 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"expected = pd.DataFrame({'a': [1, np.nan, 3],\n", | |
" 'b': [np.nan, 5, 6]},\n", | |
" index=['x', 'y', 'z'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 54 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"actual = pd.DataFrame([[4, 1],\n", | |
" [6, 3],\n", | |
" [5, np.nan]],\n", | |
" index=['x', 'z', 'y'],\n", | |
" columns=['b', 'a'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 55 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"assert_frames_equal(actual, actual)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 56 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"assert_frames_equal(actual, expected)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "AssertionError", | |
"evalue": "\nItems are not equal:\n ACTUAL: 4.0\n DESIRED: nan\n\nColumn: 'b'\nRow: 'x'", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-57-2fa991ae8dd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0massert_frames_equal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactual\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpected\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m<ipython-input-53-fedbc359fc19>\u001b[0m in \u001b[0;36massert_frames_equal\u001b[0;34m(actual, expected)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAssertionError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m raise AssertionError(e.message + \n\u001b[0;32m---> 26\u001b[0;31m '\\n\\nColumn: {!r}\\nRow: {!r}'.format(j, i))\n\u001b[0m", | |
"\u001b[0;31mAssertionError\u001b[0m: \nItems are not equal:\n ACTUAL: 4.0\n DESIRED: nan\n\nColumn: 'b'\nRow: 'x'" | |
] | |
} | |
], | |
"prompt_number": 57 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, Awesome code.
When trying to run the code, if there are multiple errors, only the first error is given.
Is this intended?