-
-
Save springcoil/6d2ae4a877e5ff5dfea9 to your computer and use it in GitHub Desktop.
Example of a function to compare two DataFrames independent of row/column ordering and with handling of null values.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:9c26f46f87352d6bedc804325404deca8e9cf8b7e2e0c151b7a2635f27e6d447" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import numpy.testing as npt\n", | |
"import pandas as pd" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def assert_frames_equal(actual, expected, use_close=False):\n", | |
" \"\"\"\n", | |
" Compare DataFrame items by index and column and\n", | |
" raise AssertionError if any item is not equal.\n", | |
"\n", | |
" Ordering is unimportant, items are compared only by label.\n", | |
" NaN and infinite values are supported.\n", | |
" \n", | |
" Parameters\n", | |
" ----------\n", | |
" actual : pandas.DataFrame\n", | |
" expected : pandas.DataFrame\n", | |
" use_close : bool, optional\n", | |
" If True, use numpy.testing.assert_allclose instead of\n", | |
" numpy.testing.assert_equal.\n", | |
"\n", | |
" \"\"\"\n", | |
" if use_close:\n", | |
" comp = npt.assert_allclose\n", | |
" else:\n", | |
" comp = npt.assert_equal\n", | |
"\n", | |
" assert (isinstance(actual, pd.DataFrame) and\n", | |
" isinstance(expected, pd.DataFrame)), \\\n", | |
" 'Inputs must both be pandas DataFrames.'\n", | |
"\n", | |
" for i, exp_row in expected.iterrows():\n", | |
" assert i in actual.index, 'Expected row {!r} not found.'.format(i)\n", | |
"\n", | |
" act_row = actual.loc[i]\n", | |
"\n", | |
" for j, exp_item in exp_row.iteritems():\n", | |
" assert j in act_row.index, \\\n", | |
" 'Expected column {!r} not found.'.format(j)\n", | |
"\n", | |
" act_item = act_row[j]\n", | |
"\n", | |
" try:\n", | |
" comp(act_item, exp_item)\n", | |
" except AssertionError as e:\n", | |
" raise AssertionError(\n", | |
" e.message + '\\n\\nColumn: {!r}\\nRow: {!r}'.format(j, i))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 53 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"expected = pd.DataFrame({'a': [1, np.nan, 3],\n", | |
" 'b': [np.nan, 5, 6]},\n", | |
" index=['x', 'y', 'z'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 54 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"actual = pd.DataFrame([[4, 1],\n", | |
" [6, 3],\n", | |
" [5, np.nan]],\n", | |
" index=['x', 'z', 'y'],\n", | |
" columns=['b', 'a'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 55 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"assert_frames_equal(actual, actual)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 56 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"assert_frames_equal(actual, expected)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "AssertionError", | |
"evalue": "\nItems are not equal:\n ACTUAL: 4.0\n DESIRED: nan\n\nColumn: 'b'\nRow: 'x'", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-57-2fa991ae8dd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0massert_frames_equal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactual\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpected\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m<ipython-input-53-fedbc359fc19>\u001b[0m in \u001b[0;36massert_frames_equal\u001b[0;34m(actual, expected)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAssertionError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m raise AssertionError(e.message + \n\u001b[0;32m---> 26\u001b[0;31m '\\n\\nColumn: {!r}\\nRow: {!r}'.format(j, i))\n\u001b[0m", | |
"\u001b[0;31mAssertionError\u001b[0m: \nItems are not equal:\n ACTUAL: 4.0\n DESIRED: nan\n\nColumn: 'b'\nRow: 'x'" | |
] | |
} | |
], | |
"prompt_number": 57 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment