Created
July 20, 2017 12:36
-
-
Save cchwala/7189654275c642f2f257dd1f1523e31d to your computer and use it in GitHub Desktop.
Show where pandas `to_timedelta()` overflows without raising an error
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"In some very particular cases, the current version of `pandas.to_timedelta()` does not correctly raise an `OverflowError`. This is demonstrated below." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"pandas version: 0.20.1\n", | |
" numpy version: 1.12.1\n" | |
] | |
} | |
], | |
"source": [ | |
"print('pandas version: %s' % pd.__version__)\n", | |
"print(' numpy version: %s' % np.__version__)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Function to create floats with smallest increment " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def float_array_with_smallest_increments(initial_float, N_points_in_one_direction): \n", | |
" floats_upward = [initial_float, ]\n", | |
" floats_downward = [initial_float, ]\n", | |
" for i in range(N_points_in_one_direction):\n", | |
" floats_upward.append(np.nextafter(floats_upward[-1] , int_max))\n", | |
" floats_downward.append(np.nextafter(floats_downward[-1] , int_min)) \n", | |
" return np.array(floats_downward[::-1] + floats_upward[1:])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"int_min = np.iinfo(np.int64).min\n", | |
"int_max = np.iinfo(np.int64).max" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Test overflow of `to_timedelta()` using seconds " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"9223372036.85476684570312500000\n", | |
"9223372036.85476875305175781250\n", | |
"9223372036.85477066040039062500\n", | |
"9223372036.85477256774902343750\n", | |
"9223372036.85477447509765625000\n", | |
"9223372036.85477638244628906250\n", | |
"9223372036.85477828979492187500\n", | |
"9223372036.85478019714355468750\n", | |
"9223372036.85478210449218750000\n", | |
"9223372036.85478401184082031250\n", | |
"9223372036.85478591918945312500\n" | |
] | |
} | |
], | |
"source": [ | |
"seconds_as_floats = float_array_with_smallest_increments(int_max/1e9, 5)\n", | |
"\n", | |
"for v in np.nditer(seconds_as_floats):\n", | |
" print('%.20f' % v)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"TimedeltaIndex([ '106751 days 23:47:16.854767',\n", | |
" '106751 days 23:47:16.854769',\n", | |
" '106751 days 23:47:16.854771',\n", | |
" '106751 days 23:47:16.854773',\n", | |
" '106751 days 23:47:16.854774',\n", | |
" '-106752 days +00:12:43.145224',\n", | |
" '-106752 days +00:12:43.145226',\n", | |
" '-106752 days +00:12:43.145228',\n", | |
" '-106752 days +00:12:43.145230',\n", | |
" '-106752 days +00:12:43.145232',\n", | |
" '-106752 days +00:12:43.145234'],\n", | |
" dtype='timedelta64[ns]', freq=None)" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.to_timedelta(seconds_as_floats, unit='s')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**It overflows without raising!**" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Test overflow of `to_timedelta()` using microseconds " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"9223372036854766.00000000000000000000\n", | |
"9223372036854768.00000000000000000000\n", | |
"9223372036854770.00000000000000000000\n", | |
"9223372036854772.00000000000000000000\n", | |
"9223372036854774.00000000000000000000\n", | |
"9223372036854776.00000000000000000000\n", | |
"9223372036854778.00000000000000000000\n", | |
"9223372036854780.00000000000000000000\n", | |
"9223372036854782.00000000000000000000\n", | |
"9223372036854784.00000000000000000000\n", | |
"9223372036854786.00000000000000000000\n" | |
] | |
} | |
], | |
"source": [ | |
"microseconds_as_floats = float_array_with_smallest_increments(int_max/1e3, 5)\n", | |
"\n", | |
"for v in np.nditer(microseconds_as_floats):\n", | |
" print('%.20f' % v)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "OverflowError", | |
"evalue": "Python int too large to convert to C long", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-8-190654f2ef57>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_timedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmicroseconds_as_floats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'us'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36mto_timedelta\u001b[0;34m(arg, unit, box, errors)\u001b[0m\n\u001b[1;32m 80\u001b[0m errors=errors, name=arg.name)\n\u001b[1;32m 81\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_list_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 82\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_convert_listlike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbox\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbox\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 83\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m raise TypeError('arg must be a string, timedelta, list, tuple, '\n", | |
"\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36m_convert_listlike\u001b[0;34m(arg, unit, box, errors, name)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m value = tslib.array_to_timedelta64(_ensure_object(arg),\n\u001b[0;32m--> 164\u001b[0;31m unit=unit, errors=errors)\n\u001b[0m\u001b[1;32m 165\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'timedelta64[ns]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_timedelta64 (pandas/_libs/tslib.c:58701)\u001b[0;34m()\u001b[0m\n", | |
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_timedelta64 (pandas/_libs/tslib.c:58408)\u001b[0;34m()\u001b[0m\n", | |
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:61660)\u001b[0;34m()\u001b[0m\n", | |
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.cast_from_unit (pandas/_libs/tslib.c:68471)\u001b[0;34m()\u001b[0m\n", | |
"\u001b[0;31mOverflowError\u001b[0m: Python int too large to convert to C long" | |
] | |
} | |
], | |
"source": [ | |
"pd.to_timedelta(microseconds_as_floats, unit='us')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**It correctly raises an OverflowError**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"TimedeltaIndex(['106751 days 23:47:16.854766', '106751 days 23:47:16.854768',\n", | |
" '106751 days 23:47:16.854770', '106751 days 23:47:16.854772'],\n", | |
" dtype='timedelta64[ns]', freq=None)" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.to_timedelta(microseconds_as_floats[0:4], unit='us')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "OverflowError", | |
"evalue": "Python int too large to convert to C long", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-10-9bedf031e2a6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_timedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmicroseconds_as_floats\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'us'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36mto_timedelta\u001b[0;34m(arg, unit, box, errors)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;31m# ...so it must be a scalar value. Return scalar.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m return _coerce_scalar_to_timedelta_type(arg, unit=unit,\n\u001b[0;32m---> 89\u001b[0;31m box=box, errors=errors)\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36m_coerce_scalar_to_timedelta_type\u001b[0;34m(r, unit, box, errors)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 134\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtslib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_to_timedelta64\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 135\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'raise'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:62190)\u001b[0;34m()\u001b[0m\n", | |
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:61660)\u001b[0;34m()\u001b[0m\n", | |
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.cast_from_unit (pandas/_libs/tslib.c:68471)\u001b[0;34m()\u001b[0m\n", | |
"\u001b[0;31mOverflowError\u001b[0m: Python int too large to convert to C long" | |
] | |
} | |
], | |
"source": [ | |
"pd.to_timedelta(microseconds_as_floats[5], unit='us')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment