Created
February 8, 2017 14:46
-
-
Save spencerkclark/da6319fa9c5462515676f1a62bf03112 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import xarray as xr", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Here I experiment with removing the ```is_monotonic``` logic from ```_partial_date_slice``` in pd.DatetimeIndex.\n\nBy ```is_monotonic``` logic I primarily mean this if statement: https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/index.py#L1312" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "class ModifiedDatetimeIndex(pd.DatetimeIndex):\n def _partial_date_slice(self, reso, parsed,\n use_lhs=True, use_rhs=True):\n t1, t2 = self._parsed_string_to_bounds(reso, parsed)\n stamps = self.asi8\n lhs_mask = (stamps >= t1.value) if use_lhs else True\n rhs_mask = (stamps <= t2.value) if use_rhs else True\n return (lhs_mask & rhs_mask).nonzero()[0]", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "rng = pd.date_range('2000-01-01', '2001-01-01', freq='M')", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"source": "series = pd.Series(rng, index=ModifiedDatetimeIndex(rng))", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "# I would not expect this to raise a KeyError; I think this stems from the way __contains__\n# is written.\nseries.loc['2000-01']", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "error", | |
"ename": "KeyError", | |
"evalue": "'the label [2000-01] is not in the [index]'", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-6-1ad2eaee8288>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# I would not expect this to raise a KeyError; I think this stems from the way __contains__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# is written.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mseries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'2000-01'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m//anaconda/envs/xarray-dev/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1310\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1311\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1312\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1314\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m//anaconda/envs/xarray-dev/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m_getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1481\u001b[0m \u001b[0;31m# fall thru to straight lookup\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1482\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_has_valid_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1483\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_label\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1484\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m//anaconda/envs/xarray-dev/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m_has_valid_type\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1417\u001b[0m \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1418\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1419\u001b[0;31m \u001b[0merror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1420\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1421\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m//anaconda/envs/xarray-dev/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36merror\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1404\u001b[0m \"key\")\n\u001b[1;32m 1405\u001b[0m raise KeyError(\"the label [%s] is not in the [%s]\" %\n\u001b[0;32m-> 1406\u001b[0;31m (key, self.obj._get_axis_name(axis)))\n\u001b[0m\u001b[1;32m 1407\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1408\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyError\u001b[0m: 'the label [2000-01] is not in the [index]'" | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "# Any date other than the first one works OK\nseries['2000-02']", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "2000-02-29 2000-02-29\ndtype: datetime64[ns]" | |
}, | |
"metadata": {}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"source": "# Using a conventional DatetimeIndex things work OK (presumably because of the is_monotonic logic)\nseries = pd.Series(rng, index=rng)", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "series.loc['2000-01']", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "2000-01-31 2000-01-31\nFreq: M, dtype: datetime64[ns]" | |
}, | |
"metadata": {}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# In addition the scalar vs. non-scalar behavior changes in a DataArray\n# Here we use the ModifiedDatetimeIndex\nda = xr.DataArray(rng, coords=[ModifiedDatetimeIndex(rng)], dims=['time'])", | |
"execution_count": 10, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "da.sel(time='2000-01')", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "<xarray.DataArray (time: 1)>\narray(['2000-01-31T00:00:00.000000000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 2000-01-31" | |
}, | |
"metadata": {}, | |
"execution_count": 11 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "# Returns a non-scalar time now.\nda.sel(time='2000-01-31')", | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "<xarray.DataArray (time: 1)>\narray(['2000-01-31T00:00:00.000000000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 2000-01-31" | |
}, | |
"metadata": {}, | |
"execution_count": 12 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"source": "# Here we use the standard DatetimeIndex\nda = xr.DataArray(rng, coords=[rng], dims=['time'])", | |
"execution_count": 13, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "da.sel(time='2000-01')", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "<xarray.DataArray (time: 1)>\narray(['2000-01-31T00:00:00.000000000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 2000-01-31" | |
}, | |
"metadata": {}, | |
"execution_count": 14 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "da.sel(time='2000-01-31')", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "<xarray.DataArray ()>\narray(949276800000000000L, dtype='datetime64[ns]')\nCoordinates:\n time datetime64[ns] 2000-01-31" | |
}, | |
"metadata": {}, | |
"execution_count": 15 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python2", | |
"display_name": "Python 2", | |
"language": "python" | |
}, | |
"language_info": { | |
"mimetype": "text/x-python", | |
"nbconvert_exporter": "python", | |
"name": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12", | |
"file_extension": ".py", | |
"codemirror_mode": { | |
"version": 2, | |
"name": "ipython" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment