Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save spencerkclark/da6319fa9c5462515676f1a62bf03112 to your computer and use it in GitHub Desktop.
Save spencerkclark/da6319fa9c5462515676f1a62bf03112 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "import xarray as xr",
"execution_count": 2,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Here I experiment with removing the ```is_monotonic``` logic from ```_partial_date_slice``` in pd.DatetimeIndex.\n\nBy ```is_monotonic``` logic I primarily mean this if statement: https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/index.py#L1312"
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "class ModifiedDatetimeIndex(pd.DatetimeIndex):\n def _partial_date_slice(self, reso, parsed,\n use_lhs=True, use_rhs=True):\n t1, t2 = self._parsed_string_to_bounds(reso, parsed)\n stamps = self.asi8\n lhs_mask = (stamps >= t1.value) if use_lhs else True\n rhs_mask = (stamps <= t2.value) if use_rhs else True\n return (lhs_mask & rhs_mask).nonzero()[0]",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "rng = pd.date_range('2000-01-01', '2001-01-01', freq='M')",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "series = pd.Series(rng, index=ModifiedDatetimeIndex(rng))",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "# I would not expect this to raise a KeyError; I think this stems from the way __contains__\n# is written.\nseries.loc['2000-01']",
"execution_count": 6,
"outputs": [
{
"output_type": "error",
"ename": "KeyError",
"evalue": "'the label [2000-01] is not in the [index]'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-6-1ad2eaee8288>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# I would not expect this to raise a KeyError; I think this stems from the way __contains__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# is written.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mseries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'2000-01'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m//anaconda/envs/xarray-dev/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1310\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1311\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1312\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1314\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/envs/xarray-dev/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m_getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1481\u001b[0m \u001b[0;31m# fall thru to straight lookup\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1482\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_has_valid_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1483\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_label\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1484\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/envs/xarray-dev/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m_has_valid_type\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1417\u001b[0m \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1418\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1419\u001b[0;31m \u001b[0merror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1420\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1421\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/envs/xarray-dev/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36merror\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1404\u001b[0m \"key\")\n\u001b[1;32m 1405\u001b[0m raise KeyError(\"the label [%s] is not in the [%s]\" %\n\u001b[0;32m-> 1406\u001b[0;31m (key, self.obj._get_axis_name(axis)))\n\u001b[0m\u001b[1;32m 1407\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1408\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'the label [2000-01] is not in the [index]'"
]
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "# Any date other than the first one works OK\nseries['2000-02']",
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "2000-02-29 2000-02-29\ndtype: datetime64[ns]"
},
"metadata": {},
"execution_count": 7
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "# Using a conventional DatetimeIndex things work OK (presumably because of the is_monotonic logic)\nseries = pd.Series(rng, index=rng)",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "series.loc['2000-01']",
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "2000-01-31 2000-01-31\nFreq: M, dtype: datetime64[ns]"
},
"metadata": {},
"execution_count": 9
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "# In addition the scalar vs. non-scalar behavior changes in a DataArray\n# Here we use the ModifiedDatetimeIndex\nda = xr.DataArray(rng, coords=[ModifiedDatetimeIndex(rng)], dims=['time'])",
"execution_count": 10,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "da.sel(time='2000-01')",
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "<xarray.DataArray (time: 1)>\narray(['2000-01-31T00:00:00.000000000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 2000-01-31"
},
"metadata": {},
"execution_count": 11
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "# Returns a non-scalar time now.\nda.sel(time='2000-01-31')",
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "<xarray.DataArray (time: 1)>\narray(['2000-01-31T00:00:00.000000000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 2000-01-31"
},
"metadata": {},
"execution_count": 12
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "# Here we use the standard DatetimeIndex\nda = xr.DataArray(rng, coords=[rng], dims=['time'])",
"execution_count": 13,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "da.sel(time='2000-01')",
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "<xarray.DataArray (time: 1)>\narray(['2000-01-31T00:00:00.000000000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 2000-01-31"
},
"metadata": {},
"execution_count": 14
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "da.sel(time='2000-01-31')",
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "<xarray.DataArray ()>\narray(949276800000000000L, dtype='datetime64[ns]')\nCoordinates:\n time datetime64[ns] 2000-01-31"
},
"metadata": {},
"execution_count": 15
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python2",
"display_name": "Python 2",
"language": "python"
},
"language_info": {
"mimetype": "text/x-python",
"nbconvert_exporter": "python",
"name": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12",
"file_extension": ".py",
"codemirror_mode": {
"version": 2,
"name": "ipython"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment