Skip to content

Instantly share code, notes, and snippets.

@flamingbear
Created November 17, 2015 18:08
Show Gist options
  • Save flamingbear/2a234e7ac6c7baab64fa to your computer and use it in GitHub Desktop.
Save flamingbear/2a234e7ac6c7baab64fa to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"ein.tags": [
"worksheet-0"
]
},
"source": [
"# Look at pandas for filename indexing.\n",
"\n",
"I think this could be a decent way of grabbing file lists by date ranges.\n"
]
},
{
"cell_type": "code",
"execution_count": 219,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"pd.options.display.mpl_style = 'default'"
]
},
{
"cell_type": "code",
"execution_count": 220,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"import os\n",
"import re\n",
"import fnmatch\n",
"import datetime as dt"
]
},
{
"cell_type": "code",
"execution_count": 221,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"DEFAULT_SEA_ICE_PATHS = ['/projects/DATASETS/nsidc0051_gsfc_nasateam_seaice/final-gsfc',\n",
" '/projects/DATASETS/nsidc0081_nrt_nasateam_seaice']\n",
"\n",
"dir_ = '/projects/DATASETS/nsidc0051_gsfc_nasateam_seaice/final-gsfc'"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"def find_all_ice_files(directory, filter_='*.bin'):\n",
" filelist = []\n",
" for root, dirs, files in os.walk(directory):\n",
" filelist.extend([os.path.join(root, f) for f in fnmatch.filter(files, filter_)])\n",
" return sorted(filelist)\n"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"search_paths = DEFAULT_SEA_ICE_PATHS\n",
"ice_files = []\n",
"for sp in search_paths:\n",
" ice_files.extend(find_all_ice_files(sp))\n",
"all_files = ice_files"
]
},
{
"cell_type": "code",
"execution_count": 223,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
" date_matcher = re.compile(\n",
" 'nt_(?P<date>(?P<year>\\d{4})(?P<month>\\d{2})(?P<day>\\d{2})?)_(?P<platform>[nf]\\d{2})_(?P<version>nrt|v01|v1\\.1)_(?P<hemisphere>n|s).bin' # noqa\n",
" )\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
"24950"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(all_files)"
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
"('19781101',\n '1978',\n '11',\n '01',\n 'n07',\n 'v1.1',\n 'n',\n '/projects/DATASETS/nsidc0051_gsfc_nasateam_seaice/final-gsfc/north/daily/1978/nt_19781101_n07_v1.1_n.bin')"
]
},
"execution_count": 226,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f = all_files[3]\n",
"date_matcher.search(f).groups() + (f,)"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"z = [date_matcher.search(f).groups() + (f,) for f in all_files if not re.search('monthly', f)]\n"
]
},
{
"cell_type": "code",
"execution_count": 228,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
"[('19781101',\n '1978',\n '11',\n '01',\n 'n07',\n 'v1.1',\n 'n',\n '/projects/DATASETS/nsidc0051_gsfc_nasateam_seaice/final-gsfc/north/daily/1978/nt_19781101_n07_v1.1_n.bin'),\n ('19781103',\n '1978',\n '11',\n '03',\n 'n07',\n 'v1.1',\n 'n',\n '/projects/DATASETS/nsidc0051_gsfc_nasateam_seaice/final-gsfc/north/daily/1978/nt_19781103_n07_v1.1_n.bin')]"
]
},
"execution_count": 228,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"z[3:5]"
]
},
{
"cell_type": "code",
"execution_count": 229,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"df = pd.DataFrame().from_records(z, columns=['datestr', 'year','month', 'day', 'platform', 'version', 'hemis', 'filename'])"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
"'1978'"
]
},
"execution_count": 234,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.year[5]"
]
},
{
"cell_type": "code",
"execution_count": 235,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"df[['year','month', 'day']] = df[['year','month', 'day']].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 310,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"a = df.copy()"
]
},
{
"cell_type": "code",
"execution_count": 311,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"a = a.set_index([df['year'], df['month'], df['day']])"
]
},
{
"cell_type": "code",
"execution_count": 312,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
" datestr year month day platform version hemis \\\nyear month day \n1978 10 26 19781026 1978 10 26 n07 v1.1 n \n 28 19781028 1978 10 28 n07 v1.1 n \n 30 19781030 1978 10 30 n07 v1.1 n \n 11 1 19781101 1978 11 1 n07 v1.1 n \n 3 19781103 1978 11 3 n07 v1.1 n \n\n filename \nyear month day \n1978 10 26 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n 28 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n 30 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n 11 1 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n 3 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... "
]
},
"execution_count": 312,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.head()"
]
},
{
"cell_type": "code",
"execution_count": 313,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
"datetime.date(1978, 11, 1)"
]
},
"execution_count": 313,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt.date(*a.index[3])"
]
},
{
"cell_type": "code",
"execution_count": 314,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"a.index = a.index.map(lambda x: dt.date(*x))"
]
},
{
"cell_type": "code",
"execution_count": 315,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"north = a[a.hemis == 'n']"
]
},
{
"cell_type": "code",
"execution_count": 316,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"days = a[(a.month == 10) & (a.day == 26)]"
]
},
{
"cell_type": "code",
"execution_count": 317,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
"1980-05-02 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-04 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-06 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-08 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-10 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-12 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-14 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-16 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-18 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-20 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-22 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-24 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-26 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-28 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-30 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-02 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-04 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-06 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-08 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-10 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-12 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-14 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-16 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-18 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-20 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-22 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-24 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-26 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-28 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\n1980-05-30 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea...\nName: filename, dtype: object"
]
},
"execution_count": 317,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a[(a.index >= dt.date(1980, 5, 1)) & (a.index < dt.date(1980, 6,1))].filename"
]
},
{
"cell_type": "code",
"execution_count": 318,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
"Index([1978-10-28, 1978-10-30, 1978-11-01, 1978-11-03, 1978-11-05, 1978-11-07,\n 1978-11-09, 1978-11-11, 1978-11-13, 1978-11-15, 1978-11-17, 1978-11-19,\n 1978-11-21, 1978-11-23, 1978-11-25, 1978-11-27, 1978-11-29, 1978-12-01,\n 1978-12-03, 1978-12-05, 1978-12-07, 1978-12-09, 1978-12-11, 1978-12-13,\n 1978-12-15, 1978-12-17, 1978-12-19, 1978-12-21, 1978-12-23, 1978-12-25,\n 1978-12-27, 1978-12-29, 1978-12-31, 1979-01-02, 1979-01-04, 1979-01-06,\n 1979-01-08, 1979-01-10, 1979-01-12, 1979-01-14, 1979-01-16, 1979-01-18,\n 1979-01-20, 1979-01-22, 1979-01-24, 1979-01-26, 1979-01-28, 1979-01-30,\n 1979-02-01],\n dtype='object')"
]
},
"execution_count": 318,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.index[1:50]"
]
},
{
"cell_type": "code",
"execution_count": 319,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"a.index = a.index.to_datetime()"
]
},
{
"cell_type": "code",
"execution_count": 320,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"dr = pd.date_range('1978-10-25', dt.date.today().strftime('%Y-%m-%d'))"
]
},
{
"cell_type": "code",
"execution_count": 333,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['1978-10-26', '1978-10-27', '1978-10-28', '1978-10-29'], dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 333,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dr[1:5]"
]
},
{
"cell_type": "code",
"execution_count": 331,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
" datestr year month day platform version hemis \\\n1978-11-01 19781101 1978 11 1 n07 v1.1 n \n1978-11-01 19781101 1978 11 1 n07 v1.1 s \n\n filename \n1978-11-01 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1978-11-01 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... "
]
},
"execution_count": 331,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a[a.index == pd.Timestamp(dt.date(1978, 11, 1))]\n"
]
},
{
"cell_type": "code",
"execution_count": 337,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"north = north[north.version == 'v1.1']"
]
},
{
"cell_type": "code",
"execution_count": 338,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": [
"north = north.reindex(index=pd.date_range('1978-10-25', dt.date.today().strftime('%Y-%m-%d')))"
]
},
{
"cell_type": "code",
"execution_count": 344,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [
{
"data": {
"text/plain": [
" datestr year month day platform version hemis \\\n1999-12-01 19991201 1999 12 1 f13 v1.1 n \n1999-12-02 19991202 1999 12 2 f13 v1.1 n \n1999-12-03 19991203 1999 12 3 f13 v1.1 n \n1999-12-04 19991204 1999 12 4 f13 v1.1 n \n1999-12-05 19991205 1999 12 5 f13 v1.1 n \n1999-12-06 19991206 1999 12 6 f13 v1.1 n \n1999-12-07 19991207 1999 12 7 f13 v1.1 n \n1999-12-08 19991208 1999 12 8 f13 v1.1 n \n1999-12-09 19991209 1999 12 9 f13 v1.1 n \n1999-12-10 19991210 1999 12 10 f13 v1.1 n \n1999-12-11 19991211 1999 12 11 f13 v1.1 n \n1999-12-12 19991212 1999 12 12 f13 v1.1 n \n1999-12-13 19991213 1999 12 13 f13 v1.1 n \n1999-12-14 19991214 1999 12 14 f13 v1.1 n \n1999-12-15 19991215 1999 12 15 f13 v1.1 n \n1999-12-16 19991216 1999 12 16 f13 v1.1 n \n1999-12-17 19991217 1999 12 17 f13 v1.1 n \n1999-12-18 19991218 1999 12 18 f13 v1.1 n \n1999-12-19 19991219 1999 12 19 f13 v1.1 n \n1999-12-20 19991220 1999 12 20 f13 v1.1 n \n1999-12-21 19991221 1999 12 21 f13 v1.1 n \n1999-12-22 19991222 1999 12 22 f13 v1.1 n \n1999-12-23 19991223 1999 12 23 f13 v1.1 n \n1999-12-24 19991224 1999 12 24 f13 v1.1 n \n1999-12-25 19991225 1999 12 25 f13 v1.1 n \n1999-12-26 19991226 1999 12 26 f13 v1.1 n \n1999-12-27 19991227 1999 12 27 f13 v1.1 n \n1999-12-28 19991228 1999 12 28 f13 v1.1 n \n1999-12-29 19991229 1999 12 29 f13 v1.1 n \n1999-12-30 19991230 1999 12 30 f13 v1.1 n \n1999-12-31 19991231 1999 12 31 f13 v1.1 n \n\n filename \n1999-12-01 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-02 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-03 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-04 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-05 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-06 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-07 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-08 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-09 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-10 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-11 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-12 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-13 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-14 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-15 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-16 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-17 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-18 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-19 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-20 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-21 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-22 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-23 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-24 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-25 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-26 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-27 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-28 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-29 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-30 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... \n1999-12-31 /projects/DATASETS/nsidc0051_gsfc_nasateam_sea... "
]
},
"execution_count": 344,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"north[(north.index.year==1999) & (north.index.month == 12) ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"autoscroll": "json-false",
"collapsed": false,
"ein.tags": [
"worksheet-0"
]
},
"outputs": [],
"source": []
}
],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment