Skip to content

Instantly share code, notes, and snippets.

@andersy005
Created July 13, 2020 20:34
Show Gist options
  • Select an option

  • Save andersy005/6359badb9d89d387b510c31ef12d134e to your computer and use it in GitHub Desktop.

Select an option

Save andersy005/6359badb9d89d387b510c31ef12d134e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = \"all\"\n",
"from IPython.display import HTML\n",
"\n",
"import pprint\n",
"import sys\n",
"sys.path.append(\"../ecgtools/\")\n",
"import parsers"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Validating yaml file with yamale.\n",
"Columns in dataframe:\n",
"Index(['path', 'variable', 'time_range', 'experiment_name', 'member_id',\n",
" 'ctrl_branch_year', 'model_name', 'time_freq', 'long_name', 'units',\n",
" 'cell_methods'],\n",
" dtype='object')\n",
"CPU times: user 473 ms, sys: 24.6 ms, total: 498 ms\n",
"Wall time: 494 ms\n"
]
}
],
"source": [
"%%time\n",
"yaml_path = '/Users/mickelso/Desktop/xdev_work/test_output/testing_new.yaml'\n",
"csv_path = '/Users/mickelso/Desktop/xdev_work/test_output/testing_new.csv'\n",
"io_lib = 'netcdf'\n",
"\n",
"Parser = parsers.YAML_Parser(yaml_path, csv_path, io_lib)\n",
"df = Parser.parser()\n",
"#HTML(Parser.parser().to_html(index=False))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Validating yaml file with yamale.\n",
"Columns in dataframe:\n",
"Index(['path', 'variable', 'time_range', 'experiment_name', 'member_id',\n",
" 'ctrl_branch_year', 'model_name', 'time_freq', 'long_name', 'units',\n",
" 'cell_methods'],\n",
" dtype='object')\n",
"CPU times: user 465 ms, sys: 17.3 ms, total: 483 ms\n",
"Wall time: 483 ms\n"
]
}
],
"source": [
"%%time\n",
"yaml_path = '/Users/mickelso/Desktop/xdev_work/test_output/testing_new.yaml'\n",
"csv_path = '/Users/mickelso/Desktop/xdev_work/test_output/testing_new.csv'\n",
"io_lib = 'netcdf'\n",
"\n",
"Parser = parsers.YAML_Parser(yaml_path, csv_path, io_lib)\n",
"df = Parser.parser()\n",
"#HTML(Parser.parser().to_html(index=False))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['path', 'variable', 'time_range', 'experiment_name', 'member_id',\n",
" 'ctrl_branch_year', 'model_name', 'time_freq', 'long_name', 'units',\n",
" 'cell_methods'],\n",
" dtype='object')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 111 entries, 8 to 14\n",
"Data columns (total 11 columns):\n",
"path 111 non-null object\n",
"variable 111 non-null object\n",
"time_range 48 non-null object\n",
"experiment_name 48 non-null object\n",
"member_id 48 non-null object\n",
"ctrl_branch_year 48 non-null object\n",
"model_name 111 non-null object\n",
"time_freq 111 non-null object\n",
"long_name 48 non-null object\n",
"units 48 non-null object\n",
"cell_methods 48 non-null object\n",
"dtypes: object(11)\n",
"memory usage: 10.4+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import netCDF4 as nc"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"NETCDF Timings"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 8.02 ms, sys: 1.73 ms, total: 9.75 ms\n",
"Wall time: 8.8 ms\n"
]
}
],
"source": [
"%%time\n",
"# netcdf4py timings\n",
"f = '/Users/mickelso/Desktop/xdev_work/data/CESM_DATA/b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001/ocn/hist/b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001.pop.h.0850-12.nc'\n",
"# open file\n",
"d = nc.Dataset(f, 'r')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 730 µs, sys: 302 µs, total: 1.03 ms\n",
"Wall time: 785 µs\n"
]
}
],
"source": [
"%%time\n",
"#open time variable and pull values\n",
"# find what the time (unlimited) dimension is\n",
"if 'time' in d.variables.keys():\n",
" times = d['time']\n",
" start = str(times[0])\n",
" end = str(times[-1])\n",
" date = start + \"-\" + end"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.12 ms, sys: 13 µs, total: 1.13 ms\n",
"Wall time: 1.15 ms\n"
]
}
],
"source": [
"%%time\n",
"#go through the variables\n",
"var_list = []\n",
"# loop through all variables\n",
"for v in d.variables.keys():\n",
" # add all variables that are not coordinates to the catalog\n",
" if v not in list(dict(d.dimensions).keys()):\n",
" var_list.append(v)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 7.05 ms, sys: 1.45 ms, total: 8.5 ms\n",
"Wall time: 7.45 ms\n"
]
}
],
"source": [
"%%time\n",
"#go through attributes\n",
"attr_list = {}\n",
"for v in var_list:\n",
" if hasattr(d.variables[v], 'units'):\n",
" attr_list[v] = getattr(d.variables[v], 'units')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 743 µs, sys: 17 µs, total: 760 µs\n",
"Wall time: 763 µs\n"
]
}
],
"source": [
"%%time\n",
"#close file\n",
"# close netcdf file\n",
"d.close()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 14.8 ms, sys: 2.03 ms, total: 16.8 ms\n",
"Wall time: 15.6 ms\n"
]
}
],
"source": [
"%%time\n",
"# netcdf4py timings\n",
"f = '/Users/mickelso/Desktop/xdev_work/data/CESM_DATA/b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001/ocn/hist/b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001.pop.h.0850-12.nc'\n",
"# open file\n",
"d = nc.Dataset(f, 'r')\n",
"\n",
"#open time variable and pull values\n",
"# find what the time (unlimited) dimension is\n",
"if 'time' in d.variables.keys():\n",
" times = d['time']\n",
" start = str(times[0])\n",
" end = str(times[-1])\n",
" date = start + \"-\" + end\n",
"\n",
"#go through the variables\n",
"var_list = []\n",
"# loop through all variables\n",
"for v in d.variables.keys():\n",
" # add all variables that are not coordinates to the catalog\n",
" if v not in list(dict(d.dimensions).keys()):\n",
" var_list.append(v)\n",
" \n",
"#go through attributes\n",
"attr_list = {}\n",
"for v in var_list:\n",
" if hasattr(d.variables[v], 'units'):\n",
" attr_list[v] = getattr(d.variables[v], 'units')\n",
" \n",
"#close file\n",
"# close netcdf file\n",
"d.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment