Created
July 13, 2020 20:34
-
-
Save andersy005/6359badb9d89d387b510c31ef12d134e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from IPython.core.interactiveshell import InteractiveShell\n", | |
| "InteractiveShell.ast_node_interactivity = \"all\"\n", | |
| "from IPython.display import HTML\n", | |
| "\n", | |
| "import pprint\n", | |
| "import sys\n", | |
| "sys.path.append(\"../ecgtools/\")\n", | |
| "import parsers" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Validating yaml file with yamale.\n", | |
| "Columns in dataframe:\n", | |
| "Index(['path', 'variable', 'time_range', 'experiment_name', 'member_id',\n", | |
| " 'ctrl_branch_year', 'model_name', 'time_freq', 'long_name', 'units',\n", | |
| " 'cell_methods'],\n", | |
| " dtype='object')\n", | |
| "CPU times: user 473 ms, sys: 24.6 ms, total: 498 ms\n", | |
| "Wall time: 494 ms\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "yaml_path = '/Users/mickelso/Desktop/xdev_work/test_output/testing_new.yaml'\n", | |
| "csv_path = '/Users/mickelso/Desktop/xdev_work/test_output/testing_new.csv'\n", | |
| "io_lib = 'netcdf'\n", | |
| "\n", | |
| "Parser = parsers.YAML_Parser(yaml_path, csv_path, io_lib)\n", | |
| "df = Parser.parser()\n", | |
| "#HTML(Parser.parser().to_html(index=False))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Validating yaml file with yamale.\n", | |
| "Columns in dataframe:\n", | |
| "Index(['path', 'variable', 'time_range', 'experiment_name', 'member_id',\n", | |
| " 'ctrl_branch_year', 'model_name', 'time_freq', 'long_name', 'units',\n", | |
| " 'cell_methods'],\n", | |
| " dtype='object')\n", | |
| "CPU times: user 465 ms, sys: 17.3 ms, total: 483 ms\n", | |
| "Wall time: 483 ms\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "yaml_path = '/Users/mickelso/Desktop/xdev_work/test_output/testing_new.yaml'\n", | |
| "csv_path = '/Users/mickelso/Desktop/xdev_work/test_output/testing_new.csv'\n", | |
| "io_lib = 'netcdf'\n", | |
| "\n", | |
| "Parser = parsers.YAML_Parser(yaml_path, csv_path, io_lib)\n", | |
| "df = Parser.parser()\n", | |
| "#HTML(Parser.parser().to_html(index=False))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Index(['path', 'variable', 'time_range', 'experiment_name', 'member_id',\n", | |
| " 'ctrl_branch_year', 'model_name', 'time_freq', 'long_name', 'units',\n", | |
| " 'cell_methods'],\n", | |
| " dtype='object')" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "Int64Index: 111 entries, 8 to 14\n", | |
| "Data columns (total 11 columns):\n", | |
| "path 111 non-null object\n", | |
| "variable 111 non-null object\n", | |
| "time_range 48 non-null object\n", | |
| "experiment_name 48 non-null object\n", | |
| "member_id 48 non-null object\n", | |
| "ctrl_branch_year 48 non-null object\n", | |
| "model_name 111 non-null object\n", | |
| "time_freq 111 non-null object\n", | |
| "long_name 48 non-null object\n", | |
| "units 48 non-null object\n", | |
| "cell_methods 48 non-null object\n", | |
| "dtypes: object(11)\n", | |
| "memory usage: 10.4+ KB\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "df.info()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import netCDF4 as nc" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "NETCDF Timings" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 8.02 ms, sys: 1.73 ms, total: 9.75 ms\n", | |
| "Wall time: 8.8 ms\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "# netcdf4py timings\n", | |
| "f = '/Users/mickelso/Desktop/xdev_work/data/CESM_DATA/b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001/ocn/hist/b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001.pop.h.0850-12.nc'\n", | |
| "# open file\n", | |
| "d = nc.Dataset(f, 'r')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 730 µs, sys: 302 µs, total: 1.03 ms\n", | |
| "Wall time: 785 µs\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "#open time variable and pull values\n", | |
| "# find what the time (unlimited) dimension is\n", | |
| "if 'time' in d.variables.keys():\n", | |
| " times = d['time']\n", | |
| " start = str(times[0])\n", | |
| " end = str(times[-1])\n", | |
| " date = start + \"-\" + end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 1.12 ms, sys: 13 µs, total: 1.13 ms\n", | |
| "Wall time: 1.15 ms\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "#go through the variables\n", | |
| "var_list = []\n", | |
| "# loop through all variables\n", | |
| "for v in d.variables.keys():\n", | |
| " # add all variables that are not coordinates to the catalog\n", | |
| " if v not in list(dict(d.dimensions).keys()):\n", | |
| " var_list.append(v)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 7.05 ms, sys: 1.45 ms, total: 8.5 ms\n", | |
| "Wall time: 7.45 ms\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "#go through attributes\n", | |
| "attr_list = {}\n", | |
| "for v in var_list:\n", | |
| " if hasattr(d.variables[v], 'units'):\n", | |
| " attr_list[v] = getattr(d.variables[v], 'units')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 743 µs, sys: 17 µs, total: 760 µs\n", | |
| "Wall time: 763 µs\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "#close file\n", | |
| "# close netcdf file\n", | |
| "d.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 14.8 ms, sys: 2.03 ms, total: 16.8 ms\n", | |
| "Wall time: 15.6 ms\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "# netcdf4py timings\n", | |
| "f = '/Users/mickelso/Desktop/xdev_work/data/CESM_DATA/b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001/ocn/hist/b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001.pop.h.0850-12.nc'\n", | |
| "# open file\n", | |
| "d = nc.Dataset(f, 'r')\n", | |
| "\n", | |
| "#open time variable and pull values\n", | |
| "# find what the time (unlimited) dimension is\n", | |
| "if 'time' in d.variables.keys():\n", | |
| " times = d['time']\n", | |
| " start = str(times[0])\n", | |
| " end = str(times[-1])\n", | |
| " date = start + \"-\" + end\n", | |
| "\n", | |
| "#go through the variables\n", | |
| "var_list = []\n", | |
| "# loop through all variables\n", | |
| "for v in d.variables.keys():\n", | |
| " # add all variables that are not coordinates to the catalog\n", | |
| " if v not in list(dict(d.dimensions).keys()):\n", | |
| " var_list.append(v)\n", | |
| " \n", | |
| "#go through attributes\n", | |
| "attr_list = {}\n", | |
| "for v in var_list:\n", | |
| " if hasattr(d.variables[v], 'units'):\n", | |
| " attr_list[v] = getattr(d.variables[v], 'units')\n", | |
| " \n", | |
| "#close file\n", | |
| "# close netcdf file\n", | |
| "d.close()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.4" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment