Created
October 22, 2018 13:24
-
-
Save cchwala/e35c852ea3a870daa2591a7107384c2d to your computer and use it in GitHub Desktop.
test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import h5py\n", | |
"import dask.dataframe as dd\n", | |
"import dask.array as da" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><strong>Dask DataFrame Structure:</strong></div>\n", | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>rx</th>\n", | |
" <th>tx</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>npartitions=32</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2017-08-07 13:07:18.261913088</th>\n", | |
" <td>float64</td>\n", | |
" <td>float64</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-08 07:10:18.283332096</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-31 05:05:18.282693888</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-31 22:35:18.225156096</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
"<div>Dask Name: set_index, 419 tasks</div>" | |
], | |
"text/plain": [ | |
"Dask DataFrame Structure:\n", | |
" rx tx\n", | |
"npartitions=32 \n", | |
"2017-08-07 13:07:18.261913088 float64 float64\n", | |
"2017-08-08 07:10:18.283332096 ... ...\n", | |
"... ... ...\n", | |
"2017-08-31 05:05:18.282693888 ... ...\n", | |
"2017-08-31 22:35:18.225156096 ... ...\n", | |
"Dask Name: set_index, 419 tasks" | |
] | |
}, | |
"execution_count": 115, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"h5_reader = h5py.File('/pd/data/regclim_data/raw/cml/ericsson_tn_monthly_2017_2018/cmls_2017_08.h5', \n", | |
" mode='r')\n", | |
"cml_ids = h5_reader['/'].keys()\n", | |
"channels = h5_reader['/'][cml_ids[0]].keys()\n", | |
"\n", | |
"# Link to data in HDF5 file\n", | |
"rx = h5_reader['/'][cml_ids[0]][channels[0]]['rx']\n", | |
"tx = h5_reader['/'][cml_ids[0]][channels[0]]['tx']\n", | |
"time = h5_reader['/'][cml_ids[0]][channels[0]]['time']\n", | |
"\n", | |
"# Concatenate into DaskDataframe\n", | |
"ddf = dd.from_dask_array(\n", | |
" da.stack([\n", | |
" da.from_array(rx, chunks=rx.chunks),\n", | |
" da.from_array(tx, chunks=tx.chunks),\n", | |
" da.from_array(time, chunks=time.chunks)], \n", | |
" axis=1,\n", | |
" ),\n", | |
" columns=['rx', 'tx', 'time']\n", | |
")\n", | |
"\n", | |
"# Cast to correct time representation and set time as index\n", | |
"ddf.time = (ddf.time * 1e9).astype('M8[ns]')\n", | |
"ddf = ddf.set_index('time', sorted=True)\n", | |
"ddf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 111, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ddf['txrx'] = ddf.tx - ddf.rx" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 112, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div><strong>Dask DataFrame Structure:</strong></div>\n", | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>rx</th>\n", | |
" <th>tx</th>\n", | |
" <th>txrx</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>npartitions=32</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2017-08-07 13:07:18.261913088</th>\n", | |
" <td>float64</td>\n", | |
" <td>float64</td>\n", | |
" <td>float64</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-08 07:10:18.283332096</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-31 05:05:18.282693888</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-31 22:35:18.225156096</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
"<div>Dask Name: assign, 547 tasks</div>" | |
], | |
"text/plain": [ | |
"Dask DataFrame Structure:\n", | |
" rx tx txrx\n", | |
"npartitions=32 \n", | |
"2017-08-07 13:07:18.261913088 float64 float64 float64\n", | |
"2017-08-08 07:10:18.283332096 ... ... ...\n", | |
"... ... ... ...\n", | |
"2017-08-31 05:05:18.282693888 ... ... ...\n", | |
"2017-08-31 22:35:18.225156096 ... ... ...\n", | |
"Dask Name: assign, 547 tasks" | |
] | |
}, | |
"execution_count": 112, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ddf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 113, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>rx</th>\n", | |
" <th>tx</th>\n", | |
" <th>txrx</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>time</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2017-08-07 13:07:18.261913088</th>\n", | |
" <td>-39.8</td>\n", | |
" <td>16.0</td>\n", | |
" <td>55.8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-07 13:08:18.294920960</th>\n", | |
" <td>-40.1</td>\n", | |
" <td>16.0</td>\n", | |
" <td>56.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-07 13:09:18.280340992</th>\n", | |
" <td>-40.1</td>\n", | |
" <td>16.0</td>\n", | |
" <td>56.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-07 13:10:18.248403968</th>\n", | |
" <td>-39.8</td>\n", | |
" <td>16.0</td>\n", | |
" <td>55.8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2017-08-07 13:11:18.248102912</th>\n", | |
" <td>-39.8</td>\n", | |
" <td>16.0</td>\n", | |
" <td>55.8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" rx tx txrx\n", | |
"time \n", | |
"2017-08-07 13:07:18.261913088 -39.8 16.0 55.8\n", | |
"2017-08-07 13:08:18.294920960 -40.1 16.0 56.1\n", | |
"2017-08-07 13:09:18.280340992 -40.1 16.0 56.1\n", | |
"2017-08-07 13:10:18.248403968 -39.8 16.0 55.8\n", | |
"2017-08-07 13:11:18.248102912 -39.8 16.0 55.8" | |
] | |
}, | |
"execution_count": 113, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ddf.compute().head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"|" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.15" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment