Created
September 27, 2016 16:12
-
-
Save mrocklin/85bd3f35892d1ef0f63d5b5774ad8712 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"<img src=\"http://dask.readthedocs.io/en/latest/_images/dask_horizontal.svg\" \n", | |
" width=\"30%\" \n", | |
" align=right\n", | |
" alt=\"Dask logo\">\n", | |
"\n", | |
"Custom Workflows\n", | |
"------------------\n", | |
"\n", | |
"We submit tasks directly to the task scheduler. This demonstrates the flexibility that can be achieved with the `submit` function and normal Python for loops.\n", | |
"\n", | |
"Later on we map functions across Python queues to construct data processing pipelines." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from dask.distributed import Client, progress\n", | |
"e = Client('localhost:8786')\n", | |
"e" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import dask.dataframe as dd\n", | |
"\n", | |
"df = dd.demo.make_timeseries('2010', '2016',\n", | |
" {'value': float, 'name': str, 'id': int},\n", | |
" freq='10s', partition_freq='7d', seed=1)\n", | |
"\n", | |
"df = df[df.value > 0][['id', 'value', 'name']]\n", | |
"\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = e.persist(df)\n", | |
"progress(df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%time len(df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%time df.groupby(df.id).value.mean().nlargest(10).compute()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Timeseries operations" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df.value.resample('1d').std().head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Visualize algorithms: Typical Groupby" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = dd.demo.make_timeseries('2010-01-01', '2010-12-31',\n", | |
" {'value': float, 'name': str, 'id': int},\n", | |
" freq='10s', partition_freq='1M', seed=1)\n", | |
"\n", | |
"df.value.sum().visualize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df.groupby(df.id).value.mean().visualize()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Visualize algorithms: Datetime-resampling" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = dd.demo.make_timeseries('2010-01-01', '2010-12-31',\n", | |
" {'value': float, 'name': str, 'id': int},\n", | |
" freq='10s', partition_freq='1M', seed=1)\n", | |
"\n", | |
"df.value.resample('1w').mean().visualize()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Visualize algorithms: Rolling-aggregation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = dd.demo.make_timeseries('2010-01-01', '2010-08-30',\n", | |
" {'value': float, 'name': str, 'id': int},\n", | |
" freq='10s', partition_freq='1M', seed=1)\n", | |
"\n", | |
"\n", | |
"df.value.rolling(100).mean().visualize(rankdir='LR')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [Root]", | |
"language": "python", | |
"name": "Python [Root]" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
}, | |
"widgets": { | |
"state": { | |
"95f143e963cc41c293bdba6e573f2c9a": { | |
"views": [ | |
{ | |
"cell_index": 4 | |
} | |
] | |
} | |
}, | |
"version": "1.2.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment