Created
September 20, 2018 09:05
-
-
Save lesteve/525110f81610357711016361b82d8069 to your computer and use it in GitHub Desktop.
Dask demo on the RIOC cluster
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Creating the dask cluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask_jobqueue import OARCluster\n", | |
"cluster = OARCluster(cores=1, memory='4GB')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "ccfe852aca3d4bfeaefb578b3a40aef3", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"VBox(children=(HTML(value='<h2>OARCluster</h2>'), HBox(children=(HTML(value='\\n<div>\\n <style scoped>\\n .d…" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"cluster.scale(4)\n", | |
"cluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Job id Name User Submission Date S Queue\n", | |
"---------- -------------- -------------- ------------------- - ----------\n", | |
"92452 dask-worker lesteve 2018-09-20 10:59:10 R short \n", | |
"92453 dask-worker lesteve 2018-09-20 10:59:11 R short \n", | |
"92454 dask-worker lesteve 2018-09-20 10:59:11 R short \n", | |
"92455 dask-worker lesteve 2018-09-20 10:59:11 R short \n" | |
] | |
} | |
], | |
"source": [ | |
"# After some time you should see 4 jobs in \"running\" state\n", | |
"!oarstat -u" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Creating the dask client" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask.distributed import Client\n", | |
"client = Client(cluster)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Using dask with your existing Python code" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask import delayed\n", | |
"import time\n", | |
"\n", | |
"def increment(x):\n", | |
" time.sleep(5)\n", | |
" return x + 1\n", | |
"\n", | |
"def decrement(x):\n", | |
" time.sleep(3)\n", | |
" return x - 1\n", | |
"\n", | |
"def add(x, y):\n", | |
" time.sleep(7)\n", | |
" return x + y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<b>Future: add</b> <font color=\"gray\">status: </font><font color=\"black\">pending</font>, <font color=\"gray\">key: </font>add-937671fc5d5145cafe6fdc2061a8b390" | |
], | |
"text/plain": [ | |
"<Future: status: pending, key: add-937671fc5d5145cafe6fdc2061a8b390>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = client.submit(increment, 1) # x = inc(1)\n", | |
"y = client.submit(decrement, 2) # y = dec(2)\n", | |
"z = client.submit(add, x, y) # z = add(x, y)\n", | |
"z" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"you can keep doing stuff\n" | |
] | |
} | |
], | |
"source": [ | |
"print('you can keep doing stuff')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# this blocks until the result is computed\n", | |
"z.result()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Dask Dataframe example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask.dataframe as dd\n", | |
"import os\n", | |
"df = dd.read_csv(\n", | |
" os.path.join('/home/rioc/lesteve/work/',\n", | |
" 'dask-tutorial-pycon-2018/data',\n", | |
" 'nycflights', '*.csv'),\n", | |
" parse_dates={'Date': [0, 1, 2]},\n", | |
" dtype={'TailNum': object,\n", | |
" 'CRSElapsedTime': float,\n", | |
" 'Cancelled': bool})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2.611892" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Number of flights\n", | |
"nb_flights = len(df)\n", | |
"nb_flights / 1e6" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Origin\n", | |
"EWR 10.295469\n", | |
"JFK 10.351299\n", | |
"LGA 7.431142\n", | |
"Name: DepDelay, dtype: float64" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Average departure delay from each airport?\n", | |
"average_delay = (df[~df.Cancelled].\n", | |
" groupby('Origin').DepDelay.mean().compute())\n", | |
"average_delay" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment