Created
December 17, 2012 02:56
-
-
Save hannes-brt/4315484 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Cluster_Training_Demo" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%load_ext autoreload\n", | |
"%autoreload 2\n", | |
"\n", | |
"from IPython.parallel import Client\n", | |
"\n", | |
"#c = Client(profile='GPU_pig1_ssh')\n", | |
"c = Client(profile='GPU_clusters')\n", | |
"num_c_nodes = len(c.ids)\n", | |
"\n", | |
"print c.ids\n", | |
"\n", | |
"lview = c.load_balanced_view()\n", | |
"lview.block = True\n", | |
"dview = c[:]\n", | |
"cnodes = c\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"[0, 1, 2, 3, 4, 5, 6, 7]\n" | |
] | |
} | |
], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"@dview.remote(block=False)\n", | |
"def getpid():\n", | |
" import os\n", | |
" return os.getpid()\n", | |
"\n", | |
"c_pid = getpid()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%px %load_ext autoreload\n", | |
"%px %autoreload 2\n", | |
"\n", | |
"def assignGPUID(c):\n", | |
" for i in c.ids:\n", | |
" c[i].execute('import gnumpy as gnp; gnp._useGPUid='+str(i%4))\n", | |
" c[i].execute('import learningUtil_test; import AE_class;')\n", | |
" print 'import gnumpy as gnp; gnp._useGPUid='+str(i%4)\n", | |
"\n", | |
" return\n", | |
"\n", | |
"assignGPUID(c)\n", | |
"%px reload(learningUtil_test)\n", | |
"%px reload(AE_class)\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"[stdout:0] \n", | |
"The autoreload extension is already loaded. To reload it, use:\n", | |
" %reload_ext autoreload\n", | |
"[stdout:1] \n", | |
"The autoreload extension is already loaded. To reload it, use:\n", | |
" %reload_ext autoreload\n", | |
"[stdout:2] \n", | |
"The autoreload extension is already loaded. To reload it, use:\n", | |
" %reload_ext autoreload\n", | |
"[stdout:3] \n", | |
"The autoreload extension is already loaded. To reload it, use:\n", | |
" %reload_ext autoreload\n", | |
"import gnumpy as gnp; gnp._useGPUid=0" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"import gnumpy as gnp; gnp._useGPUid=1\n", | |
"import gnumpy as gnp; gnp._useGPUid=2\n", | |
"import gnumpy as gnp; gnp._useGPUid=3\n", | |
"import gnumpy as gnp; gnp._useGPUid=0\n", | |
"import gnumpy as gnp; gnp._useGPUid=1\n", | |
"import gnumpy as gnp; gnp._useGPUid=2\n", | |
"import gnumpy as gnp; gnp._useGPUid=3\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[0:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[1:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[2:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[3:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[4:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[5:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[6:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[7:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[0:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[1:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[2:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[3:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[4:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[5:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[6:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"text": [ | |
"\u001b[0;31mOut[7:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Load dataset\n", | |
"===============" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"## load mnist from skdata\n", | |
"from skdata import mnist\n", | |
"from util import show_filters\n", | |
"import time \n", | |
"\n", | |
"# -- load and prepare the data set (even download if necessary)\n", | |
"dtype = 'float32'\n", | |
"n_examples = 60000\n", | |
"#n_examples = 10000\n", | |
"n_test = 10000\n", | |
"#n_test = 1000\n", | |
"n_classes = 10 # -- denoted L in the math expressions\n", | |
"\n", | |
"img_shape = (28, 28)\n", | |
"\n", | |
"data_view = mnist.views.OfficialVectorClassification(x_dtype=dtype)\n", | |
"X = data_view.train.x[:n_examples]\n", | |
"y = data_view.train.y[:n_examples]\n", | |
"\n", | |
"#arrange the labels\n", | |
"Y = 1.0*(y[:,newaxis] == arange(0,n_classes))\n", | |
"\n", | |
"X_test = data_view.test.x[:n_test]\n", | |
"y_test = data_view.test.y[:n_test]\n", | |
"\n", | |
"#set up cross-validation set\n", | |
"#n_CV = 10000\n", | |
"\n", | |
"#X_CV = data_view.train.x[-n_CV:]\n", | |
"#y_CV = data_view.train.y[-n_CV:]\n", | |
"#Y_CV = 1.0*(y_CV[:,newaxis] == arange(0,n_classes))\n", | |
"\n", | |
"\n", | |
"m,D = X.shape;\n", | |
"\n", | |
"# show dataset\n", | |
"show_filters(X,img_shape,(10,10));\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Utility Func\n", | |
"==================" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import gnumpy as gnp\n", | |
"gnp.free_reuse_cache()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def g_grid_param(dict_list):\n", | |
" if len(dict_list) < 1:\n", | |
" yield {}\n", | |
" return\n", | |
" for dict_param in g_grid_param(dict_list[1:]):\n", | |
" key, values = dict_list[0]\n", | |
" for value in values:\n", | |
" dict_param[key] = value\n", | |
" yield dict_param\n", | |
" " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import learningUtil_test\n", | |
"import AE_class\n", | |
"reload(AE_class)\n", | |
"reload(learningUtil_test)\n", | |
"\n", | |
"\n", | |
"def genClusterJobs_AE(genDataFunc, batch_size, step_size, decay, \n", | |
" momentum_schedule, momentum_values, \n", | |
" momentum_bilinear, nesterov_momentum, \n", | |
" epoch, print_i, **kwargs):\n", | |
" experiment_list = list()\n", | |
" def_AE_param = {'Arch':(D, 1000), \n", | |
" 'actFunc':learningUtil_test.relu_gpu, 'actGradA':learningUtil_test.reluGradient_gpu,\n", | |
" 'tiedWeights':True,\n", | |
" 'linearOutput':True, \n", | |
" 'useGPU':True,\n", | |
" 'bias_offset': 0.}\n", | |
" \n", | |
" g = g_grid_param(kwargs.items())\n", | |
" \n", | |
" for params in g:\n", | |
" model_param = def_AE_param\n", | |
"\n", | |
" for key in params:\n", | |
" model_param[key] = params[key]\n", | |
" \n", | |
" model = AE_class.AE1(**model_param)\n", | |
" experiment = {'genDataFunc': genDataFunc, 'batch_size': batch_size, 'step_size': step_size, \n", | |
" 'loops': epoch, 'print_i':print_i, 'Model':model, \n", | |
" 'decay':decay, \n", | |
" 'momentum_schedule':momentum_schedule, 'momentum_values':momentum_values,\n", | |
" 'momentum_bilinear':momentum_bilinear, 'nesterov_momentum':nesterov_momentum }\n", | |
" experiment_list.append(experiment)\n", | |
" \n", | |
" return experiment_list" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def encodeData(X, model):\n", | |
" m, D = X.shape\n", | |
" R = np.zeros((m,model.Arch[1]))\n", | |
" batch = m/1000\n", | |
" for i in range(batch):\n", | |
" R[i*1000:(i+1)*1000] = model.encode(X[i*1000:(i+1)*1000])\n", | |
" \n", | |
" return R" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Create cluster jobs\n", | |
"==========================" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"n_epoch = 100\n", | |
"step_size = 0.003\n", | |
"decay = 0.99\n", | |
"momentum_schedule = (20, 70, 85, 100 )\n", | |
"momentum_values = (0.5, 0.99, 0.99, 0.5, 0.5)\n", | |
"\n", | |
"\n", | |
"momentum_bilinear = False\n", | |
"nesterov_momentum = True\n", | |
"batch_size = 100\n", | |
"print_i = 1\n", | |
"\n", | |
"\n", | |
"clusterJob_AE = genClusterJobs_AE(learningUtil_test.genDataMNIST, batch_size, step_size, decay, \n", | |
" momentum_schedule, momentum_values, momentum_bilinear, nesterov_momentum, \n", | |
" n_epoch, print_i,\n", | |
" Arch = ((D, 1000,),(D, 3000,)), \n", | |
" actFunc = (learningUtil_test.relu_gpu, ),\n", | |
" actGradA = (learningUtil_test.reluGradient_gpu, ),\n", | |
" probFunc = (learningUtil_test.sigmoid_gpu, ),\n", | |
" linearOutput = (True,),\n", | |
" tiedWeights = (True, ),\n", | |
" Input_dropout = (0.,0.1,0.2,0.5),\n", | |
" )" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Train AE\n", | |
"========================" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"clusterJob_AE_results = lview.map_async(learningUtil_test.runExperiment, clusterJob_AE)\n", | |
"\n", | |
"print 'Experiments submitted'" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"if clusterJob_AE_results.ready() is True:\n", | |
" print 'Experiment run time: ', clusterJob_AE_results.elapsed\n", | |
" #print clusterJob_AE_results.metadata[0]['stdout']\n", | |
"else:\n", | |
" print 'Experiment still running ', clusterJob_AE_results.progress, '/', len(clusterJob_AE_results), ' run time: ', clusterJob_AE_results.elapsed\n", | |
"for i in range(len(clusterJob_AE_results)):\n", | |
" print '###################'\n", | |
" if clusterJob_AE_results.metadata[i]['completed'] == None :\n", | |
" if len(clusterJob_AE_results.metadata[i]['stdout'].splitlines()) > 2:\n", | |
" print clusterJob_AE_results.metadata[i]['stdout'].splitlines()[-2]\n", | |
" else:\n", | |
" print 'initilizing'\n", | |
" else:\n", | |
" print clusterJob_AE_results.metadata[i]['stdout'].splitlines()[-2]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"figure(figsize=(2,2));\n", | |
"\n", | |
"if clusterJob_AE_results.ready() is True:\n", | |
" plt.figure();\n", | |
" for i in range(len(clusterJob_AE_results)):\n", | |
" result = clusterJob_AE_results[i]\n", | |
" cost = result['cost']\n", | |
" print 'avg cost: ', cost[-1000:].mean()\n", | |
" plt.plot(np.arange(cost.shape[0]), np.log(1.+cost))\n", | |
" plt.title('struct denoising cost, avg. %0.2f'%cost[-1000:].mean(), fontsize=20)\n", | |
" #legend()\n", | |
" " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Analysis\n", | |
"===================" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"SVM\n", | |
"========================" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"clusterJob_SVM = learningUtil_test.genClusterJobs_SVM(clusterJob_AE_results, learningUtil_test.genDataMNIST)\n", | |
"clusterJob_SVM_results = lview.map_async(learningUtil_test.runLinearSVM, clusterJob_SVM)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"if clusterJob_SVM_results.ready() == False:\n", | |
" print 'cluster jobs still running'\n", | |
"else:\n", | |
" print 'totel time:', clusterJob_SVM_results.elapsed\n", | |
" \n", | |
"for i in range(len(clusterJob_SVM_results)):\n", | |
" print '###################' \n", | |
" if clusterJob_SVM_results.metadata[i]['completed'] == None :\n", | |
" print 'job still running'\n", | |
" else:\n", | |
" print clusterJob_SVM_results.metadata[i]['stdout']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment