Created
September 24, 2015 19:16
-
-
Save DmitryUlyanov/32969cfcfb88354fcf5f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "# Trylib example" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "## RF" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "param = {'n_estimators': 100,\n 'criterion' : 'gini',\n 'max_depth':None,\n 'max_features' : 'auto',\n 'min_samples_leaf' : 1,\n \n 'n_jobs':-1,\n}\n\nres = trylib(X,Y,'rf',param,one = False,skf_seed = 12, skf = 10, X_test= X_test).res", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "# ET" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "param = {'n_estimators': 100,\n 'criterion' : 'gini',\n 'max_depth':None,\n 'max_features' : 'auto',\n 'min_samples_leaf' : 1,\n \n 'n_jobs':-1,\n}\n\nres = trylib(X,Y,'et',param,one = False,skf_seed = 12, skf = 10, X_test= X_test).res", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "## XGBoost" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%capture --no-stdout --no-display\nparam = {'num_round': 200,\n \n 'seed' : 2441,\n 'max_depth':4,\n 'gamma': 0,\n 'eta':0.02,\n 'min_child_weight':1,\n 'silent':1, \n\n 'objective':'binary:logistic',\n 'scale_pos_weight':3,\n\n 'subsample' : 0.95,\n 'colsample_bytree' : 0.2,\n #'base_score':Y.mean()\n }\n\n# Objectives:\n\n# -- 'objective':'binary:logistic',\n# -- 'objective':'multi:softprob',\n# -- 'objective':'rank:pairwise',\n# -- 'objective':'reg:linear',\n\n# eval_metrics:\n\n# -- 'eval_metric':'mlogloss',\n# -- 'eval_metric':'auc',\n\n# num_class:\n\n# -- 'num_class':9\n\nres = trylib(X,Y,'xg',param,one = False,skf_seed = 12, skf = 10, X_test= X_test).res", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "## Linear SVC" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "param = {'C':1.0,\n 'penalty':'l2',\n 'loss':'squared_hinge',\n 'dual':True,\n 'multi_class':'ovr',\n 'class_weight': None,\n 'random_state':329,\n }\n\nres = trylib(X,Y,'lsvc',param,one = False,skf_seed = 12, skf = 10, X_test= X_test).res", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "## KNN" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "param = {'n_neighbors':5,\n 'weights':'uniform',\n 'metric':'minkowski',\n }\n\n# weights:\n\n# -- 'weights':'uniform'\n# -- 'weights':'distance'\n# [callable]\n\n# metrics:\n\n# -- 'metric':'euclidean'\n# -- 'metric':'cityblock'\n# -- 'metric':'canberra'\n# -- 'metric':'braycurtis'\n# cosine ? \n\nres = trylib(X,Y,'knn',param,one = False,skf_seed = 12, skf = 10, X_test= X_test).res", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "# Holdout validation" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%capture --no-stdout --no-display\nfrom sklearn.cross_validation import train_test_split\n\nfor i in range(4):\n x_train, x_ho, y_train, y_ho = train_test_split(X, Y, \n test_size=0.25, \n random_state=i)\n# x_train = x_train.values\n# x_hohout = x_hohout.values\n# y_train = y_train.values\n# y_ho = y_ho.values\n \n param = { }\n \n \n res = trylib_r(x_train,y_train,'rf',param,one = True,skf = 5,X_test=x_ho,skf_seed = 12).res\n\n print 'cv ',res['loss']\n print 'ho ',RMSE(y_ho,res['pr_test']) \n \n #print 'cv classes ', res['accuracy']\n #print 'ho classes ', accuracy_score(y_ho,np.argmax(res['pr_test'],axis=1)) ", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "# 2 level" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### 1" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%capture --no-stdout --no-display\n\n# X = \n# Y = \n# X_test =\n\n# ============ \ny_metas = []\ny_tests = []\n\nxg_param = {'num_round': 50,\n 'seed' : 21,\n 'max_depth':3,\n 'gamma': 0.01,\n 'eta':0.3,\n 'min_child_weight':1,\n 'silent':1, \n 'objective':'multi:softprob',\n 'num_round':100,\n 'num_class' : #9,\n 'subsample' : 0.8,\n 'colsample_bytree' : 0.2}\n\nclfs = [\n ('xg', xg_param),\n ('lsvc', {'C' : 100}),\n ('knn', {'n_neighbors' : 10 , 'metric': 'canberra'}),\n ('knn', {'n_neighbors' : 1 , 'metric': 'canberra'}),\n ('nusvc',{'nu' : 0.007}),\n ('rf',{'max_features' : 0.01,'n_estimators' : 4000}),\n ('rf',{'max_features' : 0.3,'n_estimators' : 4000}),\n ('et',{'max_features' : 0.1,'n_estimators' : 4000}),\n ]\n\nfor c in clfs:\n print ' ==== ', c[0], ' ====='\n \n res = trylib(X,Y,c[0],c[1],skf =10, X_test = X_test).res\n print \" - %s: log_loss: %f, acc: %f\" %(c[0], res['loss'],res['accuracy'])\n y_metas.append(res['pr'])\n y_tests.append(res['pr_test'])\n\ny_meta = res['y']", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### 2" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%capture --no-stdout --no-display\n\nxg_param = {\n 'seed' : 21,\n 'max_depth':3,\n 'gamma': 0.01,\n 'eta':0.1,\n 'silent':1, \n 'objective':'multi:softprob',\n 'num_round':200,\n 'num_class' : 9,\n 'subsample' : 0.8,\n 'colsample_bytree' : 0.1}\nr = None\n\nmetas = np.hstack(y_metas)\ntest_metas = np.hstack(y_tests)\n\ny_out = []\nfor i in range(10):\n xg_param['seed'] = (i+34)*15\n \n res = trylib(metas,y_meta,'xg',xg_param,skf = 10,one = False, X_test= test_metas).res\n y_out.append(res['pr_test'])\n \n if r is None:\n r = res['pr']\n else:\n r += res['pr']\n print \"%d %f\" % (i, log_loss(y_val,r/(i+1)))\n\npr_test_final = np.mean(y_out,axis = 0)\npr_test_final.shape\n \n#res2 = trylib(np.concatenate([d.values[res['val_index'],:],np.hstack(prs)],axis = 1),y_val,'et',{'max_features' : 0.1,'n_estimators' : 20000, 'compute_importances': True},skf = 10,one = False).res\n\n#prs.append(res['pr_val'])\n#y_val = res['y_val']", | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "# CV template" | |
| }, | |
| { | |
| "metadata": { | |
| "collapsed": true, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "skf = KFold(Y.shape[0], n_folds=10, shuffle=True, random_state=11)\n\nfor it, (train_index, test_index) in enumerate(skf):\n X_train, X_val = X[train_index], X[test_index]\n y_train, y_val = Y[train_index], Y[test_index]", | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "name": "python2", | |
| "display_name": "Python 2", | |
| "language": "python" | |
| }, | |
| "language_info": { | |
| "mimetype": "text/x-python", | |
| "nbconvert_exporter": "python", | |
| "name": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.10", | |
| "file_extension": ".py", | |
| "codemirror_mode": { | |
| "version": 2, | |
| "name": "ipython" | |
| } | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment