Vikrant79 · September 4, 2015 20:29
diff --git a/Coupon Purchase Prediction - BTB Script.ipynb b/Coupon Purchase Prediction - BTB Script.ipynb
diff --git a/Coupon Purchase Prediction - First Script.ipynb b/Coupon Purchase Prediction - First Script.ipynb
diff --git a/Flavor of Physics - Classification.ipynb b/Flavor of Physics - Classification.ipynb
diff --git a/Flavor of Physics - Mix of models.ipynb b/Flavor of Physics - Mix of models.ipynb
diff --git a/Springleaf - Experiments with Classification.ipynb b/Springleaf - Experiments with Classification.ipynb
diff --git a/Springleaf - Experiments with Random Forest.ipynb b/Springleaf - Experiments with Random Forest.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn import ensemble, preprocessing, cross_validation\n",
    "from sklearn.metrics import roc_auc_score as auc\n",
    "from time import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# PREPARE DATA\n",
    "data = pd.read_csv('train_Spring.csv').set_index(\"ID\")\n",
    "test = pd.read_csv('test_Spring.csv').set_index(\"ID\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# remove constants\n",
    "nunique = pd.Series([data[col].nunique() for col in data.columns], index = data.columns)\n",
    "constants = nunique[nunique<2].index.tolist()\n",
    "data = data.drop(constants,axis=1)\n",
    "test = test.drop(constants,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:198: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
      "  flag = np.concatenate(([True], aux[1:] != aux[:-1]))\n",
      "C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:251: FutureWarning: numpy equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
      "  return aux[:-1][aux[1:] == aux[:-1]]\n",
      "C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:384: FutureWarning: numpy equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
      "  bool_ar = (sar[1:] == sar[:-1])\n"
     ]
    }
   ],
   "source": [
    "# encode string\n",
    "strings = data.dtypes == 'object'; strings = strings[strings].index.tolist(); encoders = {}\n",
    "for col in strings:\n",
    "    encoders[col] = preprocessing.LabelEncoder()\n",
    "    data[col] = encoders[col].fit_transform(data[col])\n",
    "    try:\n",
    "        test[col] = encoders[col].transform(test[col])\n",
    "    except:\n",
    "        # lazy way to incorporate the feature only if can be encoded in the test set\n",
    "        del test[col]\n",
    "        del data[col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# DATA ready\n",
    "X = data.drop('target',1).fillna(0); y = data.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# RF FTW :)\n",
    "rf = ensemble.RandomForestClassifier(n_jobs=4, n_estimators = 20, random_state = 11)\n",
    "#rf = ensemble.RandomForestClassifier(n_jobs=500, n_estimators = 1000, random_state = 15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# CROSS VALIDATE AND PRINT TRAIN AND TEST SCORE\n",
    "kf = cross_validation.StratifiedKFold(y, n_folds=5, shuffle=True, random_state=11)\n",
    "trscores, cvscores, times = [], [], []\n",
    "for itr, icv in kf:\n",
    "    t = time()\n",
    "    trscore = auc(y.iloc[itr], rf.fit(X.iloc[itr], y.iloc[itr]).predict_proba(X.iloc[itr])[:,1])\n",
    "    cvscore = auc(y.iloc[icv], rf.predict_proba(X.iloc[icv])[:,1])\n",
    "    trscores.append(trscore); cvscores.append(cvscore); times.append(time()-t)\n",
    "print \"TRAIN %.4f | TEST %.4f | TIME %.2fm (1-fold)\" % (np.mean(trscores), np.mean(cvscores), np.mean(times)/60)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# MAKING SUBMISSION\n",
    "submission = pd.DataFrame(rf.fit(X,y).predict_proba(test.fillna(0))[:,1], index=test.index, columns=['target'])\n",
    "submission.index.name = 'ID'\n",
    "submission.to_csv('Springleaf5.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
diff --git a/SpringLeaf -Kaggle 18AUG15 (1).ipynb b/SpringLeaf -Kaggle 18AUG15 (1).ipynb
diff --git a/SpringLeaf -Kaggle 18AUG15.ipynb b/SpringLeaf -Kaggle 18AUG15.ipynb
diff --git a/Springleaf with xgb.ipynb b/Springleaf with xgb.ipynb
diff --git a/Springleaf with xgb1.ipynb b/Springleaf with xgb1.ipynb
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"from sklearn import ensemble, preprocessing, cross_validation\n",
	"from sklearn.metrics import roc_auc_score as auc\n",
	"from time import time"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# PREPARE DATA\n",
	"data = pd.read_csv('train_Spring.csv').set_index(\"ID\")\n",
	"test = pd.read_csv('test_Spring.csv').set_index(\"ID\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# remove constants\n",
	"nunique = pd.Series([data[col].nunique() for col in data.columns], index = data.columns)\n",
	"constants = nunique[nunique<2].index.tolist()\n",
	"data = data.drop(constants,axis=1)\n",
	"test = test.drop(constants,axis=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:198: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
	" flag = np.concatenate(([True], aux[1:] != aux[:-1]))\n",
	"C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:251: FutureWarning: numpy equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
	" return aux[:-1][aux[1:] == aux[:-1]]\n",
	"C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:384: FutureWarning: numpy equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
	" bool_ar = (sar[1:] == sar[:-1])\n"
	]
	}
	],
	"source": [
	"# encode string\n",
	"strings = data.dtypes == 'object'; strings = strings[strings].index.tolist(); encoders = {}\n",
	"for col in strings:\n",
	" encoders[col] = preprocessing.LabelEncoder()\n",
	" data[col] = encoders[col].fit_transform(data[col])\n",
	" try:\n",
	" test[col] = encoders[col].transform(test[col])\n",
	" except:\n",
	" # lazy way to incorporate the feature only if can be encoded in the test set\n",
	" del test[col]\n",
	" del data[col]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# DATA ready\n",
	"X = data.drop('target',1).fillna(0); y = data.target"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# RF FTW :)\n",
	"rf = ensemble.RandomForestClassifier(n_jobs=4, n_estimators = 20, random_state = 11)\n",
	"#rf = ensemble.RandomForestClassifier(n_jobs=500, n_estimators = 1000, random_state = 15)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# CROSS VALIDATE AND PRINT TRAIN AND TEST SCORE\n",
	"kf = cross_validation.StratifiedKFold(y, n_folds=5, shuffle=True, random_state=11)\n",
	"trscores, cvscores, times = [], [], []\n",
	"for itr, icv in kf:\n",
	" t = time()\n",
	" trscore = auc(y.iloc[itr], rf.fit(X.iloc[itr], y.iloc[itr]).predict_proba(X.iloc[itr])[:,1])\n",
	" cvscore = auc(y.iloc[icv], rf.predict_proba(X.iloc[icv])[:,1])\n",
	" trscores.append(trscore); cvscores.append(cvscore); times.append(time()-t)\n",
	"print \"TRAIN %.4f \| TEST %.4f \| TIME %.2fm (1-fold)\" % (np.mean(trscores), np.mean(cvscores), np.mean(times)/60)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# MAKING SUBMISSION\n",
	"submission = pd.DataFrame(rf.fit(X,y).predict_proba(test.fillna(0))[:,1], index=test.index, columns=['target'])\n",
	"submission.index.name = 'ID'\n",
	"submission.to_csv('Springleaf5.csv')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.10"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}
No results found