tvorogme · December 5, 2016 02:11
diff --git a/first_stepic.ipynb b/first_stepic.ipynb
diff --git a/second_stepic.ipynb b/second_stepic.ipynb
diff --git a/third_kaggle.ipynb b/third_kaggle.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "\n",
    "import numpy as np\n",
    "from tqdm import tqdm_notebook\n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "location = 'kaggle/'\n",
    "events = pd.DataFrame.from_csv(location+\"user_activity.csv\",index_col=None)\n",
    "structure = pd.DataFrame.from_csv(location+\"structure.csv\",index_col=None)\n",
    "targets = pd.DataFrame.from_csv(location+\"targets.csv\",index_col=None)\n",
    "events_test = pd.DataFrame.from_csv(location+\"user_activity_test.csv\",index_col=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 497,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "counter = Counter(events[events.user_id.isin(set(targets[targets.passed == 1].user_id.values))].drop_duplicates('user_id').step_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 498,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cool_feature = defaultdict(lambda: 0)\n",
    "\n",
    "for i in counter:\n",
    "    cool_feature[i] = counter[i]/659"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 499,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#\n",
    "# ПОСОРТИМ ПО ВРЕМЕНИ\n",
    "#\n",
    "\n",
    "events.sort_values(by='time', inplace=True)\n",
    "\n",
    "#\n",
    "# ПОСОРТИМ, А ПОТОМ СОХРАНИМ ВЕКТОР\n",
    "#\n",
    "structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)\n",
    "vec = structure.step_id.values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 500,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "desc = {y:x for x,y in enumerate(set(events.action))}\n",
    "events.action = list(map(lambda x: desc[x], events.action))\n",
    "\n",
    "\n",
    "desc = {y:x for x,y in enumerate(set(events.step_type))}\n",
    "events.step_type = list(map(lambda x: desc[x], events.step_type))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 501,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "desc = {y:x for x,y in enumerate(set(events_test.action))}\n",
    "events_test.action = list(map(lambda x: desc[x], events_test.action))\n",
    "\n",
    "\n",
    "desc = {y:x for x,y in enumerate(set(events_test.step_type))}\n",
    "events_test.step_type = list(map(lambda x: desc[x], events_test.step_type))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 502,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ALL_CURSES = structure.size\n",
    "ALL_USERS = list(set(events_test.user_id)) + list(set(events.user_id))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 503,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "user_event_pd_frame = {}\n",
    "\n",
    "for user in tqdm_notebook(set(events_test.user_id)):\n",
    "    user_event_pd_frame[user] = events_test[events_test.user_id == user]\n",
    "\n",
    "for user in tqdm_notebook(set(events.user_id)):\n",
    "    user_event_pd_frame[user] = events[events.user_id == user]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "user_actions_count = {}\n",
    "\n",
    "for user in ALL_USERS:\n",
    "    user_actions_count[user] = Counter(user_event_pd_frame[user].action)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "action_costs = {x:y for x,y in zip(structure.step_id, structure.step_cost)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "fast_struct = {}\n",
    "for user in tqdm_notebook(ALL_USERS):\n",
    "    fast_struct[user] = structure[structure.step_id.isin(set(user_event_pd_frame[user].step_id))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "from collections import defaultdict\n",
    "user_passed_actions = defaultdict(lambda: [])\n",
    "\n",
    "for user in tqdm_notebook(ALL_USERS):\n",
    "    frame = user_event_pd_frame[user]\n",
    "    user_passed_actions[user] = frame[frame.action == 2].step_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "MAX_STEP_COST_SUM = max([sum([action_costs[action] for action in user_passed_actions[user]]) for user in ALL_USERS])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "costed_passed_user_actions = {}\n",
    "\n",
    "for user in ALL_USERS:\n",
    "    user_whole_passed_actions_count = 0\n",
    "    user_costed_passed_actions = 0\n",
    "    for action in user_passed_actions[user]:\n",
    "        user_whole_passed_actions_count+=1\n",
    "        \n",
    "        if action_costs[action] > 0:\n",
    "            user_costed_passed_actions+=1\n",
    "    \n",
    "    if user_costed_passed_actions==0:\n",
    "        costed_passed_user_actions[user]=0\n",
    "    else:\n",
    "        costed_passed_user_actions[user]=user_whole_passed_actions_count/user_costed_passed_actions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "FuckUpTimePart = {}\n",
    "\n",
    "for user in tqdm_notebook(ALL_USERS):\n",
    "    a = sorted(user_event_pd_frame[user].time)\n",
    "    a = list(map(lambda x: x-a[0], a))\n",
    "    if a != []:\n",
    "        _all = list(range(0,a[-1]+1))\n",
    "        _all_lenght = len(_all)\n",
    "\n",
    "        FuckUpTimePart[user] = len(_all)-len(set(a))/_all_lenght/100\n",
    "    else:\n",
    "        FuckUpTimePart[user] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "user_viewed_sum = {}\n",
    "for user in tqdm_notebook(ALL_USERS):\n",
    "    ev = user_event_pd_frame[user]\n",
    "    user_viewed_sum[user] = sum(ev[ev.action == 1].step_cost)\n",
    "\n",
    "user_passed_sum = {}\n",
    "for user in tqdm_notebook(ALL_USERS):\n",
    "    ev = user_event_pd_frame[user]\n",
    "    user_passed_sum[user] = sum(ev[ev.action == 2].step_cost)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "step_id_position = structure.step_id.values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 729,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def gen_features(us_id, test = False):\n",
    "    now_ev = user_event_pd_frame[us_id]\n",
    "    \n",
    "    now_x = []\n",
    "    \n",
    "    for i in now_ev.describe().values:\n",
    "        now_x.extend(i)\n",
    "    \n",
    "    tmp = []\n",
    "    for i in now_ev.step_id:\n",
    "        tmp.append(cool_feature[i])\n",
    "    \n",
    "    now_x.append(max(tmp))\n",
    "    now_x.append(FuckUpTimePart[us_id])\n",
    "    now_x.append(user_passed_sum[us_id])\n",
    "    now_x.append(user_viewed_sum[us_id])\n",
    "    now_x.append(costed_passed_user_actions[us_id])\n",
    "    now_x.append(now_ev.step_cost.sum())\n",
    "    \n",
    "    tmp = user_actions_count[us_id]\n",
    "    now_x.extend([tmp[i] for i in tmp])\n",
    "    \n",
    "    for i in now_ev.describe().values:\n",
    "        now_x.extend(i)\n",
    "        \n",
    "    now_x.append(now_ev.step_type.max())\n",
    "    now_x.append(now_ev.step_type.min())\n",
    "    now_x.append(now_ev.step_cost.sum())\n",
    "    now_x.append(now_ev.step_cost.mean())\n",
    "    \n",
    "    \n",
    "    return (us_id, now_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 708,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "X = []\n",
    "Y = []\n",
    "for us_id in tqdm_notebook(set(events.user_id.tolist())):\n",
    "    f = gen_features(us_id)\n",
    "    X.append(f[1])\n",
    "    Y.append(targets[targets.user_id == f[0]].passed.values[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 709,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "X_test = []\n",
    "ind = []\n",
    "for us_id in tqdm_notebook(set(events_test.user_id.tolist())):\n",
    "    f = gen_features(us_id,True)\n",
    "    ind.append(f[0])\n",
    "    X_test.append(f[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 710,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#\n",
    "# in X we have NAN features, we need to feel it, \n",
    "# let's do it with pandas\n",
    "# \n",
    "X = pd.DataFrame(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 711,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = X.fillna(method='bfill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 712,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X = X.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 713,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_test = pd.DataFrame(X_test).fillna(method='bfill')\n",
    "X_test = X_test.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 714,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X = X.values\n",
    "X = X.astype(np.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 715,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_test = X_test.as_matrix()\n",
    "\n",
    "Y = np.ravel(Y)\n",
    "Y = Y.astype(np.int32)\n",
    "\n",
    "\n",
    "X_test = X_test.astype(np.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 716,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def feature_normalize(feature_index, features_array):\n",
    "    f3_max = max([a[feature_index] for a in features_array])\n",
    "    f3_min = min([a[feature_index] for a in features_array])\n",
    "\n",
    "    for row in features_array:\n",
    "        if (f3_max - f3_min) != 0:\n",
    "            row[feature_index] = (row[feature_index] - f3_min) / (f3_max - f3_min)\n",
    "        else:\n",
    "            pass\n",
    "    \n",
    "    return features_array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 717,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for i in range(len(X[0])):\n",
    "    X = feature_normalize(i, X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 718,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for i in range(len(X_test[0])):\n",
    "    X_test = feature_normalize(i, X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 719,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.cross_validation import train_test_split\n",
    "from sklearn.metrics import f1_score\n",
    "\n",
    "Xtr,Xval,Ytr,Yval = train_test_split(X,Y,test_size=0.1,random_state=128)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 720,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import theano\n",
    "import theano.tensor as T\n",
    "import lasagne"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 721,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "input_X = T.vector(\"X\")\n",
    "target_y = T.scalar(dtype='int32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 722,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from lasagne.updates import sgd\n",
    "from lasagne.nonlinearities import leaky_rectify, softmax, tanh, elu\n",
    "from lasagne.layers import InputLayer, DenseLayer\n",
    "\n",
    "l_in = InputLayer(shape=(None,len(X[0])))\n",
    "hl = DenseLayer(incoming=l_in, num_units=100) \n",
    "bb = lasagne.layers.batch_norm(hl)\n",
    "hl = DenseLayer(incoming=hl, num_units=60)\n",
    "bb = lasagne.layers.batch_norm(hl)\n",
    "hl = DenseLayer(incoming=bb, num_units=20)\n",
    "bb = lasagne.layers.batch_norm(hl)\n",
    "l_out = DenseLayer(incoming=bb, num_units=2, nonlinearity=softmax, name='outputlayer') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 723,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sym_x = T.matrix('X')\n",
    "sym_t = T.ivector('target')\n",
    "\n",
    "train_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=False)\n",
    "eval_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=True)\n",
    "\n",
    "all_params = lasagne.layers.get_all_params(l_out, trainable=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 724,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cost_train = T.nnet.categorical_crossentropy(train_out, sym_t).mean()\n",
    "cost_eval = T.nnet.categorical_crossentropy(eval_out, sym_t).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 725,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "all_grads = T.grad(cost_train, all_params)\n",
    "updates = lasagne.updates.adagrad(all_grads, all_params, learning_rate=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 726,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "f_eval = theano.function(inputs=[sym_x, sym_t],\n",
    "                         outputs=[cost_eval, eval_out])\n",
    "\n",
    "f_train = theano.function(inputs=[sym_x, sym_t],\n",
    "                          outputs=[cost_train, eval_out],\n",
    "                          updates=updates)\n",
    "\n",
    "f_pred = theano.function(inputs=[sym_x],\n",
    "                         outputs=eval_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 727,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def iterate_minibatches(inputs, targets, batchsize, inputs_new=None):\n",
    "    assert len(inputs) == len(targets)\n",
    "    if inputs_new != None:\n",
    "        assert len(inputs_new) == len(inputs_new)\n",
    "    indices = np.arange(len(inputs))\n",
    "    np.random.shuffle(indices)\n",
    "    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n",
    "        excerpt = indices[start_idx:start_idx + batchsize]\n",
    "        if inputs_new != None:\n",
    "            yield inputs[excerpt], inputs_new[excerpt], targets[excerpt]\n",
    "        else:\n",
    "            yield inputs[excerpt], targets[excerpt]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 739,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-739-4cf751260849>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miterate_minibatches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m         \u001b[0mf_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m             \u001b[0mold_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/a.tvorozhkov/anaconda3/lib/python3.5/site-packages/theano/compile/function_module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    871\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    872\u001b[0m             \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput_subset\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    874\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    875\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "old_score = 0\n",
    "answer = []\n",
    "predicted = []\n",
    "for epoch in range(1000):\n",
    "    for x,y in iterate_minibatches(np.array(X),np.array(Y,dtype=np.int32), 10):\n",
    "        f_train(list(x),list(y))\n",
    "        if old_score < now_score:\n",
    "            old_score = now_score\n",
    "            answer = [i.argmax() for i in f_pred(X_test)]\n",
    "            predicted = f_pred(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 732,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "a = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 733,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "a['passed'] = answer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 734,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a['user_id'] = ind"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 735,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a = a.set_index('user_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 736,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>passed</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8193</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16387</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8196</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         passed\n",
       "user_id        \n",
       "8193          0\n",
       "16387         0\n",
       "8196          0\n",
       "5             0\n",
       "9             0"
      ]
     },
     "execution_count": 736,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 738,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "a.to_csv('red.csv')"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "widgets": {
   "state": {
    "00cc5f7a46ca4be0bba00e6ceb579bb5": {
     "views": [
      {
       "cell_index": 8
      }
     ]
    },
    "0c40a99c801d49128662aad82934a1aa": {
     "views": [
      {
       "cell_index": 8
      }
     ]
    },
    "1c8753f3411d463580879b08e2ddedb2": {
     "views": [
      {
       "cell_index": 6
      }
     ]
    },
    "2924a75eb0994a4ea19e4a69da2c9369": {
     "views": [
      {
       "cell_index": 9
      }
     ]
    },
    "29f9da92b18a453f933638b9c2405c2d": {
     "views": [
      {
       "cell_index": 20
      }
     ]
    },
    "400480f182d74426902a7a22d237df49": {
     "views": [
      {
       "cell_index": 8
      }
     ]
    },
    "5042d02270ee4d70a11abc437292a005": {
     "views": [
      {
       "cell_index": 10
      }
     ]
    },
    "9b2eaa75f9cb4ed69f45edcc18a1bcfa": {
     "views": [
      {
       "cell_index": 8
      }
     ]
    },
    "ad42efd44c2c4869b5165cb9b28fed70": {
     "views": [
      {
       "cell_index": 8
      }
     ]
    },
    "c5ab72890efb4c47b71cfe4efffc746f": {
     "views": [
      {
       "cell_index": 7
      }
     ]
    },
    "cb56ba5222384625a58b9fcd5a4edfd6": {
     "views": [
      {
       "cell_index": 21
      }
     ]
    },
    "dff634abe09c40bb8e328e7bf43c47df": {
     "views": [
      {
       "cell_index": 6
      }
     ]
    },
    "e53fe8aabd744a11b7d3ce29f3d89cd8": {
     "views": [
      {
       "cell_index": 14
      }
     ]
    },
    "f199d361089443e0beec5c02c35f5394": {
     "views": [
      {
       "cell_index": 8
      }
     ]
    },
    "f59ec1ad186d410aadf1e71bf09e6804": {
     "views": [
      {
       "cell_index": 13
      }
     ]
    },
    "f715a71e5cad4bf89dae0a3b1f0fe3d5": {
     "views": [
      {
       "cell_index": 8
      }
     ]
    },
    "f73d00d1f55f48c8bd04e41b2f4100ac": {
     "views": [
      {
       "cell_index": 14
      }
     ]
    },
    "fde5d46ad89d410585f882b641c279b9": {
     "views": [
      {
       "cell_index": 13
      }
     ]
    }
   },
   "version": "1.2.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
 }