Last active
December 5, 2016 02:11
-
-
Save tvorogme/6dbfb8046b43612959dd2a45b682e15a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "button": false, | |
| "collapsed": false, | |
| "deletable": true, | |
| "new_sheet": false, | |
| "run_control": { | |
| "read_only": false | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "\n", | |
| "\n", | |
| "import numpy as np\n", | |
| "from tqdm import tqdm_notebook\n", | |
| "from collections import Counter" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "button": false, | |
| "collapsed": false, | |
| "deletable": true, | |
| "new_sheet": false, | |
| "run_control": { | |
| "read_only": false | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "location = 'kaggle/'\n", | |
| "events = pd.DataFrame.from_csv(location+\"user_activity.csv\",index_col=None)\n", | |
| "structure = pd.DataFrame.from_csv(location+\"structure.csv\",index_col=None)\n", | |
| "targets = pd.DataFrame.from_csv(location+\"targets.csv\",index_col=None)\n", | |
| "events_test = pd.DataFrame.from_csv(location+\"user_activity_test.csv\",index_col=None)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 497, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "counter = Counter(events[events.user_id.isin(set(targets[targets.passed == 1].user_id.values))].drop_duplicates('user_id').step_id)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 498, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "cool_feature = defaultdict(lambda: 0)\n", | |
| "\n", | |
| "for i in counter:\n", | |
| " cool_feature[i] = counter[i]/659" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 499, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#\n", | |
| "# ПОСОРТИМ ПО ВРЕМЕНИ\n", | |
| "#\n", | |
| "\n", | |
| "events.sort_values(by='time', inplace=True)\n", | |
| "\n", | |
| "#\n", | |
| "# ПОСОРТИМ, А ПОТОМ СОХРАНИМ ВЕКТОР\n", | |
| "#\n", | |
| "structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)\n", | |
| "vec = structure.step_id.values.tolist()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 500, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "desc = {y:x for x,y in enumerate(set(events.action))}\n", | |
| "events.action = list(map(lambda x: desc[x], events.action))\n", | |
| "\n", | |
| "\n", | |
| "desc = {y:x for x,y in enumerate(set(events.step_type))}\n", | |
| "events.step_type = list(map(lambda x: desc[x], events.step_type))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 501, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "desc = {y:x for x,y in enumerate(set(events_test.action))}\n", | |
| "events_test.action = list(map(lambda x: desc[x], events_test.action))\n", | |
| "\n", | |
| "\n", | |
| "desc = {y:x for x,y in enumerate(set(events_test.step_type))}\n", | |
| "events_test.step_type = list(map(lambda x: desc[x], events_test.step_type))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 502, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "ALL_CURSES = structure.size\n", | |
| "ALL_USERS = list(set(events_test.user_id)) + list(set(events.user_id))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 503, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "user_event_pd_frame = {}\n", | |
| "\n", | |
| "for user in tqdm_notebook(set(events_test.user_id)):\n", | |
| " user_event_pd_frame[user] = events_test[events_test.user_id == user]\n", | |
| "\n", | |
| "for user in tqdm_notebook(set(events.user_id)):\n", | |
| " user_event_pd_frame[user] = events[events.user_id == user]\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "user_actions_count = {}\n", | |
| "\n", | |
| "for user in ALL_USERS:\n", | |
| " user_actions_count[user] = Counter(user_event_pd_frame[user].action)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "action_costs = {x:y for x,y in zip(structure.step_id, structure.step_cost)}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "fast_struct = {}\n", | |
| "for user in tqdm_notebook(ALL_USERS):\n", | |
| " fast_struct[user] = structure[structure.step_id.isin(set(user_event_pd_frame[user].step_id))]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "from collections import defaultdict\n", | |
| "user_passed_actions = defaultdict(lambda: [])\n", | |
| "\n", | |
| "for user in tqdm_notebook(ALL_USERS):\n", | |
| " frame = user_event_pd_frame[user]\n", | |
| " user_passed_actions[user] = frame[frame.action == 2].step_id" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "MAX_STEP_COST_SUM = max([sum([action_costs[action] for action in user_passed_actions[user]]) for user in ALL_USERS])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "costed_passed_user_actions = {}\n", | |
| "\n", | |
| "for user in ALL_USERS:\n", | |
| " user_whole_passed_actions_count = 0\n", | |
| " user_costed_passed_actions = 0\n", | |
| " for action in user_passed_actions[user]:\n", | |
| " user_whole_passed_actions_count+=1\n", | |
| " \n", | |
| " if action_costs[action] > 0:\n", | |
| " user_costed_passed_actions+=1\n", | |
| " \n", | |
| " if user_costed_passed_actions==0:\n", | |
| " costed_passed_user_actions[user]=0\n", | |
| " else:\n", | |
| " costed_passed_user_actions[user]=user_whole_passed_actions_count/user_costed_passed_actions" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "FuckUpTimePart = {}\n", | |
| "\n", | |
| "for user in tqdm_notebook(ALL_USERS):\n", | |
| " a = sorted(user_event_pd_frame[user].time)\n", | |
| " a = list(map(lambda x: x-a[0], a))\n", | |
| " if a != []:\n", | |
| " _all = list(range(0,a[-1]+1))\n", | |
| " _all_lenght = len(_all)\n", | |
| "\n", | |
| " FuckUpTimePart[user] = len(_all)-len(set(a))/_all_lenght/100\n", | |
| " else:\n", | |
| " FuckUpTimePart[user] = 0" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "user_viewed_sum = {}\n", | |
| "for user in tqdm_notebook(ALL_USERS):\n", | |
| " ev = user_event_pd_frame[user]\n", | |
| " user_viewed_sum[user] = sum(ev[ev.action == 1].step_cost)\n", | |
| "\n", | |
| "user_passed_sum = {}\n", | |
| "for user in tqdm_notebook(ALL_USERS):\n", | |
| " ev = user_event_pd_frame[user]\n", | |
| " user_passed_sum[user] = sum(ev[ev.action == 2].step_cost)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "step_id_position = structure.step_id.values.tolist()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 729, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def gen_features(us_id, test = False):\n", | |
| " now_ev = user_event_pd_frame[us_id]\n", | |
| " \n", | |
| " now_x = []\n", | |
| " \n", | |
| " for i in now_ev.describe().values:\n", | |
| " now_x.extend(i)\n", | |
| " \n", | |
| " tmp = []\n", | |
| " for i in now_ev.step_id:\n", | |
| " tmp.append(cool_feature[i])\n", | |
| " \n", | |
| " now_x.append(max(tmp))\n", | |
| " now_x.append(FuckUpTimePart[us_id])\n", | |
| " now_x.append(user_passed_sum[us_id])\n", | |
| " now_x.append(user_viewed_sum[us_id])\n", | |
| " now_x.append(costed_passed_user_actions[us_id])\n", | |
| " now_x.append(now_ev.step_cost.sum())\n", | |
| " \n", | |
| " tmp = user_actions_count[us_id]\n", | |
| " now_x.extend([tmp[i] for i in tmp])\n", | |
| " \n", | |
| " for i in now_ev.describe().values:\n", | |
| " now_x.extend(i)\n", | |
| " \n", | |
| " now_x.append(now_ev.step_type.max())\n", | |
| " now_x.append(now_ev.step_type.min())\n", | |
| " now_x.append(now_ev.step_cost.sum())\n", | |
| " now_x.append(now_ev.step_cost.mean())\n", | |
| " \n", | |
| " \n", | |
| " return (us_id, now_x)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 708, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "X = []\n", | |
| "Y = []\n", | |
| "for us_id in tqdm_notebook(set(events.user_id.tolist())):\n", | |
| " f = gen_features(us_id)\n", | |
| " X.append(f[1])\n", | |
| " Y.append(targets[targets.user_id == f[0]].passed.values[0])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 709, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "X_test = []\n", | |
| "ind = []\n", | |
| "for us_id in tqdm_notebook(set(events_test.user_id.tolist())):\n", | |
| " f = gen_features(us_id,True)\n", | |
| " ind.append(f[0])\n", | |
| " X_test.append(f[1])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 710, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#\n", | |
| "# in X we have NAN features, we need to feel it, \n", | |
| "# let's do it with pandas\n", | |
| "# \n", | |
| "X = pd.DataFrame(X)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 711, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "X = X.fillna(method='bfill')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 712, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "X = X.fillna(0)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 713, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "X_test = pd.DataFrame(X_test).fillna(method='bfill')\n", | |
| "X_test = X_test.fillna(0)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 714, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "X = X.values\n", | |
| "X = X.astype(np.float32)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 715, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "X_test = X_test.as_matrix()\n", | |
| "\n", | |
| "Y = np.ravel(Y)\n", | |
| "Y = Y.astype(np.int32)\n", | |
| "\n", | |
| "\n", | |
| "X_test = X_test.astype(np.float32)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 716, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def feature_normalize(feature_index, features_array):\n", | |
| " f3_max = max([a[feature_index] for a in features_array])\n", | |
| " f3_min = min([a[feature_index] for a in features_array])\n", | |
| "\n", | |
| " for row in features_array:\n", | |
| " if (f3_max - f3_min) != 0:\n", | |
| " row[feature_index] = (row[feature_index] - f3_min) / (f3_max - f3_min)\n", | |
| " else:\n", | |
| " pass\n", | |
| " \n", | |
| " return features_array" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 717, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "for i in range(len(X[0])):\n", | |
| " X = feature_normalize(i, X)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 718, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "for i in range(len(X_test[0])):\n", | |
| " X_test = feature_normalize(i, X_test)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 719, | |
| "metadata": { | |
| "button": false, | |
| "collapsed": false, | |
| "deletable": true, | |
| "new_sheet": false, | |
| "run_control": { | |
| "read_only": false | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from sklearn.cross_validation import train_test_split\n", | |
| "from sklearn.metrics import f1_score\n", | |
| "\n", | |
| "Xtr,Xval,Ytr,Yval = train_test_split(X,Y,test_size=0.1,random_state=128)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 720, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import theano\n", | |
| "import theano.tensor as T\n", | |
| "import lasagne" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 721, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "input_X = T.vector(\"X\")\n", | |
| "target_y = T.scalar(dtype='int32')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 722, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from lasagne.updates import sgd\n", | |
| "from lasagne.nonlinearities import leaky_rectify, softmax, tanh, elu\n", | |
| "from lasagne.layers import InputLayer, DenseLayer\n", | |
| "\n", | |
| "l_in = InputLayer(shape=(None,len(X[0])))\n", | |
| "hl = DenseLayer(incoming=l_in, num_units=100) \n", | |
| "bb = lasagne.layers.batch_norm(hl)\n", | |
| "hl = DenseLayer(incoming=hl, num_units=60)\n", | |
| "bb = lasagne.layers.batch_norm(hl)\n", | |
| "hl = DenseLayer(incoming=bb, num_units=20)\n", | |
| "bb = lasagne.layers.batch_norm(hl)\n", | |
| "l_out = DenseLayer(incoming=bb, num_units=2, nonlinearity=softmax, name='outputlayer') " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 723, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "sym_x = T.matrix('X')\n", | |
| "sym_t = T.ivector('target')\n", | |
| "\n", | |
| "train_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=False)\n", | |
| "eval_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=True)\n", | |
| "\n", | |
| "all_params = lasagne.layers.get_all_params(l_out, trainable=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 724, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "cost_train = T.nnet.categorical_crossentropy(train_out, sym_t).mean()\n", | |
| "cost_eval = T.nnet.categorical_crossentropy(eval_out, sym_t).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 725, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "all_grads = T.grad(cost_train, all_params)\n", | |
| "updates = lasagne.updates.adagrad(all_grads, all_params, learning_rate=0.1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 726, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "f_eval = theano.function(inputs=[sym_x, sym_t],\n", | |
| " outputs=[cost_eval, eval_out])\n", | |
| "\n", | |
| "f_train = theano.function(inputs=[sym_x, sym_t],\n", | |
| " outputs=[cost_train, eval_out],\n", | |
| " updates=updates)\n", | |
| "\n", | |
| "f_pred = theano.function(inputs=[sym_x],\n", | |
| " outputs=eval_out)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 727, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def iterate_minibatches(inputs, targets, batchsize, inputs_new=None):\n", | |
| " assert len(inputs) == len(targets)\n", | |
| " if inputs_new != None:\n", | |
| " assert len(inputs_new) == len(inputs_new)\n", | |
| " indices = np.arange(len(inputs))\n", | |
| " np.random.shuffle(indices)\n", | |
| " for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n", | |
| " excerpt = indices[start_idx:start_idx + batchsize]\n", | |
| " if inputs_new != None:\n", | |
| " yield inputs[excerpt], inputs_new[excerpt], targets[excerpt]\n", | |
| " else:\n", | |
| " yield inputs[excerpt], targets[excerpt]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 739, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-739-4cf751260849>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miterate_minibatches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mf_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/home/a.tvorozhkov/anaconda3/lib/python3.5/site-packages/theano/compile/function_module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput_subset\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 874\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "old_score = 0\n", | |
| "answer = []\n", | |
| "predicted = []\n", | |
| "for epoch in range(1000):\n", | |
| " for x,y in iterate_minibatches(np.array(X),np.array(Y,dtype=np.int32), 10):\n", | |
| " f_train(list(x),list(y))\n", | |
| " if old_score < now_score:\n", | |
| " old_score = now_score\n", | |
| " answer = [i.argmax() for i in f_pred(X_test)]\n", | |
| " predicted = f_pred(X_test)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 732, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "a = pd.DataFrame()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 733, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "a['passed'] = answer" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 734, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "a['user_id'] = ind" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 735, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "a = a.set_index('user_id')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 736, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>passed</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>user_id</th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>8193</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16387</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8196</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " passed\n", | |
| "user_id \n", | |
| "8193 0\n", | |
| "16387 0\n", | |
| "8196 0\n", | |
| "5 0\n", | |
| "9 0" | |
| ] | |
| }, | |
| "execution_count": 736, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "a.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 738, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "a.to_csv('red.csv')" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "anaconda-cloud": {}, | |
| "kernelspec": { | |
| "display_name": "Python [default]", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| }, | |
| "widgets": { | |
| "state": { | |
| "00cc5f7a46ca4be0bba00e6ceb579bb5": { | |
| "views": [ | |
| { | |
| "cell_index": 8 | |
| } | |
| ] | |
| }, | |
| "0c40a99c801d49128662aad82934a1aa": { | |
| "views": [ | |
| { | |
| "cell_index": 8 | |
| } | |
| ] | |
| }, | |
| "1c8753f3411d463580879b08e2ddedb2": { | |
| "views": [ | |
| { | |
| "cell_index": 6 | |
| } | |
| ] | |
| }, | |
| "2924a75eb0994a4ea19e4a69da2c9369": { | |
| "views": [ | |
| { | |
| "cell_index": 9 | |
| } | |
| ] | |
| }, | |
| "29f9da92b18a453f933638b9c2405c2d": { | |
| "views": [ | |
| { | |
| "cell_index": 20 | |
| } | |
| ] | |
| }, | |
| "400480f182d74426902a7a22d237df49": { | |
| "views": [ | |
| { | |
| "cell_index": 8 | |
| } | |
| ] | |
| }, | |
| "5042d02270ee4d70a11abc437292a005": { | |
| "views": [ | |
| { | |
| "cell_index": 10 | |
| } | |
| ] | |
| }, | |
| "9b2eaa75f9cb4ed69f45edcc18a1bcfa": { | |
| "views": [ | |
| { | |
| "cell_index": 8 | |
| } | |
| ] | |
| }, | |
| "ad42efd44c2c4869b5165cb9b28fed70": { | |
| "views": [ | |
| { | |
| "cell_index": 8 | |
| } | |
| ] | |
| }, | |
| "c5ab72890efb4c47b71cfe4efffc746f": { | |
| "views": [ | |
| { | |
| "cell_index": 7 | |
| } | |
| ] | |
| }, | |
| "cb56ba5222384625a58b9fcd5a4edfd6": { | |
| "views": [ | |
| { | |
| "cell_index": 21 | |
| } | |
| ] | |
| }, | |
| "dff634abe09c40bb8e328e7bf43c47df": { | |
| "views": [ | |
| { | |
| "cell_index": 6 | |
| } | |
| ] | |
| }, | |
| "e53fe8aabd744a11b7d3ce29f3d89cd8": { | |
| "views": [ | |
| { | |
| "cell_index": 14 | |
| } | |
| ] | |
| }, | |
| "f199d361089443e0beec5c02c35f5394": { | |
| "views": [ | |
| { | |
| "cell_index": 8 | |
| } | |
| ] | |
| }, | |
| "f59ec1ad186d410aadf1e71bf09e6804": { | |
| "views": [ | |
| { | |
| "cell_index": 13 | |
| } | |
| ] | |
| }, | |
| "f715a71e5cad4bf89dae0a3b1f0fe3d5": { | |
| "views": [ | |
| { | |
| "cell_index": 8 | |
| } | |
| ] | |
| }, | |
| "f73d00d1f55f48c8bd04e41b2f4100ac": { | |
| "views": [ | |
| { | |
| "cell_index": 14 | |
| } | |
| ] | |
| }, | |
| "fde5d46ad89d410585f882b641c279b9": { | |
| "views": [ | |
| { | |
| "cell_index": 13 | |
| } | |
| ] | |
| } | |
| }, | |
| "version": "1.2.0" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 1 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment