Last active
December 5, 2016 02:11
-
-
Save tvorogme/6dbfb8046b43612959dd2a45b682e15a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"button": false, | |
"collapsed": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"\n", | |
"import numpy as np\n", | |
"from tqdm import tqdm_notebook\n", | |
"from collections import Counter" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"button": false, | |
"collapsed": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"location = 'kaggle/'\n", | |
"events = pd.DataFrame.from_csv(location+\"user_activity.csv\",index_col=None)\n", | |
"structure = pd.DataFrame.from_csv(location+\"structure.csv\",index_col=None)\n", | |
"targets = pd.DataFrame.from_csv(location+\"targets.csv\",index_col=None)\n", | |
"events_test = pd.DataFrame.from_csv(location+\"user_activity_test.csv\",index_col=None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 497, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"counter = Counter(events[events.user_id.isin(set(targets[targets.passed == 1].user_id.values))].drop_duplicates('user_id').step_id)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 498, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cool_feature = defaultdict(lambda: 0)\n", | |
"\n", | |
"for i in counter:\n", | |
" cool_feature[i] = counter[i]/659" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 499, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#\n", | |
"# ПОСОРТИМ ПО ВРЕМЕНИ\n", | |
"#\n", | |
"\n", | |
"events.sort_values(by='time', inplace=True)\n", | |
"\n", | |
"#\n", | |
"# ПОСОРТИМ, А ПОТОМ СОХРАНИМ ВЕКТОР\n", | |
"#\n", | |
"structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)\n", | |
"vec = structure.step_id.values.tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 500, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"desc = {y:x for x,y in enumerate(set(events.action))}\n", | |
"events.action = list(map(lambda x: desc[x], events.action))\n", | |
"\n", | |
"\n", | |
"desc = {y:x for x,y in enumerate(set(events.step_type))}\n", | |
"events.step_type = list(map(lambda x: desc[x], events.step_type))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 501, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"desc = {y:x for x,y in enumerate(set(events_test.action))}\n", | |
"events_test.action = list(map(lambda x: desc[x], events_test.action))\n", | |
"\n", | |
"\n", | |
"desc = {y:x for x,y in enumerate(set(events_test.step_type))}\n", | |
"events_test.step_type = list(map(lambda x: desc[x], events_test.step_type))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 502, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ALL_CURSES = structure.size\n", | |
"ALL_USERS = list(set(events_test.user_id)) + list(set(events.user_id))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 503, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"user_event_pd_frame = {}\n", | |
"\n", | |
"for user in tqdm_notebook(set(events_test.user_id)):\n", | |
" user_event_pd_frame[user] = events_test[events_test.user_id == user]\n", | |
"\n", | |
"for user in tqdm_notebook(set(events.user_id)):\n", | |
" user_event_pd_frame[user] = events[events.user_id == user]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"user_actions_count = {}\n", | |
"\n", | |
"for user in ALL_USERS:\n", | |
" user_actions_count[user] = Counter(user_event_pd_frame[user].action)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"action_costs = {x:y for x,y in zip(structure.step_id, structure.step_cost)}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"fast_struct = {}\n", | |
"for user in tqdm_notebook(ALL_USERS):\n", | |
" fast_struct[user] = structure[structure.step_id.isin(set(user_event_pd_frame[user].step_id))]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"from collections import defaultdict\n", | |
"user_passed_actions = defaultdict(lambda: [])\n", | |
"\n", | |
"for user in tqdm_notebook(ALL_USERS):\n", | |
" frame = user_event_pd_frame[user]\n", | |
" user_passed_actions[user] = frame[frame.action == 2].step_id" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"MAX_STEP_COST_SUM = max([sum([action_costs[action] for action in user_passed_actions[user]]) for user in ALL_USERS])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"costed_passed_user_actions = {}\n", | |
"\n", | |
"for user in ALL_USERS:\n", | |
" user_whole_passed_actions_count = 0\n", | |
" user_costed_passed_actions = 0\n", | |
" for action in user_passed_actions[user]:\n", | |
" user_whole_passed_actions_count+=1\n", | |
" \n", | |
" if action_costs[action] > 0:\n", | |
" user_costed_passed_actions+=1\n", | |
" \n", | |
" if user_costed_passed_actions==0:\n", | |
" costed_passed_user_actions[user]=0\n", | |
" else:\n", | |
" costed_passed_user_actions[user]=user_whole_passed_actions_count/user_costed_passed_actions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"FuckUpTimePart = {}\n", | |
"\n", | |
"for user in tqdm_notebook(ALL_USERS):\n", | |
" a = sorted(user_event_pd_frame[user].time)\n", | |
" a = list(map(lambda x: x-a[0], a))\n", | |
" if a != []:\n", | |
" _all = list(range(0,a[-1]+1))\n", | |
" _all_lenght = len(_all)\n", | |
"\n", | |
" FuckUpTimePart[user] = len(_all)-len(set(a))/_all_lenght/100\n", | |
" else:\n", | |
" FuckUpTimePart[user] = 0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"user_viewed_sum = {}\n", | |
"for user in tqdm_notebook(ALL_USERS):\n", | |
" ev = user_event_pd_frame[user]\n", | |
" user_viewed_sum[user] = sum(ev[ev.action == 1].step_cost)\n", | |
"\n", | |
"user_passed_sum = {}\n", | |
"for user in tqdm_notebook(ALL_USERS):\n", | |
" ev = user_event_pd_frame[user]\n", | |
" user_passed_sum[user] = sum(ev[ev.action == 2].step_cost)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"step_id_position = structure.step_id.values.tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 729, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def gen_features(us_id, test = False):\n", | |
" now_ev = user_event_pd_frame[us_id]\n", | |
" \n", | |
" now_x = []\n", | |
" \n", | |
" for i in now_ev.describe().values:\n", | |
" now_x.extend(i)\n", | |
" \n", | |
" tmp = []\n", | |
" for i in now_ev.step_id:\n", | |
" tmp.append(cool_feature[i])\n", | |
" \n", | |
" now_x.append(max(tmp))\n", | |
" now_x.append(FuckUpTimePart[us_id])\n", | |
" now_x.append(user_passed_sum[us_id])\n", | |
" now_x.append(user_viewed_sum[us_id])\n", | |
" now_x.append(costed_passed_user_actions[us_id])\n", | |
" now_x.append(now_ev.step_cost.sum())\n", | |
" \n", | |
" tmp = user_actions_count[us_id]\n", | |
" now_x.extend([tmp[i] for i in tmp])\n", | |
" \n", | |
" for i in now_ev.describe().values:\n", | |
" now_x.extend(i)\n", | |
" \n", | |
" now_x.append(now_ev.step_type.max())\n", | |
" now_x.append(now_ev.step_type.min())\n", | |
" now_x.append(now_ev.step_cost.sum())\n", | |
" now_x.append(now_ev.step_cost.mean())\n", | |
" \n", | |
" \n", | |
" return (us_id, now_x)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 708, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"X = []\n", | |
"Y = []\n", | |
"for us_id in tqdm_notebook(set(events.user_id.tolist())):\n", | |
" f = gen_features(us_id)\n", | |
" X.append(f[1])\n", | |
" Y.append(targets[targets.user_id == f[0]].passed.values[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 709, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"X_test = []\n", | |
"ind = []\n", | |
"for us_id in tqdm_notebook(set(events_test.user_id.tolist())):\n", | |
" f = gen_features(us_id,True)\n", | |
" ind.append(f[0])\n", | |
" X_test.append(f[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 710, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#\n", | |
"# in X we have NAN features, we need to feel it, \n", | |
"# let's do it with pandas\n", | |
"# \n", | |
"X = pd.DataFrame(X)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 711, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"X = X.fillna(method='bfill')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 712, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"X = X.fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 713, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"X_test = pd.DataFrame(X_test).fillna(method='bfill')\n", | |
"X_test = X_test.fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 714, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"X = X.values\n", | |
"X = X.astype(np.float32)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 715, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"X_test = X_test.as_matrix()\n", | |
"\n", | |
"Y = np.ravel(Y)\n", | |
"Y = Y.astype(np.int32)\n", | |
"\n", | |
"\n", | |
"X_test = X_test.astype(np.float32)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 716, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def feature_normalize(feature_index, features_array):\n", | |
" f3_max = max([a[feature_index] for a in features_array])\n", | |
" f3_min = min([a[feature_index] for a in features_array])\n", | |
"\n", | |
" for row in features_array:\n", | |
" if (f3_max - f3_min) != 0:\n", | |
" row[feature_index] = (row[feature_index] - f3_min) / (f3_max - f3_min)\n", | |
" else:\n", | |
" pass\n", | |
" \n", | |
" return features_array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 717, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"for i in range(len(X[0])):\n", | |
" X = feature_normalize(i, X)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 718, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"for i in range(len(X_test[0])):\n", | |
" X_test = feature_normalize(i, X_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 719, | |
"metadata": { | |
"button": false, | |
"collapsed": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.cross_validation import train_test_split\n", | |
"from sklearn.metrics import f1_score\n", | |
"\n", | |
"Xtr,Xval,Ytr,Yval = train_test_split(X,Y,test_size=0.1,random_state=128)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 720, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import theano\n", | |
"import theano.tensor as T\n", | |
"import lasagne" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 721, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"input_X = T.vector(\"X\")\n", | |
"target_y = T.scalar(dtype='int32')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 722, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from lasagne.updates import sgd\n", | |
"from lasagne.nonlinearities import leaky_rectify, softmax, tanh, elu\n", | |
"from lasagne.layers import InputLayer, DenseLayer\n", | |
"\n", | |
"l_in = InputLayer(shape=(None,len(X[0])))\n", | |
"hl = DenseLayer(incoming=l_in, num_units=100) \n", | |
"bb = lasagne.layers.batch_norm(hl)\n", | |
"hl = DenseLayer(incoming=hl, num_units=60)\n", | |
"bb = lasagne.layers.batch_norm(hl)\n", | |
"hl = DenseLayer(incoming=bb, num_units=20)\n", | |
"bb = lasagne.layers.batch_norm(hl)\n", | |
"l_out = DenseLayer(incoming=bb, num_units=2, nonlinearity=softmax, name='outputlayer') " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 723, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"sym_x = T.matrix('X')\n", | |
"sym_t = T.ivector('target')\n", | |
"\n", | |
"train_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=False)\n", | |
"eval_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=True)\n", | |
"\n", | |
"all_params = lasagne.layers.get_all_params(l_out, trainable=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 724, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"cost_train = T.nnet.categorical_crossentropy(train_out, sym_t).mean()\n", | |
"cost_eval = T.nnet.categorical_crossentropy(eval_out, sym_t).mean()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 725, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"all_grads = T.grad(cost_train, all_params)\n", | |
"updates = lasagne.updates.adagrad(all_grads, all_params, learning_rate=0.1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 726, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"f_eval = theano.function(inputs=[sym_x, sym_t],\n", | |
" outputs=[cost_eval, eval_out])\n", | |
"\n", | |
"f_train = theano.function(inputs=[sym_x, sym_t],\n", | |
" outputs=[cost_train, eval_out],\n", | |
" updates=updates)\n", | |
"\n", | |
"f_pred = theano.function(inputs=[sym_x],\n", | |
" outputs=eval_out)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 727, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def iterate_minibatches(inputs, targets, batchsize, inputs_new=None):\n", | |
" assert len(inputs) == len(targets)\n", | |
" if inputs_new != None:\n", | |
" assert len(inputs_new) == len(inputs_new)\n", | |
" indices = np.arange(len(inputs))\n", | |
" np.random.shuffle(indices)\n", | |
" for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n", | |
" excerpt = indices[start_idx:start_idx + batchsize]\n", | |
" if inputs_new != None:\n", | |
" yield inputs[excerpt], inputs_new[excerpt], targets[excerpt]\n", | |
" else:\n", | |
" yield inputs[excerpt], targets[excerpt]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 739, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "KeyboardInterrupt", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-739-4cf751260849>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miterate_minibatches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mf_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/home/a.tvorozhkov/anaconda3/lib/python3.5/site-packages/theano/compile/function_module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput_subset\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 874\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
] | |
} | |
], | |
"source": [ | |
"old_score = 0\n", | |
"answer = []\n", | |
"predicted = []\n", | |
"for epoch in range(1000):\n", | |
" for x,y in iterate_minibatches(np.array(X),np.array(Y,dtype=np.int32), 10):\n", | |
" f_train(list(x),list(y))\n", | |
" if old_score < now_score:\n", | |
" old_score = now_score\n", | |
" answer = [i.argmax() for i in f_pred(X_test)]\n", | |
" predicted = f_pred(X_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 732, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"a = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 733, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"a['passed'] = answer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 734, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"a['user_id'] = ind" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 735, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"a = a.set_index('user_id')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 736, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>passed</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>user_id</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>8193</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16387</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8196</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" passed\n", | |
"user_id \n", | |
"8193 0\n", | |
"16387 0\n", | |
"8196 0\n", | |
"5 0\n", | |
"9 0" | |
] | |
}, | |
"execution_count": 736, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"a.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 738, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"a.to_csv('red.csv')" | |
] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
}, | |
"widgets": { | |
"state": { | |
"00cc5f7a46ca4be0bba00e6ceb579bb5": { | |
"views": [ | |
{ | |
"cell_index": 8 | |
} | |
] | |
}, | |
"0c40a99c801d49128662aad82934a1aa": { | |
"views": [ | |
{ | |
"cell_index": 8 | |
} | |
] | |
}, | |
"1c8753f3411d463580879b08e2ddedb2": { | |
"views": [ | |
{ | |
"cell_index": 6 | |
} | |
] | |
}, | |
"2924a75eb0994a4ea19e4a69da2c9369": { | |
"views": [ | |
{ | |
"cell_index": 9 | |
} | |
] | |
}, | |
"29f9da92b18a453f933638b9c2405c2d": { | |
"views": [ | |
{ | |
"cell_index": 20 | |
} | |
] | |
}, | |
"400480f182d74426902a7a22d237df49": { | |
"views": [ | |
{ | |
"cell_index": 8 | |
} | |
] | |
}, | |
"5042d02270ee4d70a11abc437292a005": { | |
"views": [ | |
{ | |
"cell_index": 10 | |
} | |
] | |
}, | |
"9b2eaa75f9cb4ed69f45edcc18a1bcfa": { | |
"views": [ | |
{ | |
"cell_index": 8 | |
} | |
] | |
}, | |
"ad42efd44c2c4869b5165cb9b28fed70": { | |
"views": [ | |
{ | |
"cell_index": 8 | |
} | |
] | |
}, | |
"c5ab72890efb4c47b71cfe4efffc746f": { | |
"views": [ | |
{ | |
"cell_index": 7 | |
} | |
] | |
}, | |
"cb56ba5222384625a58b9fcd5a4edfd6": { | |
"views": [ | |
{ | |
"cell_index": 21 | |
} | |
] | |
}, | |
"dff634abe09c40bb8e328e7bf43c47df": { | |
"views": [ | |
{ | |
"cell_index": 6 | |
} | |
] | |
}, | |
"e53fe8aabd744a11b7d3ce29f3d89cd8": { | |
"views": [ | |
{ | |
"cell_index": 14 | |
} | |
] | |
}, | |
"f199d361089443e0beec5c02c35f5394": { | |
"views": [ | |
{ | |
"cell_index": 8 | |
} | |
] | |
}, | |
"f59ec1ad186d410aadf1e71bf09e6804": { | |
"views": [ | |
{ | |
"cell_index": 13 | |
} | |
] | |
}, | |
"f715a71e5cad4bf89dae0a3b1f0fe3d5": { | |
"views": [ | |
{ | |
"cell_index": 8 | |
} | |
] | |
}, | |
"f73d00d1f55f48c8bd04e41b2f4100ac": { | |
"views": [ | |
{ | |
"cell_index": 14 | |
} | |
] | |
}, | |
"fde5d46ad89d410585f882b641c279b9": { | |
"views": [ | |
{ | |
"cell_index": 13 | |
} | |
] | |
} | |
}, | |
"version": "1.2.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment