Last active
December 3, 2018 17:49
-
-
Save akelleh/0207d775166029450b78b5ede7ed6f4a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import tensorflow as tf\n", | |
| "\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "units = 5000\n", | |
| "time_steps = 52\n", | |
| "\n", | |
| "# data generating process is translated from jim savage's version!\n", | |
| "\n", | |
| "a_trend = np.arange(0, time_steps, 1)\n", | |
| "individual_effects = np.random.normal(size=units)\n", | |
| "A_trend = np.tile(a_trend, units).reshape(units, time_steps)\n", | |
| "A_indiv = np.tile(individual_effects, time_steps).reshape(time_steps, units).T\n", | |
| "A = A_trend + A_indiv\n", | |
| "\n", | |
| "X_observed = []\n", | |
| "for row in A.tolist():\n", | |
| " current_row = []\n", | |
| " for val in row:\n", | |
| " val = val if np.random.rand() > 0.1 else np.nan\n", | |
| " current_row.append(val)\n", | |
| " X_observed.append(current_row)\n", | |
| "X_observed = np.array(X_observed)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import tensorflow as tf\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "\n", | |
| "class MCNNM(object):\n", | |
| " def __init__(self, num_cores=8, GPU=False, CPU=True, learning_rate=1.):\n", | |
| " self.train_op = None\n", | |
| " self.learning_rate = learning_rate\n", | |
| "\n", | |
| " # some boilerplate for enabling/disabling GPU\n", | |
| " if GPU:\n", | |
| " num_GPU = 1\n", | |
| " num_CPU = 1\n", | |
| " if CPU:\n", | |
| " num_CPU = 1\n", | |
| " num_GPU = 0\n", | |
| "\n", | |
| " self.tf_config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,\\\n", | |
| " inter_op_parallelism_threads=num_cores, allow_soft_placement=True,\\\n", | |
| " device_count = {'CPU' : num_CPU, 'GPU' : num_GPU})\n", | |
| " \n", | |
| " def build_tf_graph(self, X, lambd):\n", | |
| " tf.reset_default_graph()\n", | |
| " input_shape = X.shape\n", | |
| " num_observed = float((~np.isnan(X)).sum())\n", | |
| " \n", | |
| " self.observed = tf.constant((~np.isnan(X)) * 1, dtype=tf.float64)\n", | |
| " X = tf.constant(X, tf.float64)\n", | |
| " self.X_completed_tensor = tf.get_variable(\"x_completed\", \n", | |
| " input_shape, \n", | |
| " dtype=tf.float64)\n", | |
| " self.l = self.loss(X, self.X_completed_tensor, num_observed, lambd)\n", | |
| " return tf.train.AdamOptimizer(self.learning_rate).minimize(self.l)\n", | |
| " \n", | |
| " def loss(self, X_observed, X_completed, num_observed, lambd):\n", | |
| " delta = self.project_A(X_observed - X_completed)\n", | |
| " self.normalized_frobenius = tf.norm(delta) / num_observed\n", | |
| " s, _, _ = tf.svd(X_completed)\n", | |
| " self.nuclear_norm = tf.reduce_sum(s)\n", | |
| " return self.normalized_frobenius + lambd * self.nuclear_norm\n", | |
| " \n", | |
| " def project_A(self, A):\n", | |
| " return tf.multiply(tf.where(tf.is_nan(A), tf.zeros_like(A), A), self.observed)\n", | |
| " \n", | |
| " def fit_predict(self, X, lambd=0.0001, iterations=1000, verbose=True):\n", | |
| " self.train_op = self.build_tf_graph(X, lambd)\n", | |
| " with tf.Session(config=self.tf_config) as sess:\n", | |
| " sess.run(tf.global_variables_initializer())\n", | |
| " loss, frob, nn = sess.run([self.l, self.normalized_frobenius, self.nuclear_norm])\n", | |
| " if verbose:\n", | |
| " print(\"initial loss is {}. Frob norm is {}. NN is {}.\".format(loss, frob, nn))\n", | |
| " for i in range(iterations):\n", | |
| " _, loss = sess.run([self.train_op, self.l])\n", | |
| " if i > 0 and i % 100 == 0 and verbose:\n", | |
| " print(\"current loss at step {} is: {}\".format(i, loss))\n", | |
| " self.X_completed, self.current_loss = sess.run([self.X_completed_tensor, self.l])\n", | |
| " if verbose:\n", | |
| " print(\"final loss is {}\".format(self.current_loss))\n", | |
| " return self.X_completed\n", | |
| "\n", | |
| " def pick_lambdas(self, start=1, end=-9, count=10, power=True):\n", | |
| " lambdas = np.arange(start, end, (end - start) / float(count))\n", | |
| " if power:\n", | |
| " lambdas = np.power(10, lambdas)\n", | |
| " return lambdas\n", | |
| " \n", | |
| " def get_crossvalidation_loss(self, X_test, X_completed):\n", | |
| " r = [(x_t - x_c)**2. for x_t, x_c in zip(X_test.flatten(), X_completed.flatten()) if not np.isnan(x_t)] \n", | |
| " r = sum(r) / ((~np.isnan(X_test)).sum())\n", | |
| " return r\n", | |
| " \n", | |
| " def cross_validate(self, X, lambdas=[], folds=5, kwargs={}, verbose=True):\n", | |
| " if len(lambdas) == 0:\n", | |
| " lambdas = self.pick_lambdas()\n", | |
| " results = np.zeros(shape=(folds, len(lambdas)))\n", | |
| " for i in range(folds):\n", | |
| " X_train, X_test = self.split_data(X)\n", | |
| " for j, lambd in enumerate(lambdas):\n", | |
| " X_completed = self.fit_predict(X, lambd=lambd, verbose=False, **kwargs)\n", | |
| " results[i][j] = self.get_crossvalidation_loss(X_test, X_completed)\n", | |
| " results = np.median(results, axis=0)\n", | |
| " if verbose:\n", | |
| " print(\"lambdas:\", lambdas)\n", | |
| " print(\"Median MSEs:\", results)\n", | |
| " best_lambda = lambdas[np.argmin(results)]\n", | |
| " return best_lambda\n", | |
| " \n", | |
| " def split_data(self, X):\n", | |
| " number_observed = (~np.isnan(X_observed)).sum() \n", | |
| " fraction_observed = number_observed / (X_observed.shape[0]*X_observed.shape[1])\n", | |
| " number_to_select = int(np.ceil(fraction_observed * number_observed))\n", | |
| " selected = np.random.choice(range(number_observed), size=number_to_select, replace=False)\n", | |
| "\n", | |
| " X_train = []; X_test = []\n", | |
| " for i, row in enumerate(X_observed.tolist()):\n", | |
| " current_train_row = []\n", | |
| " current_test_row = []\n", | |
| " for j, val in enumerate(row):\n", | |
| " if i + j in selected:\n", | |
| " current_train_row.append(val)\n", | |
| " current_test_row.append(np.nan)\n", | |
| " else:\n", | |
| " current_train_row.append(np.nan)\n", | |
| " current_test_row.append(val)\n", | |
| " X_train.append(current_train_row)\n", | |
| " X_test.append(current_test_row)\n", | |
| " X_train = np.array(X_train)\n", | |
| " X_test = np.array(X_test)\n", | |
| " return X_train, X_test" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "initial loss is 0.061217783099648475. Frob norm is 0.06121048376737203. NN is 72.99332276442651.\n", | |
| "current loss at step 100 is: 0.03385654440099862\n", | |
| "current loss at step 200 is: 0.0026624741769605944\n", | |
| "current loss at step 300 is: 0.0018161797565133951\n", | |
| "current loss at step 400 is: 0.0015610925804993951\n", | |
| "final loss is 0.0015617183019356355\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[-1.53635097, -0.53241162, 0.4666322 , ..., 47.47191515,\n", | |
| " 48.46528351, 49.45837831],\n", | |
| " [-0.25671383, 0.74574557, 1.70734204, ..., 48.73569856,\n", | |
| " 49.73728772, 50.7358575 ],\n", | |
| " [ 0.62897124, 1.64319631, 2.64757711, ..., 49.63582123,\n", | |
| " 50.63525461, 51.65707341],\n", | |
| " ...,\n", | |
| " [-2.46721051, -1.47652256, -0.47907954, ..., 46.5183322 ,\n", | |
| " 47.52154028, 48.52198505],\n", | |
| " [-1.42512265, -0.43172244, 0.54887283, ..., 47.57060248,\n", | |
| " 48.56804265, 49.56963451],\n", | |
| " [ 0.30190312, 1.30677794, 2.29937274, ..., 49.30762759,\n", | |
| " 50.29933079, 51.3009217 ]])" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model = MCNNM()\n", | |
| "model.fit_predict(X_observed, iterations=500, lambd=1e-7)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "lambdas: [1.e+01 1.e+00 1.e-01 1.e-02 1.e-03 1.e-04 1.e-05 1.e-06 1.e-07 1.e-08]\n", | |
| "Median MSEs: [8.74476848e+02 8.74519463e+02 8.74510827e+02 8.74504882e+02\n", | |
| " 8.74482450e+02 8.74466176e+02 8.74319237e+02 3.66621217e-04\n", | |
| " 9.10393502e-05 1.10589859e-04]\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "1e-07" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "lambd = model.cross_validate(X_observed, folds=3, kwargs={'iterations': 500})\n", | |
| "lambd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "initial loss is 0.06128357602487604. Frob norm is 0.061210531446904096. NN is 73.04457797194394.\n", | |
| "current loss at step 100 is: 0.0464511584991518\n", | |
| "current loss at step 200 is: 0.023976601480258913\n", | |
| "current loss at step 300 is: 0.015457361665983767\n", | |
| "current loss at step 400 is: 0.015446730855401332\n", | |
| "current loss at step 500 is: 0.01544309509180137\n", | |
| "current loss at step 600 is: 0.01544212085522212\n", | |
| "current loss at step 700 is: 0.015442117787439329\n", | |
| "current loss at step 800 is: 0.015442367614257492\n", | |
| "current loss at step 900 is: 0.015442718219936577\n", | |
| "final loss is 0.015443150405275791\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[-1.52355107, -0.5372075 , 0.4659409 , ..., 47.4454591 ,\n", | |
| " 48.45111517, 49.44839559],\n", | |
| " [-0.27066193, 0.75124867, 1.74130792, ..., 48.73297055,\n", | |
| " 49.72362367, 50.73003127],\n", | |
| " [ 0.62201849, 1.62113617, 2.62492045, ..., 49.62687984,\n", | |
| " 50.62424393, 51.62259981],\n", | |
| " ...,\n", | |
| " [-2.45450328, -1.4649343 , -0.45360128, ..., 46.55554822,\n", | |
| " 47.47705725, 48.50694786],\n", | |
| " [-1.44075926, -0.40896247, 0.5626034 , ..., 47.53997254,\n", | |
| " 48.53994925, 49.55376806],\n", | |
| " [ 0.29568001, 1.26412352, 2.29720692, ..., 49.30814378,\n", | |
| " 50.30428848, 51.28134894]])" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model.fit_predict(X_observed, iterations=1000, lambd=lambd)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "df = pd.DataFrame({'X_observed': X_observed.flatten(), \n", | |
| " 'X_completed': model.X_completed.flatten(), \n", | |
| " 'observed': ~np.isnan(X_observed.flatten()), \n", | |
| " 'a': A.flatten()})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<matplotlib.axes._subplots.AxesSubplot at 0x7eff04465358>" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAELCAYAAAA7h+qnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAGgFJREFUeJzt3Xu4VPV97/H3B1QwXoJcgh5wBxQr4bGRhF0laqmoMZp4ME0tVmO0fTjBntPTekmr2IfnJGkwQRtjPGkuknoh0URRo5KQKJdA1NiogKKIphIBgXKJO3CU1mDQ7/ljLeIAm73XDHvNzFrzeT3PfmbWbc93JSOf/V2X31JEYGZmra1XowswM7PGcxiYmZnDwMzMHAZmZobDwMzMcBiYmRkOAzMzw2FgZmY4DMzMDNiv0QVkNXDgwBg2bFijyzAzK5QlS5a8GhGDuluvMGEwbNgwFi9e3OgyzMwKRdKaLOv5MJGZmTkMzMzMYWBmZjgMzMwMh4GZmeEwMDNrWh3btrNs7VY6tm3P/bMKc2mpmVkruflnv+LLc3/JAb178VYE1//Z+5kwekhun+cwMDNrIis3vc5n7lnGsnX/D4DfvfUWAFfd9ywnjxjIgIP75PK5DgMzsybxd99bwuxnN3a6rJfEui1vOAzMzMrqgaVruXLWs7zdxTq/e+tthh52YG41OAzMzBpo2JQ5mdb7hzOPza0rAIeBmVlDfPxrP+OZ9dsyrTvh+MOZ/CdH51qPw8DMrI4Wr+rgvJt/kWldAdecPTL3IACHgZlZ3WQ9JLTTPZeOpX34gJyq2ZXDwMwsZ1N/sIw7nlxX1TYXf6itbkEADgMzs1xV2w0ImHfFOEYMPiSfgvbCYWBmloNqQwDgyMP68OjVZ+RQTfccBmZmPayWILjl4jGcPurwHKrJxmFgZtZDagmBg/aH57/wsRyqqY7DwMxsH3Vs286YafOr3m719MaHwE4OAzOzfVBLNwDNFQRQhzCQtBp4HXgL2BER7ZL6A3cDw4DVwMSI2JJ3LWZmPaXWbmB+A64UyqJencH4iHi1YnoKsCAipkuakk5fXadazMz2SVm6gUqNOkx0LnBq+n4msAiHgZk1uTseX8XU2Suq3m7J1DNyHWSuJ9QjDAKYKymAmyNiBjA4IjakyzcCg+tQh5lZzcrYDVSqRxicEhHrJb0HmCfpxcqFERFpUOxB0mRgMkBbW1v+lZqZ7Wb89QtY9ZvfVr1dUUJgp155f0BErE9fNwP3AycAmyQdAZC+bt7LtjMioj0i2gcNGpR3qWZmuxg2ZU5LBAHk3BlIOgjoFRGvp+/PBP4JmA1cAkxPXx/Msw4zs2qU/ZBQZ/I+TDQYuF/Szs/6XkQ8JOkpYJakScAaYGLOdZiZZdKKQQA5h0FEvAwc38n8DuD0PD/bzKwarRoCO+V+zsDMrNm1ehCAh6MwsxbmEHiHOwMza0kOgl25MzCzluIQ6Jw7AzNrCQ8sXVtTEJw5cmDpgwDcGZhZC3A30D2HgZmV1uTbn2Dui692v+JumnWY6Tw5DMyslNwNVMdhYGal8sHP/4TfvPF21dvde+lY2ocPyKGiYnAYmFlpuBuoncPAzAqv1hAowkNn6sVhYGaF5m6gZzgMzKyQHAI9yzedmVnhOAh6njsDMysMh0B+3BmYWdNbuen1moKgX185CDJyZ2BmTc3dQH24MzCzplTrwHKXjT/KQVADdwZm1nTcDdSfw8DMmsbUHyzjjifXVb3dtAmjuOik4TlU1DocBmbWFNwNNJbDwMwaykNJNAeHgZk1jLuB5uEwMLO6cwg0H19aamZ15SBoTnXpDCT1BhYD6yPiHEnDgbuAAcAS4FMR8WY9ajGzxnAINLd6dQaXAS9UTF8H3BgRI4AtwKQ61WFmDeAgaH65dwaShgIfA64FrpQk4DTgwnSVmcDngG/mXYuZ1ZdDoDjq0Rl8FbgK2PlQ0gHA1ojYkU6vA4bUoQ4zq5NaB5Zr69fHQdAguXYGks4BNkfEEkmn1rD9ZGAyQFtbWw9XZ2Z5cDdQTHkfJjoZmCDpo0Bf4FDgJqCfpP3S7mAosL6zjSNiBjADoL29PXKu1cz2wc0LX+JLD/971dtd85E/4NLxx+RQkVUj1zCIiGuAawDSzuDvI+KTku4BziO5ougS4ME86zCzfLkbKL5G3XR2NXCXpGnA08AtDarDzPbBhTf/nMdXba16Ow8l0XzqFgYRsQhYlL5/GTihXp9tZj3P3UC5eDgKM6uKQ6CcPByFmWXmICgvdwZm1i0PM11+DgMz65K7gdbgMDCzTjkEWovPGZjZLjq2bXcQtCB3Bmb2ew6B1uXOwMxYvKqjpiAYMfBAB0FJuDMwa3HuBgwcBmYtywPLWSWHgVkLcjdgu3MYmLWQk784j/WvVf+4cd88Vn4OA7MW4W7AuuIwMCs5h4Bl0WUYSPohsNcnjEXEhB6vyMx6jIPAsuquM/hy+voJ4HDgjnT6AmBTXkWZ2b5xCFi1ugyDiPgZgKQbIqK9YtEPJS3OtTIzq1rHtu2MmTa/pm0dBK0t6zmDgyQdlT6hDEnDgYPyK8vMquVuwPZF1jC4Algk6WVAwHuBS3Oryswyq7UbOAD4dweBpTKFQUQ8JOkYYGQ668WI2J5fWWaWhbsB6ymZwkDSu4ArgfdGxKclHSPp2Ij4Ub7lmdne1BIE0yaM4qKThudQjRVd1sNEtwFLgA+l0+uBewCHgVmduRuwPGQNg6Mj4nxJFwBExH9JUo51mdlurv3hcr798zVVb/fVie/n4x88MoeKrEyyhsGbkg4kvQFN0tGAzxmY1Ym7Actb1jD4HPAQcKSkO4GTgb/KqygzS/zBlDlUP6ycB5az6mW9mmiupCXAWJJLSy+LiFe7205SX+ARoE/6WfdGxGfT+xTuAgaQnIv4VETU8p03Ky13A1ZPWa8mWhARpwNzOpnXle3AaRGxTdL+wGOSfkJyZdKNEXGXpG8Bk4Bv1rYLZuVSawi4G7B90eUzkCX1ldQfGCjpMEn9059hwJDufnkktqWT+6c/AZwG3JvOnwl8vMb6zUplX7oBB4Hti+46g0uBy4H/BiytmP8a8C9ZPkBSb5JDQSOArwO/ArZGxI50lXVkCBazMvMhIWu07gaquwm4SdLfRsTXavmAiHgLGC2pH3A/79zF3C1Jk4HJAG1tbbV8vFnTcxBYM8h6NdGtkqYCbRExOR2aoqo7kCNiq6SFJDeu9ZO0X9odDCW5ia2zbWYAMwDa29v3+lwFsyJyCFgz6fKcQYVbgTeBk9Lp9cC07jaSNCjtCEjvU/gw8AKwEDgvXe0S4MEqajYrtI5t22sKgn595SCw3OR9B/IRwMz0vEEvYFZE/EjSCuAuSdOAp4FbainerGjcDVizyvUO5Ih4FvhAJ/NfBk6ook6zQluwYiOTvrOk6u3OHDmQGX95Yg4Vme0qaxh8lj3vQP7LvIoyKxN3A1YEWe9AnidpKVXegWzWyibf/gRzX6z+PxPfPGaN0GUYSPrgbrM2pK9tktoiYunu25iZuwErnu46gxu6WLbzTmIzSx17zRy213ARtEPAGq27m87G16sQs6JzN2BFlnWgur7A/wJOIekIHgW+FRG/zbE2s0JwCFgZZL2a6DvA68DOISkuBL4L/HkeRZkVhYPAyiJrGBwXEaMqphemN46ZtSSHgJVN1uEolkoau3NC0onA4nxKMmtuDgIro6ydwRjgcUmvpNNtwC8lPUfy2IL351KdWRNxCFiZZQ2Ds3KtwqyJLV7VwXk3/6Lq7Yb378vCq7p7GKBZc8h6B/IaSYcBR1Zu45vOrOzcDViryHpp6RdIxiL6FelgdfimMyuxB5au5fJZz1a93WXjj+KKj7wvh4rM8pX1MNFEkmGs38yzGLNm4G7AWlHWMFgO9AM251iLWUONmz6fV7Z2OzL7HuZfMY4Rgw/JoSKz+skaBl8Cnpa0nIrnGETEhFyqMqszdwPW6rKGwUzgOuA54O38yjGrL4eAWSJrGPxXRPzfXCsxqzMHgdk7sobBo5K+BMxm18NEvrTUCschYLanrGGw8znGYyvm+dJSKxwHgVnnst505ucaWKE5BMy6lmmgOknvlvQVSYvTnxskvTvv4sz21eJVHTUFwbv2cxBYa8l6mOhWknsNJqbTnwJuAz6RR1FmPcHdgFl2WcPg6Ij4s4rpz0t6Jo+CzPbVghUbmfSdJVVv99+Pew9fu+iPcqjIrPllDYM3JJ0SEY8BSDoZeCO/ssxq427ArDZZw+B/AjMrzhNsIRm4rkuSjiR5ZOZgkquPZkTETZL6A3cDw4DVwMSI2FJV5WYVbnz4BW5a+HLV291y8RhOH3V4DhWZFUvWq4meAY6XdGg6/VrG378D+ExELJV0CLBE0jySIFkQEdMlTQGmAFdXXb0Z7gbMekLWIay/CFwfEVvT6cNI/pGf2tV2EbEB2JC+f13SC8AQ4Fzg1HS1mcAiHAZWpVoHlnMImO0p6zOQz94ZBADpIZ2PVvNBkoaR3Lz2BDA4DQqAjSSHkcwyGzZljoPArAdlPWfQW1KfiNgOIOlAoE/WD5F0MHAfcHlEvCbp98siIiTFXrabDEwGaGtry/pxVmI+JGSWj6ydwZ3AAkmTJE0C5pEc3umWpP1JguDOiPhBOnuTpCPS5Uewl+ckRMSMiGiPiPZBgwZlLNXKykFglp+sJ5Cvk7QMOCOd9YWIeLi77ZS0ALcAL0TEVyoWzQYuAaanrw9WVbW1FIeAWf6yHiYiIh4CHupsmaR/i4gPdbLoZJK7lZ+ruEntH0lCYFbaZazhnTubzXbhIDCrj8xh0I2+nc1Mb1JTZ8uA03vos62EHAJm9ZX1nEF3Oj0BbFatjm3bawqCIYce4CAw2wc91RmY7TN3A2aN02VnIGmv13NK+uPKyR6ryFrOA0vX1hQEZ44c6CAw6yHddQaLJH0LuCEi3gKQNBi4ARgJtKfrfSq/Eq3M3A2YNYfuwmAMyZU/z0i6DPhD4ErgeuDinStFxPLcKrRSmnz7E8x98dWqt1sy9QwGHJz5fkczy6jLMEiHnbg0DYL5wH8AYyNiXT2Ks3JyN2DWfLoMA0n9gOuAE4GzSMYj+omkyyLip3Woz0rEIWDWvLo7TLQU+AbwNxGxA5graTTwDUlrIuKC3Cu0UnAQmDW37sJg3O6HhNJnG5wk6dP5lWVl4RAwK4YuLy3t6txARHy758uxMnEQmBWHbzqzHucQMCuenhqOwgxwEJgVlTsD6xEOAbNic2dg+6yWIBj4rt4OArMm4s7AauZuwKw83BlY1Ras2FhTEJz3gSMcBGZNyp2BVcXdgFk5OQwsk3HT5/PK1u1Vb3fLxWM4fdThOVRkZj3JYWDdcjdgVn4OA9urWkPAw0ybFY/DwDrlbsCstTgMbBcOAbPW5EtL7fccBGaty52BOQTMzJ1Bq3MQmBnk3BlIuhU4B9gcEcel8/oDdwPDgNXAxPRZy1ZHDgEzq5R3Z3A7ybOTK00BFkTEMcCCdNrqpGPb9pqC4KTh/RwEZiWWa2cQEY9IGrbb7HOBU9P3M4FFwNV51mEJdwNmtjeNOIE8OCI2pO83AoMbUENLufaHy/n2z9dUvZ2HkjBrHQ29migiQlLsbbmkycBkgLa2trrVVSbuBswsi0aEwSZJR0TEBklHAJv3tmJEzABmALS3t+81NGxPZ3z5p6x89Y2qt5t/xThGDD4kh4rMrJk1IgxmA5cA09PXBxtQQ6m5GzCzauV9aen3SU4WD5S0DvgsSQjMkjQJWANMzLOGVuIQMLNa5X010QV7WXR6np/bihwEZrYvPBxFwTkEzKwneDiKgqr15jFwEJjZntwZFJBDwMx6mjuDgqklCA7AQWBmXXNnUBDuBswsT+4MmtwDS9fWFATHHX6Qg8DMMnNn0MTcDZhZvTgMmtA5X13E8o3/WfV29146lvbhA3KoyMzKzmHQZNwNmFkjOAyaxOjP/Zitv61+LL4lU89gwMF9cqjIzFqJw6AJuBsws0ZzGDRQrSHgbsDMeprDoEHcDZhZM3EY1JlDwMyakW86qxMPLGdmzcydQR04BMys2bkzyNHKTa/XFAT9D+zlIDCzunJnkBN3A2ZWJA6DHnbH46uYOntF1dtNmzCKi04ankNFZmbdcxj0IHcDZlZUDoMeUOvAcvOvGMeIwYfkUJGZWXUcBvvI3YCZlYHDoEbHXjOH7dWPK+cQMLOm5DCogbsBMysbh0EVHAJmVlYNu+lM0lmSfilppaQpjaojKweBmZVZQzoDSb2BrwMfBtYBT0maHRHVX6CfM4eAmbWCRnUGJwArI+LliHgTuAs4t0G1dKrWoSSEg8DMiqdR5wyGAGsrptcBJzaolj24GzCzVtPUA9VJmixpsaTFv/71r+vymbUEwXGHH+QgMLNCa1RnsB44smJ6aDpvFxExA5gB0N7eXsNV/dm5GzCzVtaoMHgKOEbScJIQ+AvgwkYU8sDStVw+69mqt/NziM2sTBoSBhGxQ9L/Bh4GegO3RsTz9a7D3YCZWaJhN51FxI+BHzfisxes2Mik7yypejuHgJmVVcvdgfy+qXN4Y0f12zkIzKzMWiYMOrZtZ8y0+VVv5xAws1bQ1JeW9pQ7f7Gm6iDoIweBmbWO0ncGX/zR88x4bHVV2zgEzKzVlDoMzr/5cZ5YtSXz+rdcPIbTRx2eY0VmZs2ptGGweFVH5iAY+Z538dCV43OuyMyseZU2DB556dVM6/mQkJlZiU8gjztmYJfLp00Y5SAwM0uVtjNoHz6APx4xgEdXduwyf/yxA7ntr5pmgFQzs6ZQ2jAA+O7/GMviVR088Mx/8O4D9+NPPzCUEYMPaXRZZmZNp9RhAEmH0D58QKPLMDNraqU9Z2BmZtk5DMzMzGFgZmYOAzMzw2FgZmaAInJ9tHCPkfRrYE2DyxgIZLu1uRi8P82vbPtUtv2B5t+n90bEoO5WKkwYNANJiyOivdF19BTvT/Mr2z6VbX+gPPvkw0RmZuYwMDMzh0G1ZjS6gB7m/Wl+Zdunsu0PlGSffM7AzMzcGZiZmcMgE0lnSfqlpJWSpjS6nlpIulXSZknLK+b1lzRP0kvp62GNrLEako6UtFDSCknPS7osnV/IfZLUV9KTkpal+/P5dP5wSU+k3727JR3Q6FqrIam3pKcl/SidLvr+rJb0nKRnJC1O5xXyO7c7h0E3JPUGvg6cDYwCLpA0qrFV1eR24Kzd5k0BFkTEMcCCdLoodgCfiYhRwFjgb9L/X4q6T9uB0yLieGA0cJakscB1wI0RMQLYAkxqYI21uAx4oWK66PsDMD4iRldcTlrU79wuHAbdOwFYGREvR8SbwF3AuQ2uqWoR8Qjwm91mnwvMTN/PBD5e16L2QURsiIil6fvXSf7BGUJB9ykS29LJ/dOfAE4D7k3nF2Z/ACQNBT4G/Gs6LQq8P10o5Hdudw6D7g0B1lZMr0vnlcHgiNiQvt8IDG5kMbWSNAz4APAEBd6n9JDKM8BmYB7wK2BrROxIVynad++rwFXA2+n0AIq9P5AE9FxJSyRNTucV9jtXqfQPt7FsIiIkFe7SMkkHA/cBl0fEa8kfn4mi7VNEvAWMltQPuB8Y2eCSaibpHGBzRCyRdGqj6+lBp0TEeknvAeZJerFyYdG+c5XcGXRvPXBkxfTQdF4ZbJJ0BED6urnB9VRF0v4kQXBnRPwgnV3ofQKIiK3AQuBDQD9JO/9oK9J372RggqTVJIdWTwNuorj7A0BErE9fN5ME9gmU4DsHDoMsngKOSa+COAD4C2B2g2vqKbOBS9L3lwAPNrCWqqTHn28BXoiIr1QsKuQ+SRqUdgRIOhD4MMl5kIXAeelqhdmfiLgmIoZGxDCS/2Z+GhGfpKD7AyDpIEmH7HwPnAksp6Dfud35prMMJH2U5Phnb+DWiLi2wSVVTdL3gVNJRljcBHwWeACYBbSRjAg7MSJ2P8nclCSdAjwKPMc7x6T/keS8QeH2SdL7SU4+9ib5I21WRPyTpKNI/rLuDzwNXBQR2xtXafXSw0R/HxHnFHl/0trvTyf3A74XEddKGkABv3O7cxiYmZkPE5mZmcPAzMxwGJiZGQ4DMzPDYWBmZjgMzMwMh4GVTDq09SpJ/dPpw9LpYVX8jlN3DrncTNLhkwc2ug4rJ4eBlUpErAW+CUxPZ00HZkTE6oYVBVQMwWDWlBwGVkY3AmMlXQ6cAny5s5WU+GdJy9MHlpxfsfhQSXPShxp9S1KvdFTR2yvWvyL9PUdLeigdyfJRSSPT+ben2z4BXJ/+Zd+v4vNfkjQ4HYriPklPpT8np8sHSJqbPuzmXwFhlhP/tWKlExG/k/QPwEPAmRHxu72s+gmSB8kcTzJMx1OSHkmXnUDyMKM16e/5BLAKGBIRxwFU/MM+A/jriHhJ0onAN0gGZoNkMLaTIuKt9EFJfwrclq63JiI2SfoeyQNfHpPUBjwMvI9kyJDH0mEpPkYxHwRjBeEwsLI6G9gAHEfybIDOnAJ8Px06epOknwF/BLwGPBkRL8Pvx3U6heQpVkdJ+howh2Rc+4OBk4B7KobP7lPxGfekvx/gbuD/ALeRDN52dzr/DGBUxfaHpr93HEkIERFzJG2p5X8IsywcBlY6kkaTjPo5FnhM0l0VDx/JavdBuyIitkg6HvgI8NfAROBykge2jN7L7/nPivf/BoyQNIjkaVjT0vm9gLER8dvd9qPKks1q53MGVirp0NbfJHnYzSvAP7OXcwYko56en54LGETyl/iT6bIT0mHLewHnk4TKQKBXRNwHTAU+GBGvAask/fnOz08DYw+RjAp5P/AVkqG3O9JFc4G/rdiHncHyCHBhOu9soJAPWrdicBhY2XwaeCUidh4a+gbwPkl/0sm69wPPAsuAnwJXRcTGdNlTwL+QPFNgVbruEGCRkkdT3gFck677SWCSpGXA83T9jOy7gYt45xARwN8B7ZKelbSCpOsA+DwwTtLzJIeLXsmw/2Y18RDWZmbmzsDMzHwC2VqApD8Evrvb7O0RcWIj6jFrRj5MZGZmPkxkZmYOAzMzw2FgZmY4DMzMDIeBmZkB/x95VKvkfgsZNwAAAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "%matplotlib inline\n", | |
| "\n", | |
| "# How well do we fit the training data?\n", | |
| "\n", | |
| "df[df.observed == True].plot(x='X_observed', y='X_completed', kind='scatter')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<matplotlib.axes._subplots.AxesSubplot at 0x7efeecf3b7b8>" | |
| ] | |
| }, | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEKCAYAAADw2zkCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAGyBJREFUeJzt3XuYFPWd7/H3d7gqSCaAIotMBgXXRdYgTBDBGNCJEnXR3RASE6Pu4wmck6yrZl0dc/CCEoM+JujJSU6co0Zc4yrBGzrKVQjuSVQuIhIgCYIKLJeAsjCKKPA9f3QRW2Smq4qu7q7uz+t5fKarun70tx4bPvOty6/M3RERkcpWVewCRESk+BQGIiKiMBAREYWBiIigMBARERQGIiKCwkBERFAYiIgICgMREQHaFruAsLp37+61tbXFLkNEJFWWLFmyzd2PzrVdasKgtraWxYsXF7sMEZFUMbO3wmynw0QiIqIwEBERhYGIiKAwEBERFAYiIoLCQESkZG1v3sNr63ewvXlP4p+VmktLRUQqydPLNnL948tpV1XFR/v3c+dXT2H0wF6JfZ46AxGRErNmyy7+dfpyPvhoP7v27OWDj/Zz3ePLE+0Q1BmIiJSI7c17uO/FtTQuXMu+gx5P366qig3v7qZb5w6JfLbCQESkyBav284Pn1vJsvU78Ra2+Wj/fo777BGJ1aAwEBEpksXrtvOdhxbx7u59rW7Xvm0Vd371lMS6AlAYiIgUxci7XmDdtt05t2tbBc9deQZ9exyVaD0KAxGRAnpq6XqunrY89PYTRw9IPAhAYSAiUjC1DU2htzVg0t8P4FunfS65grIoDEREEjbuwZeZvXpb6O17dmnPs/98ZqLnCA6mMBARSVCUbgBg3Bf78IPz+ydUTcsUBiIiCRh++xw27vww9PYGLJ5QX9BuIJvCQEQkz6J2A4N7f4bHv3dGQtWEozAQEcmTQROf553d+yONmXvNmQW5WigXhYGISB5E7QYA3px8fgKVxJN4GJjZm8AuYB+w193rzKwr8BhQC7wJjHX3d5OuRUQk3+KEwJhTe3LX1wclUE18heoMRrp79nVVDcA8d59sZg3B8vUFqkVE5LBNeOI1Hn5lQ6Qx7YA/lVA3kK1Yh4kuBEYEr6cCC1AYiEhKxOkGSuXcQEsKEQYOzDYzB+5190agh7tvCt7fDPQ41EAzGweMA6ipqSlAqSIiLYvTDUBpnRtoSSHC4Ax332hmxwBzzGx19pvu7kFQfEoQHI0AdXV1Lc3sKiKSuDjdwA3nnsj4kf0SqCb/Eg8Dd98Y/NxqZk8CQ4AtZtbT3TeZWU9ga9J1iIjE8bWfv8iit3dGHpeGbiBbomFgZp2AKnffFbw+B7gVmAFcBkwOfj6dZB0iInGU47mBliTdGfQAnjSzA5/1iLvPNLNFwDQzuwJ4CxibcB0iIqENuW0WW9/bG2lMFbA2Zd1AtkTDwN3XAp8/xPrtwNlJfraISByV1A1k0x3IIiLACQ1NtP7wyU9rA7yR4m4gm8JARCpe2qeSyAeFgYhUrDgh0Lk9rLi1vIIAFAYiUoEWr9vOmHtfijyu3LqBbAoDEakocbqB7ke2YfFNoxKopnQoDESkIlx+3+9YsOadyOPKuRvIpjAQkbIXpxv4uwHH8NNLvpBANaVJYSAiZWvKrFXcM39t5HGV0g1kUxiISFmK0w0M61PNI+OHJ1BN6VMYiEhZqb/rBdZs2x15XCV2A9kUBiJSNnTzWHwKAxFJvZNvbOK9j6KPUxB8TGEgIqkWpxtYMqGebp07JFBNeikMRCSVTmxo4sOIY9I+zXSSFAYikirbm/cweNLcyON0SKh1CgMRSY04h4SqOxrLbjkvgWrKi8JAREremi27qJ+yMPI4dQPhKQxEpKTF6Qb6dO3I/Ov0MMUoFAYiUpI0lURhKQxEpOTE6QbOOak7jZeflkA1lUFhICIlY9yDLzN79bbI49QNHD6FgYiUhDjdwA3nnsj4kf0SqKbyKAxEpKjqbp3Jtvf3RRrTBnhD3UBeKQxEpGg0sVzpUBiISMFpYrnSU5AwMLM2wGJgo7tfYGZ9gEeBbsAS4NvuHnWaERFJIXUDpamqQJ9zFbAqa/kOYIq79wXeBa4oUB0iUiS1DU0KghKWeBiY2XHA+cB9wbIBZwHTg02mAhclXYeIFE/cEFAQFE4hDhPdDVwHHBUsdwN2uPveYHkD0KsAdYhIgcUJgZrqDixsqE+gGmlNomFgZhcAW919iZmNiDF+HDAOoKamJs/ViUhSNLFc+iTdGQwHRpvZeUBHoAtwD1BtZm2D7uA4YOOhBrt7I9AIUFdX5wnXKiJ5EKcbOOmYI5n5/ZEJVCNhJRoG7n4DcANA0Blc6+7fMrNfA2PIXFF0GfB0knWISPIuv+93LFjzTuRx6gZKQ7HuM7geeNTMJgGvAvcXqQ4RyYM43cCAYzvx7NUj8l+MxFKwMHD3BcCC4PVaYEihPltEknHtY0uZ/uqmyOPUDZQe3YEsIrHE6QYmje7PJcP6JFCNHC6FgYhEMuS2WWx9b2/uDQ+ibqC0KQxEJDTdQVy+FAYiktPAW55jxwfRr+5WEKSHwkBEWqVuoDIoDETkkE5oaCLaI2egPfBHBUEqKQxE5BO2N+9h8KS5kcctmVBPt84dEqhICkFhICJ/cXxDE/sjjunSAZZPVDeQdgoDEeGppeu5etryyON0bqB8KAxEKlycE8R9unZk/nVnJ1CNFIvCQKRCXfnwIp5ZsTXyOHUD5UlhIFKB4nQDX6jpwq+/+8UEqpFSoDAQqSDqBqQlCgORChGnG5g+fih1fbolUI2UGoWBSJkbOmk2m5s/ijxO3UBlaTUMzOwZoMUJSdx9dN4rEpG8UTcgYeXqDO4Kfv4DcCzwcLB8MbAlqaJE5PDEmViuHfAndQMVq9UwcPffAJjZj929LuutZ8xscaKViUgsmlhO4gh7zqCTmR0fPK4SM+sDdEquLBGJKk4IHNkWVk5SEEj4MLgGWGBmawEDPgeMT6wqEQkt7sRy6gYkW6gwcPeZZtYPOClYtdrd9yRXloiEEacbqO5oLLvlvASqkTQLFQZmdiTwfeBz7v4dM+tnZn/t7s8mW56IHMrDv13HhBkrI49TNyAtCXuY6JfAEuD0YHkj8GtAYSBSYJpKQpIQNgxOcPevm9nFAO7+vplZgnWJyEGufWwp01/dFHmcugEJI2wYfGhmRxDcgGZmJwA6ZyBSIHG6gWF9qnlk/PAEqpFyFDYMbgFmAr3N7FfAcOAfcw0ys47AQqBD8FnT3f3m4NLUR4FuZA4/fdvdP4xevkh5G377HDbujP5XQ92ARBX2aqLZZrYEGErm0tKr3H1biKF7gLPcvdnM2gH/YWbPkzkZPcXdHzWzXwBXAP8n3i6IlKc43cCk0f25ZFifBKqRchf2aqJ57n420HSIdS1ydweag8V2wX8OnAV8M1g/lUznoTAQAYbcNout7+2NPE7dgByOXBPVdQSOBLqb2WfJdAUAXYBeYT7AzNqQORTUF/gZ8Aaww90PfNs3hP2zRMpdnG5gyYR6unXukEA1UklydQbjgauBvwKWZq3fCfzvMB/g7vuAgWZWDTzJxzeu5WRm44BxADU1NWGHiaTOgJuaaI54asCAdeoGJE9yTVR3D3CPmV3p7j89nA9y9x1mNp/MvQrVZtY26A6OI3PfwqHGNAKNAHV1ddGmYBRJCU0sJ6WgKuR2D5jZBDNrBAjuQL4g1yAzOzroCAguTf0ysAqYD4wJNrsMeDpy5SIpd0JDU+Qg6NxeQSDJCHtp6QNkjvsPC5bD3oHcE5ganDeoAqa5+7NmthJ41MwmAa8C90euXCSl1mzZRf2UhZHHKQQkSYnegezuy4FTD7F+LTAkUqUiZaBvQxNRrxPSNNNSCLoDWaRAdG5ASlnYMLiZT9+BfHlSRYmUkzgh0LEKVt+uIJDCCXsH8hwzW0r0O5BFKtaVDy/imRVbI49TNyDFkOums0EHrTowZWKNmdW4+9KDx4hIvG6gproDCxvqE6hGJLdcncGPW3nvwLQSIhIY9ZP5rN76fuRx6gak2HLddDayUIWIpF2cbuDusadw0aDeCVQjEk3Yieo6At8FziDTEbwI/MLdP0iwNpFUqLt1Jtve3xd5nLoBKSVhryZ6CNgFHJiS4pvAvwFfS6IokbTQxHJSLsKGwQB375+1PD+4i1ikIp18YxPvfRR9nLoBKVVhw2CpmQ1195cAzOw0YHFyZYmULt08JuUobBgMBn5rZm8HyzXAH8zsdTLPsDklkepESsjQSbPZ3BytHejSAZZPVBBI6QsbBqMSrUKkhG1v3sPgSXMjj1M3IGkS9g7kt4InnfXOHqObzqTcxTkkdEyntrxy47kJVCOSnLCXlt5GZi6iNwgmq0M3nUkZm7dyM1c8tCTyOHUDklZhDxONJTONdcQH84mkT5xu4As1Xfj1d7+YQDUihRE2DFYA1UD0WbdEUkITy0klCxsGPwJeNbMVZD3HwN1HJ1KVSIHF6QauGnk815z7NwlUI1J4YcNgKnAH8DqwP7lyRApr3IMvM3t19NnY1Q1IuQkbBu+7+/9KtBKRAtPEciIfCxsGL5rZj4AZfPIwkS4tldSJc/MYqBuQ8hY2DA481H5o1jpdWiqpE6cbmHvNmfTtcVQC1YiUjrA3nem5BpJqA25qojnihdEdDP7wI3UDUhnC3nT2GeBm4Mxg1W+AW939v5IqTCRfNLGcSG5hDxM9QOZeg7HB8reBXwL/kERRIvlwfENT5Evf2gBvKAikAoUNgxPc/atZyxPNbFkSBYkcLk0sJxJd2DDYbWZnuPt/AJjZcGB3cmWJxBPnkFDn9rDiVgWBVLawYfA/gKnBuQOAd8lMXNcqM+tN5pGZPchcfdTo7veYWVfgMaAWeBMY6+7vRqpcJMuaLbuon7Iw8jh1AyIZYa8mWgZ83sy6BMs7Q/75e4F/cfelZnYUsMTM5pAJknnuPtnMGoAG4PrI1YsQrxsY1qeaR8YPT6AakXQKezXR7cCd7r4jWP4smX/kJ7Q2zt03AZuC17vMbBXQC7gQGBFsNhVYgMJAIprwxGs8/MqGyOPUDYh8WlXI7b5yIAgAgkM650X5IDOrJXPz2stAjyAoADaTOYx0qDHjzGyxmS3+85//HOXjpMzVNjRFDoK+3Y9QEIi0IOw5gzZm1sHd9wCY2RFAh7AfYmadgceBq919p5n95T13dzPzQ41z90agEaCuru6Q20hl+drPX2TR22GPUn5MISDSurBh8Ctgnpn9Mlj+RzKHd3Iys3ZkguBX7v5EsHqLmfV0901m1hM9J0FCiHNuYNLo/lwyrE8C1YiUl7AnkO8ws9eA+mDVbe4+K9c4y7QA9wOr3P0nWW/NAC4DJgc/n45UtVSUk29s4r3o88qpGxCJIGxngLvPBGYe6j0z+527n36It4aTuVv59ayb1H5AJgSmmdkVwFt8fGezyCfE6QaWTKinW+fQRzFFhAhhkEPHQ60MblKzQ70HnJ2nz5YyFCcE2gN/VDcgEku+wkAndyVv1A2IFF6+wkDksMUJga5HVLH05q8kUI1IZWk1DMysxt3fbuG9L7r7iwcW816ZVIzF67Yz5t6XIo/TCWKR/MnVGSwws18AP3b3fQBm1gP4MXASUBds9+3kSpRyFqcbGNirM09d+aUEqhGpXLnCYDCZK3+WmdlVwN8C3wfuBC49sJG7r0isQilL6gZESkurYRBMOzE+CIK5wH8CQ909+oQwIgFNLCdSenKdM6gG7gBOA0aRmY/oeTO7yt1fKEB9UkbGPfgys1dvizxO3YBI8nIdJloK/Bz4nrvvBWab2UDg52b2lrtfnHiFUhbidANjTu3JXV8flEA1InKwXGFw5sGHhIJnGwwzs+8kV5aUC3UDIumQ65xBi+cG3P3/5r8cKSdxuoG7x57CRYN6J1CNiLRGN51J3o28cx7r3vkg8jh1AyLFozCQvIrTDSgERIpPYSB5cXxDE/sjjmkDvKEgECkJCgM5bJpYTiT9FAYS2wkNTeyLOKZLB1g+Ud2ASKlRGEhka7bson7KwsjjdG5ApHQpDCSSOIeEOlbB6tsVBCKlTGEgoTy1dD1XT1seeZy6AZF0UBhITnG6gQHHduLZq0fkvxgRSYTCQFp05cOLeGbF1sjj1A2IpI/CQA4pTjcwaXR/LhnWJ4FqRCRpCgP5hKGTZrO5+aPI49QNiKSbwkD+Ik43MPeaM+nb46gEqhGRQlIYCANuaqL5w+jj1A2IlA+FQYXTxHIiAgmHgZk9AFwAbHX3AcG6rsBjQC3wJjA2eNayFNDf/M8mdkecS0ITy4mUr6qE//wHyTw7OVsDMM/d+wHzgmUpkO3Ne6htiB4Eb04+X0EgUsYS7QzcfaGZ1R60+kJgRPB6KrAAuD7JOiQjziGhvt2PYO61ZyVQjYiUkmKcM+jh7puC15uBHkWooaJoKgkRyaWoJ5Dd3c3MW3rfzMYB4wBqamoKVlc5idMNHNOpLa/ceG4C1YhIqSpGGGwxs57uvsnMegItznfg7o1AI0BdXV2LoSGfdu/8P/GjWX+MPE7dgEhlKkYYzAAuAyYHP58uQg1lLU43MKJvVx78b6cnUI2IpEHSl5b+O5mTxd3NbANwM5kQmGZmVwBvAWOTrKGSDJr4PO/sjvokYnUDIpL81UQXt/DW2Ul+biWK0w2MObUnd319UALViEja6A7klBt++xw27ow+l4S6ARHJpjBIsTjdwP2XDubs/scmUI2IpJnCIIU0zbSI5JvCIGXidANLJtTTrXOHBKoRkXKhMEiJExuaiHpmoGMVrL5d3YCI5KYwKHHbm/cweNLcyON0SEhEolAYlLCTb2zivYinBrp0gOUTFQQiEo3CoATNW7mZKx5aEnmcugERiUthUGI0sZyIFIPCoERMeOI1Hn5lQ+Rx6gZEJB8UBiUgTjcwsFdnnrrySwlUIyKVSGFQRKN+Mp/VW9+PPE7dgIjkm8KgSDSxnIiUEoVBgV1w9wJWbH4v8jh1AyKSJIVBAcXpBuZecyZ9exyVQDUiIh9TGBRA/V0vsGbb7sjj1A2ISKEoDBKmieVEJA0UBgkZcFMTzRFnlqsC1qobEJEiUBjk2Zotu6ifsjDyOB0SEpFiUhjkUZxDQtUdjWW3nJdANSIi4SkM8kATy4lI2ikMDlOcbqCmugMLG+oTqEZEJB6FQUxTZq3invlrI49TNyAipUhhEEOcbuDvBhzDTy/5QgLViIgcPoVBBLp5TETKlcIgpDjdwDkndafx8tMSqEZEJL+KFgZmNgq4B2gD3Ofuk4tVS2suv+93LFjzTuRx6gZEJE2KEgZm1gb4GfBlYAOwyMxmuPvKYtTTkjjdwPTxQ6nr0y2BakREklOszmAIsMbd1wKY2aPAhUBJhME37/1//Hbdjsjj1A2ISFoVKwx6AeuzljcAnzq4bmbjgHEANTU1iRe1vXkPgyfNjTxO00yLSNqV9Alkd28EGgHq6uo8yc+K8wjKI9vCyknqBkQk/YoVBhuB3lnLxwXrCi5uN6BDQiJSTqqK9LmLgH5m1sfM2gPfAGYUuogfPrsychAM61OtIBCRslOUzsDd95rZPwGzyFxa+oC7/76QNQy85Xl2fLA/0hiFgIiUq6KdM3D354DnCv25cQ4LaSoJESl3JX0COd8af/MGtz+/OtIYdQMiUgkqJgyum/4a0xZvCL393WNP4aJBvXNvKCJSBso+DNZs2cWs328OHQSaT0hEKlFZh8FNT73OQy+9HWrb02qreey/D0+4IhGR0lS2YbBmy65QQTCgZyfu/sZg3UEsIhWtbMNg2frW5xaqP+lo7hjzebp17lCgikRESlfZhsHA3tWHXP+v55zIuScfq05ARCRLse5ATlzfHkdx6emfnNzu0tNr+N5Z/RQEIiIHKdvOAODWC/+WS4fWsmz9Dgb2rlYIiIi0oKzDADIdgkJARKR1ZXuYSEREwlMYiIiIwkBERBQGIiKCwkBERABzT/TRwnljZn8G3ipiCd2BbUX8/CRon9Kh3Pap3PYHSnufPufuR+faKDVhUGxmttjd64pdRz5pn9Kh3Pap3PYHymOfdJhIREQUBiIiojCIorHYBSRA+5QO5bZP5bY/UAb7pHMGIiKizkBERBQGOZnZKDP7g5mtMbOGYtcTh5k9YGZbzWxF1rquZjbHzP4U/PxsMWuMysx6m9l8M1tpZr83s6uC9andLzPraGavmNlrwT5NDNb3MbOXg+/gY2bWvti1RmVmbczsVTN7NlhO9T6Z2Ztm9rqZLTOzxcG61H73QGHQKjNrA/wM+ArQH7jYzPoXt6pYHgRGHbSuAZjn7v2AecFymuwF/sXd+wNDge8F/2/SvF97gLPc/fPAQGCUmQ0F7gCmuHtf4F3giiLWGNdVwKqs5XLYp5HuPjDrktI0f/cUBjkMAda4+1p3/xB4FLiwyDVF5u4LgXcOWn0hMDV4PRW4qKBFHSZ33+TuS4PXu8j8Q9OLFO+XZzQHi+2C/xw4C5gerE/VPgGY2XHA+cB9wbKR8n1qQWq/e6AwyKUXsD5reUOwrhz0cPdNwevNQI9iFnM4zKwWOBV4mZTvV3A4ZRmwFZgDvAHscPe9wSZp/A7eDVwH7A+Wu5H+fXJgtpktMbNxwbpUf/fK/uE2kpu7u5ml8rIyM+sMPA5c7e47M790ZqRxv9x9HzDQzKqBJ4GTilzSYTGzC4Ct7r7EzEYUu548OsPdN5rZMcAcM1ud/WYav3vqDFq3EeidtXxcsK4cbDGzngDBz61FricyM2tHJgh+5e5PBKtTv18A7r4DmA+cDlSb2YFf3NL2HRwOjDazN8kcZj0LuId07xPuvjH4uZVMaA8h5d89hUHrFgH9gisf2gPfAGYUuaZ8mQFcFry+DHi6iLVEFhx3vh9Y5e4/yXortftlZkcHHQFmdgTwZTLnQuYDY4LNUrVP7n6Dux/n7rVk/v684O7fIsX7ZGadzOyoA6+Bc4AVpPi7B7rpLCczO4/MMc82wAPu/sMilxSZmf07MILMzIpbgJuBp4BpQA2Z2WDHuvvBJ5lLlpmdAbwIvM7Hx6J/QOa8QSr3y8xOIXPisQ2ZX9SmufutZnY8md+quwKvApe4+57iVRpPcJjoWne/IM37FNT+ZLDYFnjE3X9oZt1I6XcPFAYiIoIOE4mICAoDERFBYSAiIigMREQEhYGIiKAwEBERFAYiIoLCQOSwmNlTwWRlv8+asEwkdXTTmchhMLOu7v5OMH3EIuBL7r692HWJRKVZS0UOzz+b2d8Hr3sD/QCFgaSOwkAkpmCunXrgdHd/38wWAB2LWpRITDpnIBLfZ4B3gyA4iczjN0VSSWEgEt9MoK2ZrQImAy8VuR6R2HQCWURE1BmIiIjCQEREUBiIiAgKAxERQWEgIiIoDEREBIWBiIigMBAREeD/A4Qzf3PO/F9kAAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# How well do we complete the matrix on the unobserved entries?\n", | |
| "\n", | |
| "df[df.observed == False].plot(x='a', y='X_completed', kind='scatter')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment