Last active
January 11, 2021 21:42
-
-
Save maxkleiner/3dd1e5a7fd9fe1014f6dfd360aa8c288 to your computer and use it in GitHub Desktop.
logisticregression.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "logisticregression.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyOVYWXo8T2P0ORoZdZvs/8K", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/maxkleiner/3dd1e5a7fd9fe1014f6dfd360aa8c288/logisticregression.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MS7JmmTjexlp", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import matplotlib.pyplot as plt\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from sklearn.linear_model import LogisticRegression\n", | |
"from sklearn.metrics import classification_report, confusion_matrix\n", | |
"# https://realpython.com/logistic-regression-python/\n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Qm0fdjWKfJhM", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# For the purpose of this visual classification example, let’s just create arrays for the input (𝑥) and output (𝑦) values:\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8ySq6OeEfNI7", | |
"colab_type": "code", | |
"outputId": "79c7a8e1-631b-4718-e6eb-03ffc463012e", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 203 | |
} | |
}, | |
"source": [ | |
"X = np.arange(10).reshape(-1, 1)\n", | |
"y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])\n", | |
"#y = np.array([0, 1, 0,1, 1, 1, 1, 1, 0, 1])\n", | |
"print(X)\n" | |
], | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[0]\n", | |
" [1]\n", | |
" [2]\n", | |
" [3]\n", | |
" [4]\n", | |
" [5]\n", | |
" [6]\n", | |
" [7]\n", | |
" [8]\n", | |
" [9]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "lMPVXyepfYoB", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# The array X is required to be two-dimensional as matrix. \n", | |
"# Once you have the input and output prepared, you can create and define your classification model.\n", | |
"# Other solver options are 'newton-cg', 'lbfgs', 'sag', and 'saga'.\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "pGBk1ps8ffk0", | |
"colab_type": "code", | |
"outputId": "f0152124-9ad9-4326-d961-91fabc7cf9f5", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 110 | |
} | |
}, | |
"source": [ | |
"model = LogisticRegression(solver='liblinear', random_state=0)\n", | |
"model.fit(X, y)\n", | |
"print(model)" | |
], | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", | |
" intercept_scaling=1, l1_ratio=None, max_iter=100,\n", | |
" multi_class='auto', n_jobs=None, penalty='l2',\n", | |
" random_state=0, solver='liblinear', tol=0.0001, verbose=0,\n", | |
" warm_start=False)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "LGjdypm8fyq6", | |
"colab_type": "code", | |
"outputId": "b4261b2e-ba68-49ad-88e7-7c2facea55c1", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 314 | |
} | |
}, | |
"source": [ | |
"model = LogisticRegression(solver='liblinear', C=1, random_state=0).fit(X, y)\n", | |
"\n", | |
"print(model.intercept_)\n", | |
"#array([-1.04608067])\n", | |
"print(model.coef_,'\\n')\n", | |
"# The first column is the probability of the predicted output being zero, that is 1 - 𝑝(𝑥). \n", | |
"# The second column is the probability that the output is one, or 𝑝(𝑥).\n", | |
"print(model.predict_proba(X),'\\n')\n", | |
"\n", | |
"print('predict:',model.predict(X))\n", | |
"print('score: ',model.score(X, y))\n" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[-1.04608067]\n", | |
"[[0.51491375]] \n", | |
"\n", | |
"[[0.74002157 0.25997843]\n", | |
" [0.62975524 0.37024476]\n", | |
" [0.5040632 0.4959368 ]\n", | |
" [0.37785549 0.62214451]\n", | |
" [0.26628093 0.73371907]\n", | |
" [0.17821501 0.82178499]\n", | |
" [0.11472079 0.88527921]\n", | |
" [0.07186982 0.92813018]\n", | |
" [0.04422513 0.95577487]\n", | |
" [0.02690569 0.97309431]] \n", | |
"\n", | |
"predict: [0 0 0 1 1 1 1 1 1 1]\n", | |
"score: 0.9\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "BZH2sNCTzxNV", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "BqX52mfPfrFH", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# The first column is the probability of the predicted output being zero, that is 1 - 𝑝(𝑥). \n", | |
"# The second column is the probability that the output is one, or 𝑝(𝑥).\n", | |
"# You can use the fact that .fit() returns the model instance and chain the last two statements.\n", | |
"\n", | |
"# model = LogisticRegression(solver='lbfgs', C=1, random_state=0).fit(X, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "c-uGPz0agB-h", | |
"colab_type": "code", | |
"outputId": "6ed27c6f-db9c-4715-dc1b-e1ea91d151b4", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 54 | |
} | |
}, | |
"source": [ | |
"# One false positive prediction: The fourth observation is a zero that was wrongly predicted as one.\n", | |
"\n", | |
"print(confusion_matrix(y, model.predict(X)))" | |
], | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[3 1]\n", | |
" [0 6]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "XS4_lgDcgHCP", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# It’s often useful to visualize the confusion matrix. \n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "lAs1g0Q1gLZh", | |
"colab_type": "code", | |
"outputId": "4cdbafd9-ff77-4849-8cbf-d1d769dc5988", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 268 | |
} | |
}, | |
"source": [ | |
"cm = confusion_matrix(y, model.predict(X))\n", | |
"\n", | |
"plt.rcParams.update({'font.size': 16})\n", | |
"fig, ax = plt.subplots(figsize=(4, 4))\n", | |
"ax.imshow(cm)\n", | |
"ax.grid(False)\n", | |
"ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted 0s', 'Predicted 1s'))\n", | |
"ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual 0s', 'Actual 1s'))\n", | |
"ax.set_ylim(1.5, -0.5)\n", | |
"for i in range(2):\n", | |
" for j in range(2):\n", | |
" ax.text(j, i, cm[i, j], ha='center', va='center', color='red')\n", | |
"plt.show()\n" | |
], | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASQAAAD7CAYAAADD5umiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAUkUlEQVR4nO3cf3DU9Z3H8efuZoOYgBAMP0RKAz1+HT+OQQQuIppGhbBJoE6gMLYlbU3FDqg9K+C0ajCdHg4yJ8y0oj2PU+s5mhCRIOCQyImVHzI6UqRJKgdFG0hCFsJmSXaz+/3cH2iUJkACkv0QXo9/dL/fzXffu9/k+f3uNxtcxhiDiIgF3LEeQETkSwqSiFhDQRIRayhIImINBUlErBEX6wFs4TgOwWAQr9eLy+WK9TgiXZIxhubmZhISEnC7W58PKUhfCAaDVFZWxnoMkavCsGHD6NGjR6vlCtIXvF4vAGbJf0FdIMbTSHsVcDrWI0gHXNfnOhavWNjy8/aPFKQvtLxNqwtATX1sh5F2O0FDrEeQi3CuyyK6qC0i1lCQRMQaCpKIWENBEhFrKEgiYg0FSUSsoSCJiDUUJBGxhoIkItZQkETEGgqSiFhDQRIRayhIImINBUlErKEgiYg1FCQRsYaCJCLWUJBExBoKkohYQ0ESEWsoSCJiDQVJRKyhIImINRQkEbGGgiQi1lCQRMQaCpKIWENBEhFrKEgiYg0FSUSsoSCJiDUUJBGxhoIkItZQkETEGgqSiFhDQRIRayhIImINBUlErKEgiYg1FCQRsYaCJCLWUJBExBoKkohYQ0ESEWvExXoAuTx6NtXQv+GvdI8E8DjNRNzxNMQnUdVjOE3enrEeT9qQHG1ifsPfGB4+xdBIA9cYh7l9/5Vjcd1jPVqn0RlSFxXnhDnt7cWR68ZS2WcKn/ccRfdIgJHHdxAfOR3r8aQNAyON3NZYTcDtZV98r1iPExM6Q+qi/NfeiJ8bz1oWjO/FmJoyejdVUZ34nRhNJufycXwvZve/FYCZwb9zc8gf44k6n86QriJRVzwABleMJ5G2GJf2S7uCVF9fz9ixYykoKGjXRrdt28a+ffsuaTCApUuX8vLLL7e57tChQ8ydO5e77rqLuXPncvjw4Ut+vC7JGFzGoVukgcH1HxN2d8Pf/cYLf51IDLQrSCUlJYwbN45NmzYRDocveP9vKkjn8/jjjzN//ny2bt3K/Pnzeeyxxy7r412pRh7/XyYc3ciYmlK6N5+isk8qEU+3WI8l0qZ2BamoqIj777+f4cOHU1pa2rK8urqaRYsWkZmZSWZmJmvXrmXHjh2UlZXx3HPPkZ2dzRtvvMH69etZvHhxy9d9/XZFRQXz589n9uzZZGRksG7dugvOU1dXx4EDB/D5fAD4fD4OHDiA3++nsbGRxYsXk5GRQVZWFg888EBHXo8u51CvCfzl+qn8X68JRN1xDKt7Xxe1xVoXvKhdXl7OyZMnmTx5MrW1tRQVFTFjxgwAHn74YaZNm8aaNWsA8Pv9JCUlkZaWxujRo7nnnnuAMwE6l4EDB7Ju3Tri4+MJBoPk5OQwdepUhg4des6vOXr0KP369cPj8QDg8Xjo27cvR48epaqqimAwyFtvvQWcebt5NWvy9gAgGJ9E/TX9GFP9Nv0b/sqRXuNiPJlIaxc8QyosLCQ7OxuXy8Wdd97Jvn37qK6uJhgM8tFHH7FgwYKW+yYlJXV4gKamJh599FEyMzOZN28eNTU1lJeXd3g7XxoxYgQHDx4kPz+fzZs3Ex8ff9Hb6mqibi+huASuiTTEehSRNp03SOFwmJKSEoqKikhLSyMjI4Pm5ubznvG0xePx4DhOy+1QKNTy/6tWrSI5OZni4mLefPNNxo4de9b6tgwYMIDq6mqi0SgA0WiUmpoaBgwYwKBBgygpKSE1NZWdO3eSnZ19we1dLeKiTVwTaSAUlxDrUUTadN4glZaWkpKSwrvvvktZWRllZWW88MILFBcXk5CQwPjx48+65uP3n/ncRGJiIoFAoGX54MGDqaioIBwOEw6H2bp1a8u6QCBA//79iYuLo7Kykr17915w6D59+jBy5EhKSkqAMxfdR44cSVJSEseOHcPj8ZCens6yZcvw+/2cPHmyQy9KVzDUv5sBgQp6NR6lR6iW64OHGV73JwwujukzSNaa1ljNtMZqhjef+fmZFKpjWmM140InYjxZ5zjvNaSioiIyMzPPWjZ+/Hgcx2HPnj2sXLmS/Px8fD4fbrcbn89HXl4eWVlZLFu2jC1btpCbm8usWbOYMmUKM2fOpG/fvowYMYLa2loAFi5cyCOPPEJhYSEpKSlMnDixXYM/8cQTLF26lN/97nf07NmTFStWAGcukj/99NMAOI5DXl4e/fr16/ALc6ULepPo3fR3+kU+xYWh2X0NgW7XczRxGOG4a2M9npzD8hP7z7r9i/oKAD6K78WD3SbEYqRO5TLGmFgPYYNQKMT+/fsxP10NNVf3hfAryb+h62FXkt59e/PrPyxl9OjRdOvW+uMn+qS2iFhDQRIRayhIImINBUlErKEgiYg1FCQRsYaCJCLWUJBExBoKkohYQ0ESEWsoSCJiDQVJRKyhIImINRQkEbGGgiQi1lCQRMQaCpKIWENBEhFrKEgiYg0FSUSsoSCJiDUUJBGxhoIkItZQkETEGgqSiFhDQRIRayhIImINBUlErKEgiYg1FCQRsYaCJCLWUJBExBoKkohYQ0ESEWsoSCJiDQVJRKyhIImINRQkEbGGgiQi1lCQRMQaCpKIWENBEhFrKEgiYg0FSUSsoSCJiDUUJBGxhoIkItZQkETEGnGxHsA2s+o+5mhNTazHkHZqqtoR6xGkA8KRKOVVgXOu1xmSiFhDQRIRayhIImINBUlErKEgiYg1FCQRsYaCJCLWUJBExBoKkohYQ0ESEWsoSCJiDQVJRKyhIImINRQkEbGGgiQi1lCQRMQaCpKIWENBEhFrKEgiYg0FSUSsoSCJiDUUJBGxhoIkItZQkETEGgqSiFhDQRIRayhIImINBUlErKEgiYg1FCQRsYaCJCLWUJBExBoKkohYQ0ESEWsoSCJiDQVJRKyhIImINRQkEbGGgiQi1lCQRMQaCpKIWENBEhFrKEgiYg0FSUSsoSCJiDUUJBGxhoIkItZQkLqwG02U16In8EeOcSJyjMKon0EmGuux5AJc27bjyZpH3LfHEjdkHJ47ZuHasTPWY3WKuFgPIJdHd2PYFq0jhItcdy8MsNwJUBqt418813PapWORjdz//T+4l+Xj/OQenF/8HBwH1/6/QGNjrEfrFApSF/VTc5ohRBnpSeag68xu3ueKoyJaS545zX+4EmM8obRy5HPcvy7AeXwJzs9yWxabtFtjOFTnatdhsr6+nrFjx1JQUNCujW7bto19+/Zd0mAAS5cu5eWXX25z3YoVK0hLS2P48OFUVlZe8mN1NZmmiV14W2IEcNgVx5+IJ8sJxXAyORf3K4XgduP8aH6sR4mZdgWppKSEcePGsWnTJsLh8AXv/00F6Xy++93v8sc//pGBAwde1se5Uv2zifCJy9tq+QFXHKOIxGAiuRDXnr3wnSG43ighbuLtxA0YTtzNabj/86VYj9Zp2vWWraioiF/+8pesXbuW0tJSZsyYAUB1dTUFBQUcPnwYAJ/Px6hRoygrK+P999/n9ddfJzc3F8dx2L59O6tXrwZg/fr1LbcrKirIz8+nsbGRUCjEnDlzWLBgwQVnuummm9pc3tjYyJIlS/j000+Ji4sjJSWFZ555pj1Ps0tJwuEErlbL/bjpjRODieRCXMdq4Fg1nvx/J/rov8G3B+N6czOeZfkQjeLkLYj1iJfdBYNUXl7OyZMnmTx5MrW1tRQVFbUE6eGHH2batGmsWbMGAL/fT1JSEmlpaYwePZp77rkHOBOgcxk4cCDr1q0jPj6eYDBITk4OU6dOZejQoRf1hN577z2CwSBvvfUWcObtpsgVwXFwNQSJrH4K47sLADN1Cq7PPsf9zLM49/4IXK0PMl3JBd+yFRYWkp2djcvl4s4772Tfvn1UV1cTDAb56KOPzjqbSUpK6vAATU1NPProo2RmZjJv3jxqamooLy/v8Ha+NGLECA4ePEh+fj6bN28mPj7+ord1JTuBm96YVsvPnDnpN2w2Mr17nfnvbalnL7/tFly1x6G6JhZjdarzfmeGw2FKSkooKioiLS2NjIwMmpubz3vG0xaPx4PjfPU2IRT66qLqqlWrSE5Opri4mDfffJOxY8eetb6jBg0aRElJCampqezcuZPs7OxL2t6V6hNXHKNMc6vlI02EA/rlqp1G/NP5118FH9U47zMsLS0lJSWFd999l7KyMsrKynjhhRcoLi4mISGB8ePHs27dupb7+/1+ABITEwkEAi3LBw8eTEVFBeFwmHA4zNatW1vWBQIB+vfvT1xcHJWVlezdu/eSntCxY8fweDykp6ezbNky/H4/J0+evKRtXok2uq5hMs2kmK8uYA82EVIJs9HdLYaTybk4GXcC4Hpnx1nLXWXvYm7oD/2SYzFWpzrvobKoqIjMzMyzlo0fPx7HcdizZw8rV64kPz8fn8+H2+3G5/ORl5dHVlYWy5YtY8uWLeTm5jJr1iymTJnCzJkz6du3LyNGjKC2thaAhQsX8sgjj1BYWEhKSgoTJ05s1+AFBQW8/fbbHD9+nNzcXHr16sWmTZuoqKjg6aefBsBxHPLy8ujXr9/FvDZXtD+4uvNzghRHT/CYuwcGyHcCfIaH51zXxno8aYNJvw3nlsl4Hv41Tt0JzOBBuDduxr39PSKrV8R6vE7hMsa0vtBwFQqFQuzfv5/s2T/h6NGu8V59kImyyjlFugnhAspc8Tzk7snfXF3nLVtT1Y4L3+lKEgjgLliJe+MWqD8F3xlCdPHPMHdnxXqyb0Q4EqW8KsDo0aPp1q31mXrX+c6UVj5zecjx9I71GNIRPXrgrMjHWZEf60lioutfJRORK4aCJCLWUJBExBoKkohYQ0ESEWsoSCJiDQVJRKyhIImINRQkEbGGgiQi1lCQRMQaCpKIWENBEhFrKEgiYg0FSUSsoSCJiDUUJBGxhoIkItZQkETEGgqSiFhDQRIRayhIImINBUlErKEgiYg1FCQRsYaCJCLWUJBExBoKkohYQ0ESEWsoSCJiDQVJRKyhIImINRQkEbGGgiQi1lCQRMQaCpKIWENBEhFrKEgiYg0FSUSsoSCJiDUUJBGxhoIkItZQkETEGgqSiFhDQRIRayhIImINBUlErBEX6wFsYYwBoG9ynxhPIh0RjkRjPYJ0QDjiAF/9vP0jlznXmqtMIBCgsrIy1mOIXBWGDRtGjx49Wi1XkL7gOA7BYBCv14vL5Yr1OCJdkjGG5uZmEhIScLtbXzFSkETEGrqoLSLWUJBExBoKkohYQ0ESEWsoSCJiDQVJRKyhIImINRSkTpCWlsb06dPJysrC5/OxadOmS97m+vXrWbx4MQClpaWsWLHivPc/deoUzz///EU/Xlpa2jk/yf7aa69xxx13kJ6ezvLly3Ec56IfxwZdeX9VV1fzgx/8gAkTJvC9733vord/2Ri57G6//XZTUVFhjDHmk08+MWPGjDF1dXVn3ae5ublD2ywqKjKLFi1q9/0/++wzc/PNN3foMb7u68/h644cOWKmTp1q6urqTDQaNT/+8Y9NcXHxRT+ODbry/jp16pT54IMPzDvvvGNmz5590du/XPTHtZ1s1KhRJCQk8Pnnn/PUU0/h8Xg4dOgQwWCQDRs2UFxczCuvvEI0GiUxMZEnnniCIUOGEA6HKSgoYNeuXfTu3ZuRI0e2bHP9+vVs376d1atXA1BYWMiLL74IgNfrZe3atSxfvpxAIEB2djbdu3fn1VdfpaamhoKCAqqqqgiFQsycOZP77rsPgL1795Kfnw/AxIkTz/nHkFu3biU9PZ2kpCQAcnJyWL9+PbNmzeLDDz/kySefxHEcIpEICxcuxOfzXbbX9nLoavurR48e3HTTTezevbvVOiv2V6yLeDX4+tFq586dZvz48aa+vt4sWbLEzJ492wSDQWOMMR988IG59957TSgUMsYYs337djN37lxjjDEvvviiyc3NNeFw2Jw+fdrMnj275Yj79aPvrl27THp6uqmpqTHGGNPQ0GCampraPOIuWLDA7NmzxxhjTCgUMvPmzTPvvfeeCYVC5pZbbjG7du0yxhizadMmM2zYsDaPuMuXLzfPP/98y+2PP/7Y+Hw+Y4wx9913n9m4caMxxhjHcUx9ff2lvpSdoivvry/t2rWr1RmSDftLZ0idZPHixXTr1o3ExETWrFlDz549AZg+fTrXXnstAGVlZZSXl5OTkwOc+UPEU6dOAbB7925mzZqF1+vF6/WSlZXFhx9+2Opxtm/fTnZ2NsnJyQAkJCS0Oc/p06fZs2cPfr+/ZVkwGOTgwYP06dOH7t27M2nSJAAyMjJ47LHHOvycJ02axO9//3uOHDlCamoq48aN6/A2YkX7Kzb7S0HqJKtXr2bYsGGtln/5zQ1nvqHvvvtuHnjggcs+j+M4uFwuCgsL8Xq9Z60rLy9vdf9z/QsIAwYMoKqqquV2VVUVAwYMAGDBggWkpaXx/vvv8+STT5KamspDDz30DT6Ly6er7q/zsWF/6bdsFklLS2PDhg0cO3YMgGg0yv79+wGYPHkyGzZsIBKJ0NTURElJSZvbuO2229iwYQPHjx8HzhxFQ6EQiYmJNDU1EYlEAEhMTGTChAk899xzLV979OhRamtrGTJkCE1NTezduxeALVu2tBz5/9Fdd93Ftm3b8Pv9OI7D66+/zowZMwA4dOgQ3/rWt/j+97/PD3/4Q/785z9/A6+SPa7E/XU+NuwvnSFZZOLEiTz44IMsXLiQaDRKc3Mz06dPZ/To0cyZM4eKigoyMjLo3bs3Y8aMoa6urtU2Jk2aRF5eHrm5ubhcLuLj43n22We5/vrryczMJDMzk+uuu45XX32VlStX8tvf/pbMzEzgzNuF3/zmNyQnJ7Nq1aqzLpLecMMNbc48aNAg7r//fubMmQNAamoqWVlZALz00kvs3r0br9dLfHw8v/rVry7HyxYzV+L+ikaj3H777YTDYRoaGrj11lvJyclh0aJFVuwv/XtIImINvWUTEWsoSCJiDQVJRKyhIImINRQkEbGGgiQi1lCQRMQaCpKIWOP/Ab2iWadNeOEgAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<Figure size 288x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NGTQ9gZvy4un", | |
"colab_type": "code", | |
"outputId": "0cb0aac0-0d38-489b-92df-bcfaf8495c7b", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 184 | |
} | |
}, | |
"source": [ | |
"print(classification_report(y, model.predict(X)))\n" | |
], | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 1.00 0.75 0.86 4\n", | |
" 1 0.86 1.00 0.92 6\n", | |
"\n", | |
" accuracy 0.90 10\n", | |
" macro avg 0.93 0.88 0.89 10\n", | |
"weighted avg 0.91 0.90 0.90 10\n", | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "dcGPVrMny0AE", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# The code above creates a heatmap that represents the confusion matrix:\n", | |
"# You can get a more comprehensive report on the classification with" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "cEhwFRCwz1cV", | |
"colab_type": "code", | |
"outputId": "b59563bd-7f41-40b5-d55b-fa26dbfeb839", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 551 | |
} | |
}, | |
"source": [ | |
"# from utilities2 import plot_classifier, plot_confusion_matrix\n", | |
"import seaborn as sns\n", | |
"\n", | |
"def ap_log_regplot(ap_X, ap_y):\n", | |
" plt.figure(figsize=(10,5))\n", | |
" sns.regplot(ap_X, model.predict(X), logistic=True, color='green')\n", | |
" return None\n", | |
"\n", | |
"#ap_log_regplot(X, y)\n", | |
"#plt.show()\n", | |
"sns.set(style = 'whitegrid')\n", | |
"sns.regplot(X, model.predict_proba(X)[:,1], logistic=True, \n", | |
" scatter_kws={\"color\": \"red\"}, line_kws={\"color\": \"blue\"}) #label=model.predict(X))\n", | |
"# sns.regplot(X, model.predict(X), logistic=True)\n", | |
"plt.title('Logistic Probability Plot')\n", | |
"plt.show()\n", | |
"\n", | |
"#df_confusion_matrix=pd.crosstab(y,model.predict(X)[:,1],rownames=['Actual'],colnames=['Predicted'], normalize=True)\n", | |
"#plot_confusion_matrix(df_confusion_matrix, title='Cell Confusion matrix', cmap=plt.cm.Paired)\n", | |
"sns.heatmap(cm, annot=True)\n", | |
"plt.title('heatmap confusion matrix')\n", | |
"plt.show()" | |
], | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 2 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "lOr4XWiU0kSX", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"\n", | |
"Now we improve our model with the exchange of the solver from liblinear to lbfgs:\n", | |
"\n", | |
"---\n", | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "TuVjHYwf0v9e", | |
"colab_type": "code", | |
"outputId": "6923a84e-f219-4943-8a5f-cc5af8ec7ed2", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 551 | |
} | |
}, | |
"source": [ | |
"model = LogisticRegression(solver='lbfgs', C=1, random_state=0).fit(X, y)\n", | |
"sns.set(style = 'whitegrid')\n", | |
"sns.regplot(X, model.predict_proba(X)[:,1], logistic=True, \n", | |
" scatter_kws={\"color\": \"red\"}, line_kws={\"color\": \"blue\"}) #label=model.predict(X))\n", | |
"# sns.regplot(X, model.predict(X), logistic=True)\n", | |
"plt.title('Logistic Probability Plot')\n", | |
"plt.show()\n", | |
"\n", | |
"#df_confusion_matrix=pd.crosstab(y,model.predict(X)[:,1],rownames=['Actual'],colnames=['Predicted'], normalize=True)\n", | |
"#plot_confusion_matrix(df_confusion_matrix, title='Cell Confusion matrix', cmap=plt.cm.Paired)\n", | |
"cm = confusion_matrix(y, model.predict(X))\n", | |
"sns.heatmap(cm, annot=True)\n", | |
"plt.title('heatmap confusion matrix')\n", | |
"plt.show()" | |
], | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 2 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "X7Rwblyo1yyW", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import statsmodels.api as sm" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "x_3kqEkK1rr8", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# You can also implement AND study logistic regression in Python with the StatsModels package. \n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mCj9Kk7w15Re", | |
"colab_type": "code", | |
"outputId": "ce965ee5-f9a8-4134-aa3c-97df6371c5a4", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 982 | |
} | |
}, | |
"source": [ | |
"y = np.array([0, 1, 0, 0, 1, 1, 1, 1, 1, 1])\n", | |
"x = sm.add_constant(X)\n", | |
"print(x, y)\n", | |
"\n", | |
"smodel = sm.Logit(y, x)\n", | |
"\n", | |
"result = smodel.fit(method='newton')\n", | |
"print(result.params)\n", | |
"print(result.predict(x))\n", | |
"print((result.predict(x) >= 0.5).astype(int))\n", | |
"print(result.pred_table())\n", | |
"print(result.summary(),'\\n')\n", | |
"\n", | |
"# These are detailed reports with values that you can obtain with appropriate methods and attributes. \n", | |
"\n", | |
"print(result.summary2())\n", | |
"\n", | |
"# For more information, check out the official documentation related to LogitResults.\n", | |
"# https://www.statsmodels.org/stable/generated/statsmodels.discrete.discrete_model.LogitResults.html\n" | |
], | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[1. 0.]\n", | |
" [1. 1.]\n", | |
" [1. 2.]\n", | |
" [1. 3.]\n", | |
" [1. 4.]\n", | |
" [1. 5.]\n", | |
" [1. 6.]\n", | |
" [1. 7.]\n", | |
" [1. 8.]\n", | |
" [1. 9.]] [0 1 0 0 1 1 1 1 1 1]\n", | |
"Optimization terminated successfully.\n", | |
" Current function value: 0.350471\n", | |
" Iterations 7\n", | |
"[-1.972805 0.82240094]\n", | |
"[0.12208792 0.24041529 0.41872657 0.62114189 0.78864861 0.89465521\n", | |
" 0.95080891 0.97777369 0.99011108 0.99563083]\n", | |
"[0 0 0 1 1 1 1 1 1 1]\n", | |
"[[2. 1.]\n", | |
" [1. 6.]]\n", | |
" Logit Regression Results \n", | |
"==============================================================================\n", | |
"Dep. Variable: y No. Observations: 10\n", | |
"Model: Logit Df Residuals: 8\n", | |
"Method: MLE Df Model: 1\n", | |
"Date: Thu, 16 Apr 2020 Pseudo R-squ.: 0.4263\n", | |
"Time: 17:01:33 Log-Likelihood: -3.5047\n", | |
"converged: True LL-Null: -6.1086\n", | |
"Covariance Type: nonrobust LLR p-value: 0.02248\n", | |
"==============================================================================\n", | |
" coef std err z P>|z| [0.025 0.975]\n", | |
"------------------------------------------------------------------------------\n", | |
"const -1.9728 1.737 -1.136 0.256 -5.377 1.431\n", | |
"x1 0.8224 0.528 1.557 0.119 -0.213 1.858\n", | |
"============================================================================== \n", | |
"\n", | |
" Results: Logit\n", | |
"===============================================================\n", | |
"Model: Logit Pseudo R-squared: 0.426 \n", | |
"Dependent Variable: y AIC: 11.0094 \n", | |
"Date: 2020-04-16 17:01 BIC: 11.6146 \n", | |
"No. Observations: 10 Log-Likelihood: -3.5047 \n", | |
"Df Model: 1 LL-Null: -6.1086 \n", | |
"Df Residuals: 8 LLR p-value: 0.022485\n", | |
"Converged: 1.0000 Scale: 1.0000 \n", | |
"No. Iterations: 7.0000 \n", | |
"-----------------------------------------------------------------\n", | |
" Coef. Std.Err. z P>|z| [0.025 0.975]\n", | |
"-----------------------------------------------------------------\n", | |
"const -1.9728 1.7366 -1.1360 0.2560 -5.3765 1.4309\n", | |
"x1 0.8224 0.5281 1.5572 0.1194 -0.2127 1.8575\n", | |
"===============================================================\n", | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment