Created
March 30, 2016 14:24
-
-
Save savonarola/d3a5d24763c7d3361cb3f6ca64c3315d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 51, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "from math import exp, pow, sqrt\n", | |
| "from sklearn.metrics import roc_auc_score" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "data = pd.read_csv(\"data-logistic.csv\", header=None)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "y = data[0].values\n", | |
| "xs = data[[1,2]].values" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "l = xs.size" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def logistic_step(w, k, C):\n", | |
| " w1, w2 = w\n", | |
| " \n", | |
| " sum_w1 = 0\n", | |
| " sum_w2 = 0\n", | |
| " for i in range(0, y.size):\n", | |
| " yi = y[i]\n", | |
| " xi1 = xs[i][0]\n", | |
| " xi2 = xs[i][1]\n", | |
| " coef = yi * ( 1 - 1 / ( 1 + exp( - yi*( w1*xi1 + w2*xi2 ))))\n", | |
| " sum_w1 += xi1 * coef\n", | |
| " sum_w2 += xi2 * coef\n", | |
| " \n", | |
| " w1_new = w1 + k*(1/l)*sum_w1 - k * C * w1\n", | |
| " w2_new = w2 + k*(1/l)*sum_w2 - k * C * w2\n", | |
| " \n", | |
| " return (w1_new, w2_new)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def ediff(v, w):\n", | |
| " v1, v2 = v\n", | |
| " w1, w2 = w\n", | |
| " return sqrt(pow(v1 - w1, 2) + pow(v2 - w2, 2))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "threshold = 1e-5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def iterate(w0, k, C):\n", | |
| " i = 0\n", | |
| " w_old = w0\n", | |
| " while True:\n", | |
| " i += 1\n", | |
| " w_new = logistic_step(w_old, k, C)\n", | |
| " # print(\"i=%s, w: %s -> %s\" % (i, w_old, w_new)) \n", | |
| " if ediff(w_old, w_new) < threshold:\n", | |
| " break\n", | |
| " w_old = w_new\n", | |
| " return w_old" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def y_calc(w, x):\n", | |
| " w1, w2 = w\n", | |
| " if w1 * x[0] + w2 * x[1] > 0:\n", | |
| " return 1\n", | |
| " else:\n", | |
| " return -1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 52, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def score(w, x):\n", | |
| " w1, w2 = w\n", | |
| " return 1 / (1 + exp(-w1 * x[0] - w2 * x[1]))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "w_reg = iterate((0,0), 0.1, 10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "y_calc_reg = np.array([ y_calc(w_reg, x) for x in xs])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 53, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "y_score_reg = np.array([ score(w_reg, x) for x in xs])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "w_no_reg = iterate((0,0), 0.1, 0)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "y_calc_no_reg = np.array([ y_calc(w_no_reg, x) for x in xs])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 54, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "y_score_no_reg = np.array([ score(w_no_reg, x) for x in xs])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0.93666666666666654" | |
| ] | |
| }, | |
| "execution_count": 56, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "roc_auc_score(y, y_score_reg)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 57, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0.92685714285714282" | |
| ] | |
| }, | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "roc_auc_score(y, y_score_no_reg)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment