kcarnold · November 15, 2019 14:30
diff --git a/credit_scoring.ipynb b/credit_scoring.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sklearn.datasets\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.linear_model import LinearRegression, LogisticRegression\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dataset source: https://archive.ics.uci.edu/ml/datasets/Statlog+(German+Credit+Data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "credit_data = sklearn.datasets.fetch_openml('credit-g')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "**Author**: Dr. Hans Hofmann  \n",
      "**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)) - 1994    \n",
      "**Please cite**: [UCI](https://archive.ics.uci.edu/ml/citation_policy.html)\n",
      "\n",
      "**German Credit data**  \n",
      "This dataset classifies people described by a set of attributes as good or bad credit risks.\n",
      "\n",
      "This dataset comes with a cost matrix: \n",
      "``` \n",
      "      Good  Bad (predicted)  \n",
      "Good   0    1   (actual)  \n",
      "Bad    5    0  \n",
      "```\n",
      "\n",
      "It is worse to class a customer as good when they are bad (5), than it is to class a customer as bad when they are good (1).  \n",
      "\n",
      "### Attribute description  \n",
      "\n",
      "1. Status of existing checking account, in Deutsche Mark.  \n",
      "2. Duration in months  \n",
      "3. Credit history (credits taken, paid back duly, delays, critical accounts)  \n",
      "4. Purpose of the credit (car, television,...)  \n",
      "5. Credit amount  \n",
      "6. Status of savings account/bonds, in Deutsche Mark.  \n",
      "7. Present employment, in number of years.  \n",
      "8. Installment rate in percentage of disposable income  \n",
      "9. Personal status (married, single,...) and sex  \n",
      "10. Other debtors / guarantors  \n",
      "11. Present residence since X years  \n",
      "12. Property (e.g. real estate)  \n",
      "13. Age in years  \n",
      "14. Other installment plans (banks, stores)  \n",
      "15. Housing (rent, own,...)  \n",
      "16. Number of existing credits at this bank  \n",
      "17. Job  \n",
      "18. Number of people being liable to provide maintenance for  \n",
      "19. Telephone (yes,no)  \n",
      "20. Foreign worker (yes,no)\n",
      "\n",
      "Downloaded from openml.org.\n"
     ]
    }
   ],
   "source": [
    "print(credit_data.DESCR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking_status</th>\n",
       "      <th>duration</th>\n",
       "      <th>credit_history</th>\n",
       "      <th>purpose</th>\n",
       "      <th>credit_amount</th>\n",
       "      <th>savings_status</th>\n",
       "      <th>employment</th>\n",
       "      <th>installment_commitment</th>\n",
       "      <th>personal_status</th>\n",
       "      <th>other_parties</th>\n",
       "      <th>residence_since</th>\n",
       "      <th>property_magnitude</th>\n",
       "      <th>age</th>\n",
       "      <th>other_payment_plans</th>\n",
       "      <th>housing</th>\n",
       "      <th>existing_credits</th>\n",
       "      <th>job</th>\n",
       "      <th>num_dependents</th>\n",
       "      <th>own_telephone</th>\n",
       "      <th>foreign_worker</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1169.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5951.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2096.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7882.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4870.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   checking_status  duration  credit_history  purpose  credit_amount  \\\n",
       "0              0.0       6.0             4.0      3.0         1169.0   \n",
       "1              1.0      48.0             2.0      3.0         5951.0   \n",
       "2              3.0      12.0             4.0      6.0         2096.0   \n",
       "3              0.0      42.0             2.0      2.0         7882.0   \n",
       "4              0.0      24.0             3.0      0.0         4870.0   \n",
       "\n",
       "   savings_status  employment  installment_commitment  personal_status  \\\n",
       "0             4.0         4.0                     4.0              2.0   \n",
       "1             0.0         2.0                     2.0              1.0   \n",
       "2             0.0         3.0                     2.0              2.0   \n",
       "3             0.0         3.0                     2.0              2.0   \n",
       "4             0.0         2.0                     3.0              2.0   \n",
       "\n",
       "   other_parties  residence_since  property_magnitude   age  \\\n",
       "0            0.0              4.0                 0.0  67.0   \n",
       "1            0.0              2.0                 0.0  22.0   \n",
       "2            0.0              3.0                 0.0  49.0   \n",
       "3            2.0              4.0                 1.0  45.0   \n",
       "4            0.0              4.0                 3.0  53.0   \n",
       "\n",
       "   other_payment_plans  housing  existing_credits  job  num_dependents  \\\n",
       "0                  2.0      1.0               2.0  2.0             1.0   \n",
       "1                  2.0      1.0               1.0  2.0             1.0   \n",
       "2                  2.0      1.0               1.0  1.0             2.0   \n",
       "3                  2.0      2.0               1.0  2.0             2.0   \n",
       "4                  2.0      2.0               2.0  2.0             2.0   \n",
       "\n",
       "   own_telephone  foreign_worker  \n",
       "0            1.0             0.0  \n",
       "1            0.0             0.0  \n",
       "2            0.0             0.0  \n",
       "3            0.0             0.0  \n",
       "4            0.0             0.0  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df = pd.DataFrame(credit_data.data, columns=credit_data.feature_names)\n",
    "credit_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "credit_df['GOOD'] = credit_data.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>col_0</th>\n",
       "      <th>bad</th>\n",
       "      <th>good</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>checking_status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0.0</th>\n",
       "      <td>135</td>\n",
       "      <td>139</td>\n",
       "      <td>274</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>105</td>\n",
       "      <td>164</td>\n",
       "      <td>269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2.0</th>\n",
       "      <td>14</td>\n",
       "      <td>49</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3.0</th>\n",
       "      <td>46</td>\n",
       "      <td>348</td>\n",
       "      <td>394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>300</td>\n",
       "      <td>700</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "col_0            bad  good   All\n",
       "checking_status                 \n",
       "0.0              135   139   274\n",
       "1.0              105   164   269\n",
       "2.0               14    49    63\n",
       "3.0               46   348   394\n",
       "All              300   700  1000"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.crosstab(credit_df['checking_status'], credit_data.target, margins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking_status_1.0</th>\n",
       "      <th>checking_status_2.0</th>\n",
       "      <th>checking_status_3.0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   checking_status_1.0  checking_status_2.0  checking_status_3.0\n",
       "0                    0                    0                    0\n",
       "1                    1                    0                    0\n",
       "2                    0                    0                    1\n",
       "3                    0                    0                    0\n",
       "4                    0                    0                    0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = pd.get_dummies(credit_df[['checking_status']], columns=['checking_status'], drop_first=True)\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    good\n",
       "1     bad\n",
       "2    good\n",
       "3    good\n",
       "4     bad\n",
       "Name: GOOD, dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df['GOOD'].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = 0+(credit_df['GOOD'] == 'good')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"import sklearn.datasets\n",
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.metrics import confusion_matrix\n",
	"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
	"import pandas as pd\n",
	"import matplotlib.pyplot as plt\n",
	"import seaborn as sns\n",
	"%matplotlib inline"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Dataset source: https://archive.ics.uci.edu/ml/datasets/Statlog+(German+Credit+Data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"credit_data = sklearn.datasets.fetch_openml('credit-g')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Author: Dr. Hans Hofmann \n",
	"Source: [UCI](https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)) - 1994 \n",
	"Please cite: [UCI](https://archive.ics.uci.edu/ml/citation_policy.html)\n",
	"\n",
	"German Credit data \n",
	"This dataset classifies people described by a set of attributes as good or bad credit risks.\n",
	"\n",
	"This dataset comes with a cost matrix: \n",
	"``` \n",
	" Good Bad (predicted) \n",
	"Good 0 1 (actual) \n",
	"Bad 5 0 \n",
	"```\n",
	"\n",
	"It is worse to class a customer as good when they are bad (5), than it is to class a customer as bad when they are good (1). \n",
	"\n",
	"### Attribute description \n",
	"\n",
	"1. Status of existing checking account, in Deutsche Mark. \n",
	"2. Duration in months \n",
	"3. Credit history (credits taken, paid back duly, delays, critical accounts) \n",
	"4. Purpose of the credit (car, television,...) \n",
	"5. Credit amount \n",
	"6. Status of savings account/bonds, in Deutsche Mark. \n",
	"7. Present employment, in number of years. \n",
	"8. Installment rate in percentage of disposable income \n",
	"9. Personal status (married, single,...) and sex \n",
	"10. Other debtors / guarantors \n",
	"11. Present residence since X years \n",
	"12. Property (e.g. real estate) \n",
	"13. Age in years \n",
	"14. Other installment plans (banks, stores) \n",
	"15. Housing (rent, own,...) \n",
	"16. Number of existing credits at this bank \n",
	"17. Job \n",
	"18. Number of people being liable to provide maintenance for \n",
	"19. Telephone (yes,no) \n",
	"20. Foreign worker (yes,no)\n",
	"\n",
	"Downloaded from openml.org.\n"
	]
	}
	],
	"source": [
	"print(credit_data.DESCR)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>checking_status</th>\n",
	" <th>duration</th>\n",
	" <th>credit_history</th>\n",
	" <th>purpose</th>\n",
	" <th>credit_amount</th>\n",
	" <th>savings_status</th>\n",
	" <th>employment</th>\n",
	" <th>installment_commitment</th>\n",
	" <th>personal_status</th>\n",
	" <th>other_parties</th>\n",
	" <th>residence_since</th>\n",
	" <th>property_magnitude</th>\n",
	" <th>age</th>\n",
	" <th>other_payment_plans</th>\n",
	" <th>housing</th>\n",
	" <th>existing_credits</th>\n",
	" <th>job</th>\n",
	" <th>num_dependents</th>\n",
	" <th>own_telephone</th>\n",
	" <th>foreign_worker</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>0.0</td>\n",
	" <td>6.0</td>\n",
	" <td>4.0</td>\n",
	" <td>3.0</td>\n",
	" <td>1169.0</td>\n",
	" <td>4.0</td>\n",
	" <td>4.0</td>\n",
	" <td>4.0</td>\n",
	" <td>2.0</td>\n",
	" <td>0.0</td>\n",
	" <td>4.0</td>\n",
	" <td>0.0</td>\n",
	" <td>67.0</td>\n",
	" <td>2.0</td>\n",
	" <td>1.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>1.0</td>\n",
	" <td>1.0</td>\n",
	" <td>0.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1.0</td>\n",
	" <td>48.0</td>\n",
	" <td>2.0</td>\n",
	" <td>3.0</td>\n",
	" <td>5951.0</td>\n",
	" <td>0.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>1.0</td>\n",
	" <td>0.0</td>\n",
	" <td>2.0</td>\n",
	" <td>0.0</td>\n",
	" <td>22.0</td>\n",
	" <td>2.0</td>\n",
	" <td>1.0</td>\n",
	" <td>1.0</td>\n",
	" <td>2.0</td>\n",
	" <td>1.0</td>\n",
	" <td>0.0</td>\n",
	" <td>0.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3.0</td>\n",
	" <td>12.0</td>\n",
	" <td>4.0</td>\n",
	" <td>6.0</td>\n",
	" <td>2096.0</td>\n",
	" <td>0.0</td>\n",
	" <td>3.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>0.0</td>\n",
	" <td>3.0</td>\n",
	" <td>0.0</td>\n",
	" <td>49.0</td>\n",
	" <td>2.0</td>\n",
	" <td>1.0</td>\n",
	" <td>1.0</td>\n",
	" <td>1.0</td>\n",
	" <td>2.0</td>\n",
	" <td>0.0</td>\n",
	" <td>0.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>0.0</td>\n",
	" <td>42.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>7882.0</td>\n",
	" <td>0.0</td>\n",
	" <td>3.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>4.0</td>\n",
	" <td>1.0</td>\n",
	" <td>45.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>1.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>0.0</td>\n",
	" <td>0.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>0.0</td>\n",
	" <td>24.0</td>\n",
	" <td>3.0</td>\n",
	" <td>0.0</td>\n",
	" <td>4870.0</td>\n",
	" <td>0.0</td>\n",
	" <td>2.0</td>\n",
	" <td>3.0</td>\n",
	" <td>2.0</td>\n",
	" <td>0.0</td>\n",
	" <td>4.0</td>\n",
	" <td>3.0</td>\n",
	" <td>53.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>2.0</td>\n",
	" <td>0.0</td>\n",
	" <td>0.0</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" checking_status duration credit_history purpose credit_amount \\\n",
	"0 0.0 6.0 4.0 3.0 1169.0 \n",
	"1 1.0 48.0 2.0 3.0 5951.0 \n",
	"2 3.0 12.0 4.0 6.0 2096.0 \n",
	"3 0.0 42.0 2.0 2.0 7882.0 \n",
	"4 0.0 24.0 3.0 0.0 4870.0 \n",
	"\n",
	" savings_status employment installment_commitment personal_status \\\n",
	"0 4.0 4.0 4.0 2.0 \n",
	"1 0.0 2.0 2.0 1.0 \n",
	"2 0.0 3.0 2.0 2.0 \n",
	"3 0.0 3.0 2.0 2.0 \n",
	"4 0.0 2.0 3.0 2.0 \n",
	"\n",
	" other_parties residence_since property_magnitude age \\\n",
	"0 0.0 4.0 0.0 67.0 \n",
	"1 0.0 2.0 0.0 22.0 \n",
	"2 0.0 3.0 0.0 49.0 \n",
	"3 2.0 4.0 1.0 45.0 \n",
	"4 0.0 4.0 3.0 53.0 \n",
	"\n",
	" other_payment_plans housing existing_credits job num_dependents \\\n",
	"0 2.0 1.0 2.0 2.0 1.0 \n",
	"1 2.0 1.0 1.0 2.0 1.0 \n",
	"2 2.0 1.0 1.0 1.0 2.0 \n",
	"3 2.0 2.0 1.0 2.0 2.0 \n",
	"4 2.0 2.0 2.0 2.0 2.0 \n",
	"\n",
	" own_telephone foreign_worker \n",
	"0 1.0 0.0 \n",
	"1 0.0 0.0 \n",
	"2 0.0 0.0 \n",
	"3 0.0 0.0 \n",
	"4 0.0 0.0 "
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"credit_df = pd.DataFrame(credit_data.data, columns=credit_data.feature_names)\n",
	"credit_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"credit_df['GOOD'] = credit_data.target"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th>col_0</th>\n",
	" <th>bad</th>\n",
	" <th>good</th>\n",
	" <th>All</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>checking_status</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0.0</th>\n",
	" <td>135</td>\n",
	" <td>139</td>\n",
	" <td>274</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1.0</th>\n",
	" <td>105</td>\n",
	" <td>164</td>\n",
	" <td>269</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2.0</th>\n",
	" <td>14</td>\n",
	" <td>49</td>\n",
	" <td>63</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3.0</th>\n",
	" <td>46</td>\n",
	" <td>348</td>\n",
	" <td>394</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>All</th>\n",
	" <td>300</td>\n",
	" <td>700</td>\n",
	" <td>1000</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	"col_0 bad good All\n",
	"checking_status \n",
	"0.0 135 139 274\n",
	"1.0 105 164 269\n",
	"2.0 14 49 63\n",
	"3.0 46 348 394\n",
	"All 300 700 1000"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"pd.crosstab(credit_df['checking_status'], credit_data.target, margins=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>checking_status_1.0</th>\n",
	" <th>checking_status_2.0</th>\n",
	" <th>checking_status_3.0</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" checking_status_1.0 checking_status_2.0 checking_status_3.0\n",
	"0 0 0 0\n",
	"1 1 0 0\n",
	"2 0 0 1\n",
	"3 0 0 0\n",
	"4 0 0 0"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"X = pd.get_dummies(credit_df[['checking_status']], columns=['checking_status'], drop_first=True)\n",
	"X.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0 good\n",
	"1 bad\n",
	"2 good\n",
	"3 good\n",
	"4 bad\n",
	"Name: GOOD, dtype: object"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"credit_df['GOOD'].head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"y = 0+(credit_df['GOOD'] == 'good')"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}