Skip to content

Instantly share code, notes, and snippets.

@allisonmorgan
Created April 23, 2021 23:56
Show Gist options
  • Select an option

  • Save allisonmorgan/06f3be704511808e1b6c00431e230767 to your computer and use it in GitHub Desktop.

Select an option

Save allisonmorgan/06f3be704511808e1b6c00431e230767 to your computer and use it in GitHub Desktop.
Logistic regression example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "operational-tomorrow",
"metadata": {},
"outputs": [],
"source": [
"import statsmodels.api as sm\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "scheduled-somerset",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gender\n",
"female 0.443131\n",
"male 0.208086\n",
"Name: attrition, dtype: float64"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = []\n",
"\n",
"N_men = 700; men_max_career_length = 10\n",
"N_women = 300; women_max_career_length = 5\n",
"\n",
"for i in range(N_men):\n",
" leave = np.random.randint(0, men_max_career_length)\n",
" for each in [0]*(leave-1) + [1]:\n",
" data.append(['male', each])\n",
"\n",
"for j in range(N_women):\n",
" leave = np.random.randint(0, women_max_career_length)\n",
" for each in [0]*(leave-1) + [1]:\n",
" data.append(['female', each])\n",
" \n",
"df = pd.DataFrame(data, columns=['gender', 'attrition'])\n",
"df.groupby(['gender'])['attrition'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "forced-track",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gender attrition\n",
"female 0 377\n",
" 1 300\n",
"male 0 2664\n",
" 1 700\n",
"Name: attrition, dtype: int64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(['gender'])['attrition'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "rural-python",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.2627627627627628\n",
"0.7957559681697614\n"
]
}
],
"source": [
"women = df[df.gender == 'female']['attrition'].value_counts()\n",
"men = df[df.gender == 'male']['attrition'].value_counts()\n",
"\n",
"print((men[1]/(men[0] + men[1]))/(men[0]/(men[0] + men[1])))\n",
"print((women[1]/(women[0] + women[1]))/(women[0]/(women[0] + women[1])))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "otherwise-repeat",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimization terminated successfully.\n",
" Current function value: 0.540770\n",
" Iterations 5\n"
]
},
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>Logit Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>attrition</td> <th> No. Observations: </th> <td> 4041</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>Logit</td> <th> Df Residuals: </th> <td> 4039</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>MLE</td> <th> Df Model: </th> <td> 1</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Fri, 23 Apr 2021</td> <th> Pseudo R-squ.: </th> <td>0.03353</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>17:55:50</td> <th> Log-Likelihood: </th> <td> -2185.3</td> \n",
"</tr>\n",
"<tr>\n",
" <th>converged:</th> <td>True</td> <th> LL-Null: </th> <td> -2261.1</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> LLR p-value: </th> <td>7.646e-35</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> -1.3365</td> <td> 0.042</td> <td> -31.467</td> <td> 0.000</td> <td> -1.420</td> <td> -1.253</td>\n",
"</tr>\n",
"<tr>\n",
" <th>is_female</th> <td> 1.1080</td> <td> 0.088</td> <td> 12.554</td> <td> 0.000</td> <td> 0.935</td> <td> 1.281</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: attrition No. Observations: 4041\n",
"Model: Logit Df Residuals: 4039\n",
"Method: MLE Df Model: 1\n",
"Date: Fri, 23 Apr 2021 Pseudo R-squ.: 0.03353\n",
"Time: 17:55:50 Log-Likelihood: -2185.3\n",
"converged: True LL-Null: -2261.1\n",
"Covariance Type: nonrobust LLR p-value: 7.646e-35\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const -1.3365 0.042 -31.467 0.000 -1.420 -1.253\n",
"is_female 1.1080 0.088 12.554 0.000 0.935 1.281\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['is_female'] = df['gender'].apply(lambda x: 1 if x == 'female' else 0)\n",
"X = df['is_female']\n",
"X = sm.add_constant(X)\n",
"y = df['attrition']\n",
" \n",
"log_reg = sm.Logit(y, X).fit()\n",
"log_reg.summary()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "flush-stamp",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.2627627627627629\n",
"0.7957559681697618\n"
]
}
],
"source": [
"print(np.exp(log_reg.params['const']))\n",
"print(np.exp(log_reg.params['const'] + log_reg.params['is_female']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "handy-cincinnati",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment