Created
August 2, 2017 10:51
-
-
Save Erlemar/aba3278a08ff4a9684f4bef7a9910050 to your computer and use it in GitHub Desktop.
Gini index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data = pd.DataFrame({'Working': [7979, 260], 'On pension': [1334, 39], 'Unknown': [3806, 30]}).T" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>On pension</th>\n", | |
" <td>1334</td>\n", | |
" <td>39</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Unknown</th>\n", | |
" <td>3806</td>\n", | |
" <td>30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Working</th>\n", | |
" <td>7979</td>\n", | |
" <td>260</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1\n", | |
"On pension 1334 39\n", | |
"Unknown 3806 30\n", | |
"Working 7979 260" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Suppose we have a variable about people occupation and frequencies for some target.\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#Calculate target rate and sort.\n", | |
"data['s'] = data[1] / np.sum(data.values, axis=1)\n", | |
"data = data.sort_values('s', axis=0, ascending=False)\n", | |
"#Renaming columns for easier reference.\n", | |
"data.columns = ['n', 'y', 's']\n", | |
"#Calculate cummulative percent.\n", | |
"data['cum_perc_n'] = data.n.cumsum()/data.n.sum()\n", | |
"data['cum_perc_y'] = data.y.cumsum()/data.y.sum()\n", | |
"#Rolling mean for X is calculated. First value should be simply half of itself, it is calculated separately, as this is easier.\n", | |
"data['x'] = data.cum_perc_y.rolling(2, min_periods=1).mean()\n", | |
"data.iloc[0, 5] = data.iloc[0, 5] / 2\n", | |
"#Y is calculated as difference between the value in the current and the previous row.\n", | |
"data['Y'] = data.cum_perc_n - data.cum_perc_n.shift(1).fillna(0)\n", | |
"#Simple element-wise multiplication.\n", | |
"data['g'] = data.x * data.Y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>n</th>\n", | |
" <th>y</th>\n", | |
" <th>s</th>\n", | |
" <th>cum_perc_n</th>\n", | |
" <th>cum_perc_y</th>\n", | |
" <th>x</th>\n", | |
" <th>Y</th>\n", | |
" <th>g</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Working</th>\n", | |
" <td>7979</td>\n", | |
" <td>260</td>\n", | |
" <td>0.031557</td>\n", | |
" <td>0.608202</td>\n", | |
" <td>0.790274</td>\n", | |
" <td>0.395137</td>\n", | |
" <td>0.608202</td>\n", | |
" <td>0.240323</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>On pension</th>\n", | |
" <td>1334</td>\n", | |
" <td>39</td>\n", | |
" <td>0.028405</td>\n", | |
" <td>0.709886</td>\n", | |
" <td>0.908815</td>\n", | |
" <td>0.849544</td>\n", | |
" <td>0.101685</td>\n", | |
" <td>0.086386</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Unknown</th>\n", | |
" <td>3806</td>\n", | |
" <td>30</td>\n", | |
" <td>0.007821</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.954407</td>\n", | |
" <td>0.290114</td>\n", | |
" <td>0.276887</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" n y s cum_perc_n cum_perc_y x Y \\\n", | |
"Working 7979 260 0.031557 0.608202 0.790274 0.395137 0.608202 \n", | |
"On pension 1334 39 0.028405 0.709886 0.908815 0.849544 0.101685 \n", | |
"Unknown 3806 30 0.007821 1.000000 1.000000 0.954407 0.290114 \n", | |
"\n", | |
" g \n", | |
"Working 0.240323 \n", | |
"On pension 0.086386 \n", | |
"Unknown 0.276887 " | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"20.718992454156492\n" | |
] | |
} | |
], | |
"source": [ | |
"#This is Gini. Sum g, multiply by 2 and subtract 1.\n", | |
"print((np.sum(data['g']) * 2 - 1) * 100)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment