Last active
September 3, 2016 06:07
-
-
Save devashishd12/32c4db65f565e9ebbd06ab1be210c05f to your computer and use it in GitHub Desktop.
ZS Data Science challenge notebook
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# ZS Data Science Challenge" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"from matplotlib import pyplot as plt\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def converttoint(table_name, column_name):\n", | |
" \"\"\"\n", | |
" Function to convert string values of a column to int by splitting and assigning [1] value since all\n", | |
" the string values are like this.\n", | |
" \n", | |
" Args:\n", | |
" ----\n", | |
" table_name : Table name\n", | |
" column_name : Column name\n", | |
" \"\"\"\n", | |
" for u in table_name[column_name].unique():\n", | |
" try:\n", | |
" table_name.loc[table_name[column_name] == u, column_name] = int(u.split()[1])\n", | |
" except:\n", | |
" # If there's no district available, assign the value 0\n", | |
" table_name.loc[table_name[column_name] == u, column_name] = np.NaN" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Hospital Profiling" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"hospital_profiling = pd.read_csv('/home/devashish/datasets/ZS/HospitalProfiling.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 12</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 13</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 15</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 16</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 19</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID District_ID Hospital_employees\n", | |
"0 Hospital 1 District 12 3\n", | |
"1 Hospital 1 District 13 6\n", | |
"2 Hospital 1 District 15 2\n", | |
"3 Hospital 1 District 16 3\n", | |
"4 Hospital 1 District 19 5" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hospital_profiling.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(hospital_profiling, 'Hospital_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(hospital_profiling, 'District_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>13</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>15</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>16</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>19</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID District_ID Hospital_employees\n", | |
"0 1 12 3\n", | |
"1 1 13 6\n", | |
"2 1 15 2\n", | |
"3 1 16 3\n", | |
"4 1 19 5" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hospital_profiling.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Starting off with hospital revenue" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"hospital_revenue = pd.read_csv('/home/devashish/datasets/ZS/HospitalRevenue.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Region_ID</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Month 1</th>\n", | |
" <th>Month 2</th>\n", | |
" <th>Month 3</th>\n", | |
" <th>Month 4</th>\n", | |
" <th>Month 5</th>\n", | |
" <th>Month 6</th>\n", | |
" <th>Month 7</th>\n", | |
" <th>Month 8</th>\n", | |
" <th>Month 9</th>\n", | |
" <th>Month 10</th>\n", | |
" <th>Month 11</th>\n", | |
" <th>Month 12</th>\n", | |
" <th>Year Total</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>Region 1</td>\n", | |
" <td>District 12</td>\n", | |
" <td>Instrument 2</td>\n", | |
" <td>8534</td>\n", | |
" <td>9917</td>\n", | |
" <td>7825</td>\n", | |
" <td>11702</td>\n", | |
" <td>8776</td>\n", | |
" <td>7755</td>\n", | |
" <td>9289</td>\n", | |
" <td>7796</td>\n", | |
" <td>7595</td>\n", | |
" <td>8292</td>\n", | |
" <td>7787</td>\n", | |
" <td>8282</td>\n", | |
" <td>103550</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>Region 1</td>\n", | |
" <td>District 12</td>\n", | |
" <td>Instrument 3</td>\n", | |
" <td>298</td>\n", | |
" <td>298</td>\n", | |
" <td>214</td>\n", | |
" <td>311</td>\n", | |
" <td>261</td>\n", | |
" <td>223</td>\n", | |
" <td>237</td>\n", | |
" <td>171</td>\n", | |
" <td>173</td>\n", | |
" <td>183</td>\n", | |
" <td>193</td>\n", | |
" <td>0</td>\n", | |
" <td>2562</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>Region 1</td>\n", | |
" <td>District 13</td>\n", | |
" <td>Instrument 1</td>\n", | |
" <td>37</td>\n", | |
" <td>40</td>\n", | |
" <td>38</td>\n", | |
" <td>43</td>\n", | |
" <td>29</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>187</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>Region 1</td>\n", | |
" <td>District 13</td>\n", | |
" <td>Instrument 2</td>\n", | |
" <td>2486</td>\n", | |
" <td>3332</td>\n", | |
" <td>3193</td>\n", | |
" <td>2556</td>\n", | |
" <td>2108</td>\n", | |
" <td>2757</td>\n", | |
" <td>2639</td>\n", | |
" <td>2531</td>\n", | |
" <td>2771</td>\n", | |
" <td>2682</td>\n", | |
" <td>12317</td>\n", | |
" <td>1369</td>\n", | |
" <td>40741</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>Region 1</td>\n", | |
" <td>District 13</td>\n", | |
" <td>Instrument 3</td>\n", | |
" <td>857</td>\n", | |
" <td>892</td>\n", | |
" <td>739</td>\n", | |
" <td>759</td>\n", | |
" <td>736</td>\n", | |
" <td>415</td>\n", | |
" <td>1203</td>\n", | |
" <td>434</td>\n", | |
" <td>448</td>\n", | |
" <td>113</td>\n", | |
" <td>829</td>\n", | |
" <td>1124</td>\n", | |
" <td>8549</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Region_ID District_ID Instrument_ID Month 1 Month 2 Month 3 \\\n", | |
"0 Hospital 1 Region 1 District 12 Instrument 2 8534 9917 7825 \n", | |
"1 Hospital 1 Region 1 District 12 Instrument 3 298 298 214 \n", | |
"2 Hospital 1 Region 1 District 13 Instrument 1 37 40 38 \n", | |
"3 Hospital 1 Region 1 District 13 Instrument 2 2486 3332 3193 \n", | |
"4 Hospital 1 Region 1 District 13 Instrument 3 857 892 739 \n", | |
"\n", | |
" Month 4 Month 5 Month 6 Month 7 Month 8 Month 9 Month 10 Month 11 \\\n", | |
"0 11702 8776 7755 9289 7796 7595 8292 7787 \n", | |
"1 311 261 223 237 171 173 183 193 \n", | |
"2 43 29 0 0 0 0 0 0 \n", | |
"3 2556 2108 2757 2639 2531 2771 2682 12317 \n", | |
"4 759 736 415 1203 434 448 113 829 \n", | |
"\n", | |
" Month 12 Year Total \n", | |
"0 8282 103550 \n", | |
"1 0 2562 \n", | |
"2 0 187 \n", | |
"3 1369 40741 \n", | |
"4 1124 8549 " | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hospital_revenue.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(hospital_revenue, 'Hospital_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(hospital_revenue, 'Region_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(hospital_revenue, 'District_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(hospital_revenue, 'Instrument_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"hospital_revenue = hospital_revenue.dropna()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 43846\n", | |
"Region_ID 43846\n", | |
"District_ID 43846\n", | |
"Instrument_ID 43846\n", | |
"Month 1 43846\n", | |
"Month 2 43846\n", | |
"Month 3 43846\n", | |
"Month 4 43846\n", | |
"Month 5 43846\n", | |
"Month 6 43846\n", | |
"Month 7 43846\n", | |
"Month 8 43846\n", | |
"Month 9 43846\n", | |
"Month 10 43846\n", | |
"Month 11 43846\n", | |
"Month 12 43846\n", | |
"Year Total 43846\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hospital_revenue.count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"projected_revenue = pd.read_csv('/home/devashish/datasets/ZS/ProjectedRevenue.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Annual_Projected_Revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 13</td>\n", | |
" <td>Instrument 2</td>\n", | |
" <td>17164</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 13</td>\n", | |
" <td>Instrument 3</td>\n", | |
" <td>655645</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 13</td>\n", | |
" <td>Instrument 4</td>\n", | |
" <td>281452</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 2</td>\n", | |
" <td>Instrument 2</td>\n", | |
" <td>12199</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Hospital 1</td>\n", | |
" <td>District 20</td>\n", | |
" <td>Instrument 2</td>\n", | |
" <td>178128</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID District_ID Instrument_ID Annual_Projected_Revenue\n", | |
"0 Hospital 1 District 13 Instrument 2 17164\n", | |
"1 Hospital 1 District 13 Instrument 3 655645\n", | |
"2 Hospital 1 District 13 Instrument 4 281452\n", | |
"3 Hospital 1 District 2 Instrument 2 12199\n", | |
"4 Hospital 1 District 20 Instrument 2 178128" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"projected_revenue.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(projected_revenue, 'Hospital_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(projected_revenue, 'District_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(projected_revenue, 'Instrument_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 11410\n", | |
"District_ID 11320\n", | |
"Instrument_ID 11410\n", | |
"Annual_Projected_Revenue 11410\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"projected_revenue.count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"projected_revenue = projected_revenue.dropna()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 11320\n", | |
"District_ID 11320\n", | |
"Instrument_ID 11320\n", | |
"Annual_Projected_Revenue 11320\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"projected_revenue.count()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Unique Instrument_IDs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([2, 3, 1, 4, 5, 6, 7, 8, 10, 11, 15, 20, 13], dtype=object)" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hospital_revenue['Instrument_ID'].unique()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Unique District_IDs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([12, 13, 16, 18, 19, 2, 20, 28, 3, 34, 35, 37, 39, 4, 41, 5, 50, 52,\n", | |
" 32, 45, 44, 29, 10, 14, 21, 27, 33, 38, 42, 46, 47, 48, 49, 51, 6,\n", | |
" 9, 17, 7, 15, 25, 30, 31, 43, 8, 40, 22, 23, 11, 24, 26, 1, 36], dtype=object)" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hospital_revenue['District_ID'].unique()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"As we can see no hospitals use all the instruments" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID\n", | |
"1 [2, 3, 1, 4, 5, 6, 7, 8, 10, 11]\n", | |
"2 [1, 2, 3, 4, 6, 7, 5]\n", | |
"3 [1, 2, 3, 6, 7, 4, 5, 8]\n", | |
"4 [2, 3, 1, 4, 5, 8, 6, 7]\n", | |
"5 [1, 2, 3, 4, 6, 7, 5, 8]\n", | |
"Name: Instrument_ID, dtype: object" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hospital_revenue.groupby(['Hospital_ID'])['Instrument_ID'].unique().head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"And no hospitals are in all the districts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID\n", | |
"1 [12, 13, 16, 18, 19, 2, 20, 28, 3, 34, 35, 37,...\n", | |
"2 [13, 16, 20, 21, 28, 34, 41, 50]\n", | |
"3 [10, 13, 18, 29, 3, 32, 34, 35, 37, 4, 41, 44,...\n", | |
"4 [12, 13, 20, 28, 3, 34, 47, 8]\n", | |
"5 [10, 13, 16, 17, 18, 19, 20, 21, 24, 25, 27, 2...\n", | |
"Name: District_ID, dtype: object" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hospital_revenue.groupby(['Hospital_ID'])['District_ID'].unique().head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Making final train dataframe by feature engineering" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train['Hospital_ID'] = hospital_revenue['Hospital_ID']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = train.drop_duplicates()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>52</th>\n", | |
" <td>10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>60</th>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>73</th>\n", | |
" <td>1000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>100</th>\n", | |
" <td>1001</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID\n", | |
"0 1\n", | |
"52 10\n", | |
"60 100\n", | |
"73 1000\n", | |
"100 1001" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 1. Adding total hospital employees into the mix" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, hospital_profiling.groupby('Hospital_ID', as_index=False).sum(), on='Hospital_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Rename second column\n", | |
"train = train.rename(columns={'Hospital_employees':'Total_Hospital_employees'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10</td>\n", | |
" <td>1076</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>100</td>\n", | |
" <td>4925</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1000</td>\n", | |
" <td>5756</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1001</td>\n", | |
" <td>4002</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees\n", | |
"0 1 13088\n", | |
"1 10 1076\n", | |
"2 100 4925\n", | |
"3 1000 5756\n", | |
"4 1001 4002" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 2. Add district ID with hospital employees in that district" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train['key'] = 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(hospital_profiling['District_ID'].unique())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df['key'] = 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df.columns = ['District_ID', 'key']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, df, on='key')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"del train['key']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 74382\n", | |
"Total_Hospital_employees 74382\n", | |
"District_ID 74382\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, hospital_profiling.groupby(['Hospital_ID', 'District_ID'], as_index=False).sum(),\n", | |
" on=['Hospital_ID', 'District_ID'], how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = train.fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>13</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>15</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>16</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>19</td>\n", | |
" <td>10.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID Hospital_employees\n", | |
"0 1 13088 12 3.0\n", | |
"1 1 13088 13 6.0\n", | |
"2 1 13088 15 2.0\n", | |
"3 1 13088 16 3.0\n", | |
"4 1 13088 19 10.0" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train = train.rename(columns={'Hospital_employees':'Hospital_employees_in_district'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>13</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>15</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>16</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>19</td>\n", | |
" <td>10.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID \\\n", | |
"0 1 13088 12 \n", | |
"1 1 13088 13 \n", | |
"2 1 13088 15 \n", | |
"3 1 13088 16 \n", | |
"4 1 13088 19 \n", | |
"\n", | |
" Hospital_employees_in_district \n", | |
"0 3.0 \n", | |
"1 6.0 \n", | |
"2 2.0 \n", | |
"3 3.0 \n", | |
"4 10.0 " | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add total number of hospitals in district" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, hospital_profiling.groupby('District_ID', as_index=False).agg({'Hospital_ID' : np.count_nonzero}),\n", | |
" on='District_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = train.rename(columns={'Hospital_ID_x':'Hospital_ID', 'Hospital_ID_y': 'Hospitals_in_District'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 74382\n", | |
"Total_Hospital_employees 74382\n", | |
"District_ID 74382\n", | |
"Hospital_employees_in_district 74382\n", | |
"Hospitals_in_District 74382\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 49, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.count()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add unique Instrument_IDs to column" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(hospital_revenue['Instrument_ID'].unique())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df.columns = ['Instrument_ID']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10</td>\n", | |
" <td>1076</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>100</td>\n", | |
" <td>4925</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1000</td>\n", | |
" <td>5756</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1001</td>\n", | |
" <td>4002</td>\n", | |
" <td>12</td>\n", | |
" <td>97.0</td>\n", | |
" <td>1086</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID \\\n", | |
"0 1 13088 12 \n", | |
"1 10 1076 12 \n", | |
"2 100 4925 12 \n", | |
"3 1000 5756 12 \n", | |
"4 1001 4002 12 \n", | |
"\n", | |
" Hospital_employees_in_district Hospitals_in_District \n", | |
"0 3.0 1086 \n", | |
"1 0.0 1086 \n", | |
"2 3.0 1086 \n", | |
"3 0.0 1086 \n", | |
"4 97.0 1086 " | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['key'] = 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train['key'] = 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, df, on='key')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"del train['key']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 966966\n", | |
"Total_Hospital_employees 966966\n", | |
"District_ID 966966\n", | |
"Hospital_employees_in_district 966966\n", | |
"Hospitals_in_District 966966\n", | |
"Instrument_ID 966966\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID \\\n", | |
"0 1 13088 12 \n", | |
"1 1 13088 12 \n", | |
"2 1 13088 12 \n", | |
"3 1 13088 12 \n", | |
"4 1 13088 12 \n", | |
"\n", | |
" Hospital_employees_in_district Hospitals_in_District Instrument_ID \n", | |
"0 3.0 1086 2 \n", | |
"1 3.0 1086 3 \n", | |
"2 3.0 1086 1 \n", | |
"3 3.0 1086 4 \n", | |
"4 3.0 1086 5 " | |
] | |
}, | |
"execution_count": 58, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add total instrument demand" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(hospital_revenue[\"Instrument_ID\"].value_counts().reset_index())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df.columns = ['Instrument_ID', 'Total_Instr_Demand']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, df, on='Instrument_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Total_Instr_Demand</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10</td>\n", | |
" <td>1076</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>100</td>\n", | |
" <td>4925</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1000</td>\n", | |
" <td>5756</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1001</td>\n", | |
" <td>4002</td>\n", | |
" <td>12</td>\n", | |
" <td>97.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID \\\n", | |
"0 1 13088 12 \n", | |
"1 10 1076 12 \n", | |
"2 100 4925 12 \n", | |
"3 1000 5756 12 \n", | |
"4 1001 4002 12 \n", | |
"\n", | |
" Hospital_employees_in_district Hospitals_in_District Instrument_ID \\\n", | |
"0 3.0 1086 2 \n", | |
"1 0.0 1086 2 \n", | |
"2 3.0 1086 2 \n", | |
"3 0.0 1086 2 \n", | |
"4 97.0 1086 2 \n", | |
"\n", | |
" Total_Instr_Demand \n", | |
"0 13635 \n", | |
"1 13635 \n", | |
"2 13635 \n", | |
"3 13635 \n", | |
"4 13635 " | |
] | |
}, | |
"execution_count": 62, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add instrument value" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = hospital_revenue[['Instrument_ID', 'Year Total']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df.groupby('Instrument_ID', as_index=False).sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"mean = df['Year Total'].mean()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['Instrument_Value'] = df['Year Total'].apply(lambda x: x / mean)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"del df['Year Total']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Instrument_Value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>0.230520</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>7.172774</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>1.250719</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>0.460606</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>1.275721</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Instrument_ID Instrument_Value\n", | |
"0 1 0.230520\n", | |
"1 2 7.172774\n", | |
"2 3 1.250719\n", | |
"3 4 0.460606\n", | |
"4 5 1.275721" | |
] | |
}, | |
"execution_count": 69, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, df, on=['Instrument_ID'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Total_Instr_Demand</th>\n", | |
" <th>Instrument_Value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10</td>\n", | |
" <td>1076</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>100</td>\n", | |
" <td>4925</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1000</td>\n", | |
" <td>5756</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1001</td>\n", | |
" <td>4002</td>\n", | |
" <td>12</td>\n", | |
" <td>97.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID \\\n", | |
"0 1 13088 12 \n", | |
"1 10 1076 12 \n", | |
"2 100 4925 12 \n", | |
"3 1000 5756 12 \n", | |
"4 1001 4002 12 \n", | |
"\n", | |
" Hospital_employees_in_district Hospitals_in_District Instrument_ID \\\n", | |
"0 3.0 1086 2 \n", | |
"1 0.0 1086 2 \n", | |
"2 3.0 1086 2 \n", | |
"3 0.0 1086 2 \n", | |
"4 97.0 1086 2 \n", | |
"\n", | |
" Total_Instr_Demand Instrument_Value \n", | |
"0 13635 7.172774 \n", | |
"1 13635 7.172774 \n", | |
"2 13635 7.172774 \n", | |
"3 13635 7.172774 \n", | |
"4 13635 7.172774 " | |
] | |
}, | |
"execution_count": 71, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add median revenue for particular instrument" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 72, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 73, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = hospital_revenue[['Instrument_ID', 'Year Total']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df.groupby('Instrument_ID', as_index=False).median()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df.columns = ['Instrument_ID', 'Instrument_Median']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, df, on='Instrument_ID', how='left')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Join with previous year revenue" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 77, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.merge(train, hospital_revenue.drop('Region_ID', axis=1),\n", | |
" on=['Hospital_ID', 'District_ID', 'Instrument_ID'],\n", | |
" how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 78, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = train.fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 79, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Total_Instr_Demand</th>\n", | |
" <th>Instrument_Value</th>\n", | |
" <th>Instrument_Median</th>\n", | |
" <th>Month 1</th>\n", | |
" <th>...</th>\n", | |
" <th>Month 4</th>\n", | |
" <th>Month 5</th>\n", | |
" <th>Month 6</th>\n", | |
" <th>Month 7</th>\n", | |
" <th>Month 8</th>\n", | |
" <th>Month 9</th>\n", | |
" <th>Month 10</th>\n", | |
" <th>Month 11</th>\n", | |
" <th>Month 12</th>\n", | |
" <th>Year Total</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>8534.0</td>\n", | |
" <td>...</td>\n", | |
" <td>11702.0</td>\n", | |
" <td>8776.0</td>\n", | |
" <td>7755.0</td>\n", | |
" <td>9289.0</td>\n", | |
" <td>7796.0</td>\n", | |
" <td>7595.0</td>\n", | |
" <td>8292.0</td>\n", | |
" <td>7787.0</td>\n", | |
" <td>8282.0</td>\n", | |
" <td>103550.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10</td>\n", | |
" <td>1076</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>100</td>\n", | |
" <td>4925</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1000</td>\n", | |
" <td>5756</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1001</td>\n", | |
" <td>4002</td>\n", | |
" <td>12</td>\n", | |
" <td>97.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>1047.0</td>\n", | |
" <td>...</td>\n", | |
" <td>338.0</td>\n", | |
" <td>1303.0</td>\n", | |
" <td>1067.0</td>\n", | |
" <td>973.0</td>\n", | |
" <td>761.0</td>\n", | |
" <td>630.0</td>\n", | |
" <td>821.0</td>\n", | |
" <td>666.0</td>\n", | |
" <td>841.0</td>\n", | |
" <td>10453.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 22 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID \\\n", | |
"0 1 13088 12 \n", | |
"1 10 1076 12 \n", | |
"2 100 4925 12 \n", | |
"3 1000 5756 12 \n", | |
"4 1001 4002 12 \n", | |
"\n", | |
" Hospital_employees_in_district Hospitals_in_District Instrument_ID \\\n", | |
"0 3.0 1086 2 \n", | |
"1 0.0 1086 2 \n", | |
"2 3.0 1086 2 \n", | |
"3 0.0 1086 2 \n", | |
"4 97.0 1086 2 \n", | |
"\n", | |
" Total_Instr_Demand Instrument_Value Instrument_Median Month 1 \\\n", | |
"0 13635 7.172774 8656.0 8534.0 \n", | |
"1 13635 7.172774 8656.0 0.0 \n", | |
"2 13635 7.172774 8656.0 0.0 \n", | |
"3 13635 7.172774 8656.0 0.0 \n", | |
"4 13635 7.172774 8656.0 1047.0 \n", | |
"\n", | |
" ... Month 4 Month 5 Month 6 Month 7 Month 8 Month 9 Month 10 \\\n", | |
"0 ... 11702.0 8776.0 7755.0 9289.0 7796.0 7595.0 8292.0 \n", | |
"1 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"2 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"3 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"4 ... 338.0 1303.0 1067.0 973.0 761.0 630.0 821.0 \n", | |
"\n", | |
" Month 11 Month 12 Year Total \n", | |
"0 7787.0 8282.0 103550.0 \n", | |
"1 0.0 0.0 0.0 \n", | |
"2 0.0 0.0 0.0 \n", | |
"3 0.0 0.0 0.0 \n", | |
"4 666.0 841.0 10453.0 \n", | |
"\n", | |
"[5 rows x 22 columns]" | |
] | |
}, | |
"execution_count": 79, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 80, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train['Buy_or_not'] = train['Year Total']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 82, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Total_Instr_Demand</th>\n", | |
" <th>Instrument_Value</th>\n", | |
" <th>Instrument_Median</th>\n", | |
" <th>Month 1</th>\n", | |
" <th>...</th>\n", | |
" <th>Month 5</th>\n", | |
" <th>Month 6</th>\n", | |
" <th>Month 7</th>\n", | |
" <th>Month 8</th>\n", | |
" <th>Month 9</th>\n", | |
" <th>Month 10</th>\n", | |
" <th>Month 11</th>\n", | |
" <th>Month 12</th>\n", | |
" <th>Year Total</th>\n", | |
" <th>Buy_or_not</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>8534.0</td>\n", | |
" <td>...</td>\n", | |
" <td>8776.0</td>\n", | |
" <td>7755.0</td>\n", | |
" <td>9289.0</td>\n", | |
" <td>7796.0</td>\n", | |
" <td>7595.0</td>\n", | |
" <td>8292.0</td>\n", | |
" <td>7787.0</td>\n", | |
" <td>8282.0</td>\n", | |
" <td>103550.0</td>\n", | |
" <td>103550.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10</td>\n", | |
" <td>1076</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>100</td>\n", | |
" <td>4925</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1000</td>\n", | |
" <td>5756</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1001</td>\n", | |
" <td>4002</td>\n", | |
" <td>12</td>\n", | |
" <td>97.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>1047.0</td>\n", | |
" <td>...</td>\n", | |
" <td>1303.0</td>\n", | |
" <td>1067.0</td>\n", | |
" <td>973.0</td>\n", | |
" <td>761.0</td>\n", | |
" <td>630.0</td>\n", | |
" <td>821.0</td>\n", | |
" <td>666.0</td>\n", | |
" <td>841.0</td>\n", | |
" <td>10453.0</td>\n", | |
" <td>10453.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 23 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID \\\n", | |
"0 1 13088 12 \n", | |
"1 10 1076 12 \n", | |
"2 100 4925 12 \n", | |
"3 1000 5756 12 \n", | |
"4 1001 4002 12 \n", | |
"\n", | |
" Hospital_employees_in_district Hospitals_in_District Instrument_ID \\\n", | |
"0 3.0 1086 2 \n", | |
"1 0.0 1086 2 \n", | |
"2 3.0 1086 2 \n", | |
"3 0.0 1086 2 \n", | |
"4 97.0 1086 2 \n", | |
"\n", | |
" Total_Instr_Demand Instrument_Value Instrument_Median Month 1 \\\n", | |
"0 13635 7.172774 8656.0 8534.0 \n", | |
"1 13635 7.172774 8656.0 0.0 \n", | |
"2 13635 7.172774 8656.0 0.0 \n", | |
"3 13635 7.172774 8656.0 0.0 \n", | |
"4 13635 7.172774 8656.0 1047.0 \n", | |
"\n", | |
" ... Month 5 Month 6 Month 7 Month 8 Month 9 Month 10 \\\n", | |
"0 ... 8776.0 7755.0 9289.0 7796.0 7595.0 8292.0 \n", | |
"1 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"2 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"3 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"4 ... 1303.0 1067.0 973.0 761.0 630.0 821.0 \n", | |
"\n", | |
" Month 11 Month 12 Year Total Buy_or_not \n", | |
"0 7787.0 8282.0 103550.0 103550.0 \n", | |
"1 0.0 0.0 0.0 0.0 \n", | |
"2 0.0 0.0 0.0 0.0 \n", | |
"3 0.0 0.0 0.0 0.0 \n", | |
"4 666.0 841.0 10453.0 10453.0 \n", | |
"\n", | |
"[5 rows x 23 columns]" | |
] | |
}, | |
"execution_count": 82, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = train['Buy_or_not']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 84, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/devashish/miniconda2/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | |
" if __name__ == '__main__':\n", | |
"/home/devashish/miniconda2/lib/python2.7/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"df[df != 0] = 1\n", | |
"df[df == 0] = 0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Total_Instr_Demand</th>\n", | |
" <th>Instrument_Value</th>\n", | |
" <th>Instrument_Median</th>\n", | |
" <th>Month 1</th>\n", | |
" <th>...</th>\n", | |
" <th>Month 5</th>\n", | |
" <th>Month 6</th>\n", | |
" <th>Month 7</th>\n", | |
" <th>Month 8</th>\n", | |
" <th>Month 9</th>\n", | |
" <th>Month 10</th>\n", | |
" <th>Month 11</th>\n", | |
" <th>Month 12</th>\n", | |
" <th>Year Total</th>\n", | |
" <th>Buy_or_not</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>13088</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>8534.0</td>\n", | |
" <td>...</td>\n", | |
" <td>8776.0</td>\n", | |
" <td>7755.0</td>\n", | |
" <td>9289.0</td>\n", | |
" <td>7796.0</td>\n", | |
" <td>7595.0</td>\n", | |
" <td>8292.0</td>\n", | |
" <td>7787.0</td>\n", | |
" <td>8282.0</td>\n", | |
" <td>103550.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10</td>\n", | |
" <td>1076</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>100</td>\n", | |
" <td>4925</td>\n", | |
" <td>12</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1000</td>\n", | |
" <td>5756</td>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1001</td>\n", | |
" <td>4002</td>\n", | |
" <td>12</td>\n", | |
" <td>97.0</td>\n", | |
" <td>1086</td>\n", | |
" <td>2</td>\n", | |
" <td>13635</td>\n", | |
" <td>7.172774</td>\n", | |
" <td>8656.0</td>\n", | |
" <td>1047.0</td>\n", | |
" <td>...</td>\n", | |
" <td>1303.0</td>\n", | |
" <td>1067.0</td>\n", | |
" <td>973.0</td>\n", | |
" <td>761.0</td>\n", | |
" <td>630.0</td>\n", | |
" <td>821.0</td>\n", | |
" <td>666.0</td>\n", | |
" <td>841.0</td>\n", | |
" <td>10453.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 23 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID Total_Hospital_employees District_ID \\\n", | |
"0 1 13088 12 \n", | |
"1 10 1076 12 \n", | |
"2 100 4925 12 \n", | |
"3 1000 5756 12 \n", | |
"4 1001 4002 12 \n", | |
"\n", | |
" Hospital_employees_in_district Hospitals_in_District Instrument_ID \\\n", | |
"0 3.0 1086 2 \n", | |
"1 0.0 1086 2 \n", | |
"2 3.0 1086 2 \n", | |
"3 0.0 1086 2 \n", | |
"4 97.0 1086 2 \n", | |
"\n", | |
" Total_Instr_Demand Instrument_Value Instrument_Median Month 1 \\\n", | |
"0 13635 7.172774 8656.0 8534.0 \n", | |
"1 13635 7.172774 8656.0 0.0 \n", | |
"2 13635 7.172774 8656.0 0.0 \n", | |
"3 13635 7.172774 8656.0 0.0 \n", | |
"4 13635 7.172774 8656.0 1047.0 \n", | |
"\n", | |
" ... Month 5 Month 6 Month 7 Month 8 Month 9 Month 10 \\\n", | |
"0 ... 8776.0 7755.0 9289.0 7796.0 7595.0 8292.0 \n", | |
"1 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"2 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"3 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", | |
"4 ... 1303.0 1067.0 973.0 761.0 630.0 821.0 \n", | |
"\n", | |
" Month 11 Month 12 Year Total Buy_or_not \n", | |
"0 7787.0 8282.0 103550.0 1.0 \n", | |
"1 0.0 0.0 0.0 0.0 \n", | |
"2 0.0 0.0 0.0 0.0 \n", | |
"3 0.0 0.0 0.0 0.0 \n", | |
"4 666.0 841.0 10453.0 1.0 \n", | |
"\n", | |
"[5 rows x 23 columns]" | |
] | |
}, | |
"execution_count": 85, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Train the model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"predictors = ['Hospital_ID', 'Total_Hospital_employees', 'District_ID', 'Hospital_employees_in_district',\n", | |
" 'Hospitals_in_District', 'Instrument_ID', 'Total_Instr_Demand', 'Instrument_Value', 'Instrument_Median']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Machine Learning" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/devashish/EXPERIMENTATION/scikit-learn/sklearn/cross_validation.py:43: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", | |
" \"This module will be removed in 0.20.\", DeprecationWarning)\n" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.feature_selection import SelectKBest, f_classif, f_regression\n", | |
"from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n", | |
"from sklearn.linear_model import LogisticRegression, LinearRegression\n", | |
"from sklearn import cross_validation\n", | |
"from sklearn import metrics" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 88, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"selector = SelectKBest(f_classif, k=5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 89, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"SelectKBest(k=5, score_func=<function f_classif at 0x7f27ab209c08>)" | |
] | |
}, | |
"execution_count": 89, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"selector.fit(train[predictors], train['Buy_or_not'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 90, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"scores = -np.log10(selector.pvalues_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAGQCAYAAABLSBB3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3WmYZFWV7vH/WyDKLI7VAjIoItoIKoMDrdUqOF5BbWlR\nHHBoJxTx3m5Ar1LS2qjdjqDtVREBRxAV6FYElUQGlbGYRQRRRBlEhkJapeS9H/aJqsgkMiIyqjLO\niVPv73niqYwTEeQiMnPFPvvsvZZsExER7bSg7gAiImL+JMlHRLRYknxERIslyUdEtFiSfEREiyXJ\nR0S02MAkL2kTST+UdJmkSyS9rTp+sKTfSLqguj2n6zUHSbpK0hWSdpvP/4GIiJidBq2Tl7QQWGh7\niaT1gPOB3YF/BJba/uiM528DfAXYEdgE+D6wlbMgPyJi7AaO5G3fYHtJ9fWdwBXAxtXD6vGS3YGv\n2V5m+1rgKmCnVRNuRETMxZzm5CVtDmwP/LQ6tK+kJZI+L2nD6tjGwHVdL7ueFR8KERExRkMn+Wqq\n5hvAftWI/tPAlra3B24APjI/IUZExKjWHOZJktakJPhjbJ8AYPvmrqd8Djip+vp6YNOuxzapjs38\nb2aOPiJiBLZ7TZX3NOxI/gvA5bY/0TlQXZDteDFwafX1icDLJK0laQvgkcA5swTa+NvBBx9cewyJ\nM3FOcpyTEOMkxTlXA0fykp4KvAK4RNKFgIF3AS+XtD1wD3At8MYqcV8u6VjgcuBu4C0eJbKIiFhp\nA5O87bOANXo8dHKf1xwKHLoScUVExCqQHa8DLFq0qO4QhpI4V63EuepMQowwOXHO1cDNUPP2jaXM\n4kREzJEkPA8XXiMiYgIlyUdEtFiSfEREiyXJR0S0WJJ8RESLJclHRLRYknxERIslyUdEtFiSfERE\niyXJR0S0WJJ8RESLJclHRLRYknxERIslyUdEtFiSfEREiyXJR0S0WJJ8RESLJclHRLRYknxERIsl\nyUdEtFiS/AALF26OpNpvCxduXvdbERETSLbr+caS6/recyEJaEKcYhLer4iYX5KwrWGfn5F8RESL\nJclHRLRYknxERIslyUdEtFiSfEREiyXJR0S0WJJ8RESLJclHRLRYknxERIslyUdEtFiSfEREiyXJ\nR0S02MAkL2kTST+UdJmkSyS9vTq+kaRTJF0p6XuSNux6zUGSrpJ0haTd5vN/ICIiZjewCqWkhcBC\n20skrQecD+wO7APcYvvDkg4ANrJ9oKTHAF8GdgQ2Ab4PbDWz5GSqUM5VqlBGxDxUobR9g+0l1dd3\nAldQkvfuwFHV044C9qi+fiHwNdvLbF8LXAXsNPT/QURErDJzmpOXtDmwPfAT4KG2b4TyQQA8pHra\nxsB1XS+7vjoWERFjtuawT6ymar4B7Gf7Tkkz5w7mPJewePHi5V8vWrSIRYsWzfU/ERHRalNTU0xN\nTY38+qE6Q0laE/gv4Lu2P1EduwJYZPvGat7+NNvbSDoQsO0PVc87GTjY9k9n/DczJz8nmZOPiPnr\nDPUF4PJOgq+cCLym+vrVwAldx18maS1JWwCPBM4ZNqCIiFh1hlld81TgR8AllCGtgXdREvexwKbA\nr4A9bd9WveYg4HXA3ZTpnVN6/Hczkp+TjOQjYu4j+TTyHiBJPiKaJI28IyJiuST5iIgWS5KPiGix\nJPmIiBZLko+IaLEk+YiIFkuSj4hosST5iIgWS5KPiGixJPmIiBZLko+IaLEk+YiIFkuSj4hosST5\niIgWS5KPiGixJPmIiBZLko+IaLEk+YiIFkuSj4hosST5iIgWS5KPiGixJPmIiBZLko+IaLEk+YiI\nFkuSj4hosST5iIgWS5KPiGixJPmIiBZLko+IaLEk+YiIFkuSj4hosST5iIgWS5KPiGixJPmIiBZL\nko+IaLGBSV7SEZJulHRx17GDJf1G0gXV7Tldjx0k6SpJV0jabb4Cj4iIwYYZyR8JPLvH8Y/afkJ1\nOxlA0jbAnsA2wHOBT0vSKos2IiLmZGCSt30mcGuPh3ol792Br9leZvta4Cpgp5WKMCIiRrYyc/L7\nSloi6fOSNqyObQxc1/Wc66tjERFRg1GT/KeBLW1vD9wAfGTVhRQREavKmqO8yPbNXXc/B5xUfX09\nsGnXY5tUx3pavHjx8q8XLVrEokWLRgknIqK1pqammJqaGvn1sj34SdLmwEm2t63uL7R9Q/X1/sCO\ntl8u6THAl4GdKdM0pwJbucc3kdTrcOOU68ZNiFNMwvsVEfNLEraHXtAycCQv6SvAIuCBkn4NHAz8\nvaTtgXuAa4E3Ati+XNKxwOXA3cBbJiKTR0S01FAj+Xn5xhnJz1FG8hEx95F8drxGRLRYknxERIsl\nyUdEtFiSfEREiyXJR0S0WJJ8RESLJclHRLRYknxERIslyUdEtFiSfEREiyXJR0S0WJJ8RESLJclH\nRLRYknxERIslyUdEtFiSfEREiyXJR0S0WJJ8RESLJclHRLRYknxERIslyUdEtFiSfEREiyXJR0S0\nWJJ8RESLJclHRLRYknxERIslyUdEtFiSfEREiyXJR0S0WJJ8RESLJclHRLRYknxERIslyUdEtFiS\nfEREiyXJR0S0WJJ8RESLDUzyko6QdKOki7uObSTpFElXSvqepA27HjtI0lWSrpC023wFHhERgw0z\nkj8SePaMYwcC37e9NfBD4CAASY8B9gS2AZ4LfFqSVl24ERExFwOTvO0zgVtnHN4dOKr6+ihgj+rr\nFwJfs73M9rXAVcBOqybUiIiYq1Hn5B9i+0YA2zcAD6mObwxc1/W866tjERFRg1V14dWr6L8TERGr\n0Jojvu5GSQ+1faOkhcBN1fHrgU27nrdJdaynxYsXL/960aJFLFq0aMRwIiLaaWpqiqmpqZFfL3vw\nIFzS5sBJtret7n8I+IPtD0k6ANjI9oHVhdcvAztTpmlOBbZyj28iqdfhxinXjZsQp5iE9ysi5pck\nbA+9oGXgSF7SV4BFwAMl/Ro4GPggcJyk1wK/oqyowfblko4FLgfuBt4yEZk8IqKlhhrJz8s3zkh+\njjKSj4i5j+Sz4zUiosWS5CMiWixJPiKixZLkIyJaLEk+IqLFkuQjIlosST4iosWS5CMiWixJPiKi\nxZLkIyJaLEk+IqLFkuQjIlosST4iosWS5CMiWixJPiKixZLkIyJaLEk+IqLFkuQjIlosST4iosWS\n5CMiWixJPiKixZLkIyJaLEk+IqLFkuQjIlosST4iosWS5CMiWixJPiKixZLkIyJaLEk+IqLFkuQj\nIlosST4iosWS5CMiWixJPiKixZLkIyJaLEk+IqLFkuQjIlpszZV5saRrgduBe4C7be8kaSPg68Bm\nwLXAnrZvX8k4IyJiBCs7kr8HWGT78bZ3qo4dCHzf9tbAD4GDVvJ7RETEiFY2yavHf2N34Kjq66OA\nPVbye0RExIhWNskbOFXSuZJeXx17qO0bAWzfADxkJb9HRESMaKXm5IGn2v6dpAcDp0i6kpL4u828\nv9zixYuXf71o0SIWLVq0kuFERLTL1NQUU1NTI79e9qw5eG7/Ielg4E7g9ZR5+hslLQROs71Nj+d7\nVX3v+SSJPp9TYyQm4f2KiPklCdsa9vkjT9dIWkfSetXX6wK7AZcAJwKvqZ72auCEUb9HRESsnJFH\n8pK2AL5FGeauCXzZ9gclPQA4FtgU+BVlCeVtPV6fkfycZCQfEXMfya+y6Zq5SpKfqyT5iBjjdE1E\nRDRfknxERIslyUdEtFiSfEREiyXJR0S0WJJ8RESLJclHRLRYknxLLFy4OZJqvy1cuHndb0VEdMlm\nqAEmZTPUpMQZESsnm6EiImK5JPmIiBZLko+IaLEk+YiIFkuSj4hosST5iIgWS5KPiGixJPmIiBZL\nko+IaLEk+YiIFkuSj4hosST5iIgWS5KPiGixJPmIiBZLko+xakLd+9S8j9VJ6skPMCl12hPnXKTm\nfUyu1JOPiIjlkuQjIlosST4iosWS5CMiWixJPiKixZLkIyJaLEk+IqLFkuQjIlosST4iosWS5CMi\nWmzekryk50j6maSfSzpgvr5PRETMbl6SvKQFwOHAs4HHAntJevR8fK+I+TA1NVV3CEOZhDgnIUaY\nnDjnar5G8jsBV9n+le27ga8Bu8/T94pY5SblD34S4pyEGGFy4pyr+UryGwPXdd3/TXUsIiLGaM06\nv3kpO1uvhz50M2644dq6w4iImBfzUk9e0pOAxbafU90/ELDtD3U9JwW9IyJGMJd68vOV5NcArgSe\nCfwOOAfYy/YVq/ybRUTErOZlusb2XyXtC5xCmfc/Igk+ImL8amv/FxER8y87XiMiWixJvgdJa6ta\n+iPpEZKeJ6nWlUgREaPIdE0Pks4DngZsCPwEuABYavtVtQbWg6QHA9i+ue5YZiPpGNuvHHSsbpK2\nsP3LQcdidpIuBGZNKrafMMZwgjGvk5e0NfBPQKfEwRXA52xfOc44hrDA9l2SXgv8p+0PSlpSd1Ad\n1VnGwcC+lLMxSVoGHGb7kFqD6+2x3Xeq1VdPrCmWfo4HZiahb9CgWCX9PfA2YOvq0BXA4banagtq\nun+o/n0TsAZwTHX/FcBfa4loAEnrAP8beLjtN0jaCtja9n/VHNo0ku4LvATYnK7cPehvfmzTNZKe\nDEwBS4HPAp8D/gicVq2rb5IFknak/GJ2ftBr1BjPTPsDTwV2tP0A2xsBOwNPlbR/vaGtIOkgSUuB\nx0m6o7otBW4CTqg5vOUkPVrSS4ANJb246/Ya4H41h7ecpOcDXwBOAl5O+f38DvAFSc+rM7YO21fb\nvhp4pu132r6wuv0fYNe645vFkcCfgSdX968H3l9fOLM6gVIeZhkld3Zu/dkeyw34LrCox/GnA98d\nVxxDxvoMyh/Pu6v7WwKfrjuurvguBB7U4/iDgQvrjq9HXIfWHcOA+Han/KHfUv3buX0SeErd8XXF\nOQVs1+P444DT645vRkwXAU/qur8zcFHdcc0S63nVvxd2HWtcrMClo7xunNM1j3CPU0rbp0v67Bjj\nGMj2D4EfVqdH2L4GeEu9UU1zH9u/n3nQ9s2S7lNHQAOcI2lD27cDSLo/5QP/2zXHBYDtE4ATJD3Z\n9o/rjqePhbYvmnnQ9sWSHlpHQH28HjhS0v0AAXcBr603pFn9RdLaVNcSJD2CMrJvmrMlbWv7krm8\naJyra5b2eWzwKccYSdpJ0iXAVdX97SQdVnNY3f4y4mN1ObiT4AFs30a5ptA0b6o+gACQtJGkL9QZ\n0Az9/k4a9Tdk+1zbf0sZwe9ke1vb59Yd1ywOBk4GNpX0ZeAHwL/UG1JPuwDnS7pS0sWSLpF08aAX\njXMkv6mkT/Y4LppXofKTwAuAbwPYvqi64NUU20m6o8dx0aA55C69BhNNXJL6uOoDCADbt0p6fJ0B\nzfAISSf2OC7KlGJjSFoL2IPqImGnGKHtf6sxrJ5snyrpAuBJlPdyv15nyg3w3FFeNM4/tH/u89h5\nY4tiOAts/2pGlczGrAyw3aSLwMM4T9JHgU9V998KnF9jPLNZIGkj27cCSHoAzfow6teT4T/GFsVw\nvgX8ifJzbszfTi+SnlZ92ZlteIwkbP+orph6sf0rAEkPYQ6DubH9Ats+alzfaxW4TtJOgKvlfm8D\nfl5zTMtVyWdWtv8wrliG9DbgPcDXq/unUhJ903wE+LGk4ygjun8APlBvSCvYPr3uGOZgs2q6ZhJ0\nD0DvR2l6dD5lAUZjSHoh5Xf0YZQVaptRltA+tu/rqqu2807SSfTfJPHCsQQyhOqT8pPAs6pD3wf2\nbcopnKRfUt7LXuVGbbtRp+6TRNJjWPHH/UPbl9cZT7fqOlG/v6HHjTGcviR9Hvhok96/YUnaFPi4\n7ZfUHUs3SRdRfje/b/vx1RTy3rZf1/d1Y0zyT+/3+ISNUiaCpMfavqzG7/9x2++Y7QO+KR/skjaw\nfcdsZ0hNOTOStFm/xzun801QfSA9CvgFZaWKKAOQxu94rTYbXmb7MXXH0k3SebZ3qJL9423fI+ki\n29v1e904p2uGSuKSjq/7E1TSIynzxwttbyfpccDzbR9aZ1wjOIZ77+Ac9/eH5s0Xz/QVyoX285n+\nYaTqfiPOjIZN4pJ+bPvJg585r/ao+fsPrVo51/m5LwC2p5QyaZrbJK0H/Aj4sqSbGGJVVeNq10i6\n0HatKxokTQHvAj5VnRaJshGh79xX0zTkvVwDONr2K+qMY5DqZ7yp7V/XHcvKasLPvaM6O1p+kdD2\nb2sMpydJr+66uwy41vZZdcUzG0nrUi5mi7LbeUPgy7Zv6fe6Jq0c6GjCp866ts/uWvZlSXfXHNMo\nan8vXRrIbCZpLdtNXMMPLP8Z/zewbd2xrAK1/9yrEgwfAzah7CR+GGXfyaP7va4Ok7IoxHb3qH3o\nmJuY5JvgFklbsGIH3B7ADfWGNNGuAc6q1ngv/0W1/dH6QurpAkk7NnjTziT5AKW+0inV2fCuwJ41\nxzRNnwvZnesHjbiQLelM27tUdZ/uNZ1oe4N+r29ikh+6Qe082hc4Ani0pF9R+tS+rN6QRtKUkfPV\n1W0BsH51rPbRZg87A6+ofuZ/pGF/7HPQhL+hZVWZjQWSVG04atq1mRfUHcAwbO9S/bv+oOf20sQk\nf0DdAQDX2H6GpA0p1y1uG/iKGkj6ge1nznbMdlOqe15u+7juA5JeWlcwfTy77gCGIelDtg/oc6wJ\ndfpvry4SngkcXV0k/J+aY5qmSauR+lnZfTHjXEI5EadGAJKuBo4FjrTdmE1QHVXRp3WA04BFrBi5\nbQCcbLtR856SLpi5dK7Xsbppcpqb9Ho/L27Y39D6lKQu4FWUi4THuIHNbVRKnR8GbAOsRSkr/sdB\n0yDjMmNfzMOBW6uv7w/82vYW/V4/zpH8RJwaVZ4I7AV8SdJfKDW8j7V9Z71hLfdG4B2Ui1nnsyLJ\n3wEcXldQM0l6LvA8YOMZdYs2oKxiaJpGNzeR9GZKNdQtZxSmWh9o1GoQ20theUOO4wY8vW6HU6Zj\njwN2oHwoParWiLp0krikzwHfsv2d6v5zGWKpauOWUDaNpEXAlymJ6Vjg/W5IOzhJb7PdpOqY00ja\njrLm+BDgvV0PLQVO69SIqZukgyhLZtemlMSF8sH5F+Cztg+qK7Zu1fThRsChwIFdDy1tyoatDkmv\nB/6VUrfmHlacsT+81sB66NpktPxsqEnLUDskXWJ720HH7vW6cSf5pp8aAUhaADwH2Ifyif7l6vZ3\nwPtsb93n5WMj6a2UdbK3Vfc3Avay/el6I5tO0n1s3119vRFlPfrAEqnjJunQpiT0QaqzjIcyvQ1c\nY9b4S7oKeKrtm+qOZRBJP6KUMPk8ZRXd74DXDNpJOm6SvgecAXypOvQK4Gm2+15LGmc9+Y7DKVMh\nV1FGTq9nRXXCprgK+EdKz9TtbH/Y9vW2v0apNd0Ub/CM0rjAG2qMZzanStqguoB0AfA5SR+rO6ge\n/qvacIKkvSV9dFApgTpI2he4kVLo7b+rW6P6kVKWzfYqh91Er6Tkwn0pq6o2pfRSbZq9KN3fvgV8\ns/p6r0EvqmMk3/hTo04tk7rjGKS6mP04Vz/EanR3cdN25nZ+vtUp/Ka2D27ahUIoFy+B7Sjt9L5I\nGdntabtv3aVxk/QLYOdBOx3rpFKH//PAT+jqsmT7nbUFNYOkfwa+avs3dccyF5LWnbExqq86RvJ3\nqTQUWCLpwyqNp+uIo5/1JB0n6QZJv5P0dUkPqzuoHk4Gvi7pmZKeCXy1OtY0a0r6G8pmmKaNOLst\nqz4wdwcOt/0pVqzrb5LrgNsHPqten6FcDF4CXNZ1a5KHUUpLnyHpLZIeXHdA/Uh6iqTLKeWFOx3r\nBk7N1jGS34xyqrkWsD9ladWnXDq8N0I19/UN4Ojq0CuBlw6a+xq36trBG4HOWvlTgc/bblSThmpN\n/HuAM22/RdKWwL/XXYhuJkmnUz4k9wGeRqnZfdGgC1vjJukIYGvKNE33KLkxO4glLbG9fd1xDFLV\nLHoaZXXNHpQG5F8FvtlZIdQUkn5K6XFwYmfmQ9KlHlC3v44kv5/tTww6Vqdev6CT8ksbo5O0EHg5\ncK7tMyQ9nNJw/OgBLx0rST3749p+37hjmY2kD1B2OZ/E9A+ixk6DVtOdzwI+CGxte52aQ5pG0k9t\n79w9va0hSg3XkeR7beRo2pz8D4HPsqKT0Z7AG203olOMpGNt7znbBrOmzHVL+hfbH9b0Uq7L2X57\nDWG1hqR1bN81+JnjJ+m6HocbuYQSQNK2lNH8PwK/p8zVN2bgCSDpG8BHKYtXdgb2A3aw3bfkytg2\nQ0naizJK2kLTmxGvDzRqjS/wWuDTlFU/plw8em2tEU23X/Vv0zeYXVH927QevtNoJQtAjZukJ1Nq\nK60HPLzaj/BG22+pN7IVbG9adwyDSNqKkthfRlnP/zVgN9vX1BrY7N4EfALYGLgeOIUh2miOs6zB\nZsAW9NjIQVkR0sQdkI1VnVp+3/bf1x1LjNeoc7PjJGltymBkM9tvVmnEs5Xt79Yc2nIq5Uu+CnzN\n9qV1xzNfxtkZ6lfAr4C6O9bMqlq73a+HZmOWf7nUab9H0oa2G7nSQhPU1xeWn7J36v5c7hpbJw5i\n+zppWrHJRl1sp5QCuYSygRDgt5SyAY1J8rYfMczzVHOnrRklQe5l0LTnOKdrJuGUeNI+ze8ELpF0\nKtPrtDdlrrtTWvbFwEJW7NTbi7LCqhGqcgEnUIo/XUT5ndxW0q+B3Rt4sfA6SU8BLOk+lBHzFQNe\nM25b2d6rWlmF7bs041Npgtxv8FPm1ZsouelYyoflnN7HcY7kV6om8jjYPqL7vkpxJdtuVInULt+s\nbt0aU4zIVV9fSR+xvUPXQydJatI8/b9Srhs8w/Y9sHx56gcpzS/eVmNsvYw0Nztmf1GpltrZqLcF\nzelvMFd1/039DfBSykXhZZQFId/wkCXQa6knL+kJwC6UN+9M2xfWEcdsqviOoGwbRtKNwOubFidw\n/17LUesKpo91JW3ZuaBV/cGvW3NM3Z5F2Tl8T+eA7XskvYsy5dAotn9PqVvSZIdQ9hxsIuko4OnA\n6+oNaTJVO5s/A3xG0iaUC8WXSzrA9jGDXj/2JC/pvZRPpc4I9IuSjrP9/nHH0seRwDtsnwbLK1F+\nkbLlvUleTRnRdXtNj2N12x+YknQN5VRzM+Cf6g1pmr/0uvBve5mkP/d6QZ2qD8m3AZszvUBZY65x\n2D5Z0vnAUyg/83+ehGJls2jENFM1+NwL2JVybeP8oV5Xwzr5K4HtbP+pur82sMQNqewIvdftN2kt\nf9dy1F0oVek6NgD+6hndoppA0n1ZcVHzZ7b/3PXYrrZPrScykPQzyh/PzD9mAV+yvc34o5qdpIso\nZ5qXUMr4Aiumx+pWrfzajRU/7yuAU5u2E7tDAzptSfrbOlffSDoEeD7lffwapTHQ0KsR60jypwEv\n8oryuPenbCFuxEYjWL7KZi3K8ipT5sLupuqQ7prL5LZtOWqvDXJj/v6n9Xu8actUOzsf646jl6pG\n0Q+BW4ALKR+U2wMPoFzzuKHG8HqaZYNmYwroSboH+CUreh10kvZQXfXqSPLfBnak1Fkx5dTjHOA3\n0IyVIZLO6POwbT9tbMH0oVIW93+q+eNHUUZO33VVu31SNOksqZ+6zzi64ng5sBXlgmt3yYALaguq\nIulI4FLbH5lxfH/KdY996ons3tTVaYtSgqFjfeAs23vXEtgMGlDu2gN61daR5F/d73HbR40rlklX\nzXn+HaVb0FnAuZT55aZflJum7pH8sJoSp6RDKUXzrmbFdI2bcDYs6Weepcdwv8fqoAnqtDWM2dbz\nj/3Ca3cSV0O7BEnaANibe1/YasxmqIqq9cevAz5d1YlZUndQLdaIC3CUhQtb2m7iksR+y40btRS5\n2kR4O7CXpnfaWk/Sem5Qp60h9VzPX8fqminghdX3Ph+4SdJZDUug36F0MJp2YauBVNUxeQUrlqet\nUWM8o7q27gCGVPd66Y5LgftTSiE3zYaSeq3yEWVhQOOodNpaTNmgt/zMiNI8ZpL0/P2sY538hrbv\nUOkSdLSrLkE1xNHPOk24NjCEdwAHUTq4X6ZSp73vRcS6VDs0N2f6mdHR1b8vrimsSXV/4GeSzmX6\nnHwTllCeRTnT6OXscQYyB++glBZubKetlVFHku/uEvTuGr7/ML4iaR9KF6PG1sKulsyd3nX/GqBx\nH06SjgEeQekS1FlGZ1Y0ZZkU19YdQKVnPfkmsP3KYZ4naW/bXxr8zLGYhE5bw+g5nVhHkj8E+B7l\n6vW51ejzqhri6OdO4OOU7e6dUyBTapvUTtLHbb9jtgJgDRnRddsBeIzHfZV/BJNwxtGU9fAr6Z2s\nqGVUt2som/Ua22kLBq/np1yMv/frJuDvbuwk/ZLSKLmJc55IeqLt8yX1bDDdtCQg6Tjg7bZ/V3cs\n/cx2xtG0qbsZRf7WAu4D/LEhRf6G0qRls5qATlsw+nr+Oi68bgIcBjy1OnQGsJ+b1TH9F0Cjpma6\n2T6/+vd0Vc2Hbd9cb1R9PYhSa+McmjeH3G0izji6i/xVlR13B55UX0Qjacx73Enmaminre71/DOu\nX65PuQbS//U1rJM/FfgK0CmsszfwCtu7jjWQPiQdDzyGsnOvOyk1ZgWQpMXAvsACylzcMuAw24fU\nGVcvOeOYf00aGQ+jSfGqq9OW7cZ12lrZ9fx1zMk/2PaRXfe/KOkdNcTRz3eqWyNJeiflTGhH27+s\njm0J/Kek/W1/rNYAZ2haMu9jIs44JHVfG1hAOQP5U03h3Eu15nwP28f3edpPxhXPED4OPBs4EcD2\nRZIasasdVn49fx1J/hZJe1PqwkApDNWopUu2j5C0FvBw27+oO54eXgns6lJyFigra6r39RSgEUle\nk9EoptviugMY0v/q+noZZdXP7vWEcm8uXcveBcya5G2/eYwhDeTmd9oaeT1/HUn+tZQ5+U6rvbOB\nxtSzAJD0fEpX9LUojce3Bw62/aJ6I1vuPt0JvsP2zSqdghrBQzaKkbSR7VvHE9XsJuGMoxrJXdy0\ns7UeTqmi1xwvAAAR+ElEQVTO0L/O9K5lTbzWNQmdtmDE9fxZXdNDVRPmmcBpXtEo+RLb29YbWdGv\nhkpT6qvMRd0xT9oZh6RzbO9Udxz9SLqu665Z8V42YhlyN0kPovRgeBYlzlMoi0EaNcNQVUvd1XOs\nMjvOHq+H0b+pc5OWqd1t+7YZp29N+jTcTlKvEZGovx/lKGqtCTNpZxzAWZIO596j5NqrUHbZ0jOq\noTbpLLObJ6PTFoy4nn+c0zXdPT3fR4N37QFXSNoTWKDSheftNOhCke1JrE/TT5M+QPv5AdCEs6Tt\nq3+7V1IZqL0KZZefcu/3qtex2mkCOm1Vfl3d1qpuQxlnI+/u6pPvcLNLCu8LvJdyceOblB26TS3B\nEOPTlCqUr6tKWCxXra6qnaSHUBpPry1pW1a8ZxsA69QWWH/fpiyhPIkGFyQcdT1/LY28afjIzfYf\ngQOq2710ygqMN6pWa0ryHKQpv7ff4N4j4uOAJ9YQy0zPpyyu2AT4FCt+tkuB99QV1AB/sv3JuoMY\npHs9PzD0ev66kvyka8wa2kkg6RHAb2z/WaUp+uMoFUhvq57SuJ60TSTp0cBjKeV8u9fKb0BDrsVU\ne2COlLSn7WPrjmdIn6hKGzSu09YMI63nH+eF1+6VC+t0XThs5AqGWKWOB3aQ9Ejgs8AJlF3PzwMY\nZtdeQ9R9xrE18AJKqeHutfJLgTfUEtHsHiJpA5ey4p+hnHkcZPsHdQfWw7aUvSfPYPr68yZd4wBG\nW88/zjn5visXotXusb1M0osopRcOk3Rh3UHN1PQzDtsnACdIerLtH9cZyxD+yfbhknajzNG/AfgC\nzZhSmqnJnba6jbSef8G8h9VOdY/oJs3dkvYCXk2p0Q+lcmLTHA/8teuMY1PKGQfQqDOOF0naQNJ9\nJP1A0s3Vbucm6Zy1P4/yQXkRzc03nU5bTfcm4K3AxsD1lFVWbx30oszJj+bwugOYMPtQfkE/YPuX\n1ZK1Ywa8pg4TccYB7Gb7X6o4rwVeDPyI5tRnB7hI0neARwHvkrQezblwPVOTO20tN+p6/iT5LpK+\nRf8NWy+u/j1ibEG1gO3LJR1A1XSlKqr2oXqj6qn7jKMz593EM45OTM8HjrN9+4x52ibYhzI18wuX\nZvMPYkUf4qZp8p6d5UZdz58kP11G6PNA0v8C/oPptYAOadpIick54zhJ0s+A/wHeXPUUaEwVSlhe\npOzXwCMlNTrPTELNospI6/lTuybmXVUL6BnAVFctoEtt/229kU0uSQ8Abq+S6TrABrZvqDuuDkn/\nRukV8TOmd9l6Xn1R9aYJ6bQl6ae2d57r6xr9CVuXapXFByiNQ5avP7b9qNqCmmx395hSaMzOQkmX\n0H+arm8p15o8Gth8xii5SY3RXwI8ynajzjB68eR02hppPX+SfG9fBN5PmWJ4LuU0Pqc8o7tM0suB\nNSRtRakFdHbNMXV7Qd0BzIVm6UVLs5L8L4GJq7HkMrXx7SqZHjjo+WM20nr+TNf0IOl820/sLi8s\n6TzbO9Qd2ySqphPeDexWHfoe8P5JGOU1kaQraHgvWpVWio8Dvk9DW2h2zNJp6+m2n1xTSD1J+gXl\n5z6n9fwZyff2Z0kLgKslvYmyJjWbuUZUFVN6t6QPzKWw0rhJehKloc02lLnZNWjg3CxlXfdCoMm9\naE+ubpOg0Z22unTW8980lxclyfe2P7AuZVrhA5TaII3qXjVJql16n2eOhZVqcDjwMkqxrx2AV1HW\neTdN43vRTsoy4wnqtAUjrufPdE0Pkl5s+5uDjsVwJP0U+AfgxCavrulMyUm6uHOxVdKFnZibQtLT\nex1vwlLAavNYv4vYTawn3/hOWzD6zz0j+d7+L6WOfLd39zgWQxqlsFIN7lJp4L5E0ocp0yGN24rf\nhGTexz/UHcAIJqHT1sg/9yT5LpKeDTwH2FhSd0utDWjQkr8JNCmNkl9JSer7UqbsNqWUDGiEHj1o\nlz9EQyq52r56mOep6qs73/EMaRI6bY28nj9JfrqbKBc3/gRc1nV8Kc1bTjVJ3kRplNwprHQKQxRW\nqsEetj9B+fl3uvDsR4m9di2r5Lpu3QF0aWynrW6jrufPnHwPku5r+8+DnxnDkPRAN6zzfS+SLpg5\nZ9zEOfk26PVe12WWn/v5tptYFnmaYX4/M5LvIumrtvcCfiLpXp9+TfmlnEA/kbSEUk/85Kat766K\nkr2cUlfnxK6H1geaUl44VrFJ6LTVbZb1/AP3miTJT/fP1b+TePGoyR4FPIvS+/MwSccCX7T983rD\nWu5sykXWBwEf6Tq+FLi4lojarwllMyep0xaMuJ4/0zWzqCr77Ui50HGe7ZtrDqkVJP09pe75usBF\nwIET0OUoVjFJ21WNRGo3CZ22qvX8bx9lPX/jloc1gaR9gAsop/B7A+dJenW9UU0uSQ+UtJ+k84D/\nQ6mJ/SDgf9PVeakuks6s/l0q6Y6u21Kt6EUcQ5B0q6Q/9LjdKmn51FdTEnyl8Z22bP8V2GuU12Yk\n34OkK4FdOqP3alR/pu2t641sMkn6OaUu+5G2fzPjsQNsN7GBSIygGnHOqkpWjSJpie3tVTptvQB4\nJ/Aj29vVHNo0kj5GWTY5p/X8mZPv7Q/AbV33byMX4FbG1rYtaT1J69m+s/NA0xK8pCcAu1Cm6c60\n3cT2f401M4lXde+7L2L+drwRDWUSOm3BiOv5k+R7uxL4saRvU97EPYBLJb0dwPYn6wxuAj22Ko/7\nAMoS35uBV9u+tOa4ppH0XuClrNjZ/EVJx9l+f41hTSRJzwc+BmwC3ELZI/FzSh38pml8p63KSOv5\nM13Tg6R/7fe47feMK5Y2kHQ28G7bp1X3FwH/ZvsptQY2QzVNt12nBLKktYElmaabu2rJ7K7AKbYf\nL2lXYE/bTVy10vhOWzD6ev6M5HvoTuLVzrJ1bP+xz0uiv3U7CR7A9pSkJu147PgtZWqhM4q7L2WH\nbszdMts3S1ogSbZPlfQfdQfVR2M7ba3sev4k+R4kHU2pX7IMOAd4oKR/t/3R/q+MWVwj6T2saIq9\nN3BNn+fX5XZKF6tTKdN0uwLnSPokgO231xnchLld0nrAmcDRkm6iTIc0jprfaWul1vNnuqaHrqvt\nL6eslT+Asla+ib0+G0/SRpRaMJ2CVGcAi23fWl9U9zZomazto8YVy6STtD5wF2WZ9quADYGjbf++\n1sB60AR02oLR1/NnJN/bfarTtt2B/7T9F0mpQjmiKpk3fhTcncSrD6ZNbWfH62gOsv0uysj4CABJ\n/wa8q9aoepuETltQ1vNfRjkjOpnSXnF/21/q96Ik+d4+D/ya8sM/XdLDgTv7vyRmknQS/RtINKaT\nEYCkKeCFlL+L84GbJJ3VxL6kE+A53DuhP7/HsSZofKetym62/6Vaz38tpQz2jyg7yGeVJN9DtXV4\n+fZhSdfRsNrSE6LJF9p62dD2HZJeT5laOFhSRvJzIOmNlNLSj5LUvUlnfcoHZxMtrjuAIY20nj9J\nvodqPvE9wNOqQ6cD7wfm1CV9ddfdyUal49KjKSP7K+facX5M1pT0N8CelE5gMXfHAj8ADmV6D4al\ntufUgHpcGt5pq9tI6/lz4bUHScdRNm505mhfCWxjO9UpR1BtjPkMcDWl+uAWlEbe3601sBkkvZTy\n4X6W7TdXG03+3fZLag5tIkl6LPB31d0zbF/W7/njpgnotDXTKOv5k+R76KyuGXQshlONPl5g+xfV\n/UcA/227ibsfYxWQ9FZK969vV4d2Bz5l+9P1RTX5VNpobk7XLIztvks9M13T258kPcn2TwAkPYlm\nbnOeFEs7Cb5yDWWNb6NI2gQ4DHhqdegMYL+ZRdViKG8EdurUKapW1pwNJMmPaNT1/Enyvb0FOEbS\nfSmnbndR1vrGaM6T9B3KfK0p9WHO7ezes/3Nfi8eoyMppY9fWt3fuzq2a20RTS4x/RrW3TSjUcgk\n24ER1vMnyfdQle58bDX/he1UoFw59wNuBJ5e3b8ZWJuye8+sKAhWtwfbPrLr/hclvaO2aCaQpDVt\nL6Psbv6ppOOrh17EimtcMZqR1vNnTr5Lp8rkbFJ9st0k/YAycv9qdWgvYB/bz6wvqsnSXURL0k50\n7XK2fW59kU0+SadRyg3PaT1/RvLTPbjr69dR7dSLlSNpC0o3qM2ZfsGoaZtNXkuZk/8Y5QzjbGCf\nWiOaPMunZGyfQ0lIsWosHuVFGcnPQtKFth9fdxxtIOkiygfmJcDy8hATtD45hiTpN8CshfxS5G/8\nMpKfXT79Vp0/NXmqS9Jh9C+/0Pi6Ow2yBrAeuci6yqzsev4k+RiHT0g6GDiF6XOJfXtTjtF5XV+/\nDzi4rkBa4He2Dxn8tBiW7fVX5vWZruki6UJWfGI+GvhZ5yHKJ+YTer4w+pJ0KGXX8NWsmK6x7cbV\nA8o03crJ+9c8GclPl7IF8+OlwJYNrVczU0Y9KycrkRomSb6L7auHeZ6kM23vMviZUbmU0tWmkQWq\nYtXJnpLmSZIfTRP7kzbZ/YGfSTqXBtbrnnFhax1Jd3QeoqGFqiKGlSQ/mpzSz02jL2Su7IWtiCZL\nko95Z/t0SZsBW9n+flUidY2644pYHSyoO4AJlTXAcyDpDcA3gP9XHdqYFSVoI2IeJcmP5jV1BzBh\n3kop33sHgO2rgIfUGlHEaiLTNV0k3Ur/nWWdqpQXjTWwyfdn23/p9KOUtCa5rhExFkny0z2o7gBa\n6nRJ7wLWlrQrpV7/STXHFLFayI7XPqp68vfr3Lf92xrDmViSFlCqeu5GOSv6HvD5uTY/iIi5S5Lv\noWo8/TFgE+AWyoXCn6cn6fyQdHyaZUfMj1x47e0DlAuFV9reFHg2pd9nzI8t6w4goq2S5HtbZvtm\nYIEk2T4V2KnuoFosp5MR8yQXXnu7XdJ6wJnA0ZJuAv6n5pgiIuYsc/I9SFofuItypvMqYEPgaNu/\nrzWwlkp52oj5k+ma3g6y/Vfbd9s+ompZ9s66g2qxA+oOIKKtMpLvobvjfNexi2xvV1dMk0jSJfTf\nXPa4MYcUsdrJnHwXSW8E3gQ8SlJ3a7r1gfPriWqivaDuACJWdxnJd5G0EfBA4FDgwK6HltpOw4uI\nmDhJ8rOQ9Fjg76q7Z9i+rM54JpmkJwGHAdsAa1HKDP8xzTgi5l8uvPYg6a3AccDDq9uxkt5Sb1QT\n7XBgL+AqYG3g9cCnao0oYjWRkXwPki4GnmL7zur+esDZuVA4Gknn2d5B0sWd9zDLJiPGIxdeexPw\nl677d5NGISvjLklrAUskfRj4HTmLjBiLJPkukta0vQw4BvippOOrh14EHFVfZBPvlZSkvi+wP7Ap\n8OJaI4pYTWS6pkv3+nhJOwG7VA+dYfvc+iKbbJL2s/2JQcciYtVLku+SeeL5McvmsrzXEWOQ6Zrp\nHixp1vIFVXmDGJKkvYCXA1tIOrHrofWBP9QTVcTqJUl+ujWA9chF1lXlbMpF1gcBH+k6vhS4uJaI\nIlYzma7p0mtaISJikmUkP11G8KuQpDNt7yJpKdMLlXUKlGXHa8Q8y0i+i6QH2M5ccUS0RpJ8jIWk\nJ1CWpBo40/aFNYcUsVrIrsOYd5LeS9lM9kDKRdgvSvq/9UYVsXrISD7mnaQrge1s/6m6vzawxPbW\n9UYW0X4Zycc4/Ba4X9f9+wLX1xRLxGolI/mYd5K+DewInEqZk98VOAf4DYDtt9cXXUS7JcnHvJP0\n6n6P207xt4h5kiQfY1W1WNzUdna8RoxB5uRj3kmakrSBpAcAFwCfk5Q6QBFjkCQf47Ch7TsoNeSP\ntr0z8KyaY4pYLSTJxzisKelvgD2B/6o7mIjVSZJ8jMMhwPeAq22fK2lLSlPviJhnufAaEdFiGcnH\nvJO0iaRvSbqpuh0vaZO644pYHSTJxzgcCZwIPKy6nVQdi4h5lumamHeSltjeftCxiFj1MpKPcbhF\n0t6S1qhuewO31B1UxOogI/mYd5I2Aw4DnkypXXM28Hbbv641sIjVQJJ8RESLpcdrzBtJhzG9t+s0\nqT4ZMf+S5GM+ndf19fuAg+sKJGJ1lemaGAtJF9p+fN1xRKxusromxiWjiYgaJMlHRLRYpmti3kha\nyooR/DrAXZ2HANveoJbAIlYjSfIRES2W6ZqIiBZLko+IaLEk+YiIFkuSj4hosST5iIgW+/9jejSm\nvCIa5wAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x7f279e9759d0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"plt.bar(range(len(predictors)), scores)\n", | |
"plt.xticks(range(len(predictors)), predictors, rotation='vertical')\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 92, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"selector = SelectKBest(f_regression, k=5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 93, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"SelectKBest(k=5, score_func=<function f_regression at 0x7f27ab209d70>)" | |
] | |
}, | |
"execution_count": 93, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"selector.fit(train[predictors], train['Year Total'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 94, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"scores = -np.log10(selector.pvalues_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 95, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAGQCAYAAAC+tpvHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3WmYZFWV7vH/WyDKWOJEKVUyKCDSDCqDA63ZKjheQW1p\nQRRxaBSRwXu7GbxaJa1Na7cjSHtVREAUQVSgWxEQEhlUxoJiFEFGZRAZCmmVknU/7B2VkVmRmZFV\nlWfvPPH+nieejDgRwVlkRq04Z5+911JEYGZmM9+s0gGYmdnK4YRuZtYSTuhmZi3hhG5m1hJO6GZm\nLeGEbmbWEpMmdElzJZ0r6VpJiyR9OG+fL+lOSVfk22u73nOopJskXS9p5+n8HzAzs0STzUOXNAeY\nExELJa0FXA7sAvwDsDgiPjfm9ZsD3wa2A+YC5wCbhCe8m5lNq0mP0CPi7ohYmO8/AlwPrJ+fVo+3\n7AKcFBFLIuJW4CZg+5UTrpmZjWdKY+iSNgS2AX6ZN+0naaGkr0uanbetD9zR9ba7GPkCMDOzadJ3\nQs/DLd8DDshH6kcDG0fENsDdwGenJ0QzM+vHqv28SNKqpGR+QkScBhAR93W95GvAGfn+XcC8rufm\n5m1j/5seUzczWw4R0Wu4u+8j9G8A10XEFzsb8sXSjrcA1+T7pwNvl7SapI2A5wKXjBNUdbf58+cX\nj8ExOaZBjMsx9XebyKRH6JJeBrwDWCTpSiCAw4A9JG0DPA7cCuyTk/R1kk4GrgMeA/aNyaIwM7MV\nNmlCj4iLgFV6PHXmBO85AjhiBeIyM7Mp8krRMYaGhkqHsAzH1B/H1L8a43JMK27ShUXTtmPJIzFm\nZlMkiVjBi6JmZlY5J3Qzs5ZwQjczawkndDOzlnBCNzNrCSd0M7OWcEI3M2sJJ3Qzs5ZwQjczawkn\ndDOzlnBCNzNrCSd0M7OWcEI3M2sJJ3Qzs5ZwQrfWmDNnQyQ1fpszZ8MZFZO1l+uhW2tIInVIbHzP\n4/Z6rDEmm9lcD93MbAA4oZuZtYQTuplZSzihm5m1hBO6mVlLOKGbmbWEE7qZWUs4oZuZtYQTuplZ\nSzihm5m1hBO6mVlLOKGbmbWEE7qZWUs4oZuZtYQTuplZSzihm5m1hBO6mVlLOKGbmbXEpAld0lxJ\n50q6VtIiSfvn7etKOkvSjZJ+Iml213sOlXSTpOsl7Tyd/wNmZpZM2lNU0hxgTkQslLQWcDmwC7A3\ncH9EfEbSwcC6EXGIpOcDJwLbAXOBc4BNxjYQdU9RW9lq7N9ZY0w2s61QT9GIuDsiFub7jwDXkxL1\nLsBx+WXHAbvm+28CToqIJRFxK3ATsP0K/R+YmdmkpjSGLmlDYBvgF8B6EXEPpKQPPCO/bH3gjq63\n3ZW3mZnZNFq13xfm4ZbvAQdExCOSxp7PTfn8bsGCBUvvDw0NMTQ0NNX/hJlZqw0PDzM8PNzXaycd\nQweQtCrwX8CPI+KLedv1wFBE3JPH2c+LiM0lHQJERHw6v+5MYH5E/HLMf9Nj6LZS1TheXWNMNrOt\n0Bh69g3guk4yz04H3p3v7wWc1rX97ZJWk7QR8FzgkilHbWZmU9LPLJeXAT8DFpEONQI4jJSkTwbm\nAbcBu0XEg/k9hwLvBR4jDdGc1eO/6yN0W6lqPBquMSab2SY6Qu9ryGU6OKHbylZj8qwxJpvZVsaQ\ni5mZVc4J3cysJZzQzcxawgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxawgndzKwlnNDN\nzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxawgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxa\nwgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxawgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ\n3cysJZzQzcxawgndzKwlnNDNzFpi0oQu6RhJ90i6umvbfEl3Sroi317b9dyhkm6SdL2knacrcDMz\nG62fI/Rjgdf02P65iHhhvp0JIGlzYDdgc+B1wNGStNKiNTOzcU2a0CPiQuCBHk/1StS7ACdFxJKI\nuBW4Cdh+hSI0M7O+rMgY+n6SFkr6uqTZedv6wB1dr7krbzMzs2m2vAn9aGDjiNgGuBv47MoLyczM\nlseqy/OmiLiv6+HXgDPy/buAeV3Pzc3belqwYMHS+0NDQwwNDS1POGZmrTU8PMzw8HBfr1VETP4i\naUPgjIjYMj+eExF35/sHAdtFxB6Sng+cCOxAGmo5G9gkeuxEUq/NZsstXX8v8ZkS432Wa4zJZjZJ\nRETPySaTHqFL+jYwBDxV0u3AfODvJG0DPA7cCuwDEBHXSToZuA54DNjXWdvMrBl9HaFPy459hG4r\nWY1HwzXGZDPbREfoXilqZtYSTuhmZi3hhG5m1hJO6GZmLeGEbmbWEk7oZmYt4YRuZtYSTuhmZi3h\nhG5m1hJO6GZmLeGEbmbWEk7oZmYt4YRuZtYSTuhmZi3hhG5m1hJO6GZmLeGEbmbWEk7oZmYt4YRu\nZtYSTuhmZi3hhG5m1hJO6GZmLeGEbmbWEk7oZmYt4YRuZtYSTuhmZi3hhG5m1hJO6GZmLeGEbmbW\nEk7oZmYt4YRuZtYSTuhmZi3hhG7LZc6cDZHU+G3OnA1L/6+bVUsRUWbHUpTat604SUCJv58Y73Pj\nmEbtedyYbGaTRESo13M+QjczawkndDOzlpg0oUs6RtI9kq7u2raupLMk3SjpJ5Jmdz13qKSbJF0v\naefpCtzMzEbr5wj9WOA1Y7YdApwTEZsB5wKHAkh6PrAbsDnwOuBopUFEMzObZpMm9Ii4EHhgzOZd\ngOPy/eOAXfP9NwEnRcSSiLgVuAnYfuWEamZmE1neMfRnRMQ9ABFxN/CMvH194I6u192Vt5mZ2TRb\nWRdFPT/KzKywVZfzffdIWi8i7pE0B7g3b78LmNf1url5W08LFixYen9oaIihoaHlDMfMrJ2Gh4cZ\nHh7u67V9LSyStCFwRkRsmR9/GvhDRHxa0sHAuhFxSL4oeiKwA2mo5Wxgk14riLywaGarccGMYxq1\nZy8saqmJFhZNeoQu6dvAEPBUSbcD84F/A06R9B7gNtLMFiLiOkknA9cBjwH7OmubmTXDS/9tudR4\n5OmYRu3ZR+gt5aX/ZmYDwAndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxawgndzKwlnNDN\nzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxawgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxa\nwgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxawgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ\n3cysJZzQzcxawgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ3cysJVZdkTdLuhV4CHgceCwitpe0LvBd\nYAPgVmC3iHhoBeM0M7NJrOgR+uPAUES8ICK2z9sOAc6JiM2Ac4FDV3AfZmbWhxVN6Orx39gFOC7f\nPw7YdQX3YWZmfVjRhB7A2ZIulfS+vG29iLgHICLuBp6xgvswM7M+rNAYOvCyiPidpKcDZ0m6kZTk\nu419vNSCBQuW3h8aGmJoaGgFwzEza5fh4WGGh4f7eq0ixs23UyJpPvAI8D7SuPo9kuYA50XE5j1e\nHytr3yvTnDkbcs89tzW6z/XW24C777610X2uKElM8F09nXtmvM+NYxq153FjsplNEhGhXs8t95CL\npDUkrZXvrwnsDCwCTgfenV+2F3Da8u6jhJTMo9Fb018gZtZOKzLksh7wA0mR/zsnRsRZki4DTpb0\nHuA2YLeVEKeZmU1ipQ25THnHlQ65lDlFnnmnxzUOJTimUXuecZ8p68+0DLmYmVldnNDNzFrCCd3M\nrCWc0M3MWsIJ3cysJZzQzcxawgndzKwlnNDNzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxawgndzKwl\nnNDNzFrCCd3MrCWc0M3MWsIJ3cysJZzQzcxawgndzKwlnNDNzMYxZ86GSGr8NmfOhssVr5tEj+Em\n0f2psfmxYxq15xn3mapRjX8/N4k2MxsATuhmZi3hhG5m1hJO6GZmLeGEbmbWEk7oZmYt4YRuZtYS\nTuhmZi3hhG5m1hJO6GZmLeGEPgOUqCexvLUkzKwc13IZo8ZaLo5p1J5nVN2UGmOy/tX493MtFzOz\nAeCEbmbWEtOW0CW9VtINkn4l6eDp2o+ZmSXTktAlzQKOAl4DbAHsLul507EvM5u64eHh0iEso8aY\nZprpOkLfHrgpIm6LiMeAk4BdpmlfZjZFNSbPGmOaaaYroa8P3NH1+M68zczMpsmqJXeepgQ1a731\nNuDuu29tfL9mZtNtWuahS3oxsCAiXpsfHwJERHy66zWeJGtmthzGm4c+XQl9FeBG4FXA74BLgN0j\n4vqVvjMzMwOmacglIv4qaT/gLNI4/TFO5mZm06vY0n8zM1u5vFLUzKwlBj6hS1pdebqNpOdIer2k\norN/zMyWx8APuUi6DHg5MBv4BXAFsDgi3lU0MEDS0wEi4r7SsQBIOiEi3jnZtqZJ2igifjPZtkEk\n6UomKBcYES9sMBybZo0eiUraDPhHoFMG4HrgaxFxY5NxjDErIh6V9B7gPyPi3yQtLBVMPluYD+xH\nOoOSpCXAkRFxeKm4si26H+TZTC8qFEu3U4Gxiel7FIpN0t8BHwY2y5uuB46KiOEC4fx9/vkBYBXg\nhPz4HcBfC8QziqQ1gP8NPDsi3i9pE2CziPivgjE9EXgrsCFdObKCf3+TamzIRdJLgGFgMfBV4GvA\nH4Hz8rz1UmZJ2o70Ae98iFYpGM9BwMuA7SLiKRGxLrAD8DJJB5UISNKhkhYDW0l6ON8WA/cCp5WI\nKcf1PElvBWZLekvX7d3AkwrF9AbgG8AZwB6kz9WPgG9Ien3T8UTEzRFxM/CqiPhIRFyZb/8H2Knp\neHo4Fvgz8JL8+C7gk+XCAdJnehdgCSlHdW71i4hGbsCPgaEe218B/LipOHrs/5Wkf3AfzY83Bo4u\nGM+VwNN6bH86cGWpuHIMR5Tcf494diElhPvzz87tS8BLC8U0DGzdY/tWwPkFf1dXAS/uerwDcFUF\nf8PL8s8ru7YVjQu4pvTvZXlvTQ65PCd6nHJGxPmSvtpgHGP3fy5wbj7NIiJuAfYtFQ/whIj4/diN\nEXGfpCeUCKjLJZJmR8RDAJKeTPqS/mGJYCLiNOA0SS+JiJ+XiKGHORFx1diNEXG1pPVKBJS9DzhW\n0pMAAY8C7ykYT8dfJK1OHueX9BzSEXtJF0vaMiIWFY5jypqc5bJ4gueKnc5I2l7SIuCm/HhrSUeW\nigf4y3I+14T5nWQOEBEPksb7S/tA/nIBQNK6kr5RKJaJPsvFPucRcWlE/A3pyHz7iNgyIi4tFU+X\n+cCZwDxJJwI/Bf65bEjsCFwu6UZJV0taJOnqwjH1pckj9HmSvtRjuyhbifFLwBuBHwJExFX5olYp\nW0t6uMd2UWhcuEuvA4Aapnhulb9cAIiIByS9oFAsz5F0eo/tIg3nFSFpNWBX8oW+TmG8iPjXUjHl\n/Z8t6QrgxaTf0QG9zlAb9rrC+19uTf5j/KcJnrussSiWNSsibhtT+bHY1f+IKHlBdjKXSfoc8OX8\n+EPA5QXj6Zglad2IeABA0lMo90UzUd3//2gsimX9APgT6e9VfHZLh6SX57udM/jn5ybIPysVU0Tc\nBiDpGZQ/iJqSxj70EXFcU/uaojskbQ9Enob3YeBXpYLJyWhcEfGHpmLp4cPAx4Dv5sdnk5J6aZ8F\nfi7pFNJR3t8DnyoRSEScX2K/fdggD7nUpvtA70mk5jiXkyYrFCHpTaTP1LNIM7k2IE093WKi99Wg\nsYVFks5g4gUOb2okkDHyt/CXgFfnTecA+5U67ZP0G9LvqVd5zIiIYqftNZP0fEaSwLkRcV2hOBYx\n8ed8qwbDWUrS14HPlfq99EvSPOALEfHWgjFcRfosnRMRL8hDsHtGxHtLxdSvJhP6KyZ6vuIjmypJ\n2iIirm1oX1+IiAPH+1Iu+GW8TkQ8PN5ZTYmzGUkbTPR853S+afmLZlPg16RZJErh1LVSNC+suzYi\nnl8whssiYtuc2F8QEY9Luioiti4VU7+aHHLpK2FLOrXJb2dJzyWNCc+JiK0lbQW8ISKOaCqG5XQC\ny66OnM59Qdkx4F6+TbqgfTmjv2iUHzd+NtNvwpb084h4yeSvXGl2bXBffcszyjp/u1nANqTyGyU9\nKGkt4GfAiZLuZYYsLKquloukKyOisRkKkoaBw4Av59MrkRYWVD1eVuD3tApwfES8o6l99iP/veZF\nxO2lY5mKpv9+Xft9Cl0X+iLit03H0E3SXl0PlwC3RsRFpeIBkLQm6QKySCt9ZwMnRsT9JePqRw1T\nzsZq+htmzYi4uGsaV0h6rOEYlkejv6dITUs2kLRaRJSeD79U/nv9N7Bl6VimqNG/Xy5J8HlgLmll\n7bNIay+eN9H7pluNkyUiovtovLr4JlJjQm/a/ZI2YmSl2q7A3WVDqtYtwEV5nvXSD31EfK5cSABc\nIWm7ShbK1OpTpBpBZ+Uz0Z2A3UoFM8HF487YfuMXjyVdGBE75jpFywzhRcQ6Tcc0VTUm9J7NT6fR\nfsAxwPMk3Ubqgfr2hmNYHiWOkm/Ot1nA2nlbDWN2OwDvyH+/P1IwKUxB05/zJbl8xCxJygt6Sl4T\neWPBffcUETvmn2tP9tpa1ZjQD254f7dExCslzSZdU3hw0nc0QNJPI+JV422LiBIVKq+LiFPGxPS2\nAnGM9ZrSAYwl6dMRcfAE25quIf9QvtB3IXB8vtD3Pw3HsFSp2T4TqXwNSF+anLZY3SkWgKSbgZOB\nYyOi2IKirnieBKwBnAcMMXIktw5wZkQUG/OUdMXYaW69tjVNFTbeGOd3dXXBz/napAQu4F2kC30n\nROHmKUqls48ENgdWI5Wu/mOJ4Y0xa0CeDTyQ7z8ZuD0iNmo6pqlq8gi9ulOs7EXA7sC3JP2FVMv6\n5Ih4pFA8+wAHki5aXc5IQn8YOKpEQJJeB7weWH9MPZ51SDMTSqum8YakD5KqdW48pqDT2kCx2RsR\nsRiWNpQ4ZZKXN+ko0hDnKcC2pC+bTUsE0knYkr4G/CAifpQfv45Kp32OVd20xZIkDQEnkhLVycAn\no1AbM0kfjoiSVR+XkrQ1aX7w4cDHu55aDJzXqaFSIK5DSVNOVyeVg4X0BfgX4KsRcWiBmGYD6wJH\nAId0PbW45Cm7pPcB/0Kq4/I4I2fGzy4VU46rs4hn6dlLqSmdXTEtiogtJ9tWo8YTek2nWDmeWcBr\ngb1JRwYn5tvfAp+IiM0mePt0xvUh0tzXB/PjdYHdI+LoEvHkGJ4QEY91xTMvIoqXFZV0RInkPZl8\nprAeo9uYFZkvL+km4GURcW+J/Y9H0s9IZTe+Tppd9jvg3SVXZUr6CXAB8K286R3AyyOiums1YzVZ\nD73jKNIQx02kI6v3MVK9r4SbgH8g9ezcOiI+ExF3RcRJpNrMpbw/xpSEBd5fMB6AsyWtky8eXQF8\nTdLnC8cE8F95MQiS9pT0ucmW4E83SfsB95AKmP13vhXrk0mactqrLHNp7yTlof1IM5Tmkfp5lrQ7\nqUPYD4Dv5/u7F42oTyWO0Ks6xerUAymx74nki8hbRf4D5aO9q0uuYO38nfLp+7yImF/yQl9XXFcD\nW5PavH2TdLS3W0RMWD9ommP6NbBDLasLlerDfx34BV0dgSLiI4Xi+SfgOxFxZ4n990PSmmMWGVWv\nxBH6o0rF9hdK+oxS4+MScXSsJekUSXdL+p2k70p6VsF4Os4EvivpVZJeBXwnbytpVUnPJC1IKXm0\nOdaS/MW3C3BURHyZkXnypdwBPDTpq5rzFdJF2YXAtV23Up5FKnl8gaR9JT29YCyjSHqppOtIJXM7\nXcyKDXVORYkj9A1Ip6KrkTrczybVUbm50UBG4vkJ8D3g+LzpncDbSo+X5bH9fYDOXPSzga9HRLHm\nBHnO+ceACyNiX0kbA//eZDG1ceI6n/RltzfwclIN66tKXsSSdAywGWmopfuIuMiqWkkLI2KbEvse\nT67D83LSLJddSY2svwN8vzMrp1BcvyTV1D+9M3Ig6Zqos578KCUS+gER8cXJtjUYzzIf9Bo//DY+\nSXOAPYBLI+ICSc8mNa8+fpK3TmdMPXutRsQnmo4FQNKnSKt8z2D0F0wVw415SPHVwL8Bm0XEGgVj\n+WVE7NA9FKwZUj63RELvteCi5Bj6ucBXGenCsxuwT0QU6Zgi6eSI2G28hViFalz8c0R8RqNLnXbH\ntH/TMc0UktaIiEcnf+W0x3FHj83Fpy0CSNqSdJT+D8DvSWPrRQ7wcjzfAz5HmsCxA3AAsG1EVF8S\npLGFRZJ2Jx1FbaTRTXTXBkouqX0PcDRppk2QLhq9p2A8B+SfNS3Euj7/LNn7dRmquJiSpJeQagSt\nBTw7z+XfJyL2LRFPRMwrsd/xSNqElMTfTpobfxKwc0TcUjSw5APAF0nN6+8CzqKOVouTanLp/wbA\nRvRYcEGavVHDisMq5NPPcyLi70rHYsuntnFYSauTDhY2iIgPKjV22SQiflwonptJ4+UnRcQ1JWJo\noyY7Ft0G3AY02aVlXHn+9ES9H4tM58r7/qukxyXNjojiMyVUaT/YjnzK3qlxc1001JpvMhFxhzSq\nqGKxC9qkkhaLSAvmAH5LWm5fJKFHxHP6eZ0a7Ow0pqzFMmbC0GKTQy61nR7XflTwCLBI0tmMrj1e\n4kPVKbP6FmAOIyvodifNWCoiL7M/jVRI6SrSZ2lLSbcDuxS+4HeHpJcCIekJpKPj6yd5z3TaJCJ2\nzzOViIhHNebbplJPmvwlK80HSHnhZNIX3kz4/YzS5BF6VbWGI+KY7sdKRYsiIoqVFB3j+/nWrUjh\nncj9YCV9NiK27XrqDEklx9X/hTSu/8qIeByWTvf8N1JDhw8XjK22cdi/KFXy7CxU24gyNfWnqsnP\n/DOBt5Euzi4hTZT4XlRSUrsfReqhS3ohsCPpj3VhRFxZIo6uWI4hLe9F0j3A+0rGlD251/TOUsFk\na0rauHPhKieFNQvG82rSatrHOxsidWg/jDS8UExE/J5UA6QWh5Pm6s+VdBzwCuC9ZUOqS17V+xXg\nK5Lmki7YXifp4Ig4YeJ316HxhC7p46Rvwc7R5zclnRIRn2w6luxY4MCIOC/HN0RaPl56zulepCO8\nbu/usa1JBwHDkm4hnY5uAPxjwXj+0utiekQskfTnXm9oSv6y+zCwIaOLcxW53hARZ0q6HHgp6W/3\nT7UV6hpH48Me+SBvd2An0jWGy5uOYXmVmId+I7B1RPwpP14dWBjlqhouMwe+8Lz4zvTOHUkV3zrW\nAf4aY7oYNU3SExm5AHlDRPy567mdIuLsBmO5gfQPb+w/egHfiojNm4plLElXkc78FpHK1QIjw1cN\nx7IKsDMjf7frgbNLrjru0CSdnST9TVOzYCQdDryB9Ps5idRQZkbNviuR0M8D3hwjZWGfTFrqW2oh\nz+dJZQi+QxoC+gfgMXK372i4POxMnt7Za9HYNO/vvImeLznts7PasNT+u+J4JnAucD9wJenLbhvg\nKaRrD0Uboo+z0LBIwTdJjwO/YaS2fic5zoQetUCZhP5DYDtSbZIgndZcAtwJzc/ikHTBBE9HRLy8\nsWC6KJWD/Z88Jrwp6ejqx5Hrkdeo5JnNRJo+c8j73APYhHQxtHup/RUNx3EscE1EfHbM9oNI1x/2\nbjKerv0v7exEKknQsTZwUUTsWSCmCUsuR4V9UMcqkdD3muj5iDiuqVhqlsc7/5bU/eYi4FLSmHFN\nF9pGafoIvV8l4pJ0BKnQ282MDLlE02eikm6IcfrQTvTcdFOlnZ360eTc+Klq/KJod8JWBV1vJK0D\n7MmyF6+KLSzKlOcKvxc4OtdSWVg4ppmqxHzitwEbR0TpqYETTcMtNkU3L5h7CNhdozs7rSVprSjU\n2alPTc6Nn5ISs1yGgTflfV8O3CvpooIJ9Eek7jujLl5VQLkeyDsYmV62SsF4+nFr6QDGUWL+/jWk\nbvGlZ5LMltRrZo1IF9qLUurstIC0QG3pmQypWUmtqm3EXGIe+uyIeFip683xkbveFIijY41Kl/Qe\nCBxK6j5+rVLt8QkvAjYhr37ckNFnM8fnn28pFFaNngzcIOlSRo+hNz1t8SLS2UIvFzcZyDgOJJXL\nraKz00xXIqF3d735aIH9j/VtSXuTOvBUUyc6T287v+vxLUDRLx5JJwDPIXW96Ux5C0aag9Tq1gL7\n7FkPvWkR8c5+Xidpz4j41uSvXOlq6+zUj2pLApRI6IcDPyFdyb40H3neVCCOjkeAL5CWkXdOpYJU\nH6Rxkr4QEQeOVxCrcCGsbYHnR9NX0vtQ25lDifnmK+gjjNToadItpMVqVXR2gsnnxpMudlep8Vku\ntZH0G1Iz39JjnQBIelFEXC6pZ4PjkolC0inA/hHxu1Ix9DLemUPJobQxRehWA54A/LFAEbq+lJpy\nqso6O0Fdc+OnqsRF0bnAkcDL8qYLgAOiXPfvXwNVtOECiIjL88/zlRvnRsR9ZaNa6mmk2haXUHZc\neKzqzhy6i9Dlqoa7AC8uF9GkShV++wTU0dmpe278mOt6a5OuRVSvxDz0s4FvA51iN3sC74iInRoN\nZCSeU4Hnk1bTdSepYtMWJS0A9gNmkcbrlgBHRsThpWLKcVV31gD1njmMVevCKyh6hL60s1NEFO3s\nNJPnxneUGEN/ekQc2/X4m5IOLBBHx4/yrQqSPkI6e9kuIn6Tt20M/KekgyLi86ViK524J1DdmYOk\n7nH7WaSziD8VimUVYNeIOHWCl/2iqXjG+ALwGuB0gIi4SlKR1dkzfG48UCah3y9pT1LtFEjFlYpN\nWYqIYyStBjw7In5dKo4u7wR2ilR+FUgzXPLv7Cyg8YSu+pqTjLWg8P57+V9d95eQZtrsUiKQSB2w\nDgPGTegR8cEGQxq775o6O83UufFAmYT+HtIYeqcF3MVAkXoSAJLeQOrwvRqpgfU2wPyIeHOhkJ7Q\nncw7IuK3XjeLAAAR90lEQVQ+pc43jYs+m5NIWjciHmgmqhG1nTnko7urS55N9XBWPhP+LqM7YJW+\nflRbZyeYwXPjPcsl1Ux5FXBejDTzXRQRWxaKZ9y6I7XWSukoUG2x2jMHSZdExPal9j+WpDu6HgYj\nv6Mi03M7JD2NVOP/1Tmms0iTJIol01zFc6eouLLpeJrsKXokEzcaLjXF7LGIeHDMKV/Jb7mtJfU6\nahIV15DIGl1wUfmZw0WSjmLZI+JGqy122TjGVOosdcbXLerr7AQVzo3vV5NDLt29Jz9BJSvpgOsl\n7QbMUuoysz/lLhAREbXXa5lIrad7PwWaPrPZJv/snpkUQJG6/8AvWfZ30Gtbo1RZZ6fs9nxbLd9m\njCabRHdXWTww6imTux/wcdLFj++TVrHWUJLAVp4SS7Xfm8s1jASRZis1StIzSM2PV5e0JSO/i3WA\nNZqOp4cfkqYtnkElxfFqmhs/VUWaRFPRkVxE/BE4ON+W0VmK32xUM1atNS5KfN6+x7JHv6cAL2o4\njjeQJiLMBb7MyN9oMfCxhmPp5U8R8aXSQXTrnhsPFJ0bP1WlEvpMUmRObI0kPQe4MyL+rNRMeytS\nxcwH80uK9jutgaTnAVuQytZ2z0VfhwLXQPKaj2Ml7RYRJze9/z58MS//L9rZaYxq5sZPVZMXRbtn\nIqzRdeGv+IwE69upwLaSngt8FTiNtOr39QAVr6Zr8sxhM+CNpPK53XPRFwPvbzCOsZ4haZ1Ipau/\nQjp7ODQiflowJoAtSWsvXsnoOd+lrjWkACqbG9+vJsfQJ5yJYDPC4xGxRNKbSaUIjpR0Zemgajpz\niIjTgNMkvSQift7UfvvwjxFxlKSdSWPq7we+QfNDQGPV0tmpW41z4/syq3QAM0Ct48IlPCZpd2Av\nUv14SFUESzsV+GvXmcM80pkDUOzM4c2S1pH0BEk/lXRfXu1bSufs+PWkL7urqOPff6ezU00+AHwI\nWB+4izRj6UNFI+qTx9And1TpACqyN+nD/qmI+E2ecnbCJO9pQo1nDjtHxD/nmG4F3gL8jDI1xwGu\nkvQjYFPgMElrUcfkhFo6Oy1V6dz4vgxsQpf0AyZe6PSW/POYxoKqXERcJ+lgcvOPXDzs02WjAkaf\nOXTGrUufOXT2/wbglIh4aMyYbNP2Jg2v/DpS8/GnMdKrtqRa1qMsVenc+L4MbELHR95TJul/Af/B\n6Lo3h1fwQa/xzOEMSTcA/wN8MNe2L1JtEZYW6LodeK6kav7d11aHJ6tubny/Br6Wi/Uv1715JTDc\nVffmmoj4m7KR1UnSU4CHcjJdA1gnIu4uFMu/knoP3MDork6vLxFPhyrs7CTplxGxQ6n9r4hqvqlL\nyTMkPkVqcrF0nnBEbFosqHo91mPooNgRjKRFTDxsVrrc6fOADcccEZdqqP1WYNOIKHaW0EvU2dmp\nxrnxfRn4hA58E/gkaSjhdaTTd5+29HatpD2AVSRtQqp7c3HBeN5YcN8T0jh9TimX0H8DVF0nKNJw\nwQ9zMj1kstdPoyrnxvdj4IdcJF0eES/qLpkr6bKI2LZ0bLXJwwYfBXbOm34CfLK2o74aSLqeivqc\nKrXp2wo4h0paLcK4nZ1eEREvKRQSkn5N+tvVNDe+Lz5Chz9LmgXcLOkDpHmnXgTVQy5U9FFJn6qp\naJGkF5OapmxOGoddhcLjsKT51XOAWvqcnplvtamms1OXztz4ewvHMWVO6HAQsCZp+OBTpJobxToo\n1Syvnvs69RUtOgp4O6n41bbAu0jzrUuqqs9pjdNvK+3sBBXOje+Xh1ykt0TE9yfbZunqP/D3wOk1\nzXLpDJFJurpzIVSFuth3xfSKXtubnqaXF1hNdOG4dD30qjo7QT1/u+XhI3T4v6Q66N0+2mObUW3R\nokeVGn0vlPQZ0jBH0WXtFf3j//vSAUyits5ONf3tpmxgE7qk1wCvBdaX1N1aah1m2GKCBtVatOid\npAS+H2kIbR5pqX3jevQ3XfoUBaqKRsTN/bxOuT/rdMfTQ22dnaqcG9+vgU3opAse15BW713btX0x\nZadM1ewDpIa+naJFZ1FH0aJdI+KLpL9lp9vMAaRYGzWDq4quWWi/VXR26lbp3Pi+eAxdemJE/Hny\nV5qkp5bsxj4eSVeMHQsuPYY+0/T6HZbab2cqcdOxTGSmfJ4G9ghd0nciYnfgF5KW+VYrfbGoUr+Q\ntJBUR/vM0nOsc0GuPUh1ZU7vemptoNZmG0Z9nZ26jTM3fkastRjYhA78U/5Z+0WjmmwKvJrUo/JI\nSScD34yIXxWK52LSBdCnAZ/t2r4YuLpIRDNX06Uga+3sBHXOje/LwA+5AORKeNuRLoRcFhH3FQ6p\nepL+jlTbe03gKuCQyjr02BRI2jo3vWh6v1V1dspz4/evcG58X2roWFKUpL2BK0in7nsCl0naq2xU\ndZL0VEkHSLoM+D+kmtFPA/43XR2CGoznwvxzsaSHu26LNdKzdqBJekDSH3rcHpC0dFiqRDLPqurs\nFBF/BXYvtf8VNfBH6JJuBHbsHJXno/ULI2KzspHVR9KvSHXGj42IO8c8d3BE1NDswrrkI85x5QRW\njKSFEbGNUmenNwIfAX4WEVsXjOnzpKmK1cyN79cgj6F3/AF4sOvxg/iC2ng2i4iQtJaktSLikc4T\npZO5pBcCO5KGzS6MiNIt6KowNmHnGu3dFx1/22xEy6itsxNUODe+X07ocCPwc0k/JP3RdgWukbQ/\nQER8qWRwldkil4V9CmmK7n3AXhFxTcmgJH2c1D2+s7r3m5JOiYhPFgyrKpLeAHwemAvcT1pL8CtS\nzfaSqurslFU3N75fHnKR/mWi5yPiY03FUjtJFwMfjYjz8uMh4F8j4qWF47oR2LpTxlfS6sBCD5uN\nyNNNdwLOiogXSNoJ2C0iSs8oqaqzU45nRsyN72Xgj9C7E3ZeFbZGRPxxgrcMsjU7yRwgIoYllVph\n2O23pGGEzpHdE0krWW3Ekoi4T9IsSYqIsyX9R+mgsio6O9U8N75fA5/QJR1PqgGyBLgEeKqkf4+I\nz038zoF0i6SPMdKAeU/glgle35SHSN2UziYNm+0EXCLpSwARsX/J4CrxkKS1gAuB4yXdSxrmKEp1\ndXaqeW58XzzkMnKVfQ/SXPSDSXPRS/ejrI6kdUm1UjpFnC4AFkTEA+WigsmmmUbEcU3FUitJawOP\nkqYqvwuYDRwfEb8vHFdVnZ2gvrnxUzHwR+jAE/Kp3i7Af0bEXyS52mIPOXFXd7TbnbDzl868iPBK\n0dEOjYjDSEfBxwBI+lfgsKJR1dfZCdLc+GtJZzBnklr3HRQR3yob1uSc0FMHnttJH6zzJT0beGTi\ntwwWSWcwcZOEop1cJA0DbyJ9ni8H7pV0Uel+mZV5Lcsm7zf02Na0qjo7ZTtHxD/nufG3kkox/4y0\nMrpqA5/Q8xLfpct8Jd3BDJhv2rBaLp6NZ3ZEPCzpfaRhhPmSfIQOSNqHVPZ4U0ndC2PWJn35lbag\ndAA91Dg3vi8Dn9Dz2OLHgJfnTecDnwRmXMfv6dLdwUWpM9DzSEfsN1bSGX1VSc8EdiN1m7IRJwM/\nBY5gdJ3/xRFRvAlypd2Bapwb3xdfFJVOIS2w6IzDvhPYPCJchXGMvDjlK8DNpOp8G5GaRP+4cFxv\nI30pXxQRH8yLQP49It5aMq7aSNoC+Nv88IKIuHai109zLFV1dlomiMrmxvfLCT3Pcplsm0E+anlj\nRPw6P34O8N8RUXq1oU1C0odI3aV+mDftAnw5Io4uF1W9lFotbkjXKEZElJhKOSUDP+QC/EnSiyPi\nFwCSXswMOb0qYHEnmWe3kOboFiVpLnAk8LK86QLggLEFxAbcPsD2nfo7eYbLxYAT+hiVzY2fEid0\n2Bc4QdITSad7j5Lm6dqyLpP0I9K4bJDqp1zaWVUXEd+f6M3T6FhS+d635cd75m07FYqnRmL0daHH\naL6pxUyxLZXNje/XwCf0XBJzizxmRkS40uL4ngTcA7wiP74PWJ20qi4YKY7VtKdHxLFdj78p6cBC\nsVRF0qoRsYS0uveXkk7NT72ZketGNlqNc+P7MrBj6J1qiuNxlcWZQ9JPSUfk38mbdgf2johXlYuq\nDt2FpiRtT9cq34i4tFxk9ZJ0HqmEbk1z4/syyEfoT++6/17y6jkbn6SNSF2KNmT0xaLSH/T3kMbQ\nP086U7gY2LtoRPVYOqwSEZeQkpRNbEHpAJbXwB6hd5N0ZUS8oHQctZN0FemLbxGwtDxCpXOJDZB0\nJzBuoTkXoWuXQT5C7+Zvtf78qaahKElHMnFJgurqzhSwCrAWvgA6qdrnxvfDCd2m4ouS5gNnMXps\nsVSvxcu67n8CmF8ojpr9LiIOn/xlFhFrl45hRQ3skIukKxn5Nn4ecEPnKdK38Qt7vnGASTqCtJL2\nZkaGXCIiite+8bBZb/69DJZBPkL30v6pexuwcSX1W8YazCOTyQ38TJ9BMrAJPSJu7ud1ki6MiB0n\nf+VAuIbUzaV4USfrj9dVDJaBTehTUEPPzFo8GbhB0qVUMD93zEWsNSQ93HmKGXIRy2xlckKfnE/l\nR1R10bENF7HMViYndOtbRJwvaQNgk4g4J5cVXaV0XGaWzCodwAzg+buZpPcD3wP+X960PiPlWM2s\nMCf0yb27dAAV+RCpRO3DABFxE/CMohGZ2VIDO+Qi6QEmXhXWqb54VaOB1e3PEfGXTn9FSaviawxm\n1RjYhE7qNm5Tc76kw4DVJe1EqiV/RuGYzCwb2JWiY+V66E/qPI6I3xYMp0qSZpEqU+5MOpP5CfD1\nmdgIwKyNBj6h58bHnwfmAveTLvT9yn0yp07SqW7MbFaOL4rCp0gX+m6MiHnAa0g9KW3qNi4dgNkg\nc0KHJRFxHzBLkiLibGD70kHNUIN9umdW2CBfFO14SNJawIXA8ZLuBf6ncExmZlPmMXRpbeBR0tnK\nu4DZwPER8fuigc1ALtVqVpaHXODQiPhrRDwWEcfkllwfKR3UDHVw6QDMBpmP0Lu6ondtuyoiti4V\nU20kLWLiRVhbNRySmfUwsGPokvYBPgBsKqm7hdrawOVloqrWG0sHYGaTG9gjdEnrAk8FjgAO6Xpq\ncUS4gYOZzTgDm9C7SdoC+Nv88IKIuLZkPLWS9GLgSGBzYDVS6dw/upGEWR0G/qKopA8BpwDPzreT\nJe1bNqpqHQXsDtwErA68D/hy0YjMbKmBP0KXdDXw0oh4JD9eC7jYF/qWJemyiNhW0tWd34+nKprV\nY2AvinYR0N3F/jHc1GI8j0paDVgo6TPA7/BZnlk1BjahS1o1IpYAJwC/lHRqfurNwHHlIqvaO0kJ\nfD/gIGAe8JaiEZnZUgM75NI9/1zS9sCO+akLIuLScpHVS9IBEfHFybaZWRmDnNA99jtF4yzC8u/R\nrBIDO+QCPF3SuEv8cwkAAyTtDuwBbCTp9K6n1gb+UCYqMxtrkBP6KsBa+AJoPy4mXQB9GvDZru2L\ngauLRGRmyxjkIZdlhg/MzGayQT5C95F5nyRdGBE7SlrM6CJdneJcXilqVoFBPkJ/SkR4/NfMWmNg\nE7otH0kvJE3xDODCiLiycEhmlnmVn/VN0sdJi66eSrpA+k1J/7dsVGbW4SN065ukG4GtI+JP+fHq\nwMKI2KxsZGYGPkK3qfkt8KSux08E7ioUi5mN4SN065ukHwLbAWeTxtB3Ai4B7gSIiP3LRWdmTujW\nN0l7TfR8RLiomVlBTui2XHILv3kR4ZWiZpXwGLr1TdKwpHUkPQW4AviaJNe8MauEE7pNxeyIeJhU\nA/34iNgBeHXhmMwsc0K3qVhV0jOB3YD/Kh2MmY3mhG5TcTjwE+DmiLhU0sakhtFmVgFfFDUzawkf\noVvfJM2V9ANJ9+bbqZLmlo7LzBIndJuKY4HTgWfl2xl5m5lVwEMu1jdJCyNim8m2mVkZPkK3qbhf\n0p6SVsm3PYH7SwdlZomP0K1vkjYAjgReQqrlcjGwf0TcXjQwMwOc0M3MWmOQe4panyQdyeheoqO4\nyqJZHZzQrR+Xdd3/BDC/VCBmNj4PudiUSLoyIl5QOg4zW5ZnudhU+QjArFJO6GZmLeEhF5uUpMWM\nHJmvATzaeQqIiFinSGBmNooTuplZS3jIxcysJZzQzcxawgndzKwlnNDNzFrCCd3MrCX+P7A9oki/\n9/baAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x7f279e975250>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"plt.bar(range(len(predictors)), scores)\n", | |
"plt.xticks(range(len(predictors)), predictors, rotation='vertical')\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"alg_c = RandomForestClassifier()" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"alg_r = RandomForestRegressor()" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"ss = cross_validation.ShuffleSplit(train.shape[0], 10, 0.1, 0.3)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": { | |
"collapsed": false | |
}, | |
"source": [ | |
"scores = cross_validation.cross_val_score(alg_c, train[predictors_revised], train['Buy_or_not'], cv=ss)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"scores.mean()" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"ground_truth = []\n", | |
"predictions = []\n", | |
"for tr, te in ss:\n", | |
" train_predictors = (train[predictors_revised].iloc[tr, :])\n", | |
" train_target = train['Buy_or_not'].iloc[tr]\n", | |
" alg_c.fit(train_predictors, train_target)\n", | |
" test_truth = train['Buy_or_not'].iloc[te]\n", | |
" ground_truth.append(test_truth)\n", | |
" test_predictions = alg_c.predict(train[predictors_revised].iloc[te, :])\n", | |
" predictions.append(test_predictions)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"ground_truth = np.concatenate(ground_truth, axis=0)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"predictions = np.concatenate(predictions, axis=0)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"predictions[predictions > .5] = 1\n", | |
"predictions[predictions <= .5] = 0" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"metrics.f1_score(ground_truth, predictions)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"scores = cross_validation.cross_val_score(alg_r, train[predictors_revised], train['Year Total'], cv=ss)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"scores.mean()" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"np.count_nonzero(train['Buy_or_not'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Moving on to test dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 96, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = pd.read_csv('/home/devashish/datasets/ZS/Solution.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 97, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 193810\n", | |
"District_ID 193810\n", | |
"Instrument_ID 193810\n", | |
"Buy_or_not 0\n", | |
"Revenue 0\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 97, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"solution.count()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Convert to int" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 98, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(solution, 'Hospital_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 99, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(solution, 'District_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 100, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"converttoint(solution, 'Instrument_ID')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 101, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Buy_or_not</th>\n", | |
" <th>Revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>10</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>11</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>15</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID District_ID Instrument_ID Buy_or_not Revenue\n", | |
"0 1 12 1 NaN NaN\n", | |
"1 1 12 10 NaN NaN\n", | |
"2 1 12 11 NaN NaN\n", | |
"3 1 12 13 NaN NaN\n", | |
"4 1 12 15 NaN NaN" | |
] | |
}, | |
"execution_count": 101, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"solution.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add Total_Hospital_employees" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 102, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 103, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = train[['Hospital_ID', 'Total_Hospital_employees']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 104, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df.drop_duplicates()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 105, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 193810\n", | |
"District_ID 193810\n", | |
"Instrument_ID 193810\n", | |
"Buy_or_not 0\n", | |
"Revenue 0\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 105, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"solution.count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 106, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1518" | |
] | |
}, | |
"execution_count": 106, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.count_nonzero(df['Hospital_ID'].unique())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 107, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1580" | |
] | |
}, | |
"execution_count": 107, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.count_nonzero(solution[\"Hospital_ID\"].unique())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 108, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"med = df['Total_Hospital_employees'].median()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 109, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = pd.merge(solution, df, on='Hospital_ID', how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 110, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution['Total_Hospital_employees'] = solution['Total_Hospital_employees'].fillna(med)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add hospital employees in district" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 111, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = pd.merge(solution, hospital_profiling.groupby(['Hospital_ID', 'District_ID'], as_index=False).sum(),\n", | |
" on=['Hospital_ID', 'District_ID'], how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 112, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Buy_or_not</th>\n", | |
" <th>Revenue</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>Hospital_employees</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>10</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>11</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>15</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID District_ID Instrument_ID Buy_or_not Revenue \\\n", | |
"0 1 12 1 NaN NaN \n", | |
"1 1 12 10 NaN NaN \n", | |
"2 1 12 11 NaN NaN \n", | |
"3 1 12 13 NaN NaN \n", | |
"4 1 12 15 NaN NaN \n", | |
"\n", | |
" Total_Hospital_employees Hospital_employees \n", | |
"0 13088.0 3.0 \n", | |
"1 13088.0 3.0 \n", | |
"2 13088.0 3.0 \n", | |
"3 13088.0 3.0 \n", | |
"4 13088.0 3.0 " | |
] | |
}, | |
"execution_count": 112, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"solution.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 113, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = solution.rename(columns={'Hospital_employees':'Hospital_employees_in_district'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 114, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution['Hospital_employees_in_district'] = solution['Hospital_employees_in_district'].fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add Hospitals in district" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = pd.merge(solution, hospital_profiling.groupby('District_ID', as_index=False).agg({'Hospital_ID' : np.count_nonzero}),\n", | |
" on='District_ID', how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 116, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = solution.rename(columns={'Hospital_ID_x':'Hospital_ID', 'Hospital_ID_y': 'Hospitals_in_District'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 117, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution['Hospitals_in_District'] = solution['Hospitals_in_District'].fillna(solution['Hospitals_in_District'].median())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 118, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 193810\n", | |
"District_ID 193810\n", | |
"Instrument_ID 193810\n", | |
"Buy_or_not 0\n", | |
"Revenue 0\n", | |
"Total_Hospital_employees 193810\n", | |
"Hospital_employees_in_district 193810\n", | |
"Hospitals_in_District 193810\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 118, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"solution.count()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add total instrument demand" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 119, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(hospital_revenue[\"Instrument_ID\"].value_counts().reset_index())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 120, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df.columns = ['Instrument_ID', 'Total_Instr_Demand']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 121, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = pd.merge(solution, df, on='Instrument_ID', how='left').fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add Instrument Value" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 122, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 123, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = hospital_revenue[['Instrument_ID', 'Year Total']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 124, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df.groupby('Instrument_ID', as_index=False).sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 125, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"mean = df['Year Total'].mean()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 126, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['Instrument_Value'] = df['Year Total'].apply(lambda x: x / mean)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 127, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"del df['Year Total']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 128, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Instrument_Value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>0.230520</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>7.172774</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>1.250719</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>0.460606</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>1.275721</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Instrument_ID Instrument_Value\n", | |
"0 1 0.230520\n", | |
"1 2 7.172774\n", | |
"2 3 1.250719\n", | |
"3 4 0.460606\n", | |
"4 5 1.275721" | |
] | |
}, | |
"execution_count": 128, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 129, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = pd.merge(solution, df, on=['Instrument_ID'], how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 130, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"solution['Instrument_Value'] = solution['Instrument_Value'].fillna(solution['Instrument_Value'].median())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 131, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Hospital_ID 193810\n", | |
"District_ID 193810\n", | |
"Instrument_ID 193810\n", | |
"Buy_or_not 193810\n", | |
"Revenue 193810\n", | |
"Total_Hospital_employees 193810\n", | |
"Hospital_employees_in_district 193810\n", | |
"Hospitals_in_District 193810\n", | |
"Total_Instr_Demand 193810\n", | |
"Instrument_Value 193810\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 131, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"solution.count()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Add instrument median" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 132, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 133, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = hospital_revenue[['Instrument_ID', 'Year Total']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 134, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df.groupby('Instrument_ID', as_index=False).median()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 135, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df.columns = ['Instrument_ID', 'Instrument_Median']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 136, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution = pd.merge(solution, df, on='Instrument_ID', how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 137, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Buy_or_not</th>\n", | |
" <th>Revenue</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" <th>Total_Instr_Demand</th>\n", | |
" <th>Instrument_Value</th>\n", | |
" <th>Instrument_Median</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>7399.0</td>\n", | |
" <td>0.230520</td>\n", | |
" <td>413.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>10</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>76.0</td>\n", | |
" <td>0.201387</td>\n", | |
" <td>16980.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>11</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>137.0</td>\n", | |
" <td>0.241652</td>\n", | |
" <td>16862.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.005706</td>\n", | |
" <td>583102.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>15</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>169.0</td>\n", | |
" <td>0.814948</td>\n", | |
" <td>49459.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID District_ID Instrument_ID Buy_or_not Revenue \\\n", | |
"0 1 12 1 0.0 0.0 \n", | |
"1 1 12 10 0.0 0.0 \n", | |
"2 1 12 11 0.0 0.0 \n", | |
"3 1 12 13 0.0 0.0 \n", | |
"4 1 12 15 0.0 0.0 \n", | |
"\n", | |
" Total_Hospital_employees Hospital_employees_in_district \\\n", | |
"0 13088.0 3.0 \n", | |
"1 13088.0 3.0 \n", | |
"2 13088.0 3.0 \n", | |
"3 13088.0 3.0 \n", | |
"4 13088.0 3.0 \n", | |
"\n", | |
" Hospitals_in_District Total_Instr_Demand Instrument_Value \\\n", | |
"0 1086.0 7399.0 0.230520 \n", | |
"1 1086.0 76.0 0.201387 \n", | |
"2 1086.0 137.0 0.241652 \n", | |
"3 1086.0 1.0 0.005706 \n", | |
"4 1086.0 169.0 0.814948 \n", | |
"\n", | |
" Instrument_Median \n", | |
"0 413.0 \n", | |
"1 16980.5 \n", | |
"2 16862.0 \n", | |
"3 583102.0 \n", | |
"4 49459.0 " | |
] | |
}, | |
"execution_count": 137, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"solution.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 138, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution['Instrument_Median'] = solution['Instrument_Median'].fillna(solution['Instrument_Median'].median())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 139, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Hospital_ID</th>\n", | |
" <th>District_ID</th>\n", | |
" <th>Instrument_ID</th>\n", | |
" <th>Buy_or_not</th>\n", | |
" <th>Revenue</th>\n", | |
" <th>Total_Hospital_employees</th>\n", | |
" <th>Hospital_employees_in_district</th>\n", | |
" <th>Hospitals_in_District</th>\n", | |
" <th>Total_Instr_Demand</th>\n", | |
" <th>Instrument_Value</th>\n", | |
" <th>Instrument_Median</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>7399.0</td>\n", | |
" <td>0.230520</td>\n", | |
" <td>413.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>10</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>76.0</td>\n", | |
" <td>0.201387</td>\n", | |
" <td>16980.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>11</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>137.0</td>\n", | |
" <td>0.241652</td>\n", | |
" <td>16862.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.005706</td>\n", | |
" <td>583102.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>12</td>\n", | |
" <td>15</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13088.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1086.0</td>\n", | |
" <td>169.0</td>\n", | |
" <td>0.814948</td>\n", | |
" <td>49459.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Hospital_ID District_ID Instrument_ID Buy_or_not Revenue \\\n", | |
"0 1 12 1 0.0 0.0 \n", | |
"1 1 12 10 0.0 0.0 \n", | |
"2 1 12 11 0.0 0.0 \n", | |
"3 1 12 13 0.0 0.0 \n", | |
"4 1 12 15 0.0 0.0 \n", | |
"\n", | |
" Total_Hospital_employees Hospital_employees_in_district \\\n", | |
"0 13088.0 3.0 \n", | |
"1 13088.0 3.0 \n", | |
"2 13088.0 3.0 \n", | |
"3 13088.0 3.0 \n", | |
"4 13088.0 3.0 \n", | |
"\n", | |
" Hospitals_in_District Total_Instr_Demand Instrument_Value \\\n", | |
"0 1086.0 7399.0 0.230520 \n", | |
"1 1086.0 76.0 0.201387 \n", | |
"2 1086.0 137.0 0.241652 \n", | |
"3 1086.0 1.0 0.005706 \n", | |
"4 1086.0 169.0 0.814948 \n", | |
"\n", | |
" Instrument_Median \n", | |
"0 413.0 \n", | |
"1 16980.5 \n", | |
"2 16862.0 \n", | |
"3 583102.0 \n", | |
"4 49459.0 " | |
] | |
}, | |
"execution_count": 139, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"solution.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Train on full training set" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 149, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.linear_model import SGDClassifier, LogisticRegression\n", | |
"from sklearn.linear_model import SGDRegressor, LinearRegression\n", | |
"from sklearn.ensemble import AdaBoostRegressor, AdaBoostClassifier" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 150, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"alg_c = AdaBoostClassifier()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 151, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"alg_r = AdaBoostRegressor()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 152, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n", | |
" learning_rate=1.0, n_estimators=50, random_state=None)" | |
] | |
}, | |
"execution_count": 152, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"alg_c.fit(train[predictors], train['Buy_or_not'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 153, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"classifications = alg_c.predict(solution[predictors].astype(float))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 154, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"solution['Buy_or_not'] = classifications" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 155, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2019" | |
] | |
}, | |
"execution_count": 155, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.count_nonzero(solution['Buy_or_not'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 156, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',\n", | |
" n_estimators=50, random_state=None)" | |
] | |
}, | |
"execution_count": 156, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"alg_r.fit(train.loc[train['Buy_or_not'] == 1, predictors], train.loc[train['Buy_or_not'] == 1, 'Year Total'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 157, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"regressions = alg_r.predict(solution.loc[solution['Buy_or_not'] == 1, predictors])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 158, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"solution.loc[solution['Buy_or_not'] == 1, 'Revenue'] = regressions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 159, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv('/home/devashish/datasets/ZS/Solution.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 160, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['Buy_or_not'] = solution['Buy_or_not']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 161, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df['Revenue'] = solution['Revenue']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 162, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df['Buy_or_not'] = df['Buy_or_not'].astype(int)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 163, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df['Revenue'] = df['Revenue'].astype(int)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"df.loc[df['Buy_or_not'] == 0, 'Revenue'] = 0" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"np.count_nonzero(df['Buy_or_not'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df.to_csv(path_or_buf='/home/devashish/submission.csv', index=False)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment