Last active
November 17, 2016 03:47
-
-
Save phdkiran/277421dcbc78b2368d25d9759a72f062 to your computer and use it in GitHub Desktop.
MLtext3/submissions/stackexchange-kaggle-Copy3.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#import all the packages\nfrom datetime import datetime\nimport pandas as pd\nimport numpy as np\nimport re\n%matplotlib inline\nfrom sklearn import naive_bayes, cross_validation, feature_extraction, metrics", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "/work/conda3/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n \"This module will be removed in 0.20.\", DeprecationWarning)\n", | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Data Ingestion and exploration" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Training data " | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# read some sample rows to find what the rows look like\ndf = pd.read_csv('../data/train.csv', nrows=100)\n# df.columns\n# print(df.memory_usage(deep=True).sum(), 'mb')\n\n# Find all the columns that start with Date text\ndate_cols = [x for x in df.columns if 'Date' in x]\n# df = pd.read_csv('../data/train.csv', nrows=10000, index_col=0, parse_dates=date_cols)\n# Read the full set of data and parse the date columns \ndf = pd.read_csv('../data/train.csv', index_col=0, parse_dates=date_cols)\nprint(df.dtypes)", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "PostId int64\nPostCreationDate datetime64[ns]\nOwnerUserId int64\nOwnerCreationDate datetime64[ns]\nReputationAtPostCreation int64\nOwnerUndeletedAnswerCountAtPostTime int64\nTitle object\nBodyMarkdown object\nTag1 object\nTag2 object\nTag3 object\nTag4 object\nTag5 object\nPostClosedDate datetime64[ns]\nOpenStatus int64\ndtype: object\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Test data " | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "test_df = pd.read_csv('../data/test.csv', nrows=100)\ndate_cols = [x for x in test_df.columns if 'Date' in x]\ntest_df = pd.read_csv('../data/test.csv', index_col=0, parse_dates=date_cols)\nprint(test_df.dtypes)\n(test_df.head(2))", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "PostCreationDate datetime64[ns]\nOwnerUserId int64\nOwnerCreationDate datetime64[ns]\nReputationAtPostCreation int64\nOwnerUndeletedAnswerCountAtPostTime int64\nTitle object\nBodyMarkdown object\nTag1 object\nTag2 object\nTag3 object\nTag4 object\nTag5 object\ndtype: object\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>PostCreationDate</th>\n <th>OwnerUserId</th>\n <th>OwnerCreationDate</th>\n <th>ReputationAtPostCreation</th>\n <th>OwnerUndeletedAnswerCountAtPostTime</th>\n <th>Title</th>\n <th>BodyMarkdown</th>\n <th>Tag1</th>\n <th>Tag2</th>\n <th>Tag3</th>\n <th>Tag4</th>\n <th>Tag5</th>\n </tr>\n <tr>\n <th>PostId</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>11768878</th>\n <td>2012-08-01 23:10:12</td>\n <td>756422</td>\n <td>2011-05-16 21:49:59</td>\n <td>155</td>\n <td>11</td>\n <td>Maven & yui-compressor Plugin issues</td>\n <td>I'm using the yui-compressor plugin for maven ...</td>\n <td>maven</td>\n <td>maven-3</td>\n <td>yui-compressor</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11768880</th>\n <td>2012-08-01 23:10:21</td>\n <td>1569892</td>\n <td>2012-08-01 22:24:37</td>\n <td>1</td>\n <td>0</td>\n <td>Inconsistent behaviour of html select dropdowns</td>\n <td>I have written a javascript-generated web page...</td>\n <td>html</td>\n <td>select</td>\n <td>drop-down-menu</td>\n <td>scrollbar</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"text/plain": " PostCreationDate OwnerUserId OwnerCreationDate \\\nPostId \n11768878 2012-08-01 23:10:12 756422 2011-05-16 21:49:59 \n11768880 2012-08-01 23:10:21 1569892 2012-08-01 22:24:37 \n\n ReputationAtPostCreation OwnerUndeletedAnswerCountAtPostTime \\\nPostId \n11768878 155 11 \n11768880 1 0 \n\n Title \\\nPostId \n11768878 Maven & yui-compressor Plugin issues \n11768880 Inconsistent behaviour of html select dropdowns \n\n BodyMarkdown Tag1 Tag2 \\\nPostId \n11768878 I'm using the yui-compressor plugin for maven ... maven maven-3 \n11768880 I have written a javascript-generated web page... html select \n\n Tag3 Tag4 Tag5 \nPostId \n11768878 yui-compressor NaN NaN \n11768880 drop-down-menu scrollbar NaN " | |
}, | |
"metadata": {}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Data exploration " | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#explore date fields and find patterns based on weekday, night time\n# df.groupby(df.PostCreationDate.dt.weekday_name).OpenStatus.mean()\ndf['day'] = (df.PostCreationDate.dt.weekday)\ndf['hour'] = (df.PostCreationDate.dt.hour)\n# see if the weekend posts are closed more\ndf['weekend'] = (df.day > 5).astype(int) \nprint(df.groupby('OpenStatus').weekend.mean())\nprint(df.groupby('OpenStatus').hour.mean())\n# df.groupby('hour').OpenStatus.mean()\n# df.groupby('hour').OpenStatus.mean().plot()\ndf['posted_in_the_night'] = ((df.hour >0) & (df.hour < 8)).astype(int)\ndf.groupby('posted_in_the_night').OpenStatus.mean()", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "OpenStatus\n0 0.097197\n1 0.082197\nName: weekend, dtype: float64\nOpenStatus\n0 12.272428\n1 12.698999\nName: hour, dtype: float64\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "posted_in_the_night\n0 0.508320\n1 0.468776\nName: OpenStatus, dtype: float64" | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Get the mean for all the columns we have added so far" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.groupby('OpenStatus').mean()", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>PostId</th>\n <th>OwnerUserId</th>\n <th>ReputationAtPostCreation</th>\n <th>OwnerUndeletedAnswerCountAtPostTime</th>\n <th>day</th>\n <th>hour</th>\n <th>weekend</th>\n <th>posted_in_the_night</th>\n </tr>\n <tr>\n <th>OpenStatus</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>7.589407e+06</td>\n <td>756253.370965</td>\n <td>355.813063</td>\n <td>13.529956</td>\n <td>2.641596</td>\n <td>12.272428</td>\n <td>0.097197</td>\n <td>0.223537</td>\n </tr>\n <tr>\n <th>1</th>\n <td>6.429625e+06</td>\n <td>548413.808002</td>\n <td>657.454745</td>\n <td>25.405826</td>\n <td>2.573785</td>\n <td>12.698999</td>\n <td>0.082197</td>\n <td>0.197260</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"text/plain": " PostId OwnerUserId ReputationAtPostCreation \\\nOpenStatus \n0 7.589407e+06 756253.370965 355.813063 \n1 6.429625e+06 548413.808002 657.454745 \n\n OwnerUndeletedAnswerCountAtPostTime day hour \\\nOpenStatus \n0 13.529956 2.641596 12.272428 \n1 25.405826 2.573785 12.698999 \n\n weekend posted_in_the_night \nOpenStatus \n0 0.097197 0.223537 \n1 0.082197 0.197260 " | |
}, | |
"metadata": {}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Explore the date columns\n- author age on the site 'owner_age'\n- Group the data by owner_age and draw scatter plot for mean and count" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df['days_created'] = (df['PostCreationDate'] - df['OwnerCreationDate']).dt.days \ndf['owner_age'] = (datetime.now() - df['OwnerCreationDate']).dt.days\nage = df.groupby('owner_age').OpenStatus.agg(['mean', 'count']) \nage.shape\nage.reset_index().plot(kind='scatter', x='owner_age', y='mean')", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "<matplotlib.axes._subplots.AxesSubplot at 0x7fdb13987128>" | |
}, | |
"metadata": {}, | |
"execution_count": 6 | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEPCAYAAABRHfM8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvWuUnFd5Jvp8XZevbn2TkY0sS2qpJVmyJdlSYmKGSbDD\nxTYXA2H5ODIEiGV8i5CAGFt2xpZB6QlCccJ0MqgtoyCSSFafSeITmJWkAklrZmlNoAnHt5PGBCfI\n3CmRA0PMsSVZfs6Pvd/e+7tUd3Wrqqtaep+1avWlqr5v11dV77Pf2/MGJKFQKBQKxXToavcCFAqF\nQjE/oIShUCgUioaghKFQKBSKhqCEoVAoFIqGoIShUCgUioaghKFQKBSKhtBSwgiCYH8QBD8MguCp\nKR4zHATBN4IgeCIIgstbuR6FQqFQzB6t9jA+A+CaencGQXAdgEGSqwDcBmCkxetRKBQKxSzRUsIg\neRTAj6d4yNsA/LF97JcB9AZBcEEr16RQKBSK2aHdOYzFAL7t/f1d+z+FQqFQdBjaTRgKhUKhmCfI\ntvn83wWwxPv7Ivu/BIIgUNErhUKhmAVIBs04zlx4GIG9peFzAN4DAEEQXAngJyR/WO9AJDvqtnPn\nzravYb6sS9ekazoX1tWJa2omWuphBEFwCMBVAM4LguBbAHYCyAMgyX0k/yoIgjcFQfAsgJ8B+PVW\nrkehUCgUs0dLCYPkTQ08Zmsr16BQKBSK5kCT3meAq666qt1LSEUnrkvX1Bh0TY2jE9fViWtqJoJm\nx7hahSAIOF/WqlAoFJ2CIAjAeZT0VigUCsVZACUMhUKhUDQEJQyFQqFQNAQlDIVCoVA0BCUMhUKh\nUDQEJQyFQqFQNAQlDIVCoVA0BCUMhUKhUDQEJQyFQqFQNAQlDIVCoVA0BCUMhUKhUDQEJQyFQqFQ\nNAQlDIVCoVA0BCUMhUKhUDQEJQyFQqFQNAQlDIVCoVA0BCUMhUKhUDQEJQyFQqFQNAQlDIVCoVA0\nBCUMhUKhUDQEJQyFQtEyHD9+HF/5yldw/Pjxdi9F0QQoYSgUipbg0UdHsWzZGrzhDbdj2bI1ePTR\n0XYvSXGGCEi2ew0NIQgCzpe1KhTnOo4fP45ly9bghRfGAGwA8BSKxavx3HPPYOHChe1e3jmFIAhA\nMmjGsdTDUCjOIcxViOjYsWPI5wdgyAIANiCXW4Zjx4619LyK1kIJQ6E4RzCXIaKBgQGcPHkMwFP2\nP0/h1KnnMDAw0LJzzgT1iFNzLtOA5Ly4maUqFIrZoFarsVhcQOBJAiTwJIvFBazVai0756FDh1ks\nLmBPz0YWiwt46NDhlp1rJpB19fZuiqyr3v/nO6ztbIod1hyGQtGhOH78OI4dO4aBgYEzjvt/5Stf\nwRvecDv+9//+6uT/eno24YtffBhXXHHFmS617lqb+RqagXq5la9+9Sh+7uf+41mZc9EchkJxlqPZ\n4aNWhoimWuvChQtxxRVXNNXonknYqF5uZXx8XHMujaBZrkqrb9CQlOIcQavCR60IEbVirbVajePj\n46zVapHf/dfQaNgo/vx6652YmJjzkN1cAU0MSbWdCBpeqBKG4hzB+Pg4e3s3WcNlbj09Gzk+Pn7G\nx44b0DNF2lorlXU8cODArM7hE0Iu1818vneSHEZG9s3IqE+Xq4gTZ6fmXM4UShgKxVmMdiSoG1lT\nfKc+Pj6esjPfTaDI7u6ZG93o664R6I9cgzDsYXf3xoaIdLprWI84m02onQAlDIXiLEcn7XbjO/Wt\nW7fH/t7GYnEBK5V1BIqzJrqotzJOIOm5hGFfQ8efjZd2NpIFqYShUJwT6AQDltz1700lhYmJCR44\ncKCuB9DIa6nVaszne+t6GH5YajoinamXdraW1JJKGAqFYo7gduqHCSwgcDGBwbqkkGakxch3d69n\nGPZwZGRf6rlqtRpzuYolio0Eyja8dXnEiDdKpI16aZ0YAmwm5hVhALgWwDMA/hnAPSn39wD4HIAn\nADwN4H11jtPcq6hQKKZFrVZjodDn7fZl5z9mw0ZjEeMaN9IuUb3bEs5lBIqppOHIqWaPXTujBLqs\nfzpyaWWRQSegmYTR0j6MIAi6APwhgGsAXApgcxAEa2IP+w0A/0TycgBXA3goCIJsK9elUCgaw8KF\nC/Fbv/URAOfB9CgsBPAeAG8C8G4A1+Etb3nD5OM3b74Rzz33DL74xYfx3HPPYNOmy5HNLgawG8AY\nzL7wS9i+/e5EH4XrFfk+gCsAfB+nT38Pb3rTm2bcxyG9Gj/60Y+mfWyyR+UITpz4F5w8eVJlQuJo\nFvOk3QBcCeCvvb93IOZl2P/9of19OYB/rnOsZpKuQqFoEPWrlw7b31fWDfnUajWGYY/1LNwOvrv7\n8tQdfDOS/XKMYnEFgSKLxfXTHkueUygsJ1BkLre04ed2OjBfQlIA3glgn/f3uwEMxx5TAfD3AL4H\n4KcArqtzrOZeRYVC0TDEoJbLqwmstMTRWNx/ZGRfw9VTtVqN1WqV1Wq17v1ThZgcuY01vD7BxMSE\nrcKa+XM7Gc0kjE4I/VwD4HGSvxwEwSCALwRBsIHk8/EHPvjgg5O/X3XVVbjqqqvmbJEKxdmKRvSe\nNm++Ea9//S/j8ccfx9vfvhkvvPAFAANIk9KIH+O2294PANi+/bXI5wfw0kvfwv79n0o87tFHR7Fl\ny53I502IaP/+T2Hz5hsbvh9w0h8vvFBueH2C559/HoXCCpw4MfPndhKOHDmCI0eOtObgzWKetBtM\nSOpvvL/TQlL/HcBrvL//DsDPpxyriZyrUCjI2ZWTHjp02CbCSw17DdLkV887SIa9DrJQ6JuUB6lW\nqw1VMtX3MMYYhj2cmJio+7rOxDvpZGAehaQyAJ4FsAxAHibjtTb2mP8KYKf9/QIA3wawIOVYTb6M\nCsW5jTMpJ63Vaty1a2jafEOjhJQs391EoMQbbvhVGwq72IbCOG0lk8tHDMw4HxF/brG4TnMY3q3l\n8uZBEFwL4L/AKOPuJ/nxIAhusy9iXxAEiwAcALDIPuV3SD6achy2eq0KxbmEZkieTxXOamRMqzy/\nUqlg06b/gBdfDAAcsY8/AlON9SUY83Cxd1+6/Lh/vOeffx4nT57E6173Fpw48T8SzwMwpST7yZMn\n8eyzz+JVr3oV1q5d2+hl7Tg0U968pR5GM29QD0NxjmCuOrxn42FMl5T2MV1/Q9z7uOGGG2NexDiB\n1d7fhwmUWC5vYKHQxx077ousI82bqbcG8Y7qeT5nU+c35ktIqpk3JQzFuYB6hqpVJCLnq1TWMZ+v\ncMeO+yLn8M976NBh5nLdtuLpIuZy3VMa0jRCKhT6WK1WU+XEw7AnJg2SlCEpFPp4zz332nWUCKxk\nPt9bV8k27TyFQt+0woQul1ElsHcynzIfoYShUJyFmE5ao1W73ZGRfcxkyhEDfOjQ4Qh5FQp91kiL\ndMcmAv3M5SqJuRVxkpE8hy9XbshhbcJ7yOeXEQgn15LJlJjP90Y6x5Od5weZz1fq6liNjOxjGPZN\nSozs2jWU6Cjv6dnIarXK8fFxm2BfQcCRElDirl1DTb3ucwUlDIXiLES92RKNKrTOBknpD9bZhR8k\nsIjxyiigxHvuubfuDItDhw5zYmKCw8PDLBT8czzmeQ9+M2CNQF9iLaOjo6xWq9yx4z4CF1nC8hPk\nReZyPaneSFzHymhWdXvE18sgCFko9E+SGVBIuSb9DV/3ThCOFChhKBRnIdI8jJnMgJjpucbHx7lt\n23bPALtzFAorWS773dk1u/Nf6f09TmCJR2hxhdkxZjIlFgr9KVVOVQIXWIPvNwMeoBEedOcIw6XW\nQ1hvDXmvvUUNei5XiXgzLmyVDFO50Ndheyz/cQdTr0m5vKGh695p+Q8lDIXiLEV98b7meRiudHSZ\nZ4CjIZ4w7EmcN5stW69AhATXEwhZKKyjS1KLkT1sPYUSk55DjcA99r4xSx5lu4b1sXNcSueJjNNI\njBy2/0uq5kpC3qz9YMLo9/Rs5IEDB7yQVNrjavb4SXn16a57JyrfKmEoFGcx6s2xrtfvMJPwRzSh\n2+MZ4Ar93EE+3zs5GMk/7549DzGaiB5jMrQ0lmKIRR5EzrGcQN4+3h+8dNjeHycJMeSy9lGaHEPS\nMEdVb6dKhMv6ko/L5SpWar1EYHAyrzMdOlH5VglDoTiLkUYA9UhhpuEPZ9DG7W5eDOWEJZBk7kDy\nB7VajaOjo4w30AEXMAz7bCioYj2R1Z4hvt8ShR/6qRHYSqDAMFzKqD7VQUbDUr4xv8mSyYWWcJIG\nPbrL32fPOxi5PtFu9d2WMM2xcrkeHjp0eEYlxP77pB5GB9yUMBTnAmZCALPto4jKX0joZyWjIR6p\nWjIdz4XCWmYyJXZ1lRiN948RCPnII49MEpoT8RODLYZddvTRZPV73vO+aXb8Zk54uSyeyJh3f1RG\nRF6j6ekQb2aAmUxpMuHtV3OZJHqx7rHO5D3shPG6pBKGQnFWoh4B1NNgioY/TIK4Ulk3bfgjLn9R\nKKxhLldm/dDSbrp8xBhdae1ySi6hUOif3JVLKatTt73Q8yCSFVmSqzHVSUJG2+yxVxIo8qab3sUH\nHniAlcrlTM77rrFcXs1qtep5DkVGcyYHmc2WbQL+skhj33RFBbOpeNIqKSUMhaKlSIt/FwrLmc/3\nsFy+mIVCX2S36ghm+ml2ccQFAU2ieDmjVUt+2Cqej6h4u30zeS+TKUe8o5GRfaxWqwzDXppw127W\nS1ZL57Xpwyh45DROE9Kq52G4mRyFQp+tfjpIM0rW92b85Lkhqny+N7WxL22CYKdUPM0GShgKxVmI\npIcxZsMqyUY5wUxmTcTP5e+Ak93NMhdCEuMSJhJyWkrglfb3tYwmqt06Rkb22eRxPsXYm8clez72\nMlq+mwxPheGARyx+OazvyQhRPWlfUzz3MjjplaSFkDoxHzEbKGEoFGcpfOOVz1eYbJQrcnh4eNJo\n1QupSNdyvPOa5GTnc6m0YdJAivpsodDHcnk1s9myzVeIoZd8hPw+an+XJPTShOcQbTocJXAJXX5k\nAYFVDMM+r/NanlvzXvd9CUMvc75HR0dZKm3wnjPqrW8bgYy3pvuYzL3kOTQ0lHqN5Np2WsXTbKCE\noVCcxRDjlaxIMonouKGP74Jzue66nddbtrw/4QlIKElGmobhpbGeC5ENWeytp2pJws979MSOW7IG\nPd6TQQKPMZcr8+jRo6meVSZTtHIlyY5r2eU770q8nlU0zYByrk/Qhbb66Ho9pKQ3KoUSfw9GR0db\n2mU/V1DCUCjOARgJCzHC8S7qqOEUrSQXx0/vvDaG0iehmmdU/cqjojWs/v98McAJmnDZ6thx+mlK\nYivW2EsD3BiBIZow0fk0yXYza2Lr1u0MgtA+9gICRWazq+zxpU9kAU2/Rsht27azWq1aUssxmazf\nSxdGq9D0a6y2x+lh1GtKEoETWZTXU7RkVOTWrdva/KmYOZQwFIpzAIcOHWZXV5GiDhsP+RSL6yaT\nxd3d65nPV/je977PVhKRyWqicRovwU8o77XH9R9btcZeDLY7Zz6/xO78w5ixlhyBhIaE6IZoNKgW\nEFiRYuD3egb8w9594zSJayGsh6zhX0SgaMt9Q2vI/ZCcdIGLN3PYPs8PpSVfV6m0fjIk5bS1hETl\nWo2ph9GsA7X6poShOJeQNFx+fJ72Z8ET9JNqofNTdty+h1Gg2/mvYHoyumoJamnsnPcz6jU8SUBC\nXBfQNcGFdOQ24R1fyl1X0nkNg/bxl9Al2P3nSUgsbY3LmVamm82WPfVd04Nx/fXvYLG4gLncBQQG\nGE+8h2HfZCjQ6F5Jc+NGOt2sicny3fkEJQyFYg7Qzlr6qOGiNVgSItpIYAHz+UVWINCXzPArmTYQ\nyDGb7Z4MV61fv5HRuP+AJY2QfqObyx3cb43yYvu8vTS7et9zEbkQOY6fdxinKWkdt+RQZFS7qkpD\ncqF93ALvecvsucUjqDIqEyKlulIcMMAgKDKbFckQ6fJeyWJxAffseYj5fI/3vAWWqELu2fPQ5Hvu\niPrDdFVqKybJbr6V1yphKBQtxlwPMoojKTtes8ZvjDLUJ5+v2KSs9Ej4YaUaTTioxFLpUmYyJQaB\naDmJYR6L/Yw2uZkcQYnAEvu8ATovwV+X/D5kjf8qRvMORRo58zKNdzFEl0cRT+L+GJGt9gjqUpqw\nki8tIgnvol1X6L2+VUzTh8pkZLaFNAUuJFBkJjM4KX0u730QiCfWw7RS4PkUmlLCUChaiFYOMoo3\nzNUr6TSJV79/oZ9uB1+ixPJzuaXe/+KGP0423db4lml26kIw8VxHVPn1Pe95L6PhqcNMr5ySJLOf\nGL+Hpry1SBcu89cpHggtCfTRhLcKzGQuoAuBVWLnlp3/evu3eDVy7HoKtOLdSL7EV8U1nkatVrNd\n5+JJpV+f+VJeq4ShULQQrRpkJF6LlK8Wi+tTBw7VajVb6bTbGreBmKH1Da6UrPZbo3meNbaLPEMu\nCeT1dCWnsuvvY70d9MTEBEdHR20YR8jGGeBcbhmHhoZilVw3WIPfbY26n4AmxZPI5ZZQZEmieZLH\nGG1W7GW0n0JIKd6wt5gu+Z1WxrvVHmOIJrQnirkijthHE8Ir8pZbbrXriie/1cNoOxE0vFAlDMUc\noZmDjETxdHR0NCb6V79U1vRfLLfG8kmaoUKr6ZKwsuP1cxfd1sAJuchPOU83TXhlqyWSW+39iwgU\nGASvoFGOXcFCoY8333yrVw21itGmvSipGE9IjPj5dKGcfkYFBasEqiyX1/DAgQP8/Oc/zwceeIB3\n3XX3ZFlwsllxN4Eu73+Sy/G9mvuYbDDspiFPISXfAxEvqJsm3Oa/rpvoOsglP+LndxqXOu8UKGEo\nFC1GMwYZRev5L6LTZ/LLV6PlnT09Gzk8PMzodLt4nF9IR2TAq0yGpMTYFpnJnE8ga41qL6M5CClX\nfSWjTXt+0rtIpwPlBAG3bt1mO81Fb0rOv5VuYp2QlRCBee5rX/s6OpnyInO5VQzDHjsB0CeDkl2L\nNN0t9QhJRBGL9u+bmOyxGGMuV7alxpJXOZ/G23iI0RJbv6JryB7bJxST3wnDnhlJnrcbShgKxRxg\npoOM4s+NlsVWreEUo+orwCZ37V1dcZ2kX/EMt2g5iXEU7aVkrL1UWmmNojSv1Qi81xrMX4sRkOQd\nyjRehRCBlNPKazE9CTIvw8T7/Sqm98cIJ96t/VjKeZ0xdiGuKk3fht90Jzt/KbU9SFPBJOG2CqOV\nVOMsldbExsju9Z67xlvDARoCE+n1PgLdzOdF0oSUbntf8XYmn6F2QAlDoWgTGjUA4+PjDMNBa8wW\nWIMmE+b8Xav0T0QH/BjZC3n8Rmvw/Tj+qDWeEkJJF/bLZkuMeh+7reHN0e2uJdQlRnnFpLF0969m\nlIxMg1yhsMqSmwxIkrWIUZeQ1nrvuQfs/6Rayp+PUeINN9zIQqHfKtf612qUJo9y2J5HvAQ550V0\nJCfEehmBwmQfhslF9fDmm98fG6AkjYW+d7KbUYHD+t32aegUpVslDIViDjGbXeLEhEhniAES41iz\nBlO6sQ0BSEOYf66RkX3M53tYKq1mGPZ4kh/SvSxJ2zE62Q0Tt8/nzXNuueVWj2gkD7HXEoaQWdyD\nEIMru/jH6HISYjhlJobf/CdNe6voQm7SkOcb4gN0YaX4PPG9zGSKDMNelkrSt3GTdy2lYqqXrvLr\nIkZnk9/PJCGvZDZbZi5nclHF4gIvTyMNfgVmMr7USVxC3a8IM7d6eaxOUrpVwlAomoTpyGC2u8Rd\nu4as8RaSEANbf6eaVrbrry86VnTMGly/ckkS2aa/IJdbw0Khj11d5Rhp+cKBYly3emuV+P2FdFIe\ni+gE+17prUFyNEJeEm6S8FavPbZIocvQpSJd1ZJ4GSLhEa+q8nf52+kqoqTiqYemwskvuY33Yvgj\naOO6WCYcZvo0fGkSP7Q1ymjH+dRSIZ2kdKuEoVA0AdORwUx2ifHRn8aw91rDu8UaRwm9bCQQMpvt\nnsyH7NnzUKJst1DoSyRXzUhR2eVKh7UYRJ8A3HGMHlVcCLCbLkk8SJec9rua45VWn2AmU+AHP/hB\nOuIp0ZXCioHdRldVJL0U8jgp4/XzO3Kt/KoqetfrQu/6pc3WiDf9raLLUcgQpVLsPbiY0RCfvBY5\nxjLvmN3e9RM5d9NYeNNN7079HEw3mGkuoYShUJwhGhmHmjZrQmYxxJvsfOJx8x32WSMkO3O3mwWK\nHB0dZbVa5bZtH7SVSbIrHrdkkJQyj3Z/D9Ht3C+k2237VT9D9tx76XbhIrchkh/iLUgnud9UJ2qx\nEroZZDZbsfIbQoQk8IuMkouEz0iRK8/nu+1rlOS8GO9Fdh3xUmHfO5CQmXgwfvK/yK6uuIZWia4i\nTEJXfvJdvDyfZOS1SAXX+d5zPkYXwnOfGZna54+llc/B1q3bOmK2txKGQtEA0qbK+WSQNg41DPsm\nK2CSsyNMaanEwOvNoygU+mw5bb81QgN0IamNBPqZyZR511130+1c/Ya1xYx7CYVCH4eHhy2BSXLa\nr0Sq0ISPJDQkcf+0PooqTXJYdta+BHhcXkTmSETX40pve+h2+NIAN8i0Mawf+MB2pldH7Y0ZcwlP\nSS5knG4u+DhdAt2Qaxiu9OZ9k8CN9vz77HEX03gUcgxZl39NjtrfDzAallppr4fkpKJl0GE4YHtI\n1ieu0VTz2OcSShgKxTSI7/q3bt2emDedHIdaTPlbdqCXpBqEarWamBZXKq3wBPBkJ3u/Na6DBApc\nuXItXX+BH965P8UwmYqkMFzuraniGUUxwtKrcDujO/Iebw19NCNV40SYJi9CGi8ivp4ac7nFLBbX\n2/P7Mzb88I67lvl8xSrrRj2DQuHSyd24aQA0XozRcop7LFN3pZv38zFGE+BjdCEvOYY8V8qRJWQl\n4TMhKiFMec4tdT4j8ZyHI8lOkA9RwlAopkD6bOyksRfScONQ/d3neMxwHIj9Pc5KZR2r1ap3Lqmm\nuSh2rO10O9083/zmt9Dt/AfppsNJSMlXba3REcF6Oo9kr32eNO+JZ7CdxtNYQhdCuZAudLWeQA8z\nmfMYhn2RxkSZrVEomNxFGF5ir4s/SEleo5CX9EFE+0mAkGHYx0JhuT3WIKOkMj7Z8S278ImJiVhX\nvJDLBgJ5m4sRw15ksbhucu0SEsrlJLTnCxxK7sPv2H6ldy2FTPoJDMc+K5LnMZVUQI6ZTIXl8qXM\nZou2R0OS4p05nU8JQ6GYAqZCKW78V9PfIcfLWI0ch59n2MupPY7LCBR588232p2xX90j1UFxg+9r\nI8ksB+ldkF4GIQs5z/mMJmMX0fUfSOe2eAaS35Dwlqz/84wTJlDk0aNHU4UQZbZ3qTTIXK7MTKbC\n9EZD6VMoeq/RhNxyuQqPHj0aa5jzK8PGGIY93LPnoUThgQsXOjmRSmUd3/rWt9vzGcHFG264cZL0\ny+W1zOXKvPPOrUzvSXmM2WyJH/vYx3jfffd5JcpDdF3pklORMmAhxSfppNKXM5MpWmXcor3O8v6L\nFMmlHSWBroShUNRBMjHsG/toXX5cttwYfknuriSQZzZbYbm8moVCH7dsmSokMUrTNSxGxxgbo9FU\nip37Qkab2+T3EqOktJrGW5CBP0IKksRdZv+WktccXe7g1fb37YzKjJjdfS63iuPj45Ohu+7u9ZMG\nPBo6uoxAnrlcjw2JRfsQKpV1vP32OzzSXMxcrjtm+OXxJvRjGvKKzOcvZpzIpHvc5YA20eR8SonH\nugFS0rxoZEPWrBFJdaPom8lcRFNmbMJgSW8nLiN/kLlcme9617ti80aENOPVZpKfMhuCTKbIiYmJ\nNn8THJQwFIoU1Go1HjhwwEsMSwmrmbhmVFej1UqFQt9kj0M0bk5KklsqlXbtGvKqpvwwVTwJ7Xa1\nLr4fbwST/oYLLSGIgZSE9wC7ukJr9CSHIJ6T9B9IzP7X6XbVftzeb7xLekc7d340Fvq5jMb7SU6k\ny+d7vFBRMuwiIot+GXA9EUeX35G4v1SGmaqyUunSBDlkMgVGvUQSWMIwFOkTRy5Anp///OcnBQ6N\nlzPGaJjPf5920w87ZbNljozs89Z/kK6D/CBd1/s4DWnHw3FFDg8Pd0Q4ilTCUCgiqNVqvOeee22F\nk+wuHSmYDl+R205KUezYcZ/1SkQKQwghWf0U3X1f6pHEVrpGPTFo45R4e1S1VY57lMD/4Rkr/7h5\n24UsIad4J7NMtvNnb0usPkcTo5c8yTiT41afZC5XsdcrXr7qXwdz6+6+POKR9PRsZKHQx127hqYV\nYPRLS6N9JH7vh19llEwgl8txGXTjYWQy8Ua/JwmUODw8TJJeabRf/UU6TagNk3kQ+Qz5VXCuWTJn\n1y2VV/00MzXiCX8ZWrWhY8JS84owAFwL4BkA/wzgnjqPuQrA4wD+HwBjdR7TzGuoOEtw6NBhW+Lp\nh5z8noEys1kJGYwxbQZ0GBr5DbeLTDdahcJydnX5oZF9jE6ii1fhyM5eKqT8JLUYmGG6mRc+QR1k\ntFkty2ipqLwm2f36jXASg/8wnech3dDu9eTzMklPXqfIqKdLmfuegyTIp+uAj3se1apfQSUd8PHr\nnU7W11//drsu05i3des2q24bLeEFBjk6OkpSNLnSchrRxsipmjQnJibsZ0jeXwklhoyGF9PFJNvt\nacwbwgDQBeBZAMsA5AA8AWBN7DG9AP4JwGL79yvqHKvJl1Ex3zExMeEN95HQhp9s/gDNDnAJ3e4y\nnhA3cXhzHN97iJfRjtF5ChsZLWeVZjkx7ia0EQQhg2AxXXVSkVHdpW77t3RC+wQ1wain9Anv/Ktp\n+gp8j0WkPcbtfWsZHWLkN7b5r8fvDPfPKUneQRYK/RFCaLQDXnbn5fLFLBT6JvtWTL6jn8b78rut\n/WNKz8vlkSFTYdjD2267YzJHYJR9K5G1ZLPdMRJIL+X1X1O1WrX5Cve5kLJYl4u5go7EC3TCiELM\n8Sl/nVFaO58I40oAf+39vSPuZQC4A8DHGjhWEy+hYr7j0KHDVlZ7KZ1nIHX1YoBkDrQkKcX4Jssf\n9+x5KCJ2ZnupAAAgAElEQVREFwQFr4zTeCr5/Frv+X7Yxt+R12jq9SVkVM9I+yGmm+hyGr6HIaWw\n6yilsIbYpBRUXsdhJqul5LUIEXyMQM6K713OMOyxfRSy/j6avIiMU11p15hJJHDTjGu8A94QQzRx\nnctVJvNF5r3zO+AdSYhBHxnZFytbjpLTyMg+hmEfw9CUAWcyy5nNlrhnz0Mk43pO0VLeeKe+0+hK\nz8+YpLu8Z1J0ENLkMGSca5z0TCVYuxPg84kw3glgn/f3uwEMxx7z+wD+EMAYgK8A+LU6x2ruVVTM\nW7idoxhH8Qx8A+RrA0kNvjH+mUyJ+XyvJ3V962SlUD5f4bZt2z0jZfIgph9BDLqfdBbjYHbq5bKf\n15BwkRhW6e2QMl8Zmyp6SOINGGNtksPR8aF79jzEHTvutQl6eb2D7Ooq8XWvewMLhX6G4RJGx5qK\nwVzMRx55pI7W0VhMCn3c/iyxWq1OXncpuU0z9BL7HxnZZ4dAJXMLciyTy5BRqVKOWyFQYKlkqtJG\nRvZ5RQyOnHp6Ntrn+wT7Tvv3CgIht2y5pSFPKPqYdKl5QxiSdJcy3Ivp8kKikeUfw3h0hcK6tucy\nzjbC+AMA/wtAAcB5NtexMuVYTb6MivmK6M5RwkCSAJZ+gBWM5gVMb0U2W+bExMTk7tQlyZ2RDMMe\nT2bC3IrFFcxml9Dt+P2qI2fMh4eHbdlmPMFNpnsYZZokt5+DGLcGPCnJkct1xwy2hN6MwRYJ71LJ\nTxKnlxLLNejuvpzF4gKbI0jmA6rVasouXI45EFujIY9s9oLUY4l+VhjKTl0S+2mzKIqxIgZzDQqF\nPm9oE+lCadEqsD17Hpp26FWy9Nf16PiPKZfXep8TX15dPm9yHQZTS4DbmcuYT4RxJYC/8f5OC0nd\nA2Cn9/enAbwz5VjcuXPn5G1sbKyJl1Qxn5DcOT7Grq4cXYlrH2WnmVbtEw1zjNMRyyYCC5jJvML7\nwkuIyVdIHafzKIyXMjKyjyMj+2xS3C+v9XMBfYzmMMp0g4xWMBr/rjGfv8gLG5FOVM+Plfuk5JeL\nThC4gVFpcGe8pOEt2n/RRycBLgTV43kj8Rh9jYXCUpZKG2JrGWNyBsaTBELrAYn6bHxGyEYmX5Mj\nDyE2U97sFygcYFQYUHopXC4jXvJb/7OUTPBXq9VYv0do3zvx4qQseIKl0gqbo0mvMpsLjI2NRWzl\nfCKMjJf0ztuk99rYY9YA+IJ9bAnA0wAuSTlW0y+sYv4iHr8OQ5G0dgbDVBYld3pR/ad4clk6vONJ\naT8hvoFAgdu2bZ80QqYax3+chMFMx3A2W+Ytt7zf/k8MrJSYfpRRkonLb/h5jXhPh19d5MuDFGlC\nKHlmMhdHjJcJxUXzOGHY4/Wv9FFmb+/aNeR1zqdXL0XJVyb0XUwXtttAV0E0xqg3lUZ2yQo1P0eS\nTGavZrS02JVM79o1lGhOHBnZN/k58iu+fJkUX322WFxBQ+wlunngMl/Dv45jzGaLLBTWMk5e+XyP\nehgNncCU1X4dwDcA7LD/uw3Ard5j7rKVUk8B+ECd4zT5MirmK8QAuDBBPI7tyi7jYZekwmyVpqIo\nPifBT26vYnT3ey+l1r5Q6OOOHffZEMkqulDU9tja7qchn3jXtuQ6RNFWEu1j9D2Z7u7LWSj0eZIW\n25iUr5BEf/y8UdJ05OAMcrm8xh5bjLCp6nKeR9Jj8nsVZPyp88LiIbb43HG53k63KperTOaWpgvp\n+OcMwx6++tW/yGTjpE9o0XDVyMi+iECl9JT4npd7Ld329VzK6Cxx/3UYuRL3/t1Pv7w7l+tRefO5\nvClhKMh4CMHfiUqlz1JmMsXJShl5TlzmfNeuIVvFU2S0ikp0oKR8dT2j8t7+0CJfbFC0oSS+LTve\nGo0nIcZHjHKPPb7s6IUk3kvT3e1CZGG4ZHJ3nZy6V2VUxynPqEhiXMSvOKnfFA/5mL6MeuQikuOm\niiyTKSd26v6u3AgYuqS8lMYmvYqDDMOeSU9tYmKCBw4c4M6dH2UY9rFUWl83aRx/X01PRrRkulxe\nbTvH4wn+sg0zTectJZV3oxpe8pnwyUrEJjujJ0MJQ3HOIlrSKQYxuYtOq4bxDVp0foHMT/B1oCQU\n1WuNu9+s5esL+eEUWYdUR0lZr59L8Xekfv+D5Bvist4HCRQipZnVatXLG8iOXeLoR73nx5vhnMKu\nhGGiu/lxxpv7XPjKN/T1DaCfLxD1WSED2dGLGq6ozYrXJ2syISDZrV80qU3lv49p7+2ePT6ZOxKI\nJsj9658kl+Rn6xOMJu+l0k2aNiWnIQKG8b4Yd3wd0aqEoWgB6s3ZTlbq1Bid1yBGs8ZyeV1kV54k\niXHvOdK30O8dt5uy846OMJXdajwBfNiuww9n9TCqQ+R2qV1dRVsFJeW+K5nJlJjLrbCPj8bit23b\nzgMHDvDo0aOxklI/9m+MZFeXkE48B2KqrPwwzG233RHTx4ru/jOZku1PEZmR+gawkfnn/ghTIZOR\nkX0xrynZje8n6v3jJ9/bqDclxQjJbu/kdYt6GGSyVJv2GH4PzV7vOkt40e+8Vw9DCUPRMtQzOtHe\niyG6+LCEg/wmMZML6O7eGIv7+zvuCful3mtJQPofSBNSGIw9bitdEldCSn6u4TFbqy+9Hz7RxMNC\nBeZyPXRigfWMkfxf5kj787H94xk1WV/f6ejRozxw4MBk+Enuc9fCGLEw7IklwHfTkZjfHe5LticN\n4Ezmn/vvszP0QsCSMI823BWLq1NnnqeHJ503JYRmZmVUGBUwjOpJ+QTkz1o3hCkhv34GQZ5Rj0Ka\nECV0KYUNUQHMXbuG5uZLFIMShuKsxFRGZ3x83IYqfJkN2SlK2Cht5yiVRWJIJITVR2ChNcD+aFNJ\nTvu7SumbkAoo6fKV7mwjox0E59NVZvlhMklQLyWQ4zXXXGt39UmZkkJhwHoIQjajsePFS2qTYaY0\nsh0fH0+ZDhiVGy8W19nxskJmfign3cAK0kbeps0/T77PvqeX1owpieqQhcK6yPHTQ0j1Cevo0aMJ\nwhQ9qfhMEN/DdRInrqHQ6ZM9STdbvY+m89uvzjOemqgitwNKGIqzElM1UU1M+OWv8bLLITqPQGL6\ncp8v2yFhIz88MUYn5Cfd1kssaciuspsuxyHzEPwRoEI00jh4CaPzE6Rqps/el2XSk3CG5U//9E/t\ngJ5+GpKRSXwbmBY2KhT6Ujq3p+tqjhOraVgcHR2178E4k6E0I3PuC/b5RjZqjKPd3/XJxX89UkRw\nAZNSKWOMh9fSQ0jFSEWcQDyHYtGEj/z8yUxCaa6EWjwKUwTgSF08z2RorF1QwlCclUiXaVg5KYsd\nhpcyGiYao4SD0mP6kkuQL3KRTgNq3JKB7PJlrrNUComxN/MWzK5bynAH7W29Pc46Oj2h9YzuzKuM\nTtGTsIUkRffRL79MalBN0JGYT3wicriS+Xwvd+0aSuzw05KsriR5NdOSsq6pcYzJUJozfHEj60I+\n/XSd8OnkNTExkRIGM5IgYdjDXbuGODo66iX2za1QGLDd+fVDSNJDUZ8oHTlOTExMeZ88N15h55Lo\nNcpEwFxOVGvTPcB2ChAqYSg6EvWS1TNBuhDcbiZj9wvpei5Cb2TmYnZ1FZnP9zKf92dZD9IMJ/K1\nm0Qm3A/ByJhU94XP51fHqopkNrTsKkOauLuEUiR8NGYNSoVO/sNX1pUqKnmt4kXFE+rvYjShe0nC\nICd32/VzCFLJVO/x8h7k8ybUViisiTS8pRnZXK5s5VTiXdtR8krb6edy3czlelguX1anV8asT2RR\nfPXbRj53aeGypBIt6XIOJl+ydev2hOdh5mskPa9crsJC4VI2Ehqba7SFMAD8BwA3AXiP3Jq1iAbP\n37wrqGg6GnHrp4L/pU8vnX2SUUlxX8Zc4t39lAT2zp0f9caGijGWPIQYeZllUbHGeynTBw2ZHefo\n6CiHh4dtvf95dEnhD3sGXXb+opK7ik7+YxVNk6Ds4KXfQwyWzKOIGx2ZjSHXQyb9RQ1gvGN5qhkV\nfolx/PHOC7ksdUhSupH1taCSFUjF4oJUwcN8vpLoh/CJa6qEfaOGeLpZF+llw2OJz0H0NUSLGEwX\nf7w/JykN3w7MOWEA+BMrEPgpKxb4B3ERwVbflDA6F42qgtbbBaaFN6Kd2KKnJGGkKl04RUI+UWOS\nyZS9oUikS0xW6JLae+lKckXqQQTlRCG2j11dRdvkZ0JAuVy3TXpW7GPG6Tq41zHa6CfrKdF5JX7j\nnk8Ofp7GGZ0w7I0Zy6QxS0vepiHtWsdDLo28l8kmPDI6w8J0bftklLbTjyauHflJCKd+wn5mfQ1p\nHf9RbydeNjzO+EjYuJdULq9hV1fBekcX03mzGwkYD80XMWwX2kEYXwMQNOuks1qoEkZHIM3wT+Xy\nk1N7H/UMlImJSye233fRT9N7IeGpPrrRq/RuS+pM2hsjIKqstZhxl8E4UvFyMV3uw98FSyWVL0go\nXoMvg+HWU6ms45ve9BbPqKSpyQ56PRSrCBR4ww03JnbbxeICbt26bfJvf8BQWnXUTMggbYZEPAbv\nhiB1J4xqXPNpuvM3GkqbadmujzQtqeTxPswoCSdJ2T+fy9n44UQh0XECY20PRQnaQRj/DcCiZp10\nVgtVwmg7pu+RSH65pksq1iObarXqJZqdQTXNbpLPkC/0diZzHkXmckutUZfdo1Q5+WWz2+iE5Qa9\nXIgcawtN7kPWGB+W9Mv0S2vz+bV2VkUxsZ6klHpUJ0pCPyKPER+8k2aA6+Ui/CY3OXZ0nnaS2KPH\nS2ovCdx7JsUHjRvxNLnx6STIp3rudKj32TSvU7zPdAmVm29+f+r5DGF2e4TpKwQUmcutmVVYtlVo\nB2GMAfgxgCqAz8mtWYtocA1NvISKmWK6HV69L/NUScV6yc1icQFHR0djxm2CwELed999sR4GIYGQ\n6RU6Y96u/UmavoaVdMluX5Npr42pS9JbFGj9Etoyk7OnhQAeYy4nHku9BHW09DOtqmcmqNf/4KqQ\nfOXbpDhfPF/Q27spdQ5H/Sa99KFD032W6sl7THcdZlpYUW9DYj5f00uopMmiu7nkY4x2dMvkw0XM\n5yttLaX10Q7CeG3arVmLaHANzbuC5zhmU800Xdip3nGjHdr1q3LiZGMSy764m+mFiIrYxVVUpeoo\nPmlOwlfddPX9UnkVHfIThpL0HmNyVrevNptuaMysh9Xe/5IJ6kplHYeHh1PnM8zmvYwTrhMMrKWs\nNdl9nDyG3+yY/l7HE9LxxHinYGoPQ5o1/fCgeYwvoRInQkMYcn3i0u+yUTDNpZ1AGlpWq5g1ZlvN\nlKydn1kM2dSupycRySjZmKSqzJOIh3eeZDZbZhj2MZdbxKho4Fomd9Fb7WPEW+im6caVBrxu+t5I\nNCktneXSPyGhLDEKqxn1JtaykYazqYxRI0jrRPaN944d91kjOdWgJdd97DYDosU1wUaUVptRRj0X\nSNuQOCLxu/5d7siMx63vYbkiBOn1EQ2paPFFLte+ORiCdngYV8LM234ewEkApwH8tFmLaHANTbyE\n5yZmmzicqku2UUxMTHg5CXPufL43NSxhJpwto+mIjk/NMw12xeLFzGZLns5P2i56HV2vhcy1kBr6\nvXQd3b3W+JeZ1JD6sEdCUlXVT1d+K4ZGSGOAThrCDCF64xuva6g8tBEDPFUe6Z577rWVQBuZy1W8\nyq70cFgyJt9vCaafXV2FGecLOhlp1zYpSjjGekUL9TyscnmDd63Tii8G214p1Q7C+EcAKwE8bifj\n/TqA32nWIhpcQxMvYWvRqTuvRsJKcUyXtG4UrrKmnyYEUGImU0rsknt7N1lvpEgj+S1hg3jPxQJr\n8LOM9ij4j/0YnZKozKuQ50uTnoSYLqOT8ZDX+k57fEl61zzykcdtZ3Qokl8i+zGGYU+EDKrVamJ4\nkd9DUa/cNf29cGTjjF+0AmnHjnsjhr/eceMkls/3TluiO1+RfC+kXFvekxqn8rDk+XH9qfe8532x\n540RCDk6OtrW19sWwrA/n/L+93izFtHgGpp1/VqKM2lgazXRzMbDmKqKaabJR/PlvNUattUECnzd\n616f0tAlMfSPMrp7X+kZZ7+bWhLS0cojJx/uy32IDMciRuU2aH/m7dzppXTaUH5ZrZTeSphLqrBE\nbmTqktR6hj36+tN1mKZ6L6IzH8xN5kifSSf02Yb4lD1DlMl5KrlcJdXDmq5E3G2KZKOysu0eWjsI\n43/CzOT+YwCfAPAhAE82axENrqGJl7A1aEat+Gzj2o2iEe0dH2mvaTYxeGMoQ++L6aprcrky8/lL\nGI2hS4+DSHuboUZdXUWaRj0/IS2lsCb+fP31b+MjjzxijYHfhe0mxjlDLxVRxsjn8yv4yCOP2C++\nhLAqjArNSeWUzMcQUhljNPF5GYECd+y4L1ZiHC3f3LHjvjpifNHP0FQJ3DS5ijBsTCH1TD638wlp\nr9NVhCU1s2bTx5Lsz2j/9WwHYSwDUADQA2AngN8DsLJZi2hwDU28hK3BbHdqc/2FlS9C2kCaNPjz\nk/P5SiIXkeau+2t3CfMbrBGOK5SKJ9BPNxLVz10cJfAAC4XlfO973+cZeqmK8nf//TTeQ47GCxin\nSUbHw1X9jO4spSTyIltau5bREFYfDWllrQjiEKPzMWTNeSZJ0ewyowKB0fJN9/77ct/jBGqpTZDp\nCdyo0duz56GEwZuu2/5syVmkoV6nuRM5NGKCxeKK1LxDow2qpdIgO2XaHtkGwjDnRBHAxc068YwX\nOg8IY7aGP1mlUmv5B2ymaxVphUJhRd0vQ5rsxK5dQ1YWY5BO+kKUWqXCRHbrYvBW0YWexFiLEN46\n21cht/hozDHvmP10SrZ+xZAfOtpHl8+QpK/oQN1P37PIZMrcvPnXmO5NrPbOEy9pNdd3qq5mn5Sj\na+lnLleZtmfBf34Y9vDmm2+NvBdpQnppn4mzMWchSPvMR9+TKMHHr1G0RDzazR09drqW1rnkYbwV\nwNcBfNP+fbk27qVjtt2o8SqVuJFoBnyD0IgEhP88t4ONJ4brCctJn4OUrsqu+xa6qqODNPIbFzKa\n3BaZBREJ9BVd5Uvdb0mlzzvWJkYntu2jCSctiq07Pj3uE4nX5MpjVxDI8/rr35YiPGcqpcLwYuZy\nZTvgR4girjg7vUCgdFpPV0023fubJvI3VSPeuYR6neZJheT0a7R163a6HFyRW7duI5nmfZhQaaGw\nMqKs2w60gzC+CqDXT3QDeLpZi2hwDU27gK3GTHdqc1GlEtfTkdGdU0lACJKSzsnu3vShOHtpvIVB\nOg0lmcMgHdriKXQzOtZyGV3fg++R+KTSQ6cQK5pTYx459NOEo8Qj8Ee7vpJSImxGq/oNfL4IoDMe\nUQE8Q7Ll8ppJGY/oTtUnuagBmurz0YwEdPIY45xuHvdcoRO8mLQ1RBWS06/RVL1ISe/FFC6USsnp\nhHONdhDGl+xPnzCeatYiGlxDs65fxyHNSBQKyxmGfU1Jgidj3IYcNm9+d0M7z1qtllKFU2OptJqj\no6Mpu9oqze5bmuPESI/Z++RLVyNwvTXW77fE4Y9a/QCNF5E2dKiPMuksn1/NfL7C669/B4vFBXYO\nRpqW06Wx/xt57TD0R7wepuvZiBqPqWZIkMkGuhtuuHFW3uZU52jE4CaPkawCaoeHMVeFHdNdo7T7\np7vuhw4d9kKr6aQSDSvKJsVIzpxTI1oB7IeZhfEUgFVW3nykWYtocA1NvISdhbReh2Z+waca+mKG\n3qR/AfwvVlo5qFRLiddy003vtl8qvztbylD9kaeGAHK5xd5j+2mE/vzO7UvpvIxfYbTMNb0vZGJi\nglu2bGG8qxxYzi1btiRer5PXFo9HiK1xGZP4e9loorke6p1jJgY3abySVUBzibkq7JjuGk11f73r\nXqvVbMjKL7BIfw21Wo0HDhxgGC6nq8pbSZFhaQfaQRglAEO22/srAH4bQNisRTS4hiZewvahngHx\nP6xh2EOnpEn6s61ne07jIURHXpbL6+q62GkJ7B07pJP4cq+G3W+ikwqhgzRd1qRpbJPqoQKBxczl\nunnFFb9AVzYrzXdV+z8/hr+FxpPooQkv+eJ/5ib9Bq5KZVWC3IAi77rr7sT/o0lPf/aGkNaqiFBi\nvGGrVZhNSWfaMQ4cOOA1Ck6dq2ol5qLXoxHvLJqYPpjY+ad9P8fHx+28i02Ma3Ht2HFfalVgmtBj\nodDfFi+jHYTx8wAes53eT9ubhqRmiOl2P+lJS1e5cSYib1u2+BPBzAfYjMaUmvFB5vO9dRRkXSOZ\nrMHMXJZZ1TLuVPoSxBt5LPbFMV9SIyEuY03j0gxp+QRpvpNy2mi+J5utcHR0NBZ2W2iPtYRAga99\nrciQJ3fayd140nuZq3BKPSQNbmObiE7psZiLdUxHSu5+Mfqb2MjO33kY0c9xNltObKqkgzwMBxkv\neiiXN7Qlb9QOwvi6rZRabnsylgFY1qxFNLiG5l3BNmCmX5hk5cbUJX9TwSXrxFgajSVXjVNPjI5M\nKw+V2crOgPfQuN2r6JraJG+xJPbFqdFUIK1gdMqZfIn9kJPIe5xnHyvhLtGP2khJeJvpen6llSSe\nB5nP99gGLfFMJggcYKm0JhF+u/lmIVbjpWzduq1pxu5MEr7RNczss9ApPRatXkcjHkbU8Df+Xh46\ndDiyuXKqyembKlf12P7KtHYQxtFmnXDWC53nhDEbl9xVbtTv/p0OTinWHyB0MYHQNqA5Q14umyT2\n6OioN+d4mC68ZBqbHIlJ97bs/kOa6iPpjfgok675XpqEeFo+4jG6SXby5Tyf0YFHMgRI1uKHGIQU\n4tdrL6NNdulVYfXq7OvpP81kt9gMD2Um5Z9xdEJ10lysYzpS2rVL5Mhn/l5K2bPcpt5UVVK993ag\nHYTxOgCfBrAZwK/IrVmLaHANTbyEc4/ZxqDNc9Jr+uMf8vpx73hoiIwm1v0OadnJ99NJeYikd58l\nB1/sTyqttlrjLpVO4mVI/kJKWWVa3jV0Mhuyow9pQkli3Mfszy1MFxiMz6MQAvKvlzTmlThdtVA9\nUjeT6mZfhNDMcEwj5Z/nOqYipWZ6i+44/ufQvSf1BjDNNdpBGH8Ko1j7WQCfsbc/atYiGlxDEy9h\nexDvxm2kUqXRXWXaDtYZwCGasE50Z5XPL2U+70+d8+de+677LXTdx5JQFm0mkeAQQ/6b9nhd3pon\n7DHE6O6z94n8hwkRGY8kZFLMr8ZoItwY/miZrHxp6xFE/XJZPywVNyaFQh8LhX6eSZXRTJokp0On\n5CTmM5oVGquf++qs96QtOYxmnXDWC50nhDGdyy0SG3El0nrHmZiYYLVaTchU+yV/9foDJiYmLNl0\n0+zk/f6HgwzDXjvZbiVNqEdCSX63tEh6l+iEAMWTkOY0+bKIEu15HpHQGms/LCazA85nNF8hhlk8\nmzE6b0KIwDTcFQprrQTG+1O+tDUaryQ+S2P6wUBxY3LDDTfSEW3U2DcaXqnXBzPb0tZOyUnMFVoR\nxmrWMeU4osvWqKjnXKIdhPEZAJc066SzWug8IIxGqqDi/RZ+D0H8wyfjQguFdakfQjfE5WKmxWWr\n1Spvu+0OSwTLreGXkNMgs1kZ/FL2CCDuYRy0hl8GBj1J47EsppH0CGlCWRIOEjmP5Yz2NMgx/XNI\nhZU04flhJyPBkMtJfqQUW0OUGONf2jDsYRiuYTy2nMmUpzW2PlnXS5I2KtwoSOtjOZNdaKfkJFqN\ndlenCRptmJyJqOdcoR2E8TWYSXtft817WlYbw1ShAvkgRRNlUhW0mvl8D9/61ncwDPtYLstO2Q+n\niEfQMxkPTdaU96QaRtOwJ/mLxxgtST1oDXuZTpZDvIkL6PoqivYYq+25pCw2T9NDIfIey+06LrUk\nULbHkzDQEJ0X41+DC+hyG2OU7th8vsKhoSGvV8TvkzDEEi8tFY8rWWZbX7a6HpJlmK72fqZhITcP\nJErqmnuoj04JvzVCWukl8e1bs492EMaytFuzFtHgGpp3BVuAeglTf2yma3YbY7T8068WGqcrVb3U\nM1YraITuLmWxmCaVLYNbNtKprfolf3lGw0QTNBVQ/oAhIZ+9zGbLPHr06GQ4zHRwF2lCPTmasNMr\nrJG/xN5XoZMdT5P7lsS0v2N/zLtfmvyc1pOpZ/e1p6YuM3Ze12XM5SrM53tnlDPyUa/RK0r8jRn/\nTjF+8wmdMNipkfctPi0y2nTb/o3BnBNGJ9w6nTDSPlhuOEu03M6UucpuXXIMfrVPD00OwfcMoh3Z\n0Q7l+AyF+xjXvCmX1zCbFVLxhxJlmNbQdtddd0+K6snru+aa6ywBSFlsSNehLWEpnyCOMipbfom9\nX67LYpoJd+voxASlnNc31L4kwz4mezWMIZ+YmEiIOOZy3XWrVBrxNKaeP5EeWpzJsRT10QkkOx1p\npX0WOi0BroTRoYiLz0Wbxcytu/tyr89B5L3jOk+SBzjfGmIhGDmOCcVIIjyZ8A0TH1qjyLqSwLs8\nguij8RT83ME4g+A81pNwNiGzXhptJ5EAf8z7KYlmSXTLuicIPEATnnrI3reeQJFdXSVLLK+waxxn\ndMbyEJ3GlCTMJYHuOnbNcKVoghoYSO2GbrT4gJx6/kShYEZxFovrGyKBcyX30Cy0m2SnI600QgnD\nJczne9ndfXlHbAzmFWEAuBbAMwD+GcA9UzzuCgCn6vV3zAfCIOMD5n0iMDvhfL5nUqvJJFRFeE92\n4WsI5PmqV/0Co9VCsnOW0MwgC4X+yUS4JNoyGTH2vscgOk699ucA3bQ6OX9aiIj2Z4Gjo6OxeRA9\nlswkkb6SpjR2BaPlub7e1CpGPRDS7cj8XpF4v8SYva/CZFJ+jNJkl89L/kXOdymBkNu2bY+8R81K\nQE8ld61oHtpNslORVj0ZHaPTNvMwaCswbwgDQBeAZ23OIwfgCQBr6jzu7wD89/lOGIJoKaWoVg4y\nl7irThAAACAASURBVOuJlMTu2jVkO0JD71aiSQ5L74TkKJIzFvzhOnv2PESThJYks3RDy65/iKYx\nThrpZDrcNrrmuVzs+UMESiwWTaXW1q3b7M56hWd0H7K/f9j+vIauN8OfmdzDZKmrzGoYp/E49jEt\nRPbGN15LE8KS3dwQDWGJh9HHXO586634z4+WsNZqaVLtTsBwJuiEGLtibjAVaXV6P8Z8IowrAfy1\n9/eONC8DwHYAdwD4o04jjDPZ3dRruotPUDOxd+mO9nslJKa/1xq4RxgNuZjcgez+DfFcHPvQ7qUb\ndzpGt0sftMZZdvVj9ngP0FVpSQNcNCdgwm0LGW3YE6/gV+i8GSnJlc7tK5mcrHcNoz0XQmLyGk3f\nw/DwMKPekD/kyA10MqE3/xxm3WHodLLSpN7l/pl+NtodY1fUh//dnen3eDaPjyoDd84GYj4RxjsB\n7PP+fjeA4dhjLgQwZn//TCcRxmxrwP0P2+joqGfkpaN5SSSu7uSTL/QeKyWuOboEswj67aYrfx1k\nEBQZBKFHDJLUXpTyXFFxlWqlqxmdQbGJLhEuRCX5AGl8e8ySifRpSLPfRkrewE3By9Mp00qIbTdd\nqW5IM12v27s/2VznCFFex2JGCcsPX8W9GOdBJJvozmw+RLtj7Ip0+N9dEQps9Ht8Jt/7TtxAnG2E\n8X8CeBUdYbyzzrGaehGnw2zf/PiHzegQ+V3SJpF8/fVvn3yOM4b+bn9B7GcPXbw/LhkujXe+QZWp\nc2PWGLs8CpC3BONrP/nHq9DNyxZJkE32eK+kS2anDR3ycyASXsrZ22qa8JcQw0GaMJRUisn5t9Io\nf0aThuKxlUormMuVrdpuUmcrDP0y4eR7NxuJlnqfkbmaj6FoHMly6GTjZb336kyNfiduIOYTYVwJ\n4G+8vxMhKQD/am/fBPDvAH4A4PqUY3Hnzp2Tt7GxseZe1RhmE5+up0Xkehj8RHKRR48e5a5dQywW\nFzCfv4Bupy8yGqKlJAqbUgHlx/JHmZwTIbLgUvLqd4EfJlBiPr+Mbid+n/eY++g6qn/FW/cEXT5C\nyCFq4MNwgK6Cyd/5i6qt3ynuPyYuLjhOYAmHh4cTX1Tfe5tKZ2vPnocmhz3V67o/EyPfKR3IiiSi\n3135DjX2PW5GXqrdSfqxsbGIrZxPhJHxkt55m/ReO8XjP9MpIanZ7DTqKYm+4x3vZFz0DlhkZTnE\n2Ils+G46faYxRuU0DlqykFj+/daIy0Q6/9yyu9/rncPXa+qhifVLtZSI9ckgpEGacJI0zUkYaD2j\nHeEmkS/VWkePHrWVQzJ/W9ZTY1eXVGX5Eh/SuOhXN5lE9Y4d9zb0PgnppulsteKLW29j0G5VUoVB\nOz2MtOO12/ucN4Rh1oprraTINwDssP+7DcCtKY/tqKR3I+6lrx+T3O2ahq7Pf/7znoGsWSPuy3BL\nAltCS2LUu2nyCYM0oRyJ+UtewO+BiEuJ+MQgYSV/yJCQg0iKiBcgiq85Ojny3tjND18lx1yaslU/\nvGbKXovFBTx69CgPHDjAm24SWXSRNU/2jszkizqXX8zkLtQQarl8mXobHQL/uysd/42GiZqtZttu\nL3ReEUbTFtqBVVLygTBVN5LMlZ23kbaQhq43vvE6upLZxXQhJpnMtYky89okj/3chOQYxgjcQZO4\n9iXAhQTE+7iIzjNYQJNULjCbXcpo+Mr3AsZpekCkb0IS1ufR6T8dpuvuJuu57E43aTv9vM0NN9wY\nuX5Hjx7lAw88MBmam+1gm7nGmexgFXOHuaySSnu++YxEN0zt+FwoYXQAokZD9J/8clE/n3DQqqf2\nxYzMWOxxe+ka23x57ld7Rlo8gdWMJsZF9dXIb3R1+ZpN5tj5fIV33XU3XajJb2CTwfV+OEwa/nzJ\njgrjMiXxL0KtNv0ozPjuSxoP54vhdZpVq+cN0SnmDuPj4wxD2chtIrCAhcLAuTHTuxNunUYYyaE4\nfg6hSiePUW8kqhh+X+JClGVL1liL+quQiJDD7YwOMYp3Tz9G08DnexmXEQiZy/XYHop4h/kr7c+L\naDwc2fE/xGgYzC/B3Uij3prMNUzlMdQT9YvPFOj00I4o484nolPMDUwTbbLQZTq9sVZACWOOkeae\nJo3eFs+Ii+SHNMltopPhGCNwryUC6YmQnfg4jbTGxTRztHsJXE+XEBY5DvkpORO/2U2qrJbb+yVH\nEfd6RFuKdMJ/cvNzFUJST9JUZPkJ9IMEiqlaTVMlD5Oy4UYLateuoY5IEs4UnVhKqWgfnJqAv0Ek\nC4VL1cOYq1u7CGOqxNWhQ4e9SqeVDAKZT/0kTXmqGNcJAm+jSSTHvQXJI9SsQfYHGYkIoYSjPhp7\nrt9YFw9PSVjLL9X1BQz9cJTIbIhESa7O8+Iy6v3M5Sp1jXvckEol1VSDiWYTb+4EzMc1K1qDZqoJ\nNANKGHOE6Urskl7Gx7xdu/Q/bGdU/E+kzKU+3E98X0gzTEg6vMV4C0n4u5a0+RAVugS4zMCQ0NMY\no8OTxuz9IkjoV1i9i9Hchv+83QQKLJVWMwx7Jr2Cqa5h2hSy6OhTc+vp2ThZItvuyhKFYrZotprA\nmUIJY44wPj6eGIZSLK6bdCuToZX1McMrVU7dNLkKX8p8jG7WRTejCqwHvJ38XkscFZpwlU9QexkE\nIQuFfiuzLeWpl9ElsSX0FPcOuu3/q3Zt8jrFW4mq3eZyPRFPYSaGvV7fQqEQLbuNzvhIErRCMV/Q\nLDWBZkAJY44wMeEL3JHxxFV6NZBIgKyi6aFYQmAtow14Uraao+uzEI9Dps4tofNAKtbA+4lu1zR3\n551bmclU7H3Sjf0xGo9FvAe/58PXhfKJSsJT0ZkSlco6VqvVSIniTAx7ve7ZG274VcbLblX9VXG2\noFPClEoYcwTjYUiIZyPTSuOi1UBSLTVmd+7v9Hb9Ze93ERSUm+y0+1J+l3CTVEPFJc5fY48nwoO7\nvXP5TYB++MokrDOZIsOwz0p6+PO7o7kF6WIWzaSZjiit52E08j/1MBTzDZ1CFAIljDlCI8030fI5\nGY3q5xeuZjRM9Qm6LmoJH11nH+NPv4vKb2QyJVuWKyW7JPB5RnMjfi9IlUaCwy/9HaJrHlxEoMhC\nYS3DsId79jzE4eFhlkobGC2bNaW4xaKZf1Esrvdmkzdu2OMJ8OhMckc69WQ+FIr5gE7p7vahhDGH\nmKpk0k1ukx28n2SWUte4+N+4NeRZuj4LSWpXGU9M5/OV2LQ7me+925KFhK6kr2MDo6WyfZHjOTmR\n5Izw0dFRjyCrltziYoPm8blchYVCH8vl1SwU+hr6YsQ7b+t5E522Q1MoGoHKm3fQrRP7MKKT26p2\nh19ktIt6jE66m3Qlr2+wP2/gVBPufEO8datIbZxvfx5gtI/j1+h0pfoYna5nvJRcbimTJbZOC6mr\nS8JSq2k8IZmx7XsDNebzFzKf703VT2rU4Gv/guJsQqdOYFTC6AAka62l/FQmzokX4M+UkBGOkhgX\nYcD4SNECt23bniAoFwYa9zyJ6+g8mxKBV6V4BSZnYWZep93nS4j48iVpzXuHmTYqVnZSM3XJpyMX\n9TYU8wXqYXTQrdMIY2JiwmpD+VVLIrkhM61Jl3OoWY9gHaPhncfoJtzV/6BVq35oKz5l7k5LGiIu\nKH0e0v1tbuXyak+eRPIUSxidheGLEUrJcHwaXnJoUU/PxqbLZHRiPFihmAqd6DUrYbQZ8qEwFVSi\n/SQ7/0sYrXYapZPnqNGFrJyxLZVWJ/o94q6sGfXqk4opzQ3DlZ7XUGZUvjxKQskqJJMjMf0Q9zOq\nSeULJPqJ9PUphGWIYabVU1OhU3drCsV06DSvuJmEkYViRjh+/Di2bLkTL7wwBmADgD0AHra/Hwfw\nHQDLAVwFMwokA6AbwJUIw8U4cYIA/g3AEQBlAN/FSy99D11dOQBP2eMcwYkT/4JKpTJ53r6+PgC9\nAK6GmUf1HIA8Xnrp+wCW2PPdDeDjAO4C8Lv252sBLALwr9i16z9j8eLF2LLlauRyy3Dq1HPYv//T\n+OlPf4rbb98KIAtgpz3HRQB+hkzmzTh9+iK7rkUAvgvg+wA+Zc+5AMXij7F//6ewceNGnDx5zHsd\nT+HUqecwMDAw4+t87Ngx5PMDeOGFDfY/G5DLLcOxY8ewcOHCGR9PoZgrLFy48Oz9jDaLeVp9Q4d4\nGNHElkyL80UFpbei1/MsSKcLtZpuToSZu53Nrpkc8mI6tt0cDX96nMlhjFlP5jHvvH5u4xX2PIus\nFyDT7VYxDE01U3wHND4+zkJhBeMNe8D5dta4n68wnk139+UsFPoS0iDNcsnVw1AomgNoSKp9iPZm\nSOjpplg45+3WYPtqsH1MNufFxcn8mRlJIynGuFS6hLlcmZnMxYxKfkhSfZjJ5HZ9o+s61n2CG/Ne\nz2F7/MGIiGCrE9WdGA9WKOYblDDajJGRfXbnvdTG9PvoRAWl0kga5ESWXMpYpTN83Bp4Tt7CcIVt\nnHP/i+cARkb2MQz7WCxK7kL6PcQr8OdsxMth6+cUjPKu3yhYjuVVaiyXV6dKmbcSnRYPVijmG5pJ\nGJrDaBBf+9rXMD4+juPH/w0PPDCEXG4RTp36NoAfA3glgAKAbwP4AkxO4RiA0wAeBPBXAN4Jk7c4\nH8APARwG8C/w4/0nTnwHJufxFEy+4As4efKbkzmA48eP40Mf2oETJ/4HgMcB3AvgVgAvw+QVrgAQ\nIgxDAMdx4sTP7DrSj+dj8+Yb8frX/zIef/xx/OQnPwEAvO99d3jr+z5efvlH2Lhx4xlfS3ktx44d\nw8DAwJTx3rM6HqxQzDc0i3lafUMbPQzXMLecrmdCZl6LLpSv2SSlrb7O1HY6qfLz7WOW0UlwiJRI\nUlxQQjHR/Imo0Uppb1RGWcI5hcKA5+2sZD7fO2Voxy9llbxKs0NCWi6rUMwdoCGpuUNUsVZmd4uc\nhwj9STd1kUYjSvoqxuikzHtix1nr3TdOI2MuMuT1JTOiiWC/38Mpy0rISUaImrLZ6ZPH9UQCq9Vq\n00JCmsxWKOYWzSSMrnZ6N/MBX/ziFwFcCBOWGYAJO/XChHh+G0CXvT8DIABQARDimmuuQi73NuRy\nZZjy2gJMqGoDgCdgQkX3wISqbgHwIeRyNZiQ1oB9HBAvJ92//1MoFq9GT88mFApfRjZbgwkbLQQQ\n4vTp702GnBYuXIj+/n6E4fLU48Uhpaz+Y/P55ejv729aWCjtHPXWo1AoOgtKGNPgggsugMkPiFHe\nDqAG02/RC9Ov8AOYHoZ/APANADtRrf49Tp06hVOniEwmg0zmZzBkcwTADpjcxm6Y/MfXsWfPED77\n2X0oFH4DwDP2fEC8l2Hz5hvx3HPP4ItffBif/OQnEAQE8GoAK5HP/xL27/9UxLgPDAx4vRHJ4/mY\nyWNni7k4h0KhaBGa5aq0+oY2haRqtRozmRL9OdZGaTawIaZumkl4vox4cl51LldhV1fBq2CqH0ba\ntWtoWiXY5HjYgywU0mcGz6Q8dS5KWbVcVqGYO0CrpOYOCxcuxJ/8yR/h5ptvB/kTnDr1Al5+OQug\nBOAlmEqo78FUSz0F4ASMJ9KNaGhnBf7iL/bgW9/6Fu688zdx6pSrPnrppe/gm9/8Jn784x9j48aN\nGBwcRBB0ASjan0kcO3YM2ewy7xw3IZ//XTz++OPo7++PVB9JBVQjVUkzeexsMRfnUCgULUCzmKfV\nN7S5D2NkZB/z+R7PQxBhPkl6S+J7gFHJ8WRiV3opursvZy7XzUymPFnFlMt1NzScyM3i8L2Ybq0+\nUigUEUCrpOYWLvxzkK5RL96tbabpZbNF7tz50UgTnF/KeujQYRYKfSyVBpnLlZnNVmLkcpBxccJ4\ns51bj19OW2Au10OtPlIoFD6aSRgakmoADz/8CF54YQGANwD4AEzD3C6Y6qelMGGhUQAfx0svLcHH\nP/77+IM/+D0sX74MALBx40YsXLgQx48fx3vf+36cOpWFSZjX7M/z4UJLbwDwfkwl4OeE+e4G8OsA\njqFY/FV0dXXj1CkV61MoFK2BEsY0OH78OIaG9sCUzIpK6x0AXoFstgby+zh9+giAOwH8OYAyTpz4\nGT70oXfiueeeiRjrxx9/HKdOnQZwFKJKC1wH4GfwO6pzuS5ks76i7FSVT+Y55P+Ll1/+CZqhFKtQ\nKBRpUMKYBseOHUMYrsCLL94NI/t9IYCf4Rd/cQP+8R9/CqAXL7xwLYAFMD0VAwCOgeyps7uXng7A\nyIP3IpN5HqdPvxrAIuTzP8KBA5+eMiks/RhRmfIRAIj971PqXSgUiqYhMCGuzkcQBGzHWo8fP45l\ny9bY+RdfBvARGHL4AYAvwRj/z8J4HfL3UwCuxMTEV7F27drIsS66aBVOnvyfk4/L538JTzzxD/j2\nt78NwIWvGl1bnFQa1WhSKBTnBoIgAMmgGcdSD2MayG7+5ptfixdfPAnTnHcCJs8gnsIlMA18Ln9Q\nLA7i+eefBxA14gcOPIwtW65GV9dFePnl72D//oexdu3aCLHMZG1p3ocShUKhaAW007sBbN58Iz74\nwTuRlAiRbuWfwXR+u+5lwEh0PProKJYtW4M3vOF2LFu2BgDw3HPPYGzs03juuWewefONc/lSFAqF\nYtbQkFQDOH78OBYvHrTVTUdgSOMTAB5Ed/fFeOmlb2HLlndj//4/RSZzIU6d+hZ++7d3YsOGdXj7\n2zd741yfQrF4dSIZrlAoFK2ChqTmGKa66QIYsUGZqf11bNt2K9785jcBMLmHdevWYfv2uwGch498\n5D8hDC/EiRMLUE9IUKFQKOYTlDAaxvcArIURBvwCgFtQLJbw9rdvRj4/gBdffBYvvwycOvWXMNVS\nX8KJE4sAXAwtdVUoFGcDWk4YQRBcC+CTMPmS/SR3x+6/CUbnGwD+HcAdJJ9u9bpmgiVLliCTCXD6\n9C8BuADAD5HNdmF4+GG88MIYXnjhazBJ8FcCKCMqT74XwKtRLq+0SW4tdVUoFPMTLU16B0Y57w8B\nXAPgUgCbgyBYE3vYvwL4JZKXwcR8HmnlmmaKRx8dxWWX/QJOnz4J4CTMONSTeMc73mLnOiyCadr7\nHIB/g0mAH4NLgK9FoZDHX/zFHk1yKxSKeY2WJr2DILgSwE6S19m/d8Domuyu8/g+AE+TXJJy35wn\nvY8fP46lS1fjxRcJw61HIKGlQuG1CIIuvPDCHwB4CMBXYeRB7gQQAvgJisVBAN/D/v2fUqJQKBRt\nwXxKei+GqT8VfAfAq6Z4/C0A/rqlK5oBjh07hkzmAgCEmaTnkteZzFJ84ANvxu/93h04efIlGI/i\nRgAXIAzfhr/7uy8gn89rA51CoThr0DFJ7yAIroZR0vuP9R7z4IMPTv5+1VVX4aqrrmrpmgYGBnDi\nxHdgZl4U4ZLXR/Dii8/ik58cQRguw+nTz6Kr6xdRLA5aSY59eM1rXtPStSkUCkUajhw5giNHjrTk\n2HMRknqQ5LX279SQVBAEG2CU+64l+S91jtWWkJSR8ngzgD+D4dcemGFJGZiubxei+su/HJ2RtIdC\noVC0Gs0MSbW60/srAFYGQbAsCII8gF+FyQ5PIgiCpTBk8Wv1yKJdMMKDSwH8DYAqgM8A+CmAPOJS\nIPn8cvT39ytZKBSKsxYtJQySpwFsBfC3AP4JwGGSXwuC4LYgCG61D7sfRs3vU0EQPB4EwXgr1zQT\nGBnx5wAsgVGW/V/2ns8B+BF8KRDtr1AoFGc7VBpkGjz88CO4/fbtAO4CsBtmYNI34CqiFiAMf4TP\nfGZEK6EUCkXHoZkhKSWMBvDggx/DRz/6n2Em4/0IRsZ8EYA/RybzYTz99FdnpTarUCgUrcZ8ymHM\nezz66Cg+/vGHYLq4fwTgQQCvgenm/l10deXwxBNP1T+AQqFQnCVQD2MKuOFJfw7grTCyIPcA2A5/\nWJIq0CoUik6FehhzhGPHjln5j6sAbAHwXQC/CWA1/AqpbHYpjh071o4lKhQKxZxBCWMKmCqpYzDV\nUG+CERZ8BaJaUU/hxRf/VSukFArFWQ8ljCkg41mLxatRqfwmjLDgvwF4Ccbr2GR/vty2NSoUCsVc\nQQljGmzefCOee+4Z/P3f/xFGRoaRyZyCSYB/HcDDAL6OYnGVhqQUCsVZj47RkupkLFy4EAsXLsQV\nV1yBf//3f8dHPvKfAHwfwBXQpj2FQnGuQAljBjh+/DgeeGAIprT2ahh5kG/g93//v2iFlEKhOOuh\nIakZwFVN3Q0zqvXTqFQGsWnT5e1dmEKhUMwBlDBmgGjV1EIAIU6f/p6GoxQKxTkBJYwZwK+a6unZ\nhGLxap3RrVAozhlop/cscPz4cRw7dkyn6SkUio6Hig8qFAqFoiGoNIhCoVAo5hxKGAqFQqFoCEoY\nCoVCoWgIShgKhUKhaAhKGAqFQqFoCEoYCoVCoWgIShgKhUKhaAhKGAqFQqFoCEoYCoVCoWgIShgK\nhUKhaAhKGAqFQqFoCEoYCoVCoWgIShgKhUKhaAhKGAqFQqFoCEoYCoVCoWgIShgKhUKhaAhKGAqF\nQqFoCEoYCoVCoWgIShgKhUKhaAhKGAqFQqFoCEoYCoVCoWgILSeMIAiuDYLgmSAI/jkIgnvqPGY4\nCIJvBEHwRBAEl7d6TQqFQqGYOVpKGEEQdAH4QwDXALgUwOYgCNbEHnMdgEGSqwDcBmCklWtqJo4c\nOdLuJaSiE9ela2oMuqbG0Ynr6sQ1NROt9jBeBeAbJJ8jeQrAYQBviz3mbQD+GABIfhlAbxAEF7R4\nXU1Bp344OnFduqbGoGtqHJ24rk5cUzPRasJYDODb3t/fsf+b6jHfTXmMQqFQKNoMTXorFAqFoiEE\nJFt38CC4EsCDJK+1f+8AQJK7vceMABgjOWr/fgbAa0n+MHas1i1UoVAozmKQDJpxnGwzDjIFvgJg\nZRAEywB8H8CvAtgce8znAPwGgFFLMD+JkwXQvBesUCgUitmhpYRB8nQQBFsB/C1M+Gs/ya8FQXCb\nuZv7SP5VEARvCoLgWQA/A/DrrVyTQqFQKGaHloakFAqFQnH2oK1J7yAI9gdB8MMgCJ5Kue83gyB4\nOQiCBd7/7rUNfl8LguCN3v83BUHwlG0O/GQr1hQEwQfseZ8OguDj7V5TEASXBUHwD0EQPB4EwXgQ\nBD8/x2u6KAiCvw+C4J/sNdlm/98fBMHfBkHw9SAIqkEQ9M7VulLW9AH7/0/Ycz4RBMGfB0HQ08Y1\nbYvdP+ef86nW1ObPeb3PVNs+60EQhEEQfNme++kgCHba/7fzc15vTa3/nJNs2w3AfwRwOYCnYv+/\nCMDfAPgmgAX2f2sBPA4TRhsA8Cych/RlAFfY3/8KwDXNXBOAq2DCaln79ys6YE1VAG+0v18HUzgA\nAJfM0ZpeCeBy+3sFwNcBrAGwG8Dd9v/3APj4XK1rijW9HkCX/f/HAfxOu9fUzs/5FNfpKrT3cx5f\n1zP23O3+rJfszwyAL8H0l7Xtcz7Fmlr+OW+rh0HyKIAfp9z1+wA+Evvf2wAcJvkSyWMAvgHgVUEQ\nvBJAN8mv2Mf9MYC3N3lNd8B8IF6yj/lRB6zpZQCyq+mD6V8BgOvnaE0/IPmE/f15AF+DMYBvA/BZ\n+7DPeudo+brqrGkxyS+SfNk+7Et2nW1dk727LZ/zKdbU7s95fF3PALgQ7f+s/3/21xDG6BJt/JzX\nW9NcfM47rg8jCILrAXyb5NOxu+o1+C2GaQgUpDUHnilWA/ilIAi+FATBWBAEP9cBa/oQgN8NguBb\nAD4B4N52rSkIggEYD+hLAC6grXIj+QMA57djXd6avhy762aYnVRb19Qpn/PYdeqYz3lsXW39rAdB\n0BUEweMAfgDgC9bAtvVzXmdNPlryOe8owgiCoAjgPgA7272WGLIA+kleCeBuAP+tzesBzG5wO8ml\nMF+oP2rHIoIgqAD4M7uW52F2Xz7mvKoiZU3y/98CcIrko+1cE4DT6IDPecp16ojPecq62vpZJ/ky\nyY0wO/ZXBUFwKdr8OY+t6ReCILhE7mvl57yjCAPAIEyM7ckgCL4JczH+7yAIzodhxaXeYy+y//su\ngCUp/28mvg3gLwDAMvnpIAjOa/Oa3kvy/7Jr+jMAV9j/1zt309cUBEEW5ov9JyT/0v77h4HVArMu\nb20u11VnTQiC4H0A3gTgJu/h7VpT2z/nda5T2z/nddbV9s+6PfdPARwBcC3a/DmPrWnMrqn1n/PZ\nJl2adYP54jxd575vwux4AJe4yQP/f3v3ExpXFcVx/PvzH2qUltqiK0XESsU/Na3EGCtUReJOTXdC\nraCtGylKQcSFKCJ0o5RKFoIoLbjyD4giVEVJY4uk6T9bdVWpC7XuRIpWkePinqFj6KQvZSZ3iL8P\nhLy582beyeROTt7cmXO4lv8u3LQWfUQ5DRvtZkzARuDF3F4OHO+DmI5SPhEPcC8wVSGmHcCrM8a2\nAs9G58XAnsbVIabRfLyumDFeLaba87zD49QP8/xMcVWb68BSYFFuXwJMUP4gV5vns8TU83l+zr/Y\nbnwB7wA/AaeAH4HHZlx/jHz3SF5+Ln/Y78h3TeT4KuAbymLOtm7HRDlV35nH2NeavJVjujNjOQDs\nBW6b55hGKC+tHMwY9ueEXQJ8RnnnzS5g8XzF1SGmB/J+j+fl/cB45ZhGZ+wzr/N8lt/dhZXneae4\nqs114OaM4yBwGHg+x2vO804x9Xye+4N7ZmbWSL+tYZiZWZ9ywjAzs0acMMzMrBEnDDMza8QJw8zM\nGnHCMDOzRpwwzLpI0vm1YzDrFScMW9AkPZM9Aw5L2ixpi0oXSCS9Junz3F4raWdu/y7p5ewrsEfS\nshxfKund7EXwtaThHH9B0g5Jk5RPKp8pjmskTUjal1935LgkjUv6NvsqfCzp4bxuUNKXkqYkfdIq\nRWFWixOGLViSBoFHKbWHhoHHgd3A3bnLKmAgzwrWUEosAAwAeyJiZe7/RI5vo5StGALWAW+286Tg\nkgAAAdxJREFUHW4FcE9EPNIhnBPAfRGxmtLbfnuOjwFXR8SNwPqMs1VTaTswFhG3A28Br5zL42DW\nLT3t6W1W2V3ABxHxJ4Ck94EhYFDS5ZRSK9OUhLIGeCpvdyoiWqWhpymNacjvKyQpL18m6dLc/jAi\n/pollouA1yWtpJS/uD7HR8iqsBFxQtIXOX4DcBPwaR7vPEp5GLNqnDDs/0SUZjw/ABuAryi1eNYC\n10XE97nf3223+YfTzxMBQxHRfj2ZP06e5dhPA79ExC15RvNHg1iPRMTIWfYzmzd+ScoWst3Ag5Iu\nljQAPJRjk8AWyktQk8CTlMJ2LZp5R2kXpZ9F2Um6dQ6xLAJ+zu31lNaaUJLWWK5lXElpkwqlqN2y\ntrWOC9p7HpjV4IRhC1ZEHADeBqYoVU7fiIhDlKRxFbA3In6l/Lc/0X7TDne5GVgt6ZCkI8CmOYQz\nDmzILmnLOX1G8h6l09lRyoL5NPBbnsWsA7ZKalVvHZ7D8cy6ztVqzSqTNBARJyUtobQkHclEZtZX\nvIZhVt9HkhZT+lG85GRh/cpnGGZdJOl+Sje21hNLwLGIGKsXlVl3OGGYmVkjXvQ2M7NGnDDMzKwR\nJwwzM2vECcPMzBpxwjAzs0acMMzMrJF/AfFXnTmOK9v9AAAAAElFTkSuQmCC\n", | |
"text/plain": "<matplotlib.figure.Figure at 0x7fdb3ef9e4a8>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df[(df.owner_age >= 2000) & (df.owner_age <=3200)].OpenStatus.mean()", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "0.5815277374382806" | |
}, | |
"metadata": {}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#### Owner age looks like a good feature but length of the title post is useless.\nLets continue looking at Authors\n - plot mean and count on a scatter plot" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "authors = df.groupby('OwnerUserId').OpenStatus.agg(['mean', 'count']) \nauthors[authors['count'] > 0].plot(kind='scatter', x='count', y='mean')", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "<matplotlib.axes._subplots.AxesSubplot at 0x7fdb18307320>" | |
}, | |
"metadata": {}, | |
"execution_count": 8 | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEPCAYAAABV6CMBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX+cVeV959/P/GIGEJSINMuPGaOMkGQMGvnRgnZAIZBS\npM02gK+0JB1t0AzaxTSapIxot7tLX9vdDbpDKqFCM2Ecm11Jhg1B0zBtxy0ZYjRoZxBtBYUmOmld\nEnUiKM/+ce6dOffcc+495869c3993q/Xec19znnOc55z0PM53+f7fb6PsdYihBBChKUi3x0QQghR\nXEg4hBBCRELCIYQQIhISDiGEEJGQcAghhIiEhEMIIUQk8i4cxpjdxpjXjDHHAo7fYoz5cWzrNcY0\njXUfhRBCjJB34QAeAT6W4vg/AzdYaz8C/Edg15j0SgghhC9V+e6AtbbXGFOf4vgRV/EIMD33vRJC\nCBFEIVgcUbgVOJjvTgghRDmTd4sjLMaYpcBngCX57osQQpQzRSEcxpirgYeBldbaN1LUU+ItIYSI\niLXWRKlfKENVJrYlHzBmFvC/gN+11v5TuoastQW93XfffXnvg/qpfqqf6md8y4S8WxzGmH1AM/A+\nY8wrwH1ADWCttQ8DW4EpQLsxxgDnrbUL8tVfIYQod/IuHNbaW9Icvw24bYy6I4QQIg2FMlRVNjQ3\nN+e7C6FQP7OL+pld1M/8YjId4ypEjDG2lO5HCCFyjTEGW6TOcSGEEEWChEMIIUQkJBxCCCEiIeEQ\nQggRCQmHEEKISEg4hBBCRELCIYQQIhISDiGEEJGQcAghhIiEhEMIIUQkJBxCCCEiIeEQQggRCQmH\nEEKISEg4hBBCRELCIYQQIhISDiGEEJGQcAghhIiEhEMIIUQkJBxCCCEiIeEQQggRCQmHEEKISORd\nOIwxu40xrxljjqWos8MY86Ix5lljzLyx7J8QQohE8i4cwCPAx4IOGmNWAVdYa2cDnwW+OlYdGwuM\nMcNbNsp++xobGzHG0NjYCEBbWxtXXXUVbW1tAKxfv57Jkyezfv16AAYGBti7dy8DAwOB/Q5TRwhR\nmhhrbb77gDGmHui21l7tc+yrwGFrbVesPAA0W2tf86lrC+F+wmJMFVADzABOA78EakdZrshSmzOB\nV2ltvY0HH/xKQr83b/5DHnro4ZR1hBDFgTEGa61JX3OEQrA40jEdeNVVPhPbV9Q41kANcAQ4Eftb\nCwx5yhM85d+Ilb8TK3vPr/bZ94k053zHU/5j4AXgCA89tCvBqhgYGIiJxpHAOkKI0qYq3x3INtu2\nbRv+3dzcTHNzc976kp4ZQNzIuhpHD19KU/77WHlCwPlDPvv+Ls05EzzlH7jKM+jr62Pu3LkA9PX1\n4VgaVwfWEUIULj09PfT09IyqjWIQjjM4b6o4M2L7fHELR+FzGjiG8/I9RuJtxcuXesq/ESu/FXC+\n9dn3iTTnvOW5xmdc5dMsWLBguFfO71c9bSTWEUIULt4P6vvvvz96I9bavG9AA/BcwLGPA/8n9nsR\ncCRFO7aYAGOhzsKVsb/ZKGerzdkW6mxr651J/W5tvTNtHSFEcRB7b0Z6Z+fdOW6M2Qc0A+8DXgPu\nwxn8t9bah2N1HgJW4nwaf8Za+6OAtmy+7ycq7mgoa+2oy35tNjY28uKLLzJ79mxOnDhBW1sbXV1d\nrFu3jgceeID169dz8OBBVq1axaOPPsrAwAB9fX0sWLAgcPgpTB0hROGTiXM878KRTYpROIQQIp+U\nalSVEBkxODjI0aNHGRwczHdXhCgpJByiaIgiBJ2dXdTXz2H58k3U18+hs7NrDHooRHmgoSpRMAwO\nDnLy5EkaGhqYOnVqwrHOzi5aWu6gpqaBc+dOsnt3Oxs2rAtsp75+DkNDh4lHftXVLeXUqeNJ7QpR\n7mioShQs6ayFVBbC4OAgLS13MDR0mLNnn2Zo6DAtLXcEtnXy5ElqahpwzzWprq7n5MmTWb0nIcoV\nCYfICe5cVumGjdIJQ1QhaGhwrBJnjgnAMc6fP0VDQ0PW71OIcqQYJgCKIsAdntve/hcJuawqKiwX\nLvyAoSFn2KilZSk33bRseNgoLgzOcXALw9SpUz1C4LSRSgimTp3K7t3ttLQspbq6nvPnT7F7d7uG\nqYTIEhIOkRHBQvEKYHByWTkv+QsXFuHk0AKvKIDXQkgWhkyEYMOGddx007JAn4kQYhREnTFYyBtF\nNnO8WNi+fbudN2+e3b59u7XW2tbWu2Izxxtjf2ss/NiCtfANC1fEfse3Ky08EPv9Y1tXN8W+/vrr\nCdfYt+9RW1c3xU6adI2tq5ti9+17NKkfr7/+uu3r60s6VwiRORTjzPFsoqiq7PBnf/ZndHZ2smHD\nBu6//z/z9tvvEE+7XltbyS9/+R5uiwJ+FTgJTAUGcTLI/IPr+CLGjatm3Lgrhq0Fv4ioVFFVQojc\noJnjEo5RM2HCJQlCAe8BR3GLAEyJHYtzJfAAcEusznygcriN1tbbaGv7Y4mCEAWIhEPCkRF33303\n3/zmN5k1axa9vU8TbE0AzAZeBn5EophYHB+HIxR33LFp1LmsZIEIkXskHBKOyFRW1nHhgsGxDl7B\nefm/6KrhtSYWsWjRtRw58iPcFkU2hOLAgQPs37+ftWvX8otfvBV6wp8QInMkHBKOUMyYMYMzZ84w\nfvx43n7bMmJh9OBksXdbHHFrwlkGZcKEat5882zWs+M2NV3L888fJ2iIbKxmfsvKEeWGZo6LtBhT\nxZkz/wbMjolGDSMT65qByThiMRtYxIQJ1Wzffj/z5k1k+/ZtvPnmWQDmzp3Lxo0bsyIaBw4ciImG\ne0nbSvxCeEdLqhnsym8lREiihmEV8obCcVMyffr0WPjsj4dDY53y4wnlJUuWJITfZoPu7m7b0tJi\nu7u7k461tLTEFoWKFsIblXjI7+TJ1yaF/L7++uu2rm5KwrPJxjWFKHTIIBw37y/7bG4SDn9wxppi\nm98Lunp4BcDKypqsXLO3t9e2tbXZ3t5e++EPX5OwYmBT07yEut3d3b6CVlNzUcp5HVFIJwx9fX12\n8uRrE57NpEnX2L6+vlFdV4hCR8Ih4UgCKhNe2k458QU9fvx4O2vWLLtly5asXHP58lWea9YkXdNr\neTQ1zUtYwrapaV5WJ/ylEwZZHKJcyUQ4lHKkhHGWkK0j2dk9H5gFnAF+yVtvXRj1tb7xjW/w2GOP\n8ZGPfIQnn+whOaT3/bGaVwPT2b9/P6tXrx4+/9ixZxKiquLHsuWgzkVaEyHKFUVVlTCOcMzGcTjH\nmQ28BMD06dM5ffq0z5nRmDnzA5w+/VNGIqLGAW+4aiSH9HZ3P5YgHGNBfE0PtzB4Q3wVVSXKDYXj\nSjiAuGDE8bM4hhjtc3rqqad44oknsNbyJ3/yX32usQ9Yi19Ib1PTVRw79kzoa6xYsYLFixePqr9x\nJAxCJCLhkHBgTBVOiG386/+XQC0wnfjQlLWjG5paseLjseGooEmDs4FTQD1whhUrmtm8+Y6kYajw\n1zjNihXNHDr0nVH1WwiRjISjzIUj2KcxNFwnk+ezfv16Dh48yKpVq9i8eTNLliwn3aTBrVs/jzEm\nkrUQtwZee+01fvM3P5nUZm/vk1mzPIQQDhIOCQdBPo1Mn4sx1TgT8dwWzJWea/wK8HPiVs3MmdN4\n5ZWXU7brHYZyryn+5psDvPfejKT7aGu7hfvvvz+j+xBC+FOUM8eNMSuNMceNMSeMMff4HJ9kjPm2\nMeZZY8xzxphP56GbRcRp3EumOsNTmbF+/Xoc0XDP6K7FGZ5yX+PnbN36edas+SAdHbvSisaKFR9n\nyZLlPPBAJ0uWLKe5+aaEpWPfe2+f732sWLEi43sRQmSPvFocxpgKnDfSjcC/4CQnWm+tPe6q80Vg\nkrX2i8aYS4EXgGnW2nd92is7i8PtCLfW4jzS0fk0du7cSWdnJ319fbzzziy8X/4VFf/MhQvjhq8R\nxv8Qz21VVVXFpz51G95hqPHjZ/D22yPXqai4lAsX3o50jWwg57koNzKxOPI9j2MB8KK19hSAMeZR\n4GbguKuOBS6K/b4I+Fc/0ShHHEd4HfFhJGMqsPZCTEyckNuoQjplyq/wxhs/j7X5HiNf/vGX/Bl+\n53d+h82bN4eOeNq8+Q9dS8v+E/ABRvJjOfM63nnn1YTrjBtneeyxxzh69GhWo6pS4R4uU0ZeIVIQ\ndcZgNjfgE8DDrvKngB2eOhOB7+NYJD8HVqVoL+xkyaIHCMg7lfkzaG9v92nTJMzohopIbfb393va\n3OPb7/vuuz/t0rG5RDPHRblCic4c/xjwjLV2mTHmCuBJY8zV1to3/Spv27Zt+HdzczPNzc1j0sn8\nMAPvl3vc0siEzs5Onzav4H3ve4Pz519n1ao1PProo6Haig/5HDlyBMfSiLe5EfgcTrTXyDDUtm1t\nfO5zt+dtmOjkyZPU1DQwNDRy7/GMvMU4ZKUhNxFET08PPT09o2skqtJkc8N5e3zXVb4XuMdT5wCw\n2FX+G+C6gPayJcIFD1m0OGbPnm0Be+mll/q22d7eHqk9dxba2tqLfdvs6OgYToIYRDZzVaWjlCyO\nVFmAhfBCsSU5xFl04SWcmWI1wLPAXE+d/wncF/s9DXgVmBLQXjafZ8GTPIxkMmjDmwQxsc0pU6aG\naqejo8OuWbPGtre3J72AKyvHJ1yjtfXOtO1l4+UXVXji18zXcFk2KCUBFGND0QmH02dW4kRKvQjc\nG9v3WeAPYr/fDxzC8ZweAzakaCurD7QYwJUyPSqOpZFsDVx66aX2+uuvD21pzJhxuUd8xllvFtqu\nri67Z88e29/fH9hOPBV7d3f3qF9+mQrPWFo5uUDp4UVUilI4srmVo3CMBgLX5wj/HDs6OgKGzP40\n0kvfm4q9omJyxi+/cv7qLud7F5mRiXDkfQKgiIYxZnjLlOrqatf5yRPtZs+enbaNtrY2rrrqqthM\n7mQnvTH3MWnStdTVLU2bnvypp55ypWJ3JhpeuHAO2DvcL3cK9HTEHd3uPgUtPZtqKdliJJ4evq5u\naejnL0RkoipNIW+UuMXh54/IRhtR/STV1RM8bSQv1NTe3h56yKetrc3X8qmsrM3I3xD2q7uUncjF\nPuQmxg40VFW6wkEWoqiqqqoC2wDs7Nmz07axdevWgDZqh8Vn5syGSPfW29vr22Z3d3fCyy/KyzCd\no1tDOkI4SDhKXjhG54/IRhuNjY2+bUybNs2uWbPGdnR0+J7X3d1tW1pakpaMjbNiRdzH4YjPihWr\nEo5nYh2kEpqwTmR9uYtSR8JR8sKRG4ujqqoq5XmbNm2y06ZNs5s2bQq0OLZu3Rp4/oc/fE3C0FZT\n0zzfevGoKu/cjlxYB2HaLOWhLCHiSDhKWDistRn5I0bbhuO/GHnpG1Nla2rqEtqoqakLPL+7uztw\nGCod8a/9Q4cO5STENNVwloayRLmQiXAoqqqIcLLcDuHMmRwibNZbdySWtReoqjoPvERV1fmUbdx+\n++04czRHop2sreb3f38jW7d+nsbGCrZu/TzvvPN2YBv79+/HL+rK2R9MZ2cX9fVzWL58E2vXbohl\nzh2J/vKLsooaIbVhwzpOnTrO9773F5w6dTwhoWGUyCwhyo6oSlPIGyVucWTCaCKxpk2bFujPCEsm\nFoff135NzWRbW3txoLM702Gl/v5+34mJsjhEuYCGqiQcbsjQL7JkyRJbVVVlL7vsMt/zN23aFKkf\nTU3zEoa2gnwccYIc14cOHfKNsurv78/oJd/aelesX43WLxVKKaQgESIdmQiHlo4tYTJZStZZ46OG\nxKViRxaGMuY8Fy6cj9yXAwcOsH//ftauXcvq1atT1h0cHKS+fg5DQ4eJr89RV7eUU6eOD09kc6+d\n8ctfvkRFRT1DQ8eG25g06Vq+972/YP78+Ultnzx5knPnznnWTncWlOrvf5q5c+cm1VeW2eyg51l4\nZLKQU96thGxuyOJIgIgWx5IlS3zrX3bZZcNRVUGkC7eNSjTH9eGkfqeb8OdMYmz0DMPNtnv27MlK\n/0UyilIrTJDFIYvDS5SlZKurq3n33cvxWihVVSc5fz7Yymhqupbnnz9O3EpparqKY8eeGXXfvV+n\n8fIbb7zBJz/5Rc6efXq4bm3t5Vj7/xg37nLOnz/F7t3t3HTTsuHzAY8Vsx+4hXQWh8gOYazI0bQt\nKyZzZHHI4vCFkBl0gyyOJUuWBJ4zmnDbKHi/VqurJyZZGP39/cM+EG/9P/mTP03ym1RVTU0IHAiT\n7l1kRq6y9sqKGT3IOS7hGC1R53m0tLT4Rl61tLRkrU9Ro6yCIqKcRaUS9/X29qZN9y5GT74mcYr0\nZCIcmsdR4GSSDTfqOTNmzMAYw4wZM7D2AkuWfJSqqpMsWfJRgoa1Pv3pT/O+972Pl19+Gb8Mu2vX\nrk2oPzAwwN69exkYGAh9H/FzDh8+nDSnorb2A3zrW12R5mB8+ct/lJQ1dvHixWzcuFHDUzkmF1l7\nNdcmj0RVmkLeKDGLI5M5GFHPyewa1UnnpAq3TRf26of3HGcVwXBflqm+RJV7Kr9k8/nL4sgOaKiq\ndISDDOZgRD1n+vTpvvWnT58eeI2NGzf6nrNs2TLfqKr+/n7f+qmGhoLO8Q5NpXoJBUVlBU34E8WZ\n0FFzbUaPhKPkhCNaJtuo52RyjSlTpvieM2XKlIR68YSFX/rSl2zYsNd05+zYsSPQ+e33wvC+CDOx\nfMqFYnYyF6PgFRISjpITjsKwODZu3GinTJliN27cGGhxbNy4cbh+4jKwtaEsDu/SsX6LQ8XPCTtE\n4bYuwlo+5fgS0pBPeSPhKCHhsNbaTLLhRs9+m7p+sj+j0o74Ra4c3hfHf1GmxAy73i/9oIWcHNEZ\nOScuBF1dXWlDO73WxY03Lk9r+RTzV/doyFWorCgOJBwlJhzW2pgVkX4ORpRzvMcdywNfSyPIunBb\nIW6CloG9/fbbE3wL7rU3wpwTxVkeZF04QjQ6K6YUKed7FxKOkhSObBMliiqsP8NNkPXgXpzJOyw1\nf/7ClOeEdZbH2bNnj691ceONNwVaPuX+1S0nc/ki4ZBwpCSqDySMPyPO1q1bbWNjo926dWvKZWCD\nhOW66xYlnRP3N+zYscNXCNzOcjep/BlKox5MOfp3RJEKB7ASOI6TIOmegDrNwDPA88DhFG1l7WGW\nIplFagX7M+I4CQNHvuRrauoCl4ENGpZyD1v19vYm+BucGd/RQnpbW+8MtC6C0Fe3KEeKTjiACpzl\n7OqBauBZYI6nzmTgH4HpsfKlKdrL5vMsCqL4QMJaHE1NTdYYY5uamqy1NtCfYa0NvQZ5PHvu9u3b\n0w5l+X39Oz6NaEKQyZwNzfMQ5UYxCsci4KCrfK/X6gBuBx4I2V62nmVRkNms73RRVNHabGxs9LUg\nGhsbh+t8+MPXJLQ5YcKkwKEsa4P9DV1dXTl9qZdrVJUobzIRjnznqpoOvOoqn47tc9MITDHGHDbG\nHDXG/O6Y9a6AcfJQ1eBeDxxq0+anSrVu+dVXX+3bprPfn3Xr1uGXq8rZ7yzg5KRcH2nzrbfOs337\nNtrabqG390kOHfrOcN1bb72VEydOcO7cSbxrjC9dujRneaUGBwdpabmDoaHDnD37NENDh2lpuSP0\n+uVClBNV+e5ACKqAa4FlwATgH4wx/2Ctfcmv8rZt24Z/Nzc309zcPAZdzBczcCd4czTX97Ek4Hxk\nJPP8888DVya16exPpK2tja6uLtatW0dNDZw7t4j4mh81NfDAAw8AsH//ft9+njhxgq997WvD7bnX\n9Ni9ex8zZ07jZz9bSnV1/fD6GrlYa8G9xkdNTQNDQ8kJ87TGgyglenp66OnpGV0jUU2UbG44Q1Xf\ndZX9hqruAe5zlb8GfCKgvSwZb4UPGcwsT0dTU5Nvm3FfRxw/Z7g7qspNmPU6gup0dHQkRPlk2/8Q\nZo0PRRiJUoci9HFUMuIcr8Fxjs/11JkDPBmrOx54DvhgQHvZfaIFTthZ4kRyoKduM6wz3P2Sb2qa\nl9CmN3tumDU9sp1nKuoaH0KUKkUnHE6fWQm8ALwI3Bvb91ngD1x1Po8TWXUM2JyirWw+z6IgnShk\n4kD3RlW5CeMM93vJe9ckdwtLOqskkwy7QcTnKhw6dMjXAX/o0CHNZYiheR3lQVEKRza3chSOVIQd\nzpo4caIF7MSJE9O2mc7iCPOS9xMWP6skkwy7qfDODampmayhqQAUYVY+5FQ4gF8DbgF+L75FvViu\nNwlHIoSY8JeJRVJTU5fwkq+pqRs+FpTuI/6STyUsbqskkwy7qfAbmqqunqgJfz5oFn15kYlwhArH\nNcZ8HfivwBJgfmy7Lsy5It8kh8rGueiii/ALv3X2j7Bo0SKqqqpYtGgRAO+88zZbt36exsYKtm79\nPO+88/Zw3QULFuBEWLuveTq2H/r6+oCZJEZZzaCvr4/Vq1fzta99jUsuuYQnn+xx9esHwHs4sRSN\nwCJaW29LG5Y7ODjI0aNHhyOnvMuM1tXNZv/+Tt/lZ8sZLckq0hJGXYABnE/RvFsVafqZDQEuKVI5\nu8miRdLR0WHXrFljOzo6Uqb7CDOUFTbDbiq8Qy1f/erD+or2EOTDkMVRXpCroSrgr4H3R218rDcJ\nhz8EONAd30bySzzu61i40D9r7cKFCxPamTHj8gShmDmzISl01j0MlS6PVJgMu6kIevHFxUNDU+l9\nGMrbVT7kUjgOA28Ah4Bvx7eoF8v1JuGITiqLpLKy0vfLv7JyJNFhR0eH70u+o6NjuI435UhT0zzf\nORlucfHLsBs2yidVinRFCoW3KPSsyoNcCsev+21RL5brTcKRGUFRVWEsjjVr1viKy5o1a6y14SYA\nWusvLkHZctN9AWuoJTXlvvaISCRnwlEsm4Qj+6SbEJjO4ggzuS+duGQiBBpqCUbCKtzk0uJYBBwF\n3gTO4YS4/DzqxXK9SThyw8KFC21lZWWSbyPOzJkNCeIyc2bD8LEwFkc6ccn0C1lDLcFIWEWcTIQj\nbHbch4ANOLO764Bbgf8Z8lwRAWPM8JbJ8TB1KioqMMZQUeH/z79+/XomT57M+vXrAThy5Ajvvvsu\nR44cGa6zc+dObrjhBnbu3Mkrr7xMR8cu1qz5IB0du3jllZcZGBhg7969XHHFFTQ1XYXz7TEbWBQr\nw6233sqBAwdYu3YtfmHDS5cu5ejRo0ycONE3W+7EiROHw239mDp1KvPnz1eSQh82bFjHqVPHFYos\nMiOMugA/jP095tr3TFSVyvVGkVsc6UJfw4TGjrYNqPIcr0i6xiWXTEuoM2XK1ITj6VKO+PkzvDPH\nZ85sSPBptLbemfCF3Np6l2Y2C5EFyOFQ1d/hzBT7K+DPgP8A/DjqxXK9FbNwkCY9SLrjYeoYY3yP\nG+OIx7p163yPr1u3bvga7e3tvnXa29uttennaaQauoqLS0dHh+8YfH9/v+3r67P9/f0aoxciS2Qi\nHGGHqn4XZ5nXVuAtnKm/nxilsSOS8FtfI8rx1HWc/0aSjzv74eDBg77Hnf0OnZ2dvnWc/alnhkPw\n+hz79+8fnjne2NjoO3P5zTffZP78+bz55pt5m9nsno0+mjpCFDOhhMNaewowOJMA77fWbrEBCymJ\n0RCcHiTc8dR1HJ9H8vG4L2TVqlW+x539Dhs2bPCt4+xPn3IkyJ/h7HdoaGjw9Wk0NDSkPJ7O5zFa\nOju7qK+fw/Llm6ivn0NnZ1dGdYQoesKYJcBv4qQ+fzlWnocmAGad9OuBp19/Y7RtQIXneLKPY8qU\nqQl1kn0cqWeGB63P4TdvIyjqx3s87gPJlc8jTAirwlxFMUIOfRxPA5NxOcSB56JeLNdbsQuHtTbm\np0i1vkb6RZnS1XF8HQz7NrysW7fOTpo0KcG34aW9vd1ef/31w76NVOttWJuYy8qvfmI23HAzxePH\nx8LnESYkOKiO1vgQhUwuheNI7K9bOI5FvViut1IQjkIkaFnYOH5RUm78clm5GW1uqrGYCZ2pxRFf\nVVDRX6JQyaVw7MZZi+MYTjD+g8BXo14s15uEI/v4rS/uJt0EvzC5rIKy4ba1taXs21haHNaGmzTn\nraN1zEWhk0vhGA/8Kc7s8aPAfwTGRb1YrjcJR2YE5aoKs754ulnf6XJZWRtscXR3dwcO8XhzV3nn\neeTqyz7MbPR0y9MqJ5QoJHIpHNcBjwPPAM/FNg1VlQCpJgSGWV88U4tj69atw45wa21SNtympmsC\nnd1Bw0bxeR6F8kUvZ7koBnIpHC/EIqsuB+rjW9SL5XqTcEQj3XocYSwOa4OjpOJ4c1nV1IxPEKsV\nK1ZZa0eiqrq7u1O+cIspu+u+fY/a2tqL7YQJjba29mL5OETBkUvh6I3acD42CYc/QVFWzr7UKwCm\nWl/cjTdKavv27XbevHl2+/bt1tqRqKogMXI7wtMJw1h+yY82UWJ8SG3ChI/IOS4KklwKx43A13AS\nHf52fIt6sVxvEo5kUg1FpbM44vhFVXmFws348RcnXHPChEnDx8I4wsMIw1hkd42yBogfpT5UFcXf\nUyr3XIrkUjg6gB8Ce4FHYttfRr1YrjcJRyKEym+VflKhl1Tht9u3b/e9ZtzySBV6637JhBGGXL6U\nsvHSL6YhtaiEEdXRCq8YG3Lq44jacOgOwErgOHACuCdFvfnA+VSWjoQjkTBDUdYGR1X5kc4ZPm/e\nPN9rzps3Ii5+y8L6vWTy+bWajcl8pWpxaBZ9aZFL4XgE+GDUxkO0WwG8FHO2VwPPAnMC6v0NcEDC\nkUiqWeJhLA4/li1bZmtqauyyZcuSjqULvw2yOLZs2ZIwk9ydXqQQXzJ+faquvijyF3QpLpg0mln0\npWBtlRq5FI4BnJX/XohNAsxKOC7O6j4HXeV7/awO4C7gduAvJRwjhFufI9pQVLo2w6zoN2HCpIRr\nVlSMs971OdwU6kvG/dKvrb3Y1tRMzkjcSm2cXxZHaZFL4aj326JezKfdTwAPu8qfAnZ46vw74HDs\n9yMSDoco1kQqq8TNsmXLfNv0Wh7pwm+tHYmq2rJli2+bccvD2sJ+yWgynz+ZzKIvBWurFMlEOKoI\ngXXSqudR9s1+AAAX3klEQVSL/wHc4yoHr5kKbNu2bfh3c3Mzzc3NOelUYeC39kZytnvnv4309Pb2\n4nwTJLbp7B/h2LFnOHDgAPv372ft2rWsXr2ap556iieeeIIVK1awePFivvCFL/CFL3yBvXv3ErQ+\nx9y5cwFnidfdu9tpaVlKdXU958+fYvfu9oJa8nXmzJmuVO5X40317mZwcJCTJ0/S0NBQUPeQTTZs\nWMdNNy1LeZ9h6oixp6enh56entE1ElVpsrnhDFV911VOGqoC/jm2vQz8AvgpsCagvezJcIFDhv4L\na62trKy0gK2srEzYH9bi8OKX2TZO0IqA3d3dCT4Pa/MzpJPqmpmkNVEkkSg2yNVQVa42oJIR53gN\njnN8bor6GqpykUkobfo1x6O1GSazrXd9jpFwXn+fRxDZFpZUL/lM0poU8pCbEEEUnXA4fWYljtP9\nReDe2L7PAn/gU1fOcQ+E9F9YG7c0kl/yfpaHO6rKu5aGm7CZbePrcwQ5192Whx/Z/pJP95LPxGFf\nqE5+IVJRlMKRza0chSMKhJzX4SbTtTTa29sTkhjG2bNnT8zScPdhtt2zZ09gH3LxJZ+LtCayOEQx\nIuGQcKQkrMURJ8xaGtYmT+gbWVo2vM+jv78/adXAOLn4ks9VWhNFEoliQ8Ih4UhLFB9GmLU04sQn\n9LW3t0f2ebS23mlbW+8K9Hvk6ks+V2lNSm3ehihtJBwSjlAERVV5CWtxuInq84hbGun8Hrn6ktdL\nXpQ7Eo4SFI4ozu9Mz1m4cKGtrKy0CxcuTDrmXUsj7uPYtGmTnTZtmt20aVNC/UxW8wvye+zYsSPh\nnHQv+aChLiFEMBKOEhOOMClFRntOmPreqCqoSTjHmKqE+smr+c1LGREVZHHU1l4cOooq1VCXECIY\nCUcJCUcmE/yinrNw4ULf+n6WR5xNmzb5nuNneYRZzS+O1+9RWTk+tE8jSHh6e3s1DCVEGiQcJScc\n0UJno57j+DqS66fyfUybNs33nGnTpvnWD4qI2rVrV1K4bnyoqaurK1IUlf9Q15W2unqiZnALkQYJ\nR8kJR/FYHDfffLPvvA2/iChjahOsC3e4btA5mVgccDjU+UKUMxKOEhIOa63NLKVI1DTqyfWDHN9x\njKnynFORUgjcEVHjxsVTrgeH63rPCefjSBzqqq6eFdpiEaKcyUQ4jHNeaWCMsaV0PwDGjCQDDntv\nUc9ZtGgRP/zhD7nuuuv4wQ+ewUkhNgM4jTHnuXDhfNI5t99+O48//jiLFi3iW996AjhCPGssLKK3\n90kWL148XD+eMbarq4s///Nv4yz4GGc2bW23sGLFioQMu1GzzA4MDNDX18eVV17J8uVrGBo6PNyn\nurqlnDp1XBlahfBgjMFamzLreBJRlaaQN0rM4sgWhAzPDev4dhM0b+Puu+/2dUwHhevOn78opdUS\nFc3gFiIcaKhKwuElSnhuVMe3tcFCUFMT7Jj2huvOn+/va/EOX0VFs74LDz3fwiMT4ajIrtEjCgln\nyKoGZxjpROxvbcJQlpvf+q3fAk7jDDcR+3uG2bNnc8MNN7Bz586kcxYvXsyKFc04S6vMjv19l3Pn\nnuLs2acZGjpMS8sdDA4ODp9z6NB36O19kra2W+jtfZJVqz6G36JUTzzxxCifQDQ6O7uor5/D8uWb\nqK+fQ2dn15hev9TR8y0hoipNIW/I4kiADEJ6kx3fcee5Y7FMmTLV97z4vI1du3b5htJ2dXUFzuoO\ns6ZHVKKmYVdm29yi51u4oKEqCYcbMgjptXYknciSJUt8z29vbw881+8FUVFRZ9PN6vYOX/n5OMIO\nc2TyktJaGrlFz7dwkXBIOJLIJKQ3zvXXX+9rsVx//fUpzwsTfhtkefjNBXG3GcaCyOQlpS/i3KLn\nW7hIOMpAOAgZIZXqnFmzZlnAzpo1K+V5QSnS29vbU64KaO2IdbBjxw4bJoFhKqIu4xrlJeW2YhSJ\nlVv0fAsTCUeJC0cmSQ9H28bIokwjizSlWxXQTdCs7nHjJoX2P/hZEHV1H07ZRpiXlJ8Vo6if3KLn\nW3hIOEpYODL1V7hxLI3kNsJYHtdff/2wpRF1jQ7vrO6KilpfayBoqMrPggiTUiTVSyqqFSNEqSLh\nKHnhiBYhlYs2glYFXLlyZai1MoISGC5c+GsJ4pI6bcnFtq7u8kg+DC9+Vkxt7eV23LjwqdyFKAUk\nHCUvHLmxOMaPH58yN5WbIIujqmp8qBeu35f+uHGTfdv0szz6+vpsf3//qB2tyf04nNQHOW9FOSDh\nKGHhsNba0URIpWoj1aJMfnhXBXSSHIZ/4Xr9D7/925/wtWK8y82maiMT68Ab/VVX1zQqK0aIYiQT\n4VCSwyIjk6SHXurr63nllVcYP348b79t8SYo3LRpo+8scTff+MY3eOyxx7juuuv48z/fz9mzTw8f\nmzTpWnbtupehoSEWLFjA3Llzk853JzA8ceIES5YsT+pHUKLEeNLDqEkQ/Yi3MXHiRD760SVKjCjK\njqJMcgisBI7j5MS4x+f4LcCPY1sv0JSirexIcJmQSW4qL5lO+POSbgJg1JngmZCtcFFFDoligmIb\nqgIqgJeAeqAaeBaY46mzCJhsR0TmSIr2svk8iwIymNcRJygbblNTk501a5bdsmVLqHbCTPjr7u4O\nTDkSJ0pUVa78D6N96Y+FwAmRTYpROBYBB13le/2sDtfxi4FXUxzP1rMsCrIxryNdbqrKyppQ7aSe\n8HelhXGRLBA3xZKuQrOjRTGSiXDkOzvudOBVV/l0bF8QtwIHc9qjIiFq5tsgLlw4z6ZNG5k27Rc0\nNV0J1Ca0+d57ldx9991p25k6dSrz58/npptuwvknTcywC98FXgCO8NBDu3jqqac4evRoQtbcIBoa\nGjh37mRCm+fPn6KhoSHSveaakydPUlPTgDvLb3V1PSdPnsxfp4TIAVX57kBYjDFLgc8AS1LV27Zt\n2/Dv5uZmmpubc9qv/JKcitwZ+YvGzp072blzJ/X19b5t/tVf/RVnz55l7dq1rF69OmVbc+fOpbX1\nNh56aBHxVQQdQ7HZ1eYMli79GOPHX8W5cyfZvbudDRvWBbY5depUdu9up6VlKdXV9Zw/f4rdu9sL\nzmmdKHCOg70QBU6UNz09PfT09IyukagmSjY3nKGq77rKvkNVOP8Xvghckaa9rJlvhQ5ZmNfhZcuW\nLT5tjksYumpqmheqrfiEv+7u7oB+Ph55OCcbTudcO66Vj0kUGxShj6OSEed4DY5zfK6nzqyYaCwK\n0V4WH2fhEzSvg1E4zCsra1xt1gY6uqPgTTlSXX1ZKH9Ftl/yY+W4VlSVGCuy8d9a0QmH02dW4gx+\nvwjcG9v3WeAPYr93Af8K/Ah4BuhL0VbGD69Y8YpENhzmW7ZssbNmzbIf+tCHrF+4bktLS+Q24xZI\nb29vKAdytl/yclyLUiNb/48UpXBkcytH4XCT7eGroGEmP4sj1VoaXtIN5+QiAWGxRGYJEYZsfghJ\nOCQcvhbCaJ5LU9O8hOEwPx/H8uXxyXv+CQr9SGViZ5JGPcz1ZHGIUiGbH0ISDglHoMUB2OnTp2fU\nbnd3t21paQm0NILSrKeb8BdEpmnU0yHHtSgVZHFIOLJKuiSGmfg8UtHW1uZj5bzfRk054iUbadT9\nrBo5rkWpkK0PIQmHhMNaaxMc5n7WQKaWhx/JFsdh32tmanlkmkZdqT9EOVC2UVXZ3CQciaTyedTU\n1Nhly5Zl5TqJCQrHxf66rznb7tmzJ+GceJRVWEGJ8nUlf4YQ4ZFwSDgSmD59us/Xf+KEvmwNXcWj\nqoIWenILRGvrXRkNZYX9ulIElRDhkXBIOJJI9Hn4T+ibPn16pGy46fBO+HMLQ39/f9aGsoKQxSFE\neDIRjnwnORQ5xtoLTJ8+BWeC/i/xy0V15szPeOWVcfy3/7aTqqpxo77mgw9+hf7+p9mz58v09z/N\ngw9+ZfhYX18fMNPThxmx/dkhntuqrm4pkyZdS13d0oLMbSVEsaIVAMuIG2+8ke9//x/wrrQH38FJ\nQuiUb755BZdeemmopIZRGRgY4IMf/GhSH/r7n/ZdKXA0ZGOFQCFKnUxWAJRwlBnGVOCkTp+Ok+58\nMvATV43ZwMvAB4DTNDVdRVfXPvr6+gKXgY3K5s138dBDu4hnz21tvS3BKhFCjB0SDglHKG688UZ6\ne3uZOnUqZ878G8kWyE5go6v8LnA58GrWXvIDAwNZFSMhRGZIOCQckamqGsd771UyYoFUAG+6aswG\nmoD/TS6HlURu0bCdCCIT4ZBzvMx599132LLldmbNOsfNN68ALpC8et/vx8rBjuyBgQH27t3LwMDA\nWHRbRKCzs4v6+jksX76J+vo5dHZ25btLosiRxSESuPrqa3juuRcYsUDewclmH+zI3rz5D3nooYdx\noqX8h7P0xZsfBgcHqa+fw9DQYeL/hnV1Szl16rj+HQQgi0NkgWPHnqG7+zFaWn6d7u7HaG1txfFz\nNAKLaG29LUE0BgYGYqJxBPea4m7LQ1+8+UProItcIItDpCWVI3vv3r18+tP/CUc04jSyZ8+X2bhx\no75484yev0hHJhZHVa46I0qHuXPnBjrDFyxYALyKM4wVH846Hds/8sU7NJT8xasXV+6JT4ZsaVlK\ndXU958+f0mRIMWpkcYhRk2pehr54CwP5mEQQCseVcOSNVMNZnZ1dtLTckfDFu2HDujz1VAjhRsIh\n4ShY9MUrRGEi4ZBwCCFEJBSOK4QQIudIOIQQQkQi78JhjFlpjDlujDlhjLknoM4OY8yLxphnjTHz\nxrqPQgghRsircBgnx/dDwMeADwEbjDFzPHVWAVdYa2cDnwW+OuYdzSHGmOEtG2WA8ePHY4xh/Pjx\nAKxfv57Jkyezfv16AHbu3MkNN9zAzp07AThw4AC33norBw4cABxH9tGjRxkcHPQtlxPlfO9CBBJ1\nycBsbji5LA66yvcC93jqfBVY5yoPANMC2ku7TGIhAZVJ63+Pvhy1zeqE8owZDbauboqdPPlaW1c3\nxba23plQ3rfv0Xw/tjFj375Hy/beRflAsa05DnwCeNhV/hSww1OnG/g1V/l7wLUB7WXvaeYYwHft\nbWe/uzzPU97jKe/wlKt82jycpuxt4/HA+uWydrfWLRflQibCUXIpR7Zt2zb8u7m5mebm5rz1JT3J\n6387a4O7y897yv/sKf/YU/4XnzYnpCl72zjrKs9IqF8u6UKUKkWUKj09PfT09IyqjXwLxxlglqs8\nI7bPW2dmmjrDuIWj8DlNYo4n923Fyx/2lD/gKX/EUz7v0+ZbnjresreNya7y6YT658+foqGhIcP7\nLR4aGho4d+4k7mdZLvcuShvvB/X9998fvZGoJko2N6AS5xO7HqgBngXmeup8HPg/sd+LgCMp2sum\nBZdzRvwNV3r8D6MpRzvHmKqE8syZjo9j0qRrEnwc8XI5jfPHfRzleO+ifCCDoaq8zxw3xqwEvoIT\n4bXbWvtfjDGfjd3Mw7E6DwErcT59P2Ot/VFAWzbf9xMVdzSUtXbUZXCiqoaGhqirq+Ptt99m/fr1\nHDx4kFWrVvHoo4+yc+dOOjs72bBhA7fffjsHDhxg//79rF27ltWrVyelBynndCHlfO+iPFDKkSIU\nDiGEyCdKOSKEECLnSDiEEEJEQsIhhBAiEhIOIYQQkZBwCCGEiISEQwghRCQkHEIIISIh4RBCCBEJ\nCYcQQohISDiEEEJEQsIhhBAiEhIOIYQQkZBwCCGEiISEQwghRCQkHEIIISIh4RBCCBEJCYcQQohI\nSDiEEEJEQsIhhBAiEhIOIYQQkZBwCCGEiISEQwghRCTyJhzGmEuMMU8YY14wxhwyxkz2qTPDGPN9\nY8w/GmOeM8bcmY++CiGEGCGfFse9wPestVcB3we+6FPnXWCLtfZDwK8CnzPGzBnDPmadnp6efHch\nFOpndlE/s4v6mV/yKRw3A3tjv/cCa70VrLU/tdY+G/v9JjAATB+zHuaAYvkPSf3MLupndlE/80s+\nheMya+1r4AgEcFmqysaYBmAe8IOc90wIIUQgVbls3BjzJDDNvQuwwB/7VLcp2pkIfBO4K2Z5CCGE\nyBPG2sD3dW4vbMwA0Gytfc0Y8yvAYWvtXJ96VcAB4KC19itp2szPzQghRBFjrTVR6ufU4kjDt4FP\nA9uBjcC3Aur9JdCfTjQg+s0LIYSITj4tjinAY8BM4BTwSWvt/zPGvB/YZa1dbYxZDPwd8BzOUJYF\nvmSt/W5eOi2EECJ/wiGEEKI4KfqZ48aYf2+Med4Y854x5lrPsS8aY140xgwYY1bkq4+u/qw0xhw3\nxpwwxtyT7/7EMcbsNsa8Zow55tqXdoLmWBM0IbSQ+mqMGWeM+YEx5plYH+8rtD66McZUGGN+ZIz5\ndqxccP00xpw0xvw49kz7Crifk40xfx173/yjMWZhofXTGNMYe44/iv09a4y5M2o/i144cIaxfgv4\nW/dOY8xc4JPAXGAV0G6MyZsPxBhTATwEfAz4ELChgCYzPoLTLzdhJmiONUETQgumr9bad4Cl1tpr\ncMLHVxljFhRSHz3cBfS7yoXYzws4gTTXWGsXxPYVYj+/AnwnFuTzEeA4BdZPa+2J2HO8Fvgo8Bbw\nOFH7aa0tiQ04DFzrKt8L3OMqHwQW5rF/i3Aiw3z7l+8NqAeOucrHgWmx378CHM93H336vB+4qVD7\nCowHfgjML8Q+AjOAJ4Fm4NuF+u8OvAy8z7OvoPoJTAL+yWd/QfXT07cVwN9n0s9SsDiCmA686iqf\nIb+zzr39OU1hz4KPNEFzrHFNCD2C8x98wfQ1NvzzDPBT4Elr7dFC62OM/w78EYlzqAqxnxZ40hhz\n1Bhza2xfofXzcuBnxphHYsNADxtjxlN4/XSzDtgX+x2pn/kMxw1NiomEX7bWduenV2VHwURReCeE\n+szfyWtfrbUXgGuMMZOAx40xH/LpU177aIz5DeA1a+2zxpjmFFUL4d99sbX2J8aYqcATxpgXKLDn\nifMuvRb4nLX2h8aY/44zqlBo/QTAGFMNrAHivtZI/SwK4bDWLs/gtDM4ob5xZsT25YszwCxXOd/9\nScdrxphpdmSC5uv57hAMTwj9JvB1a2187k9B9tVa+3NjTA+wksLr42JgjTHm40AdcJEx5uvATwus\nn1hrfxL7O2iM2Q8soPCe52ngVWvtD2Pl/4UjHIXWzzirgKettT+LlSP1s9SGqtzO728D640xNcaY\ny4Ergb78dAuAo8CVxph6Y0wNsD7Wx0LBkPz8Ph37nWqC5ljjNyG0YPpqjLk0HpFijKkDluMk5yyY\nPgJYa79krZ1lrf0Azn+L37fW/i7QTQH10xgzPmZhYoyZgDMu/xyF9zxfA141xjTGdt0I/CMF1k8X\nG4BOVzlaP/PtoMmCg2ctju9gCPgJiQ7oLwIv4fyPu6IA+roSeAF4Ebg33/1x9Wsf8C/AO8ArwGeA\nS4Dvxfr7BHBxAfRzMfAe8CzwDPCj2DOdUih9BZpi/XoWOIYznEoh9dGnz7/OiHO8oPqJ4zuI/3s/\nF///ptD6GevTR3A+EJ8F/jcwuUD7OR4YBC5y7YvUT00AFEIIEYlSG6oSQgiRYyQcQgghIiHhEEII\nEQkJhxBCiEhIOIQQQkRCwiGEECISEg4hCghjzF3GmNp890OIVGgehxAFhDHmZeCj1tp/y3dfhAhC\nFocQETHG/J5rYaG9sTQyf2OMedYY86QxZkas3iPGmN92nfeL2N9fN8Ycdi368/XY/s3AvwMOG2P+\nJh/3JkQYiiLJoRCFgjHmg8CXgF+11r5hjLkE2As8Yq3tMMZ8BngQZ3ExL27zfh7wQZzU608ZY37N\nWvugMeY/4Cxa9EZu70SIzJHFIUQ0lgF/HX+xx/7+KiMJ476Ok1MrHX3W2p9YZ6z4WaAhtt+bbFKI\ngkPCIcToCXIUvkvs/7HYssU1rmPvuH6/h6x/UURIOISIxveB3zHGTAGI/f2/OGmqAT4F/H3s90ng\nutjvm4HqEO3/HGcZUiEKFn3lCBEBa22/MeZPgb81xryLk+57M7DHGPN5nHTVn4lV3wV8K7aM7CHg\nraBmXb93Ad81xpyx1t6Yk5sQYpQoHFcIIUQkNFQlhBAiEhIOIYQQkZBwCCGEiISEQwghRCQkHEII\nISIh4RBCCBEJCYcQQohISDiEEEJE4v8DXimsoHSmIv4AAAAASUVORK5CYII=\n", | |
"text/plain": "<matplotlib.figure.Figure at 0x7fdb19dbaa58>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#### see if the number of questions asked before " | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"source": "#check if the topic has a question mark \n#hypothesis: real questions will have a question mark \n# df['has_qn'] = df['BodyMarkdown'].apply(lambda x: 1 if re.match(r'(how|why|what|when|\\?)', x, re.M) else 0) \n# find the words that lead to a qn and count the occurances \n\ndf['qn_count'] = df['BodyMarkdown'].str.count(r'(how|why|what|when|\\?)', re.M) \ndf['qn_count_title'] = df['Title'].str.count(r'(how|why|what|when|\\?)', re.M)", | |
"execution_count": 11, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df[df.qn_count > 100].OpenStatus.mean()", | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "0.1875" | |
}, | |
"metadata": {}, | |
"execution_count": 12 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# df.plot(kind='scatter', x='qn_count', y='OpenStatus')\ndf.groupby('OpenStatus').qn_count.describe()", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "OpenStatus \n0 count 70136.000000\n mean 2.174604\n std 5.591464\n min 0.000000\n 25% 1.000000\n 50% 2.000000\n 75% 3.000000\n max 1133.000000\n1 count 70136.000000\n mean 2.485414\n std 2.629112\n min 0.000000\n 25% 1.000000\n 50% 2.000000\n 75% 3.000000\n max 139.000000\nName: qn_count, dtype: float64" | |
}, | |
"metadata": {}, | |
"execution_count": 13 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#### Create a list of smart authors those who have high chances of keeping the qn open " | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "smart_authors = authors[(authors['mean'] >= 0.7) & (authors['count'] >= 5)]\nsmart_authors.shape\nsmart_authors.mean()", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "mean 0.848510\ncount 7.294529\ndtype: float64" | |
}, | |
"metadata": {}, | |
"execution_count": 14 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#### Create a list of fequent authors those who have high chances of keeping the qn closed " | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "freq_authors = authors[(authors['mean'] <= 0.3) & (authors['count'] >= 5)]\nfreq_authors.shape\nfreq_authors.mean()", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "mean 0.158347\ncount 7.589595\ndtype: float64" | |
}, | |
"metadata": {}, | |
"execution_count": 15 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#### Get all the good information from describe function " | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.groupby('OpenStatus').describe()", | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th>OwnerUndeletedAnswerCountAtPostTime</th>\n <th>OwnerUserId</th>\n <th>PostId</th>\n <th>ReputationAtPostCreation</th>\n <th>day</th>\n <th>days_created</th>\n <th>hour</th>\n <th>owner_age</th>\n <th>posted_in_the_night</th>\n <th>qn_count</th>\n <th>qn_count_title</th>\n <th>weekend</th>\n </tr>\n <tr>\n <th>OpenStatus</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th rowspan=\"8\" valign=\"top\">0</th>\n <th>count</th>\n <td>70136.000000</td>\n <td>7.013600e+04</td>\n <td>7.013600e+04</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>13.529956</td>\n <td>7.562534e+05</td>\n <td>7.589407e+06</td>\n <td>355.813063</td>\n <td>2.641596</td>\n <td>180.737681</td>\n <td>12.272428</td>\n <td>2098.656168</td>\n <td>0.223537</td>\n <td>2.174604</td>\n <td>0.457525</td>\n <td>0.097197</td>\n </tr>\n <tr>\n <th>std</th>\n <td>72.280095</td>\n <td>4.411708e+05</td>\n <td>3.106982e+06</td>\n <td>1917.610162</td>\n <td>1.898102</td>\n <td>252.647218</td>\n <td>6.163602</td>\n <td>385.402181</td>\n <td>0.416618</td>\n <td>5.591464</td>\n <td>0.655759</td>\n <td>0.296228</td>\n </tr>\n <tr>\n <th>min</th>\n <td>0.000000</td>\n <td>4.000000e+00</td>\n <td>2.300000e+01</td>\n <td>-17.000000</td>\n <td>0.000000</td>\n <td>-1177.000000</td>\n <td>0.000000</td>\n <td>1568.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>0.000000</td>\n <td>3.938680e+05</td>\n <td>5.724392e+06</td>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>8.000000</td>\n <td>1799.000000</td>\n <td>0.000000</td>\n <td>1.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>0.000000</td>\n <td>7.670040e+05</td>\n <td>8.117328e+06</td>\n <td>18.000000</td>\n <td>3.000000</td>\n <td>62.000000</td>\n <td>13.000000</td>\n <td>2013.000000</td>\n <td>0.000000</td>\n <td>2.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>5.000000</td>\n <td>1.106342e+06</td>\n <td>1.017955e+07</td>\n <td>148.000000</td>\n <td>4.000000</td>\n <td>268.000000</td>\n <td>17.000000</td>\n <td>2331.000000</td>\n <td>0.000000</td>\n <td>3.000000</td>\n <td>1.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>max</th>\n <td>5772.000000</td>\n <td>1.567070e+06</td>\n <td>1.175062e+07</td>\n <td>123242.000000</td>\n <td>6.000000</td>\n <td>1420.000000</td>\n <td>23.000000</td>\n <td>3030.000000</td>\n <td>1.000000</td>\n <td>1133.000000</td>\n <td>32.000000</td>\n <td>1.000000</td>\n </tr>\n <tr>\n <th rowspan=\"8\" valign=\"top\">1</th>\n <th>count</th>\n <td>70136.000000</td>\n <td>7.013600e+04</td>\n <td>7.013600e+04</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n <td>70136.000000</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>25.405826</td>\n <td>5.484138e+05</td>\n <td>6.429625e+06</td>\n <td>657.454745</td>\n <td>2.573785</td>\n <td>250.666662</td>\n <td>12.698999</td>\n <td>2278.689104</td>\n <td>0.197260</td>\n <td>2.485414</td>\n <td>0.379377</td>\n <td>0.082197</td>\n </tr>\n <tr>\n <th>std</th>\n <td>104.635848</td>\n <td>4.119402e+05</td>\n <td>3.251247e+06</td>\n <td>2955.744169</td>\n <td>1.821750</td>\n <td>282.437137</td>\n <td>6.216131</td>\n <td>392.882446</td>\n <td>0.397933</td>\n <td>2.629112</td>\n <td>0.600391</td>\n <td>0.274667</td>\n </tr>\n <tr>\n <th>min</th>\n <td>0.000000</td>\n <td>3.000000e+00</td>\n <td>1.300000e+01</td>\n <td>-34.000000</td>\n <td>0.000000</td>\n <td>-1214.000000</td>\n <td>0.000000</td>\n <td>1568.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>0.000000</td>\n <td>1.960002e+05</td>\n <td>3.714598e+06</td>\n <td>6.000000</td>\n <td>1.000000</td>\n <td>21.000000</td>\n <td>8.000000</td>\n <td>1955.000000</td>\n <td>0.000000</td>\n <td>1.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>2.000000</td>\n <td>4.604070e+05</td>\n <td>6.607204e+06</td>\n <td>64.000000</td>\n <td>2.000000</td>\n <td>149.000000</td>\n <td>13.000000</td>\n <td>2253.000000</td>\n <td>0.000000</td>\n <td>2.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>13.000000</td>\n <td>8.510290e+05</td>\n <td>9.263690e+06</td>\n <td>394.000000</td>\n <td>4.000000</td>\n <td>391.000000</td>\n <td>18.000000</td>\n <td>2591.000000</td>\n <td>0.000000</td>\n <td>3.000000</td>\n <td>1.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>max</th>\n <td>5117.000000</td>\n <td>1.567174e+06</td>\n <td>1.175065e+07</td>\n <td>209631.000000</td>\n <td>6.000000</td>\n <td>1437.000000</td>\n <td>23.000000</td>\n <td>3030.000000</td>\n <td>1.000000</td>\n <td>139.000000</td>\n <td>23.000000</td>\n <td>1.000000</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"text/plain": " OwnerUndeletedAnswerCountAtPostTime OwnerUserId \\\nOpenStatus \n0 count 70136.000000 7.013600e+04 \n mean 13.529956 7.562534e+05 \n std 72.280095 4.411708e+05 \n min 0.000000 4.000000e+00 \n 25% 0.000000 3.938680e+05 \n 50% 0.000000 7.670040e+05 \n 75% 5.000000 1.106342e+06 \n max 5772.000000 1.567070e+06 \n1 count 70136.000000 7.013600e+04 \n mean 25.405826 5.484138e+05 \n std 104.635848 4.119402e+05 \n min 0.000000 3.000000e+00 \n 25% 0.000000 1.960002e+05 \n 50% 2.000000 4.604070e+05 \n 75% 13.000000 8.510290e+05 \n max 5117.000000 1.567174e+06 \n\n PostId ReputationAtPostCreation day \\\nOpenStatus \n0 count 7.013600e+04 70136.000000 70136.000000 \n mean 7.589407e+06 355.813063 2.641596 \n std 3.106982e+06 1917.610162 1.898102 \n min 2.300000e+01 -17.000000 0.000000 \n 25% 5.724392e+06 1.000000 1.000000 \n 50% 8.117328e+06 18.000000 3.000000 \n 75% 1.017955e+07 148.000000 4.000000 \n max 1.175062e+07 123242.000000 6.000000 \n1 count 7.013600e+04 70136.000000 70136.000000 \n mean 6.429625e+06 657.454745 2.573785 \n std 3.251247e+06 2955.744169 1.821750 \n min 1.300000e+01 -34.000000 0.000000 \n 25% 3.714598e+06 6.000000 1.000000 \n 50% 6.607204e+06 64.000000 2.000000 \n 75% 9.263690e+06 394.000000 4.000000 \n max 1.175065e+07 209631.000000 6.000000 \n\n days_created hour owner_age \\\nOpenStatus \n0 count 70136.000000 70136.000000 70136.000000 \n mean 180.737681 12.272428 2098.656168 \n std 252.647218 6.163602 385.402181 \n min -1177.000000 0.000000 1568.000000 \n 25% 1.000000 8.000000 1799.000000 \n 50% 62.000000 13.000000 2013.000000 \n 75% 268.000000 17.000000 2331.000000 \n max 1420.000000 23.000000 3030.000000 \n1 count 70136.000000 70136.000000 70136.000000 \n mean 250.666662 12.698999 2278.689104 \n std 282.437137 6.216131 392.882446 \n min -1214.000000 0.000000 1568.000000 \n 25% 21.000000 8.000000 1955.000000 \n 50% 149.000000 13.000000 2253.000000 \n 75% 391.000000 18.000000 2591.000000 \n max 1437.000000 23.000000 3030.000000 \n\n posted_in_the_night qn_count qn_count_title \\\nOpenStatus \n0 count 70136.000000 70136.000000 70136.000000 \n mean 0.223537 2.174604 0.457525 \n std 0.416618 5.591464 0.655759 \n min 0.000000 0.000000 0.000000 \n 25% 0.000000 1.000000 0.000000 \n 50% 0.000000 2.000000 0.000000 \n 75% 0.000000 3.000000 1.000000 \n max 1.000000 1133.000000 32.000000 \n1 count 70136.000000 70136.000000 70136.000000 \n mean 0.197260 2.485414 0.379377 \n std 0.397933 2.629112 0.600391 \n min 0.000000 0.000000 0.000000 \n 25% 0.000000 1.000000 0.000000 \n 50% 0.000000 2.000000 0.000000 \n 75% 0.000000 3.000000 1.000000 \n max 1.000000 139.000000 23.000000 \n\n weekend \nOpenStatus \n0 count 70136.000000 \n mean 0.097197 \n std 0.296228 \n min 0.000000 \n 25% 0.000000 \n 50% 0.000000 \n 75% 0.000000 \n max 1.000000 \n1 count 70136.000000 \n mean 0.082197 \n std 0.274667 \n min 0.000000 \n 25% 0.000000 \n 50% 0.000000 \n 75% 0.000000 \n max 1.000000 " | |
}, | |
"metadata": {}, | |
"execution_count": 16 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Make features from all the data analysis above" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Find all the columns that start with 'Tag' so that we can count, combine them in make features\ntag_cols = [x for x in df.columns if 'Tag' in x]\n\n# off topic, not constructive, not a real question, or too localized\ndef make_features(df):\n \n #get the length of the title \n #hypothesis: longer title means its a real question\n df['len_Title'] = df['Title'].apply(len)\n df['wc_Title'] = df['Title'].apply(lambda x: len(x.split()) )\n \n #get the length of the post \n #hypothesis: longer post means its a real question\n df['len_Post'] = df['BodyMarkdown'].apply(len)\n df['wc_Post'] = df['BodyMarkdown'].apply(lambda x: len(x.split()) )\n \n #clean negative reputation since some models dont work well\n df.loc[df['ReputationAtPostCreation'] < 0, 'ReputationAtPostCreation'] = 0\n \n #get the count of real valid tags excluding NaN \n #hypothesis: real questions will have atleast one tag \n df['tag_count'] = df[tag_cols].apply(lambda tag_list: tag_list.notnull().sum(), axis=1)\n # append word 'Tag ' to distinguish it in text vectorization\n df['tag_all'] = df[tag_cols].apply(lambda tag_list: tag_list.str.cat(sep= 'Tag , '), axis=1) \n # append word 'Tag ' to the last element in list\n df['tag_all'] = df['tag_all'].astype(str) + 'Tag '\n \n #check if the topic has a question mark\n #hypothesis: real questions will have a question mark\n# df['has_qn'] = df['BodyMarkdown'].apply(lambda x: 1 if re.match(r'(how|why|what|when|\\?)', x, re.M) else 0)\n # find the words that lead to a qn and count the occurances\n df['qn_count'] = df['BodyMarkdown'].str.count(r'(how|why|what|when|\\?)', re.M)\n df['qn_count_title'] = df['Title'].str.count(r'(how|why|what|when|\\?)', re.M)\n \n #check if the topic and op creation date are within the same day \n #hypothesis: same day postings might be more prone to closing \n df['days_created'] = (df['PostCreationDate'] - df['OwnerCreationDate']).dt.days\n df['owner_age'] = (datetime.now() - df['OwnerCreationDate']).dt.days\n# df['days_closed_owner'] = (df['PostClosedDate'] - df['OwnerCreationDate']).dt.days\n \n #get the mean on the reputation of the owner\n #hypothesis: same day postings might be more prone to closing \n df['more_history'] = (df['days_created'] > 7.).astype(int)\n df['more_tags'] = (df['tag_count'] > 2.).astype(int)\n df['more_posts'] = (df['OwnerUndeletedAnswerCountAtPostTime'] > 5.).astype(int)\n df['more_reps'] = (df['ReputationAtPostCreation'] > 18.).astype(int)\n \n df['smart_author'] = df.OwnerUserId.isin(smart_authors.index).astype(int)\n df['freq_author'] = df.OwnerUserId.isin(freq_authors.index).astype(int)\n \n df['day'] = (df.PostCreationDate.dt.weekday)\n df['hour'] = (df.PostCreationDate.dt.hour)\n df['weekend'] = (df.day > 5).astype(int) \n df['posted_in_the_night'] = ((df.hour >0) & (df.hour < 8)).astype(int)\n df['seasoned_authors'] = ((df.owner_age >= 2000) & (df.owner_age <=3200)).astype(int)\n \n return df\n\ndef train_features(df):\n #Also, check if the topic close and create dates delta\n #hypothesis: want to see how long it takes to close the topic might be more prone to closing \n df['days_closed'] = (df['PostClosedDate'] - df['PostCreationDate']).dt.days\n return df\n\ndf = make_features(df)\n# df = train_features(df)", | |
"execution_count": 17, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#### Make functions for the Pipelines later use" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def ft_get_text_title(df):\n return df['Title'].str.cat(df['tag_all'], sep=' __ ')\n\ndef ft_get_text_topic(df):\n return df['BodyMarkdown'].str.cat(df['tag_all'], sep=' __ ')\n\ndef ft_get_text_all(df):\n return df['tag_all'].str.cat(df['Title'], sep=' __ ').str.cat(df['BodyMarkdown'], sep=' __ ')\n\nft_cols = ['ReputationAtPostCreation', 'OwnerUndeletedAnswerCountAtPostTime']\nft_cols_extra = ['owner_age', 'len_Title', 'len_Post', 'tag_count', 'more_history', 'more_reps', 'more_tags', \n 'wc_Post', 'wc_Title', 'more_posts', 'smart_author', 'freq_author', \n 'weekend', 'posted_in_the_night', 'seasoned_authors', 'qn_count', 'qn_count_title']\n# 'has_qn', 'has_qn_title', \n\n\ndef ft_get_features(df):\n return df[ft_cols]\n", | |
"execution_count": 18, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#### null accuracy" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "total_closed = (df.OpenStatus == 0)\nprint('null accuracy is: ', total_closed.sum()/df['OpenStatus'].count())", | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "null accuracy is: 0.5\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Model Evaluation\nTrying three models for the feature evaluation\n - knn\n - LDA\n - LogReg" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#Model building cell\n# Try out knn LDA and logreg\n\nfrom sklearn.neighbors import KNeighborsClassifier\nknn = KNeighborsClassifier(n_neighbors=100)\n\nfrom sklearn.lda import LDA\nlda = LDA()\n\nfrom sklearn.linear_model import LogisticRegression, LinearRegression\nlinreg = LinearRegression()\nlogreg = LogisticRegression()\n\n# model_list = [knn, lda, linreg, logreg]\nmodel_list = [knn, lda, logreg]", | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "/work/conda3/lib/python3.5/site-packages/sklearn/lda.py:4: DeprecationWarning: lda.LDA has been moved to discriminant_analysis.LinearDiscriminantAnalysis in 0.17 and will be removed in 0.19\n \"in 0.17 and will be removed in 0.19\", DeprecationWarning)\n", | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Model Split and evaluation in a loop" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "X_train, X_test, y_train, y_test = cross_validation.train_test_split(df[ft_cols], df['OpenStatus'], random_state = 1)\n\ndef model_fit_predict(model):\n model.fit(X_train, y_train)\n pred_probs = model.predict_proba(X_test)\n # return the model, log loss score and roc_auc score (just for exploration)\n print('log loss value is: ', metrics.log_loss(y_test, pred_probs[:, 1]))\n print('roc auc value is: ', metrics.roc_auc_score(y_test, pred_probs[:, 1])) #just for comparison and not used \n return model, metrics.log_loss(y_test, pred_probs[:, 1]), metrics.roc_auc_score(y_test, pred_probs[:, 1])\n\n# Try out each model one by one\n[model_fit_predict(model) for model in model_list]", | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "log loss value is: 0.676232990432\nroc auc value is: 0.605230954073\nlog loss value is: 0.690411966135\nroc auc value is: 0.608855364454\nlog loss value is: 0.689967792451\nroc auc value is: 0.607034694214\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "[(KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n metric_params=None, n_jobs=1, n_neighbors=100, p=2,\n weights='uniform'), 0.676232990431617, 0.60523095407296323),\n (LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,\n solver='svd', store_covariance=False, tol=0.0001),\n 0.69041196613491651,\n 0.60885536445383315),\n (LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n verbose=0, warm_start=False),\n 0.68996779245069317,\n 0.60703469421430767)]" | |
}, | |
"metadata": {}, | |
"execution_count": 21 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false | |
}, | |
"cell_type": "raw", | |
"source": "### commented out for now\n# feature engineering\n#knn is the winner here\n# take the knn predicted value and try to add features that would improve this score\nbase_pred = 0.676232990431617 \nbase_ft_cols = ft_cols.copy()\n\ndef add_feature(x):\n ft_cols.append(x)\n X_train, X_test, y_train, y_test = cross_validation.train_test_split(df[ft_cols], df['OpenStatus'], random_state = 1)\n score_ = model_fit_predict(knn)\n print('prediction = ', score_)\n if score_[1] > base_pred:\n print('feature did not improve score, removing ', x)\n ft_cols.remove(x)\n else:\n print('feature improved score, adding ', x, ft_cols)\n# base_pred = score_[1]\n print(ft_cols) \n \n \n[add_feature(x) for x in ft_cols_extra]\nft_cols" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### knn seems to be the winner here with the lowest prediction\n### Lets build Countvectorizer and analyze the text" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Lets mine the title and topic of the post for useful information\n# Use Count vectorizer \nfrom sklearn.pipeline import Pipeline, make_pipeline, make_union \nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.feature_extraction.text import TfidfVectorizer \nfrom sklearn.naive_bayes import MultinomialNB\n\ncv = CountVectorizer()\n# CountVectorizer(input='content', encoding='utf-8', decode_error='strict', strip_accents=None, lowercase=True, preprocessor=None, \n# tokenizer=None, stop_words=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b', \n# ngram_range=(1, 1), analyzer='word', max_df=1.0, min_df=1, max_features=None, vocabulary=None, binary=False, dtype=)\n# cv = CountVectorizer(token_pattern=r'([\\w ]+)')\nnb = MultinomialNB()\ntf = TfidfVectorizer()", | |
"execution_count": 120, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": false | |
}, | |
"cell_type": "markdown", | |
"source": "### Let's analyze the text data now from the Body title and tags" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "text_ = ft_get_text_all(df)\nX_train, X_test, y_train, y_test = cross_validation.train_test_split(text_, df['OpenStatus'], random_state = 0)\nX_test.head()", | |
"execution_count": 121, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "135949 mysqlTag , linuxTag , ubuntuTag __ Adding use...\n79828 e-commerceTag __ What technology to use for m...\n131502 apacheTag , tomcat6Tag , struts2Tag , redirect...\n5510 objective-cTag , osxTag , cocoaTag __ iOS-lik...\n94976 consultingTag , contractingTag __ Where do yo...\nName: tag_all, dtype: object" | |
}, | |
"metadata": {}, | |
"execution_count": 121 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# helper function to dump useful info from CV\ndef analyze_cv(cv):\n cv.fit(X_train, y_train) \n print('features count is: ', len(cv.get_feature_names()))\n print('features are: ', cv.get_feature_names()[:50])\n print('stopwords are: ', len(cv.stop_words_))\n# cv.transform(X_test)\n# return metrics.log_loss(cv.predict_proba(X_test))", | |
"execution_count": 122, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Creating the regex pattern to include relevant data. \n# include words starting with alphabets only - [^\\W\\d]\nreg_pattern = r'([^\\W\\d][\\w.-]{2,})'\ncv = CountVectorizer(token_pattern= reg_pattern, min_df=1, max_features=21000, stop_words='english')\n# cv = CountVectorizer(token_pattern= r'([a-zA-Z.-]{3,})', min_df=20, max_features=11000, stop_words='english')\nanalyze_cv(cv)", | |
"execution_count": 123, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "features count is: 21000\nfeatures are: ['_22', '___', '___.', '___.__', '____', '_____', '______', '__adjust_heap', '__android_log_print', '__asm', '__attribute__', '__call', '__call__', '__cdecl', '__cfrunlooprun', '__construct', '__declspec', '__device__', '__dopostback', '__file__', '__getitem__', '__gnu_cxx', '__init__', '__init__.py', '__line__', '__main__', '__name__', '__new__', '__normal_iterator', '__nsautoreleasenopool', '__syncthreads', '__text', '__thiscall', '__unicode__', '_alloc', '_bitmapcache.get', '_blank', '_chart', '_context', '_cookie', '_data', '_debug', '_default', '_dword', '_elem', '_files', '_gaq', '_gaq.push', '_get', '_helper-']\nstopwords are: 457789\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "tf = TfidfVectorizer(token_pattern= reg_pattern, min_df=20, max_features=21000, stop_words='english')\n# cv = CountVectorizer(token_pattern= r'([a-zA-Z.-]{3,})', min_df=20, max_features=11000, stop_words='english')\nanalyze_cv(tf)", | |
"execution_count": 124, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "features count is: 14584\nfeatures are: ['___', '____', '__cdecl', '__construct', '__declspec', '__file__', '__init__', '__init__.py', '__main__', '__name__', '__unicode__', '_blank', '_cookie', '_default', '_files', '_get', '_id', '_name', '_post', '_request', '_server', '_session', '_tchar', '_tmain', 'a-z', 'a-z0-9', 'a-za-z', 'a-za-z0-9', 'a.id', 'a.length', 'aaa', 'aac', 'abandon', 'abandoned', 'abc', 'abc.com', 'abcd', 'abilities', 'ability', 'able', 'abort', 'aborted', 'aborting', 'about.', 'above.', 'abs', 'absence', 'absent', 'absolute', 'absolutely']\nstopwords are: 464205\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# helper function to calculate score\ndef calculate_score(cv, model):\n # fit the model\n model.fit(cv.transform(X_train), y_train)\n #predict the test set\n model_pred = model.predict_proba(cv.transform(X_test))\n # calculate the log loss\n return metrics.log_loss(y_test, model_pred[:, 1]), model_pred", | |
"execution_count": 132, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Evaluate Logreg and NB for this data and pick the winner\nml = [LogisticRegression(), MultinomialNB()]\n# ml = [MultinomialNB()]\nresults = [calculate_score(cv, x) for x in ml]\nresults", | |
"execution_count": 133, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "[(0.62316285511688785, array([[ 0.35072733, 0.64927267],\n [ 0.93336409, 0.06663591],\n [ 0.29216522, 0.70783478],\n ..., \n [ 0.99118097, 0.00881903],\n [ 0.03791873, 0.96208127],\n [ 0.6758225 , 0.3241775 ]])),\n (2.0321573062968925, array([[ 9.87772058e-01, 1.22279419e-02],\n [ 1.00000000e+00, 1.32165500e-11],\n [ 1.34182381e-01, 8.65817619e-01],\n ..., \n [ 1.00000000e+00, 3.35806988e-10],\n [ 9.54901831e-10, 9.99999999e-01],\n [ 9.83588649e-01, 1.64113510e-02]]))]" | |
}, | |
"metadata": {}, | |
"execution_count": 133 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Create pipelines\nI have created these different pipelines\n - (Get_text --> CV & Get_features) --> Logreg\n - (Get_text --> TF & Get_features) --> Logreg\n - (Get_text --> TF & Get_features) --> Knn \n - Get_features --> Logreg " | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#create pipelines\npipe_cv = make_pipeline(FunctionTransformer(ft_get_text_all, validate=False), cv)\npipe_tf = make_pipeline(FunctionTransformer(ft_get_text_all, validate=False), tf)\n# union_cv_ft = make_union(pipe_cv_2, FunctionTransformer(ft_get_features, validate=False))\nunion_cv_ft = make_union(pipe_cv, FunctionTransformer(ft_get_features, validate=False))\nunion_tf_ft = make_union(pipe_tf, FunctionTransformer(ft_get_features, validate=False))\nunion_cv_ft_nb = make_pipeline(union_cv_ft, nb)\nunion_tf_ft_nb = make_pipeline(union_tf_ft, nb)\nunion_cv_ft_knn = make_pipeline(union_cv_ft, knn)\nunion_cv_ft_logreg = make_pipeline(union_cv_ft, logreg)\nunion_tf_ft_logreg = make_pipeline(union_tf_ft, logreg)\nunion_cv_ft_lda = make_pipeline(union_cv_ft, lda)\n\nstandalone_ft_knn = make_pipeline(FunctionTransformer(ft_get_features, validate=False), knn) \nstandalone_ft_logreg = make_pipeline(FunctionTransformer(ft_get_features, validate=False), logreg) \nstandalone_cv_logreg = make_pipeline(pipe_cv, logreg) \nstandalone_tf_logreg = make_pipeline(pipe_tf, logreg) \nstandalone_cv_nb = make_pipeline(pipe_cv, nb) \nstandalone_tf_nb = make_pipeline(pipe_tf, nb) \nstandalone_cv_lda = make_pipeline(pipe_cv, lda) \n##Standalone knn \n#pipe line for knn\n# from sklearn.neighbors import KNeighborsClassifier \n# knn = KNeighborsClassifier(n_neighbors=100)", | |
"execution_count": 134, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ft_cols = ['ReputationAtPostCreation', 'OwnerUndeletedAnswerCountAtPostTime', 'owner_age', 'len_Post', 'tag_count', 'wc_Post', \n 'wc_Title', 'smart_author', 'freq_author', 'weekend', 'posted_in_the_night', 'seasoned_authors', 'qn_count'] ", | |
"execution_count": 135, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "raw", | |
"source": "## Try out feature selection later" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "raw", | |
"source": "from sklearn.feature_selection import SelectKBest, chi2\nX_new = SelectKBest(chi2, k=7).fit_transform(df[ft_cols], df['OpenStatus'])\nX_new.shape\nX_new.dtype.names" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Feature list" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ft_cols = [ 'smart_author', 'freq_author', 'weekend', 'posted_in_the_night', 'seasoned_authors' ]\nextra_cols = ['ReputationAtPostCreation', 'OwnerUndeletedAnswerCountAtPostTime', 'owner_age', 'tag_count', 'wc_Post', 'wc_Title',\n 'qn_count', 'more_history', 'more_reps', 'more_tags', 'more_posts'] \nlen(ft_cols)", | |
"execution_count": 136, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "5" | |
}, | |
"metadata": {}, | |
"execution_count": 136 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#### Feature engineering \nStart with basic features, add extra feature one at a time and keep looking at the score to make a decision to keep it or remove it" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "base_Score = cross_validation.cross_val_score(standalone_ft_logreg, X=(df), y=df['OpenStatus'], scoring='neg_log_loss', cv=2)[0]\nfor x in extra_cols:\n# print('testing feature: ', x)\n ft_cols.append(x)\n res = cross_validation.cross_val_score(standalone_ft_logreg, X=(df), y=df['OpenStatus'], scoring='neg_log_loss', cv=2)\n print('scores are: ', res)\n if res[0] > base_Score:\n print('feature improved score, adding: ', x)\n else:\n# print('feature did not improve score, removing it')\n ft_cols.remove(x)\nft_cols", | |
"execution_count": 137, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "scores are: [-0.65552302 -0.65418349]\nfeature improved score, adding: ReputationAtPostCreation\nscores are: [-0.65535369 -0.6540151 ]\nfeature improved score, adding: OwnerUndeletedAnswerCountAtPostTime\nscores are: [-0.65136671 -0.64981793]\nfeature improved score, adding: owner_age\nscores are: [-0.64631204 -0.64531099]\nfeature improved score, adding: tag_count\nscores are: [-0.64171825 -0.63983102]\nfeature improved score, adding: wc_Post\nscores are: [-0.64057469 -0.63956445]\nfeature improved score, adding: wc_Title\nscores are: [-0.64068241 -0.63993371]\nfeature improved score, adding: qn_count\nscores are: [-0.63898029 -0.6375153 ]\nfeature improved score, adding: more_history\nscores are: [-0.63849748 -0.63697206]\nfeature improved score, adding: more_reps\nscores are: [-0.63853454 -0.63935321]\nfeature improved score, adding: more_tags\nscores are: [-0.63842891 -0.6365 ]\nfeature improved score, adding: more_posts\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "['smart_author',\n 'freq_author',\n 'weekend',\n 'posted_in_the_night',\n 'seasoned_authors',\n 'ReputationAtPostCreation',\n 'OwnerUndeletedAnswerCountAtPostTime',\n 'owner_age',\n 'tag_count',\n 'wc_Post',\n 'wc_Title',\n 'qn_count',\n 'more_history',\n 'more_reps',\n 'more_tags',\n 'more_posts']" | |
}, | |
"metadata": {}, | |
"execution_count": 137 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"source": "### Try out CV scores for different pipelines we created and pick the best by looking at the scores", | |
"execution_count": 138, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# get scores for cv + nb\ncross_validation.cross_val_score(union_cv_ft_nb, X=((df)), y=df['OpenStatus'], scoring='neg_log_loss', cv=5)", | |
"execution_count": 139, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "array([-15.44386128, -15.4065413 , -15.45383486, -15.40184179, -15.39544195])" | |
}, | |
"metadata": {}, | |
"execution_count": 139 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# get scores for cv + nb\ncross_validation.cross_val_score(union_cv_ft_nb, X=((df)), y=df['OpenStatus'], scoring='neg_log_loss', cv=2)", | |
"execution_count": 140, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "array([-15.44005523, -15.40238521])" | |
}, | |
"metadata": {}, | |
"execution_count": 140 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Get scores for cv _ logreg\ncross_validation.cross_val_score(union_cv_ft_logreg, X=(df), y=df['OpenStatus'], scoring='neg_log_loss', cv=2)", | |
"execution_count": 141, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "array([-0.52021453, -0.52051107])" | |
}, | |
"metadata": {}, | |
"execution_count": 141 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Get scores for tf _ logreg\ncross_validation.cross_val_score(union_tf_ft_logreg, X=(df), y=df['OpenStatus'], scoring='neg_log_loss', cv=5)\n", | |
"execution_count": 142, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "array([-0.47722675, -0.47911976, -0.47324098, -0.47876792, -0.49501057])" | |
}, | |
"metadata": {}, | |
"execution_count": 142 | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Randomized grid to tune the parameters" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.grid_search import RandomizedSearchCV\nnew_param_grid = { \n 'featureunion__pipeline__tfidfvectorizer__token_pattern':[reg_pattern],\n 'featureunion__pipeline__tfidfvectorizer__min_df': list(range(0, 51, 10)),\n 'featureunion__pipeline__tfidfvectorizer__max_features': list(range(5000, 105001, 5000)),\n 'featureunion__pipeline__tfidfvectorizer__stop_words': ['english', None],\n 'featureunion__n_jobs': [-1],\n 'logisticregression__n_jobs': [-1],\n# 'multinomialnb__alpha': [0.1, 0.3, 0.6, 0.8, 1] #list(range(0, 1, 0.1))]\n }\nrand_grid = RandomizedSearchCV(union_tf_ft_logreg, new_param_grid, cv=5, n_iter=8, scoring='neg_log_loss')\nrand_grid.fit(df, df['OpenStatus'])\nrand_grid", | |
"execution_count": 143, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "RandomizedSearchCV(cv=5, error_score='raise',\n estimator=Pipeline(steps=[('featureunion', FeatureUnion(n_jobs=1,\n transformer_list=[('pipeline', Pipeline(steps=[('functiontransformer', FunctionTransformer(accept_sparse=False,\n func=<function ft_get_text_all at 0x7fdb198448c8>,\n inv_kw_args=None, inverse_func=None, kw_args=None, pass_y=False...ty='l2', random_state=None, solver='liblinear', tol=0.0001,\n verbose=0, warm_start=False))]),\n fit_params={}, iid=True, n_iter=8, n_jobs=1,\n param_distributions={'logisticregression__n_jobs': [-1], 'featureunion__pipeline__tfidfvectorizer__token_pattern': ['([^\\\\W\\\\d][\\\\w.-]{2,})'], 'featureunion__n_jobs': [-1], 'featureunion__pipeline__tfidfvectorizer__min_df': [0, 10, 20, 30, 40, 50], 'featureunion__pipeline__tfidfvectorizer__stop_word...40000, 45000, 50000, 55000, 60000, 65000, 70000, 75000, 80000, 85000, 90000, 95000, 100000, 105000]},\n pre_dispatch='2*n_jobs', random_state=None, refit=True,\n scoring='neg_log_loss', verbose=0)" | |
}, | |
"metadata": {}, | |
"execution_count": 143 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#print the best model\nrand_grid.best_score_, rand_grid.best_estimator_", | |
"execution_count": 144, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "(-0.47570733351921424, Pipeline(steps=[('featureunion', FeatureUnion(n_jobs=-1,\n transformer_list=[('pipeline', Pipeline(steps=[('functiontransformer', FunctionTransformer(accept_sparse=False,\n func=<function ft_get_text_all at 0x7fdb198448c8>,\n inv_kw_args=None, inverse_func=None, kw_args=None, pass_y=Fals...ty='l2', random_state=None, solver='liblinear', tol=0.0001,\n verbose=0, warm_start=False))]))" | |
}, | |
"metadata": {}, | |
"execution_count": 144 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"source": "rand_grid.best_params_", | |
"execution_count": 149, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "{'featureunion__n_jobs': -1,\n 'featureunion__pipeline__tfidfvectorizer__max_features': 45000,\n 'featureunion__pipeline__tfidfvectorizer__min_df': 0,\n 'featureunion__pipeline__tfidfvectorizer__stop_words': None,\n 'featureunion__pipeline__tfidfvectorizer__token_pattern': '([^\\\\W\\\\d][\\\\w.-]{2,})',\n 'logisticregression__n_jobs': -1}" | |
}, | |
"metadata": {}, | |
"execution_count": 149 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# rand_grid_.best_score_\n# rand_grid.fit(df, df['OpenStatus'])\npred2 = rand_grid.predict_proba(make_features(test_df))", | |
"execution_count": 145, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "pd.DataFrame({'id': test_df.index, 'OpenStatus': pred2[:, 1]}).set_index('id').to_csv('../data/kaggle_stackoverflow_tf_logreg3.csv')", | |
"execution_count": 146, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "raw", | |
"source": "------ End of Model -------\nBelow, code is for manual ensembling" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Manual Ensembling" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "raw", | |
"source": "# Get scores for knn alone \ncross_validation.cross_val_score(standalone_ft_knn, X=(df), y=df['OpenStatus'], scoring='neg_log_loss', cv=2)\n\n# Get scores for cv _ nb alone - no features added \ncross_validation.cross_val_score(standalone_cv_nb, X=(df), y=df['OpenStatus'], scoring='neg_log_loss', cv=2)\n\n# Get scores for tf _ nb \ncross_validation.cross_val_score(standalone_tf_nb, X=(df), y=df['OpenStatus'], scoring='neg_log_loss', cv=2)" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "union_cv_ft_logreg.steps", | |
"execution_count": 147, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "[('featureunion', FeatureUnion(n_jobs=1,\n transformer_list=[('pipeline', Pipeline(steps=[('functiontransformer', FunctionTransformer(accept_sparse=False,\n func=<function ft_get_text_all at 0x7fdb198448c8>,\n inv_kw_args=None, inverse_func=None, kw_args=None, pass_y=False,\n validate=False)), ('countvectorizer', Coun... inv_kw_args=None, inverse_func=None, kw_args=None, pass_y=False,\n validate=False))],\n transformer_weights=None)),\n ('logisticregression',\n LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n verbose=0, warm_start=False))]" | |
}, | |
"metadata": {}, | |
"execution_count": 147 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#for manual ensemble model\ndef avg_predictions(m1, m2, test_df):\n df_ = make_features(test_df)\n pred1 = m1.predict_proba(df_)\n pred2 = rand_grid_.predict_proba(df_)\n return (pred1 + pred2)/2\n \n \navg_pred = avg_predictions(rand_grid, rand_grid_, test_df)\nrand_grid.best_score_, rand_grid_.best_score_\n# pred_rand = rand_grid_nb.predict_proba((make_features(test_df)))\n# test_df.columns", | |
"execution_count": 148, | |
"outputs": [ | |
{ | |
"output_type": "error", | |
"ename": "NameError", | |
"evalue": "name 'rand_grid_' is not defined", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-148-83b91ab30592>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[0mavg_pred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mavg_predictions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrand_grid\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrand_grid_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_df\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[0mrand_grid\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrand_grid_\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;31m# pred_rand = rand_grid_nb.predict_proba((make_features(test_df)))\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;31mNameError\u001b[0m: name 'rand_grid_' is not defined" | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "avg_pred[:, 1].shape", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "cv", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "raw", | |
"source": "#check if we have the tags captured as features\nlen([x for x in cv.stop_words_ if x.endswith('tag')])\n\nft_list = set(cv.get_feature_names())\n\nft_set = set() \n# tag_set = df[tag_cols].apply(lambda x: ft_set.update(set(x)))\ntag_set = df[tag_cols].apply(lambda x: (set(x)))\n\n[ft_set.update(x) for x in tag_set.values]\n\nlen(ft_list.intersection(tags_set))" | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"nbconvert_exporter": "python", | |
"version": "3.5.1", | |
"name": "python", | |
"file_extension": ".py", | |
"codemirror_mode": { | |
"version": 3, | |
"name": "ipython" | |
}, | |
"mimetype": "text/x-python", | |
"pygments_lexer": "ipython3" | |
}, | |
"gist": { | |
"id": "277421dcbc78b2368d25d9759a72f062", | |
"data": { | |
"description": "MLtext3/submissions/stackexchange-kaggle-Copy3.ipynb", | |
"public": true | |
} | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/277421dcbc78b2368d25d9759a72f062" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment