Last active
August 23, 2018 23:05
-
-
Save ClebsonDantasUchoa/f5d668ad876636b12d3d94b5f327bab5 to your computer and use it in GitHub Desktop.
Casa
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%matplotlib inline\n", | |
| "import pandas as pd\n", | |
| "from sklearn import tree" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Id</th>\n", | |
| " <th>MSSubClass</th>\n", | |
| " <th>MSZoning</th>\n", | |
| " <th>LotFrontage</th>\n", | |
| " <th>LotArea</th>\n", | |
| " <th>Street</th>\n", | |
| " <th>Alley</th>\n", | |
| " <th>LotShape</th>\n", | |
| " <th>LandContour</th>\n", | |
| " <th>Utilities</th>\n", | |
| " <th>...</th>\n", | |
| " <th>PoolArea</th>\n", | |
| " <th>PoolQC</th>\n", | |
| " <th>Fence</th>\n", | |
| " <th>MiscFeature</th>\n", | |
| " <th>MiscVal</th>\n", | |
| " <th>MoSold</th>\n", | |
| " <th>YrSold</th>\n", | |
| " <th>SaleType</th>\n", | |
| " <th>SaleCondition</th>\n", | |
| " <th>SalePrice</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>60</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>65.0</td>\n", | |
| " <td>8450</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Reg</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2008</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Normal</td>\n", | |
| " <td>208500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>20</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>9600</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Reg</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " <td>5</td>\n", | |
| " <td>2007</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Normal</td>\n", | |
| " <td>181500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>60</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>68.0</td>\n", | |
| " <td>11250</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>IR1</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " <td>9</td>\n", | |
| " <td>2008</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Normal</td>\n", | |
| " <td>223500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>70</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>60.0</td>\n", | |
| " <td>9550</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>IR1</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2006</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Abnorml</td>\n", | |
| " <td>140000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>60</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>84.0</td>\n", | |
| " <td>14260</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>IR1</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " <td>12</td>\n", | |
| " <td>2008</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Normal</td>\n", | |
| " <td>250000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>6</td>\n", | |
| " <td>50</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>85.0</td>\n", | |
| " <td>14115</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>IR1</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>MnPrv</td>\n", | |
| " <td>Shed</td>\n", | |
| " <td>700</td>\n", | |
| " <td>10</td>\n", | |
| " <td>2009</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Normal</td>\n", | |
| " <td>143000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>7</td>\n", | |
| " <td>20</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>75.0</td>\n", | |
| " <td>10084</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Reg</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " <td>8</td>\n", | |
| " <td>2007</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Normal</td>\n", | |
| " <td>307000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>8</td>\n", | |
| " <td>60</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>10382</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>IR1</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Shed</td>\n", | |
| " <td>350</td>\n", | |
| " <td>11</td>\n", | |
| " <td>2009</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Normal</td>\n", | |
| " <td>200000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>9</td>\n", | |
| " <td>50</td>\n", | |
| " <td>RM</td>\n", | |
| " <td>51.0</td>\n", | |
| " <td>6120</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Reg</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " <td>4</td>\n", | |
| " <td>2008</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Abnorml</td>\n", | |
| " <td>129900</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>10</td>\n", | |
| " <td>190</td>\n", | |
| " <td>RL</td>\n", | |
| " <td>50.0</td>\n", | |
| " <td>7420</td>\n", | |
| " <td>Pave</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Reg</td>\n", | |
| " <td>Lvl</td>\n", | |
| " <td>AllPub</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2008</td>\n", | |
| " <td>WD</td>\n", | |
| " <td>Normal</td>\n", | |
| " <td>118000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>10 rows × 81 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", | |
| "0 1 60 RL 65.0 8450 Pave NaN Reg \n", | |
| "1 2 20 RL 80.0 9600 Pave NaN Reg \n", | |
| "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", | |
| "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", | |
| "4 5 60 RL 84.0 14260 Pave NaN IR1 \n", | |
| "5 6 50 RL 85.0 14115 Pave NaN IR1 \n", | |
| "6 7 20 RL 75.0 10084 Pave NaN Reg \n", | |
| "7 8 60 RL NaN 10382 Pave NaN IR1 \n", | |
| "8 9 50 RM 51.0 6120 Pave NaN Reg \n", | |
| "9 10 190 RL 50.0 7420 Pave NaN Reg \n", | |
| "\n", | |
| " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n", | |
| "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n", | |
| "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n", | |
| "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n", | |
| "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n", | |
| "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n", | |
| "5 Lvl AllPub ... 0 NaN MnPrv Shed 700 \n", | |
| "6 Lvl AllPub ... 0 NaN NaN NaN 0 \n", | |
| "7 Lvl AllPub ... 0 NaN NaN Shed 350 \n", | |
| "8 Lvl AllPub ... 0 NaN NaN NaN 0 \n", | |
| "9 Lvl AllPub ... 0 NaN NaN NaN 0 \n", | |
| "\n", | |
| " MoSold YrSold SaleType SaleCondition SalePrice \n", | |
| "0 2 2008 WD Normal 208500 \n", | |
| "1 5 2007 WD Normal 181500 \n", | |
| "2 9 2008 WD Normal 223500 \n", | |
| "3 2 2006 WD Abnorml 140000 \n", | |
| "4 12 2008 WD Normal 250000 \n", | |
| "5 10 2009 WD Normal 143000 \n", | |
| "6 8 2007 WD Normal 307000 \n", | |
| "7 11 2009 WD Normal 200000 \n", | |
| "8 4 2008 WD Abnorml 129900 \n", | |
| "9 1 2008 WD Normal 118000 \n", | |
| "\n", | |
| "[10 rows x 81 columns]" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df = pd.read_csv('train.csv')\n", | |
| "df.head(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "SaleCondition\n", | |
| "Abnorml 101\n", | |
| "AdjLand 4\n", | |
| "Alloca 12\n", | |
| "Family 20\n", | |
| "Normal 1198\n", | |
| "Partial 125\n", | |
| "dtype: int64\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(df.groupby('SaleCondition').size())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Id</th>\n", | |
| " <th>MSSubClass</th>\n", | |
| " <th>LotFrontage</th>\n", | |
| " <th>LotArea</th>\n", | |
| " <th>OverallQual</th>\n", | |
| " <th>OverallCond</th>\n", | |
| " <th>YearBuilt</th>\n", | |
| " <th>YearRemodAdd</th>\n", | |
| " <th>MasVnrArea</th>\n", | |
| " <th>BsmtFinSF1</th>\n", | |
| " <th>...</th>\n", | |
| " <th>SaleType_ConLw</th>\n", | |
| " <th>SaleType_New</th>\n", | |
| " <th>SaleType_Oth</th>\n", | |
| " <th>SaleType_WD</th>\n", | |
| " <th>SaleCondition_Abnorml</th>\n", | |
| " <th>SaleCondition_AdjLand</th>\n", | |
| " <th>SaleCondition_Alloca</th>\n", | |
| " <th>SaleCondition_Family</th>\n", | |
| " <th>SaleCondition_Normal</th>\n", | |
| " <th>SaleCondition_Partial</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>60</td>\n", | |
| " <td>65.0</td>\n", | |
| " <td>8450</td>\n", | |
| " <td>7</td>\n", | |
| " <td>5</td>\n", | |
| " <td>2003</td>\n", | |
| " <td>2003</td>\n", | |
| " <td>196.0</td>\n", | |
| " <td>706</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>20</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>9600</td>\n", | |
| " <td>6</td>\n", | |
| " <td>8</td>\n", | |
| " <td>1976</td>\n", | |
| " <td>1976</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>978</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>60</td>\n", | |
| " <td>68.0</td>\n", | |
| " <td>11250</td>\n", | |
| " <td>7</td>\n", | |
| " <td>5</td>\n", | |
| " <td>2001</td>\n", | |
| " <td>2002</td>\n", | |
| " <td>162.0</td>\n", | |
| " <td>486</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>70</td>\n", | |
| " <td>60.0</td>\n", | |
| " <td>9550</td>\n", | |
| " <td>7</td>\n", | |
| " <td>5</td>\n", | |
| " <td>1915</td>\n", | |
| " <td>1970</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>216</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>60</td>\n", | |
| " <td>84.0</td>\n", | |
| " <td>14260</td>\n", | |
| " <td>8</td>\n", | |
| " <td>5</td>\n", | |
| " <td>2000</td>\n", | |
| " <td>2000</td>\n", | |
| " <td>350.0</td>\n", | |
| " <td>655</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>6</td>\n", | |
| " <td>50</td>\n", | |
| " <td>85.0</td>\n", | |
| " <td>14115</td>\n", | |
| " <td>5</td>\n", | |
| " <td>5</td>\n", | |
| " <td>1993</td>\n", | |
| " <td>1995</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>732</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>7</td>\n", | |
| " <td>20</td>\n", | |
| " <td>75.0</td>\n", | |
| " <td>10084</td>\n", | |
| " <td>8</td>\n", | |
| " <td>5</td>\n", | |
| " <td>2004</td>\n", | |
| " <td>2005</td>\n", | |
| " <td>186.0</td>\n", | |
| " <td>1369</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>8</td>\n", | |
| " <td>60</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>10382</td>\n", | |
| " <td>7</td>\n", | |
| " <td>6</td>\n", | |
| " <td>1973</td>\n", | |
| " <td>1973</td>\n", | |
| " <td>240.0</td>\n", | |
| " <td>859</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>9</td>\n", | |
| " <td>50</td>\n", | |
| " <td>51.0</td>\n", | |
| " <td>6120</td>\n", | |
| " <td>7</td>\n", | |
| " <td>5</td>\n", | |
| " <td>1931</td>\n", | |
| " <td>1950</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>10</td>\n", | |
| " <td>190</td>\n", | |
| " <td>50.0</td>\n", | |
| " <td>7420</td>\n", | |
| " <td>5</td>\n", | |
| " <td>6</td>\n", | |
| " <td>1939</td>\n", | |
| " <td>1950</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>851</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>10 rows × 290 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Id MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt \\\n", | |
| "0 1 60 65.0 8450 7 5 2003 \n", | |
| "1 2 20 80.0 9600 6 8 1976 \n", | |
| "2 3 60 68.0 11250 7 5 2001 \n", | |
| "3 4 70 60.0 9550 7 5 1915 \n", | |
| "4 5 60 84.0 14260 8 5 2000 \n", | |
| "5 6 50 85.0 14115 5 5 1993 \n", | |
| "6 7 20 75.0 10084 8 5 2004 \n", | |
| "7 8 60 NaN 10382 7 6 1973 \n", | |
| "8 9 50 51.0 6120 7 5 1931 \n", | |
| "9 10 190 50.0 7420 5 6 1939 \n", | |
| "\n", | |
| " YearRemodAdd MasVnrArea BsmtFinSF1 ... \\\n", | |
| "0 2003 196.0 706 ... \n", | |
| "1 1976 0.0 978 ... \n", | |
| "2 2002 162.0 486 ... \n", | |
| "3 1970 0.0 216 ... \n", | |
| "4 2000 350.0 655 ... \n", | |
| "5 1995 0.0 732 ... \n", | |
| "6 2005 186.0 1369 ... \n", | |
| "7 1973 240.0 859 ... \n", | |
| "8 1950 0.0 0 ... \n", | |
| "9 1950 0.0 851 ... \n", | |
| "\n", | |
| " SaleType_ConLw SaleType_New SaleType_Oth SaleType_WD \\\n", | |
| "0 0 0 0 1 \n", | |
| "1 0 0 0 1 \n", | |
| "2 0 0 0 1 \n", | |
| "3 0 0 0 1 \n", | |
| "4 0 0 0 1 \n", | |
| "5 0 0 0 1 \n", | |
| "6 0 0 0 1 \n", | |
| "7 0 0 0 1 \n", | |
| "8 0 0 0 1 \n", | |
| "9 0 0 0 1 \n", | |
| "\n", | |
| " SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca \\\n", | |
| "0 0 0 0 \n", | |
| "1 0 0 0 \n", | |
| "2 0 0 0 \n", | |
| "3 1 0 0 \n", | |
| "4 0 0 0 \n", | |
| "5 0 0 0 \n", | |
| "6 0 0 0 \n", | |
| "7 0 0 0 \n", | |
| "8 1 0 0 \n", | |
| "9 0 0 0 \n", | |
| "\n", | |
| " SaleCondition_Family SaleCondition_Normal SaleCondition_Partial \n", | |
| "0 0 1 0 \n", | |
| "1 0 1 0 \n", | |
| "2 0 1 0 \n", | |
| "3 0 0 0 \n", | |
| "4 0 1 0 \n", | |
| "5 0 1 0 \n", | |
| "6 0 1 0 \n", | |
| "7 0 1 0 \n", | |
| "8 0 0 0 \n", | |
| "9 0 1 0 \n", | |
| "\n", | |
| "[10 rows x 290 columns]" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#gender = {'Abnorml': 1,'AdjLand': 2, 'Alloca': 3, 'Family': 4, 'Normal': 5, 'Partial': 6}\n", | |
| "#df.SaleCondition = [gender[item] for item in df.SaleCondition]\n", | |
| "df = pd.get_dummies( df )\n", | |
| "df.head(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[1 60 'RL' ... 'WD' 5 208500]\n", | |
| " [2 20 'RL' ... 'WD' 5 181500]\n", | |
| " [3 60 'RL' ... 'WD' 5 223500]\n", | |
| " ...\n", | |
| " [1458 70 'RL' ... 'WD' 5 266500]\n", | |
| " [1459 20 'RL' ... 'WD' 5 142125]\n", | |
| " [1460 20 'RL' ... 'WD' 5 147500]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "df = df.values\n", | |
| "print(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[1 60 'RL' ... 2008 'WD' 5]\n", | |
| " [2 20 'RL' ... 2007 'WD' 5]\n", | |
| " [3 60 'RL' ... 2008 'WD' 5]\n", | |
| " ...\n", | |
| " [1458 70 'RL' ... 2010 'WD' 5]\n", | |
| " [1459 20 'RL' ... 2010 'WD' 5]\n", | |
| " [1460 20 'RL' ... 2008 'WD' 5]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "dados_teste = df[:, :80]\n", | |
| "print(dados_teste)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[208500 181500 223500 ... 266500 142125 147500]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "dados_resposta = df[:, 80]\n", | |
| "print(dados_resposta)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "ValueError", | |
| "evalue": "could not convert string to float: 'WD'", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-47-22e2047aabfb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDecisionTreeClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdados_teste\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdados_resposta\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/tree/tree.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight, check_input, X_idx_sorted)\u001b[0m\n\u001b[1;32m 788\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0mcheck_input\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcheck_input\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 790\u001b[0;31m X_idx_sorted=X_idx_sorted)\n\u001b[0m\u001b[1;32m 791\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/tree/tree.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight, check_input, X_idx_sorted)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0mrandom_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_random_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcheck_input\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 117\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0missparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 431\u001b[0m force_all_finite)\n\u001b[1;32m 432\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 433\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 434\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 435\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'WD'" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "clf = tree.DecisionTreeClassifier()\n", | |
| "clf = clf.fit(dados_teste, dados_resposta)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "testes = pd.read_csv('test.csv')\n", | |
| "testes.head(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.5" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment