Skip to content

Instantly share code, notes, and snippets.

@ClebsonDantasUchoa
Last active August 23, 2018 23:05
Show Gist options
  • Select an option

  • Save ClebsonDantasUchoa/f5d668ad876636b12d3d94b5f327bab5 to your computer and use it in GitHub Desktop.

Select an option

Save ClebsonDantasUchoa/f5d668ad876636b12d3d94b5f327bab5 to your computer and use it in GitHub Desktop.
Casa
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import pandas as pd\n",
"from sklearn import tree"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id</th>\n",
" <th>MSSubClass</th>\n",
" <th>MSZoning</th>\n",
" <th>LotFrontage</th>\n",
" <th>LotArea</th>\n",
" <th>Street</th>\n",
" <th>Alley</th>\n",
" <th>LotShape</th>\n",
" <th>LandContour</th>\n",
" <th>Utilities</th>\n",
" <th>...</th>\n",
" <th>PoolArea</th>\n",
" <th>PoolQC</th>\n",
" <th>Fence</th>\n",
" <th>MiscFeature</th>\n",
" <th>MiscVal</th>\n",
" <th>MoSold</th>\n",
" <th>YrSold</th>\n",
" <th>SaleType</th>\n",
" <th>SaleCondition</th>\n",
" <th>SalePrice</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>60</td>\n",
" <td>RL</td>\n",
" <td>65.0</td>\n",
" <td>8450</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>208500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>20</td>\n",
" <td>RL</td>\n",
" <td>80.0</td>\n",
" <td>9600</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>2007</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>181500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>60</td>\n",
" <td>RL</td>\n",
" <td>68.0</td>\n",
" <td>11250</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>IR1</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>223500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>70</td>\n",
" <td>RL</td>\n",
" <td>60.0</td>\n",
" <td>9550</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>IR1</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2006</td>\n",
" <td>WD</td>\n",
" <td>Abnorml</td>\n",
" <td>140000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>RL</td>\n",
" <td>84.0</td>\n",
" <td>14260</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>IR1</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>12</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>50</td>\n",
" <td>RL</td>\n",
" <td>85.0</td>\n",
" <td>14115</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>IR1</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>MnPrv</td>\n",
" <td>Shed</td>\n",
" <td>700</td>\n",
" <td>10</td>\n",
" <td>2009</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>143000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>20</td>\n",
" <td>RL</td>\n",
" <td>75.0</td>\n",
" <td>10084</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>2007</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>307000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>60</td>\n",
" <td>RL</td>\n",
" <td>NaN</td>\n",
" <td>10382</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>IR1</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Shed</td>\n",
" <td>350</td>\n",
" <td>11</td>\n",
" <td>2009</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>50</td>\n",
" <td>RM</td>\n",
" <td>51.0</td>\n",
" <td>6120</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Abnorml</td>\n",
" <td>129900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>190</td>\n",
" <td>RL</td>\n",
" <td>50.0</td>\n",
" <td>7420</td>\n",
" <td>Pave</td>\n",
" <td>NaN</td>\n",
" <td>Reg</td>\n",
" <td>Lvl</td>\n",
" <td>AllPub</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2008</td>\n",
" <td>WD</td>\n",
" <td>Normal</td>\n",
" <td>118000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 81 columns</p>\n",
"</div>"
],
"text/plain": [
" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
"0 1 60 RL 65.0 8450 Pave NaN Reg \n",
"1 2 20 RL 80.0 9600 Pave NaN Reg \n",
"2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
"3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
"4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
"5 6 50 RL 85.0 14115 Pave NaN IR1 \n",
"6 7 20 RL 75.0 10084 Pave NaN Reg \n",
"7 8 60 RL NaN 10382 Pave NaN IR1 \n",
"8 9 50 RM 51.0 6120 Pave NaN Reg \n",
"9 10 190 RL 50.0 7420 Pave NaN Reg \n",
"\n",
" LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
"0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
"1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
"2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
"3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
"4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
"5 Lvl AllPub ... 0 NaN MnPrv Shed 700 \n",
"6 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
"7 Lvl AllPub ... 0 NaN NaN Shed 350 \n",
"8 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
"9 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
"\n",
" MoSold YrSold SaleType SaleCondition SalePrice \n",
"0 2 2008 WD Normal 208500 \n",
"1 5 2007 WD Normal 181500 \n",
"2 9 2008 WD Normal 223500 \n",
"3 2 2006 WD Abnorml 140000 \n",
"4 12 2008 WD Normal 250000 \n",
"5 10 2009 WD Normal 143000 \n",
"6 8 2007 WD Normal 307000 \n",
"7 11 2009 WD Normal 200000 \n",
"8 4 2008 WD Abnorml 129900 \n",
"9 1 2008 WD Normal 118000 \n",
"\n",
"[10 rows x 81 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('train.csv')\n",
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SaleCondition\n",
"Abnorml 101\n",
"AdjLand 4\n",
"Alloca 12\n",
"Family 20\n",
"Normal 1198\n",
"Partial 125\n",
"dtype: int64\n"
]
}
],
"source": [
"print(df.groupby('SaleCondition').size())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id</th>\n",
" <th>MSSubClass</th>\n",
" <th>LotFrontage</th>\n",
" <th>LotArea</th>\n",
" <th>OverallQual</th>\n",
" <th>OverallCond</th>\n",
" <th>YearBuilt</th>\n",
" <th>YearRemodAdd</th>\n",
" <th>MasVnrArea</th>\n",
" <th>BsmtFinSF1</th>\n",
" <th>...</th>\n",
" <th>SaleType_ConLw</th>\n",
" <th>SaleType_New</th>\n",
" <th>SaleType_Oth</th>\n",
" <th>SaleType_WD</th>\n",
" <th>SaleCondition_Abnorml</th>\n",
" <th>SaleCondition_AdjLand</th>\n",
" <th>SaleCondition_Alloca</th>\n",
" <th>SaleCondition_Family</th>\n",
" <th>SaleCondition_Normal</th>\n",
" <th>SaleCondition_Partial</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>60</td>\n",
" <td>65.0</td>\n",
" <td>8450</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>2003</td>\n",
" <td>2003</td>\n",
" <td>196.0</td>\n",
" <td>706</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>20</td>\n",
" <td>80.0</td>\n",
" <td>9600</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>1976</td>\n",
" <td>1976</td>\n",
" <td>0.0</td>\n",
" <td>978</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>60</td>\n",
" <td>68.0</td>\n",
" <td>11250</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>2001</td>\n",
" <td>2002</td>\n",
" <td>162.0</td>\n",
" <td>486</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>70</td>\n",
" <td>60.0</td>\n",
" <td>9550</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>1915</td>\n",
" <td>1970</td>\n",
" <td>0.0</td>\n",
" <td>216</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>84.0</td>\n",
" <td>14260</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>2000</td>\n",
" <td>2000</td>\n",
" <td>350.0</td>\n",
" <td>655</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>50</td>\n",
" <td>85.0</td>\n",
" <td>14115</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>1993</td>\n",
" <td>1995</td>\n",
" <td>0.0</td>\n",
" <td>732</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>20</td>\n",
" <td>75.0</td>\n",
" <td>10084</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>2004</td>\n",
" <td>2005</td>\n",
" <td>186.0</td>\n",
" <td>1369</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>60</td>\n",
" <td>NaN</td>\n",
" <td>10382</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>1973</td>\n",
" <td>1973</td>\n",
" <td>240.0</td>\n",
" <td>859</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>50</td>\n",
" <td>51.0</td>\n",
" <td>6120</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>1931</td>\n",
" <td>1950</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>190</td>\n",
" <td>50.0</td>\n",
" <td>7420</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>1939</td>\n",
" <td>1950</td>\n",
" <td>0.0</td>\n",
" <td>851</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 290 columns</p>\n",
"</div>"
],
"text/plain": [
" Id MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt \\\n",
"0 1 60 65.0 8450 7 5 2003 \n",
"1 2 20 80.0 9600 6 8 1976 \n",
"2 3 60 68.0 11250 7 5 2001 \n",
"3 4 70 60.0 9550 7 5 1915 \n",
"4 5 60 84.0 14260 8 5 2000 \n",
"5 6 50 85.0 14115 5 5 1993 \n",
"6 7 20 75.0 10084 8 5 2004 \n",
"7 8 60 NaN 10382 7 6 1973 \n",
"8 9 50 51.0 6120 7 5 1931 \n",
"9 10 190 50.0 7420 5 6 1939 \n",
"\n",
" YearRemodAdd MasVnrArea BsmtFinSF1 ... \\\n",
"0 2003 196.0 706 ... \n",
"1 1976 0.0 978 ... \n",
"2 2002 162.0 486 ... \n",
"3 1970 0.0 216 ... \n",
"4 2000 350.0 655 ... \n",
"5 1995 0.0 732 ... \n",
"6 2005 186.0 1369 ... \n",
"7 1973 240.0 859 ... \n",
"8 1950 0.0 0 ... \n",
"9 1950 0.0 851 ... \n",
"\n",
" SaleType_ConLw SaleType_New SaleType_Oth SaleType_WD \\\n",
"0 0 0 0 1 \n",
"1 0 0 0 1 \n",
"2 0 0 0 1 \n",
"3 0 0 0 1 \n",
"4 0 0 0 1 \n",
"5 0 0 0 1 \n",
"6 0 0 0 1 \n",
"7 0 0 0 1 \n",
"8 0 0 0 1 \n",
"9 0 0 0 1 \n",
"\n",
" SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 1 0 0 \n",
"4 0 0 0 \n",
"5 0 0 0 \n",
"6 0 0 0 \n",
"7 0 0 0 \n",
"8 1 0 0 \n",
"9 0 0 0 \n",
"\n",
" SaleCondition_Family SaleCondition_Normal SaleCondition_Partial \n",
"0 0 1 0 \n",
"1 0 1 0 \n",
"2 0 1 0 \n",
"3 0 0 0 \n",
"4 0 1 0 \n",
"5 0 1 0 \n",
"6 0 1 0 \n",
"7 0 1 0 \n",
"8 0 0 0 \n",
"9 0 1 0 \n",
"\n",
"[10 rows x 290 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#gender = {'Abnorml': 1,'AdjLand': 2, 'Alloca': 3, 'Family': 4, 'Normal': 5, 'Partial': 6}\n",
"#df.SaleCondition = [gender[item] for item in df.SaleCondition]\n",
"df = pd.get_dummies( df )\n",
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[1 60 'RL' ... 'WD' 5 208500]\n",
" [2 20 'RL' ... 'WD' 5 181500]\n",
" [3 60 'RL' ... 'WD' 5 223500]\n",
" ...\n",
" [1458 70 'RL' ... 'WD' 5 266500]\n",
" [1459 20 'RL' ... 'WD' 5 142125]\n",
" [1460 20 'RL' ... 'WD' 5 147500]]\n"
]
}
],
"source": [
"df = df.values\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[1 60 'RL' ... 2008 'WD' 5]\n",
" [2 20 'RL' ... 2007 'WD' 5]\n",
" [3 60 'RL' ... 2008 'WD' 5]\n",
" ...\n",
" [1458 70 'RL' ... 2010 'WD' 5]\n",
" [1459 20 'RL' ... 2010 'WD' 5]\n",
" [1460 20 'RL' ... 2008 'WD' 5]]\n"
]
}
],
"source": [
"dados_teste = df[:, :80]\n",
"print(dados_teste)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[208500 181500 223500 ... 266500 142125 147500]\n"
]
}
],
"source": [
"dados_resposta = df[:, 80]\n",
"print(dados_resposta)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "could not convert string to float: 'WD'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-47-22e2047aabfb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDecisionTreeClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdados_teste\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdados_resposta\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/tree/tree.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight, check_input, X_idx_sorted)\u001b[0m\n\u001b[1;32m 788\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0mcheck_input\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcheck_input\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 790\u001b[0;31m X_idx_sorted=X_idx_sorted)\n\u001b[0m\u001b[1;32m 791\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/tree/tree.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight, check_input, X_idx_sorted)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0mrandom_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_random_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcheck_input\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 117\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0missparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 431\u001b[0m force_all_finite)\n\u001b[1;32m 432\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 433\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 434\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 435\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'WD'"
]
}
],
"source": [
"clf = tree.DecisionTreeClassifier()\n",
"clf = clf.fit(dados_teste, dados_resposta)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"testes = pd.read_csv('test.csv')\n",
"testes.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment