Created
August 9, 2019 11:54
-
-
Save ericvenarusso/b52254a448229034b66ddde171b259bc to your computer and use it in GitHub Desktop.
Treinamento Comum.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Treinamento Comum.ipynb", | |
"version": "0.3.2", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/ericvenarusso/b52254a448229034b66ddde171b259bc/treinamento-comum.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "wmm6TqrmZKQd", | |
"colab_type": "code", | |
"outputId": "ec92c54e-05ac-40df-c7c5-61ba2c5d358a", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 292 | |
} | |
}, | |
"source": [ | |
"# Instalando o pacote category_encoders\n", | |
"! pip install category_encoders" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Collecting category_encoders\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/6e/a1/f7a22f144f33be78afeb06bfa78478e8284a64263a3c09b1ef54e673841e/category_encoders-2.0.0-py2.py3-none-any.whl (87kB)\n", | |
"\u001b[K |████████████████████████████████| 92kB 5.9MB/s \n", | |
"\u001b[?25hRequirement already satisfied: patsy>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (0.5.1)\n", | |
"Requirement already satisfied: pandas>=0.21.1 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (0.24.2)\n", | |
"Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (0.21.3)\n", | |
"Requirement already satisfied: statsmodels>=0.6.1 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (0.10.1)\n", | |
"Requirement already satisfied: numpy>=1.11.3 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (1.16.4)\n", | |
"Requirement already satisfied: scipy>=0.19.0 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (1.3.0)\n", | |
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from patsy>=0.4.1->category_encoders) (1.12.0)\n", | |
"Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.21.1->category_encoders) (2.5.3)\n", | |
"Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas>=0.21.1->category_encoders) (2018.9)\n", | |
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.20.0->category_encoders) (0.13.2)\n", | |
"Installing collected packages: category-encoders\n", | |
"Successfully installed category-encoders-2.0.0\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ch3C4cGRSYCY", | |
"colab_type": "code", | |
"outputId": "35c17761-1c28-4a49-9871-4baeca216fbb", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 156 | |
} | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"\n", | |
"from sklearn.pipeline import Pipeline\n", | |
"from sklearn.impute import SimpleImputer\n", | |
"from sklearn.ensemble import RandomForestRegressor\n", | |
"from category_encoders.target_encoder import TargetEncoder\n", | |
"\n", | |
"from sklearn.externals import joblib\n", | |
"\n", | |
"# Configurações do google drive\n", | |
"from google.colab import drive\n", | |
"drive.mount('/content/gdrive')\n", | |
"\n", | |
"import os\n", | |
"os.chdir('gdrive/My Drive/Colab Notebooks/Sklearn Pipelines')" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/sklearn/externals/joblib/__init__.py:15: DeprecationWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.\n", | |
" warnings.warn(msg, category=DeprecationWarning)\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n", | |
"\n", | |
"Enter your authorization code:\n", | |
"··········\n", | |
"Mounted at /content/gdrive\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "B6nxOlrDS5QS", | |
"colab_type": "code", | |
"outputId": "98d619b1-40d6-44ef-ec4b-87302425d364", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 270 | |
} | |
}, | |
"source": [ | |
"# Carregamento dos dados\n", | |
"train = pd.read_csv('input/train.csv')\n", | |
"\n", | |
"print(train.shape)\n", | |
"\n", | |
"train.head()" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"(1460, 81)\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Id</th>\n", | |
" <th>MSSubClass</th>\n", | |
" <th>MSZoning</th>\n", | |
" <th>LotFrontage</th>\n", | |
" <th>LotArea</th>\n", | |
" <th>Street</th>\n", | |
" <th>Alley</th>\n", | |
" <th>LotShape</th>\n", | |
" <th>LandContour</th>\n", | |
" <th>Utilities</th>\n", | |
" <th>LotConfig</th>\n", | |
" <th>LandSlope</th>\n", | |
" <th>Neighborhood</th>\n", | |
" <th>Condition1</th>\n", | |
" <th>Condition2</th>\n", | |
" <th>BldgType</th>\n", | |
" <th>HouseStyle</th>\n", | |
" <th>OverallQual</th>\n", | |
" <th>OverallCond</th>\n", | |
" <th>YearBuilt</th>\n", | |
" <th>YearRemodAdd</th>\n", | |
" <th>RoofStyle</th>\n", | |
" <th>RoofMatl</th>\n", | |
" <th>Exterior1st</th>\n", | |
" <th>Exterior2nd</th>\n", | |
" <th>MasVnrType</th>\n", | |
" <th>MasVnrArea</th>\n", | |
" <th>ExterQual</th>\n", | |
" <th>ExterCond</th>\n", | |
" <th>Foundation</th>\n", | |
" <th>BsmtQual</th>\n", | |
" <th>BsmtCond</th>\n", | |
" <th>BsmtExposure</th>\n", | |
" <th>BsmtFinType1</th>\n", | |
" <th>BsmtFinSF1</th>\n", | |
" <th>BsmtFinType2</th>\n", | |
" <th>BsmtFinSF2</th>\n", | |
" <th>BsmtUnfSF</th>\n", | |
" <th>TotalBsmtSF</th>\n", | |
" <th>Heating</th>\n", | |
" <th>...</th>\n", | |
" <th>CentralAir</th>\n", | |
" <th>Electrical</th>\n", | |
" <th>1stFlrSF</th>\n", | |
" <th>2ndFlrSF</th>\n", | |
" <th>LowQualFinSF</th>\n", | |
" <th>GrLivArea</th>\n", | |
" <th>BsmtFullBath</th>\n", | |
" <th>BsmtHalfBath</th>\n", | |
" <th>FullBath</th>\n", | |
" <th>HalfBath</th>\n", | |
" <th>BedroomAbvGr</th>\n", | |
" <th>KitchenAbvGr</th>\n", | |
" <th>KitchenQual</th>\n", | |
" <th>TotRmsAbvGrd</th>\n", | |
" <th>Functional</th>\n", | |
" <th>Fireplaces</th>\n", | |
" <th>FireplaceQu</th>\n", | |
" <th>GarageType</th>\n", | |
" <th>GarageYrBlt</th>\n", | |
" <th>GarageFinish</th>\n", | |
" <th>GarageCars</th>\n", | |
" <th>GarageArea</th>\n", | |
" <th>GarageQual</th>\n", | |
" <th>GarageCond</th>\n", | |
" <th>PavedDrive</th>\n", | |
" <th>WoodDeckSF</th>\n", | |
" <th>OpenPorchSF</th>\n", | |
" <th>EnclosedPorch</th>\n", | |
" <th>3SsnPorch</th>\n", | |
" <th>ScreenPorch</th>\n", | |
" <th>PoolArea</th>\n", | |
" <th>PoolQC</th>\n", | |
" <th>Fence</th>\n", | |
" <th>MiscFeature</th>\n", | |
" <th>MiscVal</th>\n", | |
" <th>MoSold</th>\n", | |
" <th>YrSold</th>\n", | |
" <th>SaleType</th>\n", | |
" <th>SaleCondition</th>\n", | |
" <th>SalePrice</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>60</td>\n", | |
" <td>RL</td>\n", | |
" <td>65.0</td>\n", | |
" <td>8450</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Reg</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>Inside</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>CollgCr</td>\n", | |
" <td>Norm</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>2Story</td>\n", | |
" <td>7</td>\n", | |
" <td>5</td>\n", | |
" <td>2003</td>\n", | |
" <td>2003</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>BrkFace</td>\n", | |
" <td>196.0</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>PConc</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>No</td>\n", | |
" <td>GLQ</td>\n", | |
" <td>706</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0</td>\n", | |
" <td>150</td>\n", | |
" <td>856</td>\n", | |
" <td>GasA</td>\n", | |
" <td>...</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>856</td>\n", | |
" <td>854</td>\n", | |
" <td>0</td>\n", | |
" <td>1710</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>8</td>\n", | |
" <td>Typ</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>2003.0</td>\n", | |
" <td>RFn</td>\n", | |
" <td>2</td>\n", | |
" <td>548</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>0</td>\n", | |
" <td>61</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>2008</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" <td>208500</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>20</td>\n", | |
" <td>RL</td>\n", | |
" <td>80.0</td>\n", | |
" <td>9600</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Reg</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>FR2</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>Veenker</td>\n", | |
" <td>Feedr</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>1Story</td>\n", | |
" <td>6</td>\n", | |
" <td>8</td>\n", | |
" <td>1976</td>\n", | |
" <td>1976</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>MetalSd</td>\n", | |
" <td>MetalSd</td>\n", | |
" <td>None</td>\n", | |
" <td>0.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>CBlock</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>Gd</td>\n", | |
" <td>ALQ</td>\n", | |
" <td>978</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0</td>\n", | |
" <td>284</td>\n", | |
" <td>1262</td>\n", | |
" <td>GasA</td>\n", | |
" <td>...</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>1262</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1262</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>TA</td>\n", | |
" <td>6</td>\n", | |
" <td>Typ</td>\n", | |
" <td>1</td>\n", | |
" <td>TA</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>1976.0</td>\n", | |
" <td>RFn</td>\n", | |
" <td>2</td>\n", | |
" <td>460</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>298</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" <td>2007</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" <td>181500</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>60</td>\n", | |
" <td>RL</td>\n", | |
" <td>68.0</td>\n", | |
" <td>11250</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>IR1</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>Inside</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>CollgCr</td>\n", | |
" <td>Norm</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>2Story</td>\n", | |
" <td>7</td>\n", | |
" <td>5</td>\n", | |
" <td>2001</td>\n", | |
" <td>2002</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>BrkFace</td>\n", | |
" <td>162.0</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>PConc</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>Mn</td>\n", | |
" <td>GLQ</td>\n", | |
" <td>486</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0</td>\n", | |
" <td>434</td>\n", | |
" <td>920</td>\n", | |
" <td>GasA</td>\n", | |
" <td>...</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>920</td>\n", | |
" <td>866</td>\n", | |
" <td>0</td>\n", | |
" <td>1786</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>6</td>\n", | |
" <td>Typ</td>\n", | |
" <td>1</td>\n", | |
" <td>TA</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>2001.0</td>\n", | |
" <td>RFn</td>\n", | |
" <td>2</td>\n", | |
" <td>608</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>0</td>\n", | |
" <td>42</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>9</td>\n", | |
" <td>2008</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" <td>223500</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>70</td>\n", | |
" <td>RL</td>\n", | |
" <td>60.0</td>\n", | |
" <td>9550</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>IR1</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>Corner</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>Crawfor</td>\n", | |
" <td>Norm</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>2Story</td>\n", | |
" <td>7</td>\n", | |
" <td>5</td>\n", | |
" <td>1915</td>\n", | |
" <td>1970</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>Wd Sdng</td>\n", | |
" <td>Wd Shng</td>\n", | |
" <td>None</td>\n", | |
" <td>0.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>BrkTil</td>\n", | |
" <td>TA</td>\n", | |
" <td>Gd</td>\n", | |
" <td>No</td>\n", | |
" <td>ALQ</td>\n", | |
" <td>216</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0</td>\n", | |
" <td>540</td>\n", | |
" <td>756</td>\n", | |
" <td>GasA</td>\n", | |
" <td>...</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>961</td>\n", | |
" <td>756</td>\n", | |
" <td>0</td>\n", | |
" <td>1717</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>7</td>\n", | |
" <td>Typ</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>Detchd</td>\n", | |
" <td>1998.0</td>\n", | |
" <td>Unf</td>\n", | |
" <td>3</td>\n", | |
" <td>642</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>0</td>\n", | |
" <td>35</td>\n", | |
" <td>272</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>2006</td>\n", | |
" <td>WD</td>\n", | |
" <td>Abnorml</td>\n", | |
" <td>140000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>60</td>\n", | |
" <td>RL</td>\n", | |
" <td>84.0</td>\n", | |
" <td>14260</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>IR1</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>FR2</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>NoRidge</td>\n", | |
" <td>Norm</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>2Story</td>\n", | |
" <td>8</td>\n", | |
" <td>5</td>\n", | |
" <td>2000</td>\n", | |
" <td>2000</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>BrkFace</td>\n", | |
" <td>350.0</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>PConc</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>Av</td>\n", | |
" <td>GLQ</td>\n", | |
" <td>655</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0</td>\n", | |
" <td>490</td>\n", | |
" <td>1145</td>\n", | |
" <td>GasA</td>\n", | |
" <td>...</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>1145</td>\n", | |
" <td>1053</td>\n", | |
" <td>0</td>\n", | |
" <td>2198</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>9</td>\n", | |
" <td>Typ</td>\n", | |
" <td>1</td>\n", | |
" <td>TA</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>2000.0</td>\n", | |
" <td>RFn</td>\n", | |
" <td>3</td>\n", | |
" <td>836</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>192</td>\n", | |
" <td>84</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>12</td>\n", | |
" <td>2008</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" <td>250000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 81 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Id MSSubClass MSZoning ... SaleType SaleCondition SalePrice\n", | |
"0 1 60 RL ... WD Normal 208500\n", | |
"1 2 20 RL ... WD Normal 181500\n", | |
"2 3 60 RL ... WD Normal 223500\n", | |
"3 4 70 RL ... WD Abnorml 140000\n", | |
"4 5 60 RL ... WD Normal 250000\n", | |
"\n", | |
"[5 rows x 81 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "RF-2A3r-cOLg", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"X_train = train.drop('SalePrice', axis = 1)\n", | |
"y_train = train['SalePrice']" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qWbV3-gBclI-", | |
"colab_type": "code", | |
"outputId": "3cfba3e3-c03c-4eef-b5fb-68c41274e30d", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"# Criacao do TargetEncoder\n", | |
"te = TargetEncoder()\n", | |
"\n", | |
"# Treinamento do TargetEncoder\n", | |
"X_train = te.fit_transform(X_train, y_train)\n", | |
"\n", | |
"# Salvando o TargetEncoder ja treinado\n", | |
"joblib.dump(te, 'saved_models/treinamento_normal/target_enconder.pkl')" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['saved_models/treinamento_normal/target_enconder.pkl']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ja_hwBPlTllS", | |
"colab_type": "code", | |
"outputId": "f85a441a-e328-4b4c-92ed-4a89c9ae17fc", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"# Criacao do SimpleImputer\n", | |
"si = SimpleImputer(strategy = 'median')\n", | |
"\n", | |
"# Treinamento do SimpleImputer\n", | |
"X_train = si.fit_transform(X_train)\n", | |
"\n", | |
"# Salvando o SimpleImputer ja treinado\n", | |
"joblib.dump(si, 'saved_models/treinamento_normal/simple_imputer.pkl')" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['saved_models/treinamento_normal/simple_imputer.pkl']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 6 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NkhcWQTVTwuG", | |
"colab_type": "code", | |
"outputId": "667a78a0-817e-4e9b-8404-83ce123a2b94", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"# Criacao da RandomForestRegressor\n", | |
"rf_model = RandomForestRegressor(n_estimators = 1000, random_state = 0)\n", | |
"\n", | |
"# Treinamento da RandomForestRegressor\n", | |
"rf_model.fit(X_train, y_train)\n", | |
"\n", | |
"# Salvando a RandomForestRegressor ja treinada\n", | |
"joblib.dump(rf_model, 'saved_models/treinamento_normal/model.pkl')" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['saved_models/treinamento_normal/model.pkl']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "SHUezfc0puhg", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Conjunto de teste" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5bNkL41Ipvv_", | |
"colab_type": "code", | |
"outputId": "7eee8f6a-9a48-4da6-ac79-ea3925caa7ad", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 241 | |
} | |
}, | |
"source": [ | |
"# Carregamento dos dados\n", | |
"test = pd.read_csv('input/test.csv')\n", | |
"\n", | |
"print(test.shape)\n", | |
"\n", | |
"test.head()" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"(1459, 80)\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Id</th>\n", | |
" <th>MSSubClass</th>\n", | |
" <th>MSZoning</th>\n", | |
" <th>LotFrontage</th>\n", | |
" <th>LotArea</th>\n", | |
" <th>Street</th>\n", | |
" <th>Alley</th>\n", | |
" <th>LotShape</th>\n", | |
" <th>LandContour</th>\n", | |
" <th>Utilities</th>\n", | |
" <th>LotConfig</th>\n", | |
" <th>LandSlope</th>\n", | |
" <th>Neighborhood</th>\n", | |
" <th>Condition1</th>\n", | |
" <th>Condition2</th>\n", | |
" <th>BldgType</th>\n", | |
" <th>HouseStyle</th>\n", | |
" <th>OverallQual</th>\n", | |
" <th>OverallCond</th>\n", | |
" <th>YearBuilt</th>\n", | |
" <th>YearRemodAdd</th>\n", | |
" <th>RoofStyle</th>\n", | |
" <th>RoofMatl</th>\n", | |
" <th>Exterior1st</th>\n", | |
" <th>Exterior2nd</th>\n", | |
" <th>MasVnrType</th>\n", | |
" <th>MasVnrArea</th>\n", | |
" <th>ExterQual</th>\n", | |
" <th>ExterCond</th>\n", | |
" <th>Foundation</th>\n", | |
" <th>BsmtQual</th>\n", | |
" <th>BsmtCond</th>\n", | |
" <th>BsmtExposure</th>\n", | |
" <th>BsmtFinType1</th>\n", | |
" <th>BsmtFinSF1</th>\n", | |
" <th>BsmtFinType2</th>\n", | |
" <th>BsmtFinSF2</th>\n", | |
" <th>BsmtUnfSF</th>\n", | |
" <th>TotalBsmtSF</th>\n", | |
" <th>Heating</th>\n", | |
" <th>HeatingQC</th>\n", | |
" <th>CentralAir</th>\n", | |
" <th>Electrical</th>\n", | |
" <th>1stFlrSF</th>\n", | |
" <th>2ndFlrSF</th>\n", | |
" <th>LowQualFinSF</th>\n", | |
" <th>GrLivArea</th>\n", | |
" <th>BsmtFullBath</th>\n", | |
" <th>BsmtHalfBath</th>\n", | |
" <th>FullBath</th>\n", | |
" <th>HalfBath</th>\n", | |
" <th>BedroomAbvGr</th>\n", | |
" <th>KitchenAbvGr</th>\n", | |
" <th>KitchenQual</th>\n", | |
" <th>TotRmsAbvGrd</th>\n", | |
" <th>Functional</th>\n", | |
" <th>Fireplaces</th>\n", | |
" <th>FireplaceQu</th>\n", | |
" <th>GarageType</th>\n", | |
" <th>GarageYrBlt</th>\n", | |
" <th>GarageFinish</th>\n", | |
" <th>GarageCars</th>\n", | |
" <th>GarageArea</th>\n", | |
" <th>GarageQual</th>\n", | |
" <th>GarageCond</th>\n", | |
" <th>PavedDrive</th>\n", | |
" <th>WoodDeckSF</th>\n", | |
" <th>OpenPorchSF</th>\n", | |
" <th>EnclosedPorch</th>\n", | |
" <th>3SsnPorch</th>\n", | |
" <th>ScreenPorch</th>\n", | |
" <th>PoolArea</th>\n", | |
" <th>PoolQC</th>\n", | |
" <th>Fence</th>\n", | |
" <th>MiscFeature</th>\n", | |
" <th>MiscVal</th>\n", | |
" <th>MoSold</th>\n", | |
" <th>YrSold</th>\n", | |
" <th>SaleType</th>\n", | |
" <th>SaleCondition</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1461</td>\n", | |
" <td>20</td>\n", | |
" <td>RH</td>\n", | |
" <td>80.0</td>\n", | |
" <td>11622</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Reg</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>Inside</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>NAmes</td>\n", | |
" <td>Feedr</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>1Story</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>1961</td>\n", | |
" <td>1961</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>None</td>\n", | |
" <td>0.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>CBlock</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>No</td>\n", | |
" <td>Rec</td>\n", | |
" <td>468.0</td>\n", | |
" <td>LwQ</td>\n", | |
" <td>144.0</td>\n", | |
" <td>270.0</td>\n", | |
" <td>882.0</td>\n", | |
" <td>GasA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>896</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>896</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>TA</td>\n", | |
" <td>5</td>\n", | |
" <td>Typ</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>1961.0</td>\n", | |
" <td>Unf</td>\n", | |
" <td>1.0</td>\n", | |
" <td>730.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>140</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>120</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>MnPrv</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>6</td>\n", | |
" <td>2010</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1462</td>\n", | |
" <td>20</td>\n", | |
" <td>RL</td>\n", | |
" <td>81.0</td>\n", | |
" <td>14267</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>IR1</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>Corner</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>NAmes</td>\n", | |
" <td>Norm</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>1Story</td>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" <td>1958</td>\n", | |
" <td>1958</td>\n", | |
" <td>Hip</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>Wd Sdng</td>\n", | |
" <td>Wd Sdng</td>\n", | |
" <td>BrkFace</td>\n", | |
" <td>108.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>CBlock</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>No</td>\n", | |
" <td>ALQ</td>\n", | |
" <td>923.0</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0.0</td>\n", | |
" <td>406.0</td>\n", | |
" <td>1329.0</td>\n", | |
" <td>GasA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>1329</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1329</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>6</td>\n", | |
" <td>Typ</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>1958.0</td>\n", | |
" <td>Unf</td>\n", | |
" <td>1.0</td>\n", | |
" <td>312.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>393</td>\n", | |
" <td>36</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Gar2</td>\n", | |
" <td>12500</td>\n", | |
" <td>6</td>\n", | |
" <td>2010</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1463</td>\n", | |
" <td>60</td>\n", | |
" <td>RL</td>\n", | |
" <td>74.0</td>\n", | |
" <td>13830</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>IR1</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>Inside</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>Gilbert</td>\n", | |
" <td>Norm</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>2Story</td>\n", | |
" <td>5</td>\n", | |
" <td>5</td>\n", | |
" <td>1997</td>\n", | |
" <td>1998</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>None</td>\n", | |
" <td>0.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>PConc</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>No</td>\n", | |
" <td>GLQ</td>\n", | |
" <td>791.0</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0.0</td>\n", | |
" <td>137.0</td>\n", | |
" <td>928.0</td>\n", | |
" <td>GasA</td>\n", | |
" <td>Gd</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>928</td>\n", | |
" <td>701</td>\n", | |
" <td>0</td>\n", | |
" <td>1629</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>TA</td>\n", | |
" <td>6</td>\n", | |
" <td>Typ</td>\n", | |
" <td>1</td>\n", | |
" <td>TA</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>1997.0</td>\n", | |
" <td>Fin</td>\n", | |
" <td>2.0</td>\n", | |
" <td>482.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>212</td>\n", | |
" <td>34</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>MnPrv</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>2010</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1464</td>\n", | |
" <td>60</td>\n", | |
" <td>RL</td>\n", | |
" <td>78.0</td>\n", | |
" <td>9978</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>IR1</td>\n", | |
" <td>Lvl</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>Inside</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>Gilbert</td>\n", | |
" <td>Norm</td>\n", | |
" <td>Norm</td>\n", | |
" <td>1Fam</td>\n", | |
" <td>2Story</td>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" <td>1998</td>\n", | |
" <td>1998</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>VinylSd</td>\n", | |
" <td>BrkFace</td>\n", | |
" <td>20.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>PConc</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>No</td>\n", | |
" <td>GLQ</td>\n", | |
" <td>602.0</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0.0</td>\n", | |
" <td>324.0</td>\n", | |
" <td>926.0</td>\n", | |
" <td>GasA</td>\n", | |
" <td>Ex</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>926</td>\n", | |
" <td>678</td>\n", | |
" <td>0</td>\n", | |
" <td>1604</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>7</td>\n", | |
" <td>Typ</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>1998.0</td>\n", | |
" <td>Fin</td>\n", | |
" <td>2.0</td>\n", | |
" <td>470.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>360</td>\n", | |
" <td>36</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>6</td>\n", | |
" <td>2010</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1465</td>\n", | |
" <td>120</td>\n", | |
" <td>RL</td>\n", | |
" <td>43.0</td>\n", | |
" <td>5005</td>\n", | |
" <td>Pave</td>\n", | |
" <td>NaN</td>\n", | |
" <td>IR1</td>\n", | |
" <td>HLS</td>\n", | |
" <td>AllPub</td>\n", | |
" <td>Inside</td>\n", | |
" <td>Gtl</td>\n", | |
" <td>StoneBr</td>\n", | |
" <td>Norm</td>\n", | |
" <td>Norm</td>\n", | |
" <td>TwnhsE</td>\n", | |
" <td>1Story</td>\n", | |
" <td>8</td>\n", | |
" <td>5</td>\n", | |
" <td>1992</td>\n", | |
" <td>1992</td>\n", | |
" <td>Gable</td>\n", | |
" <td>CompShg</td>\n", | |
" <td>HdBoard</td>\n", | |
" <td>HdBoard</td>\n", | |
" <td>None</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>PConc</td>\n", | |
" <td>Gd</td>\n", | |
" <td>TA</td>\n", | |
" <td>No</td>\n", | |
" <td>ALQ</td>\n", | |
" <td>263.0</td>\n", | |
" <td>Unf</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1017.0</td>\n", | |
" <td>1280.0</td>\n", | |
" <td>GasA</td>\n", | |
" <td>Ex</td>\n", | |
" <td>Y</td>\n", | |
" <td>SBrkr</td>\n", | |
" <td>1280</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1280</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>Gd</td>\n", | |
" <td>5</td>\n", | |
" <td>Typ</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Attchd</td>\n", | |
" <td>1992.0</td>\n", | |
" <td>RFn</td>\n", | |
" <td>2.0</td>\n", | |
" <td>506.0</td>\n", | |
" <td>TA</td>\n", | |
" <td>TA</td>\n", | |
" <td>Y</td>\n", | |
" <td>0</td>\n", | |
" <td>82</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>144</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2010</td>\n", | |
" <td>WD</td>\n", | |
" <td>Normal</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Id MSSubClass MSZoning ... YrSold SaleType SaleCondition\n", | |
"0 1461 20 RH ... 2010 WD Normal\n", | |
"1 1462 20 RL ... 2010 WD Normal\n", | |
"2 1463 60 RL ... 2010 WD Normal\n", | |
"3 1464 60 RL ... 2010 WD Normal\n", | |
"4 1465 120 RL ... 2010 WD Normal\n", | |
"\n", | |
"[5 rows x 80 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "zQOFBjHSrLaI", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Carregamento do target_enconder salvo treinado.\n", | |
"saved_te = joblib.load('saved_models/treinamento_normal/target_enconder.pkl')\n", | |
"\n", | |
"test_transformed = saved_te.transform(test)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8-N4m64TagMt", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Carregamento do simple_imputer salvo treinado.\n", | |
"saved_si = joblib.load('saved_models/treinamento_normal/simple_imputer.pkl')\n", | |
"\n", | |
"test_transformed = saved_si.transform(test_transformed)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5MK5nwbsatYS", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Carregamento da random forest regressor salvo treinado.\n", | |
"saved_model = joblib.load('saved_models/treinamento_normal/model.pkl')\n", | |
"\n", | |
"y_pred = saved_model.predict(test_transformed)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "h9itwVRIa4eG", | |
"colab_type": "code", | |
"outputId": "dc609d0d-f246-46c4-c4ea-620d4e3dd8ca", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
} | |
}, | |
"source": [ | |
"y_pred" | |
], | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([126004.991, 153460.822, 183824.115, ..., 153414.329, 112589.308,\n", | |
" 197563.65 ])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 12 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment