Created
December 9, 2019 11:50
-
-
Save analyticsindiamagazine/db9b7755f2cdf814c3e1d1c681a41958 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Building FastAi Model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Printing thw working directory\n", | |
"import os\n", | |
"wd = os.getcwd()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'/Users/amalnair/Documents/Amal-WorkSpace/UCC_Project/resources'" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Setting The Home Directory for the project\n", | |
"home_dir = wd[:-9]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'/Users/amalnair/Documents/Amal-WorkSpace/UCC_Project/'" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"home_dir" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"HOME DIRECTORY\n", | |
"['.DS_Store', 'requirements.txt', 'bin', 'include', 'resources', 'model', 'app.py', 'lib', 'templates']\n", | |
"WORKING DIRECTORY\n", | |
"['Data_Train.xlsx', 'modeling.ipynb', 'Data_Test.xlsx', '.ipynb_checkpoints']\n" | |
] | |
} | |
], | |
"source": [ | |
"#Listing the home directory and working directory contents\n", | |
"print('HOME DIRECTORY')\n", | |
"print(os.listdir(home_dir))\n", | |
"print('WORKING DIRECTORY')\n", | |
"print(os.listdir(wd))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Loading The Datasets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "y0mIWjyrdrpy" | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"training_set = pd.read_excel(wd+'/Data_Train.xlsx')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"colab_type": "code", | |
"id": "vSEa6kilqvMF", | |
"outputId": "a1760c04-fa01-4d05-da21-93f1cc310ed3" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Name</th>\n", | |
" <th>Location</th>\n", | |
" <th>Year</th>\n", | |
" <th>Kilometers_Driven</th>\n", | |
" <th>Fuel_Type</th>\n", | |
" <th>Transmission</th>\n", | |
" <th>Owner_Type</th>\n", | |
" <th>Mileage</th>\n", | |
" <th>Engine</th>\n", | |
" <th>Power</th>\n", | |
" <th>Seats</th>\n", | |
" <th>New_Price</th>\n", | |
" <th>Price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>Maruti Wagon R LXI CNG</td>\n", | |
" <td>Mumbai</td>\n", | |
" <td>2010</td>\n", | |
" <td>72000</td>\n", | |
" <td>CNG</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>26.6 km/kg</td>\n", | |
" <td>998 CC</td>\n", | |
" <td>58.16 bhp</td>\n", | |
" <td>5.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.75</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>Hyundai Creta 1.6 CRDi SX Option</td>\n", | |
" <td>Pune</td>\n", | |
" <td>2015</td>\n", | |
" <td>41000</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>19.67 kmpl</td>\n", | |
" <td>1582 CC</td>\n", | |
" <td>126.2 bhp</td>\n", | |
" <td>5.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>12.50</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>Honda Jazz V</td>\n", | |
" <td>Chennai</td>\n", | |
" <td>2011</td>\n", | |
" <td>46000</td>\n", | |
" <td>Petrol</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>18.2 kmpl</td>\n", | |
" <td>1199 CC</td>\n", | |
" <td>88.7 bhp</td>\n", | |
" <td>5.0</td>\n", | |
" <td>8.61 Lakh</td>\n", | |
" <td>4.50</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>Maruti Ertiga VDI</td>\n", | |
" <td>Chennai</td>\n", | |
" <td>2012</td>\n", | |
" <td>87000</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>20.77 kmpl</td>\n", | |
" <td>1248 CC</td>\n", | |
" <td>88.76 bhp</td>\n", | |
" <td>7.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>6.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>Audi A4 New 2.0 TDI Multitronic</td>\n", | |
" <td>Coimbatore</td>\n", | |
" <td>2013</td>\n", | |
" <td>40670</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Automatic</td>\n", | |
" <td>Second</td>\n", | |
" <td>15.2 kmpl</td>\n", | |
" <td>1968 CC</td>\n", | |
" <td>140.8 bhp</td>\n", | |
" <td>5.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>17.74</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Name Location Year Kilometers_Driven \\\n", | |
"0 Maruti Wagon R LXI CNG Mumbai 2010 72000 \n", | |
"1 Hyundai Creta 1.6 CRDi SX Option Pune 2015 41000 \n", | |
"2 Honda Jazz V Chennai 2011 46000 \n", | |
"3 Maruti Ertiga VDI Chennai 2012 87000 \n", | |
"4 Audi A4 New 2.0 TDI Multitronic Coimbatore 2013 40670 \n", | |
"\n", | |
" Fuel_Type Transmission Owner_Type Mileage Engine Power Seats \\\n", | |
"0 CNG Manual First 26.6 km/kg 998 CC 58.16 bhp 5.0 \n", | |
"1 Diesel Manual First 19.67 kmpl 1582 CC 126.2 bhp 5.0 \n", | |
"2 Petrol Manual First 18.2 kmpl 1199 CC 88.7 bhp 5.0 \n", | |
"3 Diesel Manual First 20.77 kmpl 1248 CC 88.76 bhp 7.0 \n", | |
"4 Diesel Automatic Second 15.2 kmpl 1968 CC 140.8 bhp 5.0 \n", | |
"\n", | |
" New_Price Price \n", | |
"0 NaN 1.75 \n", | |
"1 NaN 12.50 \n", | |
"2 8.61 Lakh 4.50 \n", | |
"3 NaN 6.00 \n", | |
"4 NaN 17.74 " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"training_set.head(5)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Structuring & Formatting The Datasets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "HGT8XlW8rtC9" | |
}, | |
"outputs": [], | |
"source": [ | |
"def restructure(data):\n", | |
" \n", | |
" names = list(data.Name)\n", | |
" \n", | |
" brand = []\n", | |
" model = []\n", | |
" \n", | |
" for i in range(len(names)):\n", | |
" try:\n", | |
" brand.append(names[i].split(\" \")[0])\n", | |
" try:\n", | |
" model.append(\" \".join(names[i].split(\" \")[1:]).strip())\n", | |
" except:\n", | |
" pass\n", | |
" except:\n", | |
" print(\"ERR ! - \", names[i], \"@\" , i)\n", | |
" \n", | |
" \n", | |
" mileage = list(data.Mileage)\n", | |
" \n", | |
" for i in range(len(mileage)):\n", | |
" try :\n", | |
" mileage[i] = float(mileage[i].split(\" \")[0].strip())\n", | |
" except:\n", | |
" mileage[i] = np.nan\n", | |
" \n", | |
" \n", | |
" engine = list(data.Engine)\n", | |
" for i in range(len(engine)):\n", | |
" try :\n", | |
" engine[i] = int(engine[i].split(\" \")[0].strip())\n", | |
" except:\n", | |
" engine[i] = np.nan\n", | |
" \n", | |
" \n", | |
" power = list(data.Power)\n", | |
" for i in range(len(power)):\n", | |
" try :\n", | |
" power[i] = float(power[i].split(\" \")[0].strip())\n", | |
" except:\n", | |
" power[i] = np.nan\n", | |
" \n", | |
" data['New_Price'].fillna(0, inplace = True)\n", | |
" \n", | |
" newp = list(data['New_Price'])\n", | |
" \n", | |
" for i in range(len(newp)):\n", | |
" if newp[i] == 0:\n", | |
" newp[i] = float(newp[i])\n", | |
" continue\n", | |
" elif 'Cr' in newp[i]:\n", | |
" newp[i] = float(newp[i].split()[0].strip()) * 100 \n", | |
" elif 'Lakh' in newp[i]:\n", | |
" newp[i] = float(newp[i].split()[0].strip())\n", | |
" \n", | |
" \n", | |
"#Re-ordering the columns\n", | |
"\n", | |
" restructured = pd.DataFrame({'Brand': brand,\n", | |
" 'Model':model,\n", | |
" 'Location': data['Location'], \n", | |
" 'Year':data['Year'] , \n", | |
" 'Kilometers_Driven':data['Kilometers_Driven'],\n", | |
" 'Fuel_Type':data['Fuel_Type'],\n", | |
" 'Transmission':data['Transmission'],\n", | |
" 'Owner_Type':data['Owner_Type'],\n", | |
" 'Mileage':mileage,\n", | |
" 'Engine':engine,\n", | |
" 'Power':power,\n", | |
" 'Seats':data['Seats'],\n", | |
" 'New_Price':newp\n", | |
" })\n", | |
"\n", | |
" if 'Price' in data.columns:\n", | |
" restructured['Price'] = data['Price']\n", | |
" return restructured\n", | |
"\n", | |
" else:\n", | |
" return restructured" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "D2kKU52KAnlm" | |
}, | |
"outputs": [], | |
"source": [ | |
"train_d = restructure(training_set)\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Selecting Few Features" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "LA9RejMEOMhF" | |
}, | |
"outputs": [], | |
"source": [ | |
"cols = ['Brand', 'Location', 'Year', 'Kilometers_Driven', 'Fuel_Type',\n", | |
" 'Transmission', 'Owner_Type', 'Mileage', 'Price']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "__7hgfYL1atI" | |
}, | |
"outputs": [], | |
"source": [ | |
"train_d = train_d[cols]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 255 | |
}, | |
"colab_type": "code", | |
"id": "b3JX0LY63t8v", | |
"outputId": "77286e45-b1a4-4acd-f87d-8de515db3471" | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 6019 entries, 0 to 6018\n", | |
"Data columns (total 9 columns):\n", | |
"Brand 6019 non-null object\n", | |
"Location 6019 non-null object\n", | |
"Year 6019 non-null int64\n", | |
"Kilometers_Driven 6019 non-null int64\n", | |
"Fuel_Type 6019 non-null object\n", | |
"Transmission 6019 non-null object\n", | |
"Owner_Type 6019 non-null object\n", | |
"Mileage 6017 non-null float64\n", | |
"Price 6019 non-null float64\n", | |
"dtypes: float64(2), int64(2), object(5)\n", | |
"memory usage: 423.3+ KB\n" | |
] | |
} | |
], | |
"source": [ | |
"train_d.info()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "Jz4R89Lil2vF" | |
}, | |
"source": [ | |
"## Modeling With Fast.ai\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "b1axpAjKl7Oi" | |
}, | |
"outputs": [], | |
"source": [ | |
"from fastai.tabular import *\n", | |
"#This path will be used for saving and exporting the model\n", | |
"path = wd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "tQcQyVVKUc21" | |
}, | |
"outputs": [], | |
"source": [ | |
"#The target variable that we are trying to predict\n", | |
"dep_var = 'Price'\n", | |
"\n", | |
"#The categorical variables \n", | |
"cat_names = list(train_d.select_dtypes('object').columns)\n", | |
"\n", | |
"#The continuous variables\n", | |
"cont_names =['Year', 'Kilometers_Driven', 'Mileage'] #No need to keep the Dependend variable\n", | |
"\n", | |
"#Preprocessing steps for the fastai learner\n", | |
"procs = [FillMissing, Categorify, Normalize]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "hetMkktbUcy9" | |
}, | |
"outputs": [], | |
"source": [ | |
"#Creating a validation set\n", | |
"val = TabularList.from_df(train_d.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "UoAVDfT5UcqY" | |
}, | |
"outputs": [], | |
"source": [ | |
"#Creating a trainig set\n", | |
"data = (TabularList.from_df(train_d, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n", | |
" .split_by_idx(list(range(len(train_d) - int(len(train_d) * 0.2),len(train_d))))\n", | |
" .label_from_df(cols=dep_var)\n", | |
" .add_test(val)\n", | |
" .databunch())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"colab_type": "code", | |
"id": "vDDUdyFdw0Au", | |
"outputId": "9ed157bd-937f-4a89-d365-8bf6ad78fd3e" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Brand</th>\n", | |
" <th>Location</th>\n", | |
" <th>Fuel_Type</th>\n", | |
" <th>Transmission</th>\n", | |
" <th>Owner_Type</th>\n", | |
" <th>Mileage_na</th>\n", | |
" <th>Year</th>\n", | |
" <th>Kilometers_Driven</th>\n", | |
" <th>Mileage</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>Volkswagen</td>\n", | |
" <td>Delhi</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Manual</td>\n", | |
" <td>Second</td>\n", | |
" <td>False</td>\n", | |
" <td>-0.4138</td>\n", | |
" <td>0.1472</td>\n", | |
" <td>0.8758</td>\n", | |
" <td>2.65</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Ford</td>\n", | |
" <td>Kochi</td>\n", | |
" <td>Petrol</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>-1.3381</td>\n", | |
" <td>0.0725</td>\n", | |
" <td>-0.9393</td>\n", | |
" <td>1.65</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Hyundai</td>\n", | |
" <td>Bangalore</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>0.2025</td>\n", | |
" <td>-0.3001</td>\n", | |
" <td>0.9789</td>\n", | |
" <td>8.35</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Renault</td>\n", | |
" <td>Pune</td>\n", | |
" <td>Petrol</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>-0.1056</td>\n", | |
" <td>-0.3895</td>\n", | |
" <td>-1.0622</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Maruti</td>\n", | |
" <td>Mumbai</td>\n", | |
" <td>Petrol</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>-0.7219</td>\n", | |
" <td>-0.0111</td>\n", | |
" <td>-0.1273</td>\n", | |
" <td>2.95</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"data.show_batch(5)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "Jg8DCyNBWQoP" | |
}, | |
"source": [ | |
"### Initializing Neural Network" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "AETtx1dawzzq" | |
}, | |
"outputs": [], | |
"source": [ | |
"learn = tabular_learner(data, layers=[300,100, 100, 50], metrics= rmse)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "0lsZaZX_Wc11" | |
}, | |
"source": [ | |
"### Training The Model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 979 | |
}, | |
"colab_type": "code", | |
"id": "fOxMFAUiwzx0", | |
"outputId": "2cbbe816-c79f-422e-9c9d-f92c49f1220d" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>epoch</th>\n", | |
" <th>train_loss</th>\n", | |
" <th>valid_loss</th>\n", | |
" <th>root_mean_squared_error</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>55.471222</td>\n", | |
" <td>36.102558</td>\n", | |
" <td>5.426857</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>33.489296</td>\n", | |
" <td>35.301064</td>\n", | |
" <td>5.212431</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>31.670364</td>\n", | |
" <td>37.889015</td>\n", | |
" <td>5.400799</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>29.258881</td>\n", | |
" <td>35.724693</td>\n", | |
" <td>5.329782</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>27.429525</td>\n", | |
" <td>33.397991</td>\n", | |
" <td>5.029610</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>26.536196</td>\n", | |
" <td>33.372677</td>\n", | |
" <td>5.031393</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>26.760942</td>\n", | |
" <td>35.821808</td>\n", | |
" <td>5.235147</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>26.035761</td>\n", | |
" <td>29.993044</td>\n", | |
" <td>4.786624</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>26.381557</td>\n", | |
" <td>30.354549</td>\n", | |
" <td>4.830415</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>24.374403</td>\n", | |
" <td>33.764561</td>\n", | |
" <td>5.071315</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>10</td>\n", | |
" <td>26.226358</td>\n", | |
" <td>31.806461</td>\n", | |
" <td>4.990485</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>11</td>\n", | |
" <td>26.756731</td>\n", | |
" <td>31.552532</td>\n", | |
" <td>4.922013</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>12</td>\n", | |
" <td>24.486885</td>\n", | |
" <td>33.296734</td>\n", | |
" <td>5.090023</td>\n", | |
" <td>00:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>13</td>\n", | |
" <td>26.346607</td>\n", | |
" <td>35.681992</td>\n", | |
" <td>5.410216</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>14</td>\n", | |
" <td>22.871401</td>\n", | |
" <td>32.267162</td>\n", | |
" <td>4.954698</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>15</td>\n", | |
" <td>24.234312</td>\n", | |
" <td>30.641901</td>\n", | |
" <td>4.893451</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>16</td>\n", | |
" <td>25.584629</td>\n", | |
" <td>31.672037</td>\n", | |
" <td>4.896180</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>17</td>\n", | |
" <td>24.936560</td>\n", | |
" <td>33.300102</td>\n", | |
" <td>5.097296</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>18</td>\n", | |
" <td>26.099588</td>\n", | |
" <td>36.330246</td>\n", | |
" <td>5.385923</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>19</td>\n", | |
" <td>24.944227</td>\n", | |
" <td>34.039410</td>\n", | |
" <td>5.045019</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>20</td>\n", | |
" <td>23.358793</td>\n", | |
" <td>33.369480</td>\n", | |
" <td>4.982137</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>21</td>\n", | |
" <td>24.013760</td>\n", | |
" <td>35.621788</td>\n", | |
" <td>5.172819</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>22</td>\n", | |
" <td>21.373287</td>\n", | |
" <td>33.819248</td>\n", | |
" <td>5.061254</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>23</td>\n", | |
" <td>19.799622</td>\n", | |
" <td>33.503162</td>\n", | |
" <td>5.102333</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>24</td>\n", | |
" <td>19.037552</td>\n", | |
" <td>51.808071</td>\n", | |
" <td>6.158722</td>\n", | |
" <td>00:01</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.fit(25, 1e-2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Brand</th>\n", | |
" <th>Location</th>\n", | |
" <th>Fuel_Type</th>\n", | |
" <th>Transmission</th>\n", | |
" <th>Owner_Type</th>\n", | |
" <th>Mileage_na</th>\n", | |
" <th>Year</th>\n", | |
" <th>Kilometers_Driven</th>\n", | |
" <th>Mileage</th>\n", | |
" <th>target</th>\n", | |
" <th>prediction</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>Hyundai</td>\n", | |
" <td>Hyderabad</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>-0.4138</td>\n", | |
" <td>0.1209</td>\n", | |
" <td>0.8384</td>\n", | |
" <td>3.95</td>\n", | |
" <td>[5.786664]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Maruti</td>\n", | |
" <td>Pune</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Manual</td>\n", | |
" <td>Fourth & Above</td>\n", | |
" <td>False</td>\n", | |
" <td>-1.3381</td>\n", | |
" <td>0.2068</td>\n", | |
" <td>-0.0614</td>\n", | |
" <td>2.15</td>\n", | |
" <td>[4.262152]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Audi</td>\n", | |
" <td>Kochi</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Automatic</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>0.5106</td>\n", | |
" <td>-0.4526</td>\n", | |
" <td>-0.2128</td>\n", | |
" <td>21.43</td>\n", | |
" <td>[22.643032]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Hyundai</td>\n", | |
" <td>Bangalore</td>\n", | |
" <td>Petrol</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>0.2025</td>\n", | |
" <td>-0.1244</td>\n", | |
" <td>-0.2370</td>\n", | |
" <td>7.75</td>\n", | |
" <td>[6.077044]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>BMW</td>\n", | |
" <td>Mumbai</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Automatic</td>\n", | |
" <td>Second</td>\n", | |
" <td>False</td>\n", | |
" <td>-1.6463</td>\n", | |
" <td>0.2167</td>\n", | |
" <td>0.0878</td>\n", | |
" <td>10.5</td>\n", | |
" <td>[11.230114]</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.show_results(ds_type=DatasetType.Train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Brand</th>\n", | |
" <th>Location</th>\n", | |
" <th>Fuel_Type</th>\n", | |
" <th>Transmission</th>\n", | |
" <th>Owner_Type</th>\n", | |
" <th>Mileage_na</th>\n", | |
" <th>Year</th>\n", | |
" <th>Kilometers_Driven</th>\n", | |
" <th>Mileage</th>\n", | |
" <th>target</th>\n", | |
" <th>prediction</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>BMW</td>\n", | |
" <td>Delhi</td>\n", | |
" <td>Petrol</td>\n", | |
" <td>Automatic</td>\n", | |
" <td>Second</td>\n", | |
" <td>False</td>\n", | |
" <td>-2.2625</td>\n", | |
" <td>-0.1112</td>\n", | |
" <td>-1.4178</td>\n", | |
" <td>6.99</td>\n", | |
" <td>[9.390251]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Hyundai</td>\n", | |
" <td>Coimbatore</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>1.7431</td>\n", | |
" <td>-0.1722</td>\n", | |
" <td>1.0140</td>\n", | |
" <td>15.57</td>\n", | |
" <td>[10.457531]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Tata</td>\n", | |
" <td>Coimbatore</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>0.5106</td>\n", | |
" <td>0.2558</td>\n", | |
" <td>-0.6101</td>\n", | |
" <td>5.29</td>\n", | |
" <td>[8.295876]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>Datsun</td>\n", | |
" <td>Kolkata</td>\n", | |
" <td>Petrol</td>\n", | |
" <td>Manual</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>0.8188</td>\n", | |
" <td>-0.4392</td>\n", | |
" <td>1.0140</td>\n", | |
" <td>2.25</td>\n", | |
" <td>[2.160599]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>BMW</td>\n", | |
" <td>Chennai</td>\n", | |
" <td>Diesel</td>\n", | |
" <td>Automatic</td>\n", | |
" <td>First</td>\n", | |
" <td>False</td>\n", | |
" <td>-1.3381</td>\n", | |
" <td>0.5049</td>\n", | |
" <td>-1.4002</td>\n", | |
" <td>20.0</td>\n", | |
" <td>[15.827377]</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.show_results(ds_type=DatasetType.Valid)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Saving & Exporting The Model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"colab_type": "code", | |
"id": "IEtGmv6Wh7T4", | |
"outputId": "dfc37e85-2990-4292-8269-d13841c6a231" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"PosixPath('/Users/amalnair/Documents/Amal-WorkSpace/UCC_Project/resources/models/model.pth')" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"learn.save('model',return_path=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "Pz2rMhp9mbdT" | |
}, | |
"outputs": [], | |
"source": [ | |
"learn.export('model.pkl')" | |
] | |
} | |
], | |
"metadata": { | |
"colab": { | |
"name": "modeling.ipynb", | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment