Skip to content

Instantly share code, notes, and snippets.

@analyticsindiamagazine
Created December 9, 2019 11:50
Show Gist options
  • Save analyticsindiamagazine/db9b7755f2cdf814c3e1d1c681a41958 to your computer and use it in GitHub Desktop.
Save analyticsindiamagazine/db9b7755f2cdf814c3e1d1c681a41958 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Building FastAi Model"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#Printing thw working directory\n",
"import os\n",
"wd = os.getcwd()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/amalnair/Documents/Amal-WorkSpace/UCC_Project/resources'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Setting The Home Directory for the project\n",
"home_dir = wd[:-9]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/amalnair/Documents/Amal-WorkSpace/UCC_Project/'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"home_dir"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HOME DIRECTORY\n",
"['.DS_Store', 'requirements.txt', 'bin', 'include', 'resources', 'model', 'app.py', 'lib', 'templates']\n",
"WORKING DIRECTORY\n",
"['Data_Train.xlsx', 'modeling.ipynb', 'Data_Test.xlsx', '.ipynb_checkpoints']\n"
]
}
],
"source": [
"#Listing the home directory and working directory contents\n",
"print('HOME DIRECTORY')\n",
"print(os.listdir(home_dir))\n",
"print('WORKING DIRECTORY')\n",
"print(os.listdir(wd))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading The Datasets"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "y0mIWjyrdrpy"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"training_set = pd.read_excel(wd+'/Data_Train.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"id": "vSEa6kilqvMF",
"outputId": "a1760c04-fa01-4d05-da21-93f1cc310ed3"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Location</th>\n",
" <th>Year</th>\n",
" <th>Kilometers_Driven</th>\n",
" <th>Fuel_Type</th>\n",
" <th>Transmission</th>\n",
" <th>Owner_Type</th>\n",
" <th>Mileage</th>\n",
" <th>Engine</th>\n",
" <th>Power</th>\n",
" <th>Seats</th>\n",
" <th>New_Price</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Maruti Wagon R LXI CNG</td>\n",
" <td>Mumbai</td>\n",
" <td>2010</td>\n",
" <td>72000</td>\n",
" <td>CNG</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>26.6 km/kg</td>\n",
" <td>998 CC</td>\n",
" <td>58.16 bhp</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>1.75</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Hyundai Creta 1.6 CRDi SX Option</td>\n",
" <td>Pune</td>\n",
" <td>2015</td>\n",
" <td>41000</td>\n",
" <td>Diesel</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>19.67 kmpl</td>\n",
" <td>1582 CC</td>\n",
" <td>126.2 bhp</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>12.50</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Honda Jazz V</td>\n",
" <td>Chennai</td>\n",
" <td>2011</td>\n",
" <td>46000</td>\n",
" <td>Petrol</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>18.2 kmpl</td>\n",
" <td>1199 CC</td>\n",
" <td>88.7 bhp</td>\n",
" <td>5.0</td>\n",
" <td>8.61 Lakh</td>\n",
" <td>4.50</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>Maruti Ertiga VDI</td>\n",
" <td>Chennai</td>\n",
" <td>2012</td>\n",
" <td>87000</td>\n",
" <td>Diesel</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>20.77 kmpl</td>\n",
" <td>1248 CC</td>\n",
" <td>88.76 bhp</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>6.00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Audi A4 New 2.0 TDI Multitronic</td>\n",
" <td>Coimbatore</td>\n",
" <td>2013</td>\n",
" <td>40670</td>\n",
" <td>Diesel</td>\n",
" <td>Automatic</td>\n",
" <td>Second</td>\n",
" <td>15.2 kmpl</td>\n",
" <td>1968 CC</td>\n",
" <td>140.8 bhp</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>17.74</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name Location Year Kilometers_Driven \\\n",
"0 Maruti Wagon R LXI CNG Mumbai 2010 72000 \n",
"1 Hyundai Creta 1.6 CRDi SX Option Pune 2015 41000 \n",
"2 Honda Jazz V Chennai 2011 46000 \n",
"3 Maruti Ertiga VDI Chennai 2012 87000 \n",
"4 Audi A4 New 2.0 TDI Multitronic Coimbatore 2013 40670 \n",
"\n",
" Fuel_Type Transmission Owner_Type Mileage Engine Power Seats \\\n",
"0 CNG Manual First 26.6 km/kg 998 CC 58.16 bhp 5.0 \n",
"1 Diesel Manual First 19.67 kmpl 1582 CC 126.2 bhp 5.0 \n",
"2 Petrol Manual First 18.2 kmpl 1199 CC 88.7 bhp 5.0 \n",
"3 Diesel Manual First 20.77 kmpl 1248 CC 88.76 bhp 7.0 \n",
"4 Diesel Automatic Second 15.2 kmpl 1968 CC 140.8 bhp 5.0 \n",
"\n",
" New_Price Price \n",
"0 NaN 1.75 \n",
"1 NaN 12.50 \n",
"2 8.61 Lakh 4.50 \n",
"3 NaN 6.00 \n",
"4 NaN 17.74 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_set.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Structuring & Formatting The Datasets"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HGT8XlW8rtC9"
},
"outputs": [],
"source": [
"def restructure(data):\n",
" \n",
" names = list(data.Name)\n",
" \n",
" brand = []\n",
" model = []\n",
" \n",
" for i in range(len(names)):\n",
" try:\n",
" brand.append(names[i].split(\" \")[0])\n",
" try:\n",
" model.append(\" \".join(names[i].split(\" \")[1:]).strip())\n",
" except:\n",
" pass\n",
" except:\n",
" print(\"ERR ! - \", names[i], \"@\" , i)\n",
" \n",
" \n",
" mileage = list(data.Mileage)\n",
" \n",
" for i in range(len(mileage)):\n",
" try :\n",
" mileage[i] = float(mileage[i].split(\" \")[0].strip())\n",
" except:\n",
" mileage[i] = np.nan\n",
" \n",
" \n",
" engine = list(data.Engine)\n",
" for i in range(len(engine)):\n",
" try :\n",
" engine[i] = int(engine[i].split(\" \")[0].strip())\n",
" except:\n",
" engine[i] = np.nan\n",
" \n",
" \n",
" power = list(data.Power)\n",
" for i in range(len(power)):\n",
" try :\n",
" power[i] = float(power[i].split(\" \")[0].strip())\n",
" except:\n",
" power[i] = np.nan\n",
" \n",
" data['New_Price'].fillna(0, inplace = True)\n",
" \n",
" newp = list(data['New_Price'])\n",
" \n",
" for i in range(len(newp)):\n",
" if newp[i] == 0:\n",
" newp[i] = float(newp[i])\n",
" continue\n",
" elif 'Cr' in newp[i]:\n",
" newp[i] = float(newp[i].split()[0].strip()) * 100 \n",
" elif 'Lakh' in newp[i]:\n",
" newp[i] = float(newp[i].split()[0].strip())\n",
" \n",
" \n",
"#Re-ordering the columns\n",
"\n",
" restructured = pd.DataFrame({'Brand': brand,\n",
" 'Model':model,\n",
" 'Location': data['Location'], \n",
" 'Year':data['Year'] , \n",
" 'Kilometers_Driven':data['Kilometers_Driven'],\n",
" 'Fuel_Type':data['Fuel_Type'],\n",
" 'Transmission':data['Transmission'],\n",
" 'Owner_Type':data['Owner_Type'],\n",
" 'Mileage':mileage,\n",
" 'Engine':engine,\n",
" 'Power':power,\n",
" 'Seats':data['Seats'],\n",
" 'New_Price':newp\n",
" })\n",
"\n",
" if 'Price' in data.columns:\n",
" restructured['Price'] = data['Price']\n",
" return restructured\n",
"\n",
" else:\n",
" return restructured"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "D2kKU52KAnlm"
},
"outputs": [],
"source": [
"train_d = restructure(training_set)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Selecting Few Features"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "LA9RejMEOMhF"
},
"outputs": [],
"source": [
"cols = ['Brand', 'Location', 'Year', 'Kilometers_Driven', 'Fuel_Type',\n",
" 'Transmission', 'Owner_Type', 'Mileage', 'Price']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "__7hgfYL1atI"
},
"outputs": [],
"source": [
"train_d = train_d[cols]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 255
},
"colab_type": "code",
"id": "b3JX0LY63t8v",
"outputId": "77286e45-b1a4-4acd-f87d-8de515db3471"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 6019 entries, 0 to 6018\n",
"Data columns (total 9 columns):\n",
"Brand 6019 non-null object\n",
"Location 6019 non-null object\n",
"Year 6019 non-null int64\n",
"Kilometers_Driven 6019 non-null int64\n",
"Fuel_Type 6019 non-null object\n",
"Transmission 6019 non-null object\n",
"Owner_Type 6019 non-null object\n",
"Mileage 6017 non-null float64\n",
"Price 6019 non-null float64\n",
"dtypes: float64(2), int64(2), object(5)\n",
"memory usage: 423.3+ KB\n"
]
}
],
"source": [
"train_d.info()"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "Jz4R89Lil2vF"
},
"source": [
"## Modeling With Fast.ai\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "b1axpAjKl7Oi"
},
"outputs": [],
"source": [
"from fastai.tabular import *\n",
"#This path will be used for saving and exporting the model\n",
"path = wd"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "tQcQyVVKUc21"
},
"outputs": [],
"source": [
"#The target variable that we are trying to predict\n",
"dep_var = 'Price'\n",
"\n",
"#The categorical variables \n",
"cat_names = list(train_d.select_dtypes('object').columns)\n",
"\n",
"#The continuous variables\n",
"cont_names =['Year', 'Kilometers_Driven', 'Mileage'] #No need to keep the Dependend variable\n",
"\n",
"#Preprocessing steps for the fastai learner\n",
"procs = [FillMissing, Categorify, Normalize]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "hetMkktbUcy9"
},
"outputs": [],
"source": [
"#Creating a validation set\n",
"val = TabularList.from_df(train_d.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "UoAVDfT5UcqY"
},
"outputs": [],
"source": [
"#Creating a trainig set\n",
"data = (TabularList.from_df(train_d, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n",
" .split_by_idx(list(range(len(train_d) - int(len(train_d) * 0.2),len(train_d))))\n",
" .label_from_df(cols=dep_var)\n",
" .add_test(val)\n",
" .databunch())"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"id": "vDDUdyFdw0Au",
"outputId": "9ed157bd-937f-4a89-d365-8bf6ad78fd3e"
},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Brand</th>\n",
" <th>Location</th>\n",
" <th>Fuel_Type</th>\n",
" <th>Transmission</th>\n",
" <th>Owner_Type</th>\n",
" <th>Mileage_na</th>\n",
" <th>Year</th>\n",
" <th>Kilometers_Driven</th>\n",
" <th>Mileage</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>Volkswagen</td>\n",
" <td>Delhi</td>\n",
" <td>Diesel</td>\n",
" <td>Manual</td>\n",
" <td>Second</td>\n",
" <td>False</td>\n",
" <td>-0.4138</td>\n",
" <td>0.1472</td>\n",
" <td>0.8758</td>\n",
" <td>2.65</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Ford</td>\n",
" <td>Kochi</td>\n",
" <td>Petrol</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>-1.3381</td>\n",
" <td>0.0725</td>\n",
" <td>-0.9393</td>\n",
" <td>1.65</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Hyundai</td>\n",
" <td>Bangalore</td>\n",
" <td>Diesel</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>0.2025</td>\n",
" <td>-0.3001</td>\n",
" <td>0.9789</td>\n",
" <td>8.35</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Renault</td>\n",
" <td>Pune</td>\n",
" <td>Petrol</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>-0.1056</td>\n",
" <td>-0.3895</td>\n",
" <td>-1.0622</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Maruti</td>\n",
" <td>Mumbai</td>\n",
" <td>Petrol</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>-0.7219</td>\n",
" <td>-0.0111</td>\n",
" <td>-0.1273</td>\n",
" <td>2.95</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data.show_batch(5)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "Jg8DCyNBWQoP"
},
"source": [
"### Initializing Neural Network"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "AETtx1dawzzq"
},
"outputs": [],
"source": [
"learn = tabular_learner(data, layers=[300,100, 100, 50], metrics= rmse)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "0lsZaZX_Wc11"
},
"source": [
"### Training The Model"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 979
},
"colab_type": "code",
"id": "fOxMFAUiwzx0",
"outputId": "2cbbe816-c79f-422e-9c9d-f92c49f1220d"
},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>root_mean_squared_error</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>55.471222</td>\n",
" <td>36.102558</td>\n",
" <td>5.426857</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>33.489296</td>\n",
" <td>35.301064</td>\n",
" <td>5.212431</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>31.670364</td>\n",
" <td>37.889015</td>\n",
" <td>5.400799</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>29.258881</td>\n",
" <td>35.724693</td>\n",
" <td>5.329782</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>27.429525</td>\n",
" <td>33.397991</td>\n",
" <td>5.029610</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>26.536196</td>\n",
" <td>33.372677</td>\n",
" <td>5.031393</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>26.760942</td>\n",
" <td>35.821808</td>\n",
" <td>5.235147</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>26.035761</td>\n",
" <td>29.993044</td>\n",
" <td>4.786624</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>26.381557</td>\n",
" <td>30.354549</td>\n",
" <td>4.830415</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>24.374403</td>\n",
" <td>33.764561</td>\n",
" <td>5.071315</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>26.226358</td>\n",
" <td>31.806461</td>\n",
" <td>4.990485</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>26.756731</td>\n",
" <td>31.552532</td>\n",
" <td>4.922013</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>24.486885</td>\n",
" <td>33.296734</td>\n",
" <td>5.090023</td>\n",
" <td>00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>26.346607</td>\n",
" <td>35.681992</td>\n",
" <td>5.410216</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>22.871401</td>\n",
" <td>32.267162</td>\n",
" <td>4.954698</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>24.234312</td>\n",
" <td>30.641901</td>\n",
" <td>4.893451</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>25.584629</td>\n",
" <td>31.672037</td>\n",
" <td>4.896180</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>24.936560</td>\n",
" <td>33.300102</td>\n",
" <td>5.097296</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>26.099588</td>\n",
" <td>36.330246</td>\n",
" <td>5.385923</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>24.944227</td>\n",
" <td>34.039410</td>\n",
" <td>5.045019</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>20</td>\n",
" <td>23.358793</td>\n",
" <td>33.369480</td>\n",
" <td>4.982137</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>21</td>\n",
" <td>24.013760</td>\n",
" <td>35.621788</td>\n",
" <td>5.172819</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>22</td>\n",
" <td>21.373287</td>\n",
" <td>33.819248</td>\n",
" <td>5.061254</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>23</td>\n",
" <td>19.799622</td>\n",
" <td>33.503162</td>\n",
" <td>5.102333</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" <tr>\n",
" <td>24</td>\n",
" <td>19.037552</td>\n",
" <td>51.808071</td>\n",
" <td>6.158722</td>\n",
" <td>00:01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit(25, 1e-2)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Brand</th>\n",
" <th>Location</th>\n",
" <th>Fuel_Type</th>\n",
" <th>Transmission</th>\n",
" <th>Owner_Type</th>\n",
" <th>Mileage_na</th>\n",
" <th>Year</th>\n",
" <th>Kilometers_Driven</th>\n",
" <th>Mileage</th>\n",
" <th>target</th>\n",
" <th>prediction</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>Hyundai</td>\n",
" <td>Hyderabad</td>\n",
" <td>Diesel</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>-0.4138</td>\n",
" <td>0.1209</td>\n",
" <td>0.8384</td>\n",
" <td>3.95</td>\n",
" <td>[5.786664]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Maruti</td>\n",
" <td>Pune</td>\n",
" <td>Diesel</td>\n",
" <td>Manual</td>\n",
" <td>Fourth &amp; Above</td>\n",
" <td>False</td>\n",
" <td>-1.3381</td>\n",
" <td>0.2068</td>\n",
" <td>-0.0614</td>\n",
" <td>2.15</td>\n",
" <td>[4.262152]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Audi</td>\n",
" <td>Kochi</td>\n",
" <td>Diesel</td>\n",
" <td>Automatic</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>0.5106</td>\n",
" <td>-0.4526</td>\n",
" <td>-0.2128</td>\n",
" <td>21.43</td>\n",
" <td>[22.643032]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Hyundai</td>\n",
" <td>Bangalore</td>\n",
" <td>Petrol</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>0.2025</td>\n",
" <td>-0.1244</td>\n",
" <td>-0.2370</td>\n",
" <td>7.75</td>\n",
" <td>[6.077044]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>BMW</td>\n",
" <td>Mumbai</td>\n",
" <td>Diesel</td>\n",
" <td>Automatic</td>\n",
" <td>Second</td>\n",
" <td>False</td>\n",
" <td>-1.6463</td>\n",
" <td>0.2167</td>\n",
" <td>0.0878</td>\n",
" <td>10.5</td>\n",
" <td>[11.230114]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.show_results(ds_type=DatasetType.Train)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Brand</th>\n",
" <th>Location</th>\n",
" <th>Fuel_Type</th>\n",
" <th>Transmission</th>\n",
" <th>Owner_Type</th>\n",
" <th>Mileage_na</th>\n",
" <th>Year</th>\n",
" <th>Kilometers_Driven</th>\n",
" <th>Mileage</th>\n",
" <th>target</th>\n",
" <th>prediction</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>BMW</td>\n",
" <td>Delhi</td>\n",
" <td>Petrol</td>\n",
" <td>Automatic</td>\n",
" <td>Second</td>\n",
" <td>False</td>\n",
" <td>-2.2625</td>\n",
" <td>-0.1112</td>\n",
" <td>-1.4178</td>\n",
" <td>6.99</td>\n",
" <td>[9.390251]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Hyundai</td>\n",
" <td>Coimbatore</td>\n",
" <td>Diesel</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>1.7431</td>\n",
" <td>-0.1722</td>\n",
" <td>1.0140</td>\n",
" <td>15.57</td>\n",
" <td>[10.457531]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Tata</td>\n",
" <td>Coimbatore</td>\n",
" <td>Diesel</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>0.5106</td>\n",
" <td>0.2558</td>\n",
" <td>-0.6101</td>\n",
" <td>5.29</td>\n",
" <td>[8.295876]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Datsun</td>\n",
" <td>Kolkata</td>\n",
" <td>Petrol</td>\n",
" <td>Manual</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>0.8188</td>\n",
" <td>-0.4392</td>\n",
" <td>1.0140</td>\n",
" <td>2.25</td>\n",
" <td>[2.160599]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>BMW</td>\n",
" <td>Chennai</td>\n",
" <td>Diesel</td>\n",
" <td>Automatic</td>\n",
" <td>First</td>\n",
" <td>False</td>\n",
" <td>-1.3381</td>\n",
" <td>0.5049</td>\n",
" <td>-1.4002</td>\n",
" <td>20.0</td>\n",
" <td>[15.827377]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.show_results(ds_type=DatasetType.Valid)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Saving & Exporting The Model"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "IEtGmv6Wh7T4",
"outputId": "dfc37e85-2990-4292-8269-d13841c6a231"
},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/Users/amalnair/Documents/Amal-WorkSpace/UCC_Project/resources/models/model.pth')"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"learn.save('model',return_path=True)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Pz2rMhp9mbdT"
},
"outputs": [],
"source": [
"learn.export('model.pkl')"
]
}
],
"metadata": {
"colab": {
"name": "modeling.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment