Created
February 6, 2024 04:14
-
-
Save dre013/e0c9761345702a24a9a10c01b744aef3 to your computer and use it in GitHub Desktop.
JupNtbk
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "fb68be2f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"from sklearn.linear_model import LinearRegression\n", | |
"from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "49beddf6", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"flats_data = pd.read_excel('flats.xlsx', header=1)\n", | |
"euro_renovation_data = pd.read_excel('euro_renovation.xlsx', header=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "3a5eeaf4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>offer_views</th>\n", | |
" <th>city</th>\n", | |
" <th>lat</th>\n", | |
" <th>lon</th>\n", | |
" <th>price_sq</th>\n", | |
" <th>area</th>\n", | |
" <th>floor</th>\n", | |
" <th>kitchen_area</th>\n", | |
" <th>bathroom_type</th>\n", | |
" <th>...</th>\n", | |
" <th>all_data.object_info.loggias</th>\n", | |
" <th>all_data.house.parking[0].display_name</th>\n", | |
" <th>all_data.house.yard[0].display_name</th>\n", | |
" <th>all_data.house.yard[1].display_name</th>\n", | |
" <th>all_data.house.infrastructure[0].display_name</th>\n", | |
" <th>all_data.house.infrastructure[1].display_name</th>\n", | |
" <th>all_data.house.infrastructure[2].display_name</th>\n", | |
" <th>all_data.house.infrastructure[3].display_name</th>\n", | |
" <th>all_data.house.infrastructure[4].display_name</th>\n", | |
" <th>all_data.object_info.garage_type.display_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1900703057</td>\n", | |
" <td>0</td>\n", | |
" <td>Москва</td>\n", | |
" <td>55.812120</td>\n", | |
" <td>37.632269</td>\n", | |
" <td>456666</td>\n", | |
" <td>15.0</td>\n", | |
" <td>1</td>\n", | |
" <td>3.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1871863655</td>\n", | |
" <td>0</td>\n", | |
" <td>Казань</td>\n", | |
" <td>55.728718</td>\n", | |
" <td>49.171865</td>\n", | |
" <td>199975</td>\n", | |
" <td>41.5</td>\n", | |
" <td>16</td>\n", | |
" <td>8.6</td>\n", | |
" <td>Совмещенный</td>\n", | |
" <td>...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Во дворе</td>\n", | |
" <td>Детская площадка</td>\n", | |
" <td>Спортивная площадка</td>\n", | |
" <td>Школа</td>\n", | |
" <td>Парк</td>\n", | |
" <td>Детский сад</td>\n", | |
" <td>Фитнес</td>\n", | |
" <td>Торговый центр</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1912069065</td>\n", | |
" <td>0</td>\n", | |
" <td>Новосибирск</td>\n", | |
" <td>54.955856</td>\n", | |
" <td>82.838521</td>\n", | |
" <td>80263</td>\n", | |
" <td>38.0</td>\n", | |
" <td>1</td>\n", | |
" <td>9.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Детская площадка</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1915932774</td>\n", | |
" <td>0</td>\n", | |
" <td>Новосибирск</td>\n", | |
" <td>54.930900</td>\n", | |
" <td>82.913180</td>\n", | |
" <td>101754</td>\n", | |
" <td>28.5</td>\n", | |
" <td>7</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Совмещенный</td>\n", | |
" <td>...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Во дворе</td>\n", | |
" <td>Детская площадка</td>\n", | |
" <td>Спортивная площадка</td>\n", | |
" <td>Школа</td>\n", | |
" <td>Детский сад</td>\n", | |
" <td>Торговый центр</td>\n", | |
" <td>Парк</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1902155995</td>\n", | |
" <td>0</td>\n", | |
" <td>Новосибирск</td>\n", | |
" <td>54.935571</td>\n", | |
" <td>82.885261</td>\n", | |
" <td>112500</td>\n", | |
" <td>20.0</td>\n", | |
" <td>5</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Совмещенный</td>\n", | |
" <td>...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Во дворе</td>\n", | |
" <td>Детская площадка</td>\n", | |
" <td>Спортивная площадка</td>\n", | |
" <td>Школа</td>\n", | |
" <td>Детский сад</td>\n", | |
" <td>Торговый центр</td>\n", | |
" <td>Фитнес</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1315</th>\n", | |
" <td>1865111487</td>\n", | |
" <td>0</td>\n", | |
" <td>Санкт-Петербург</td>\n", | |
" <td>59.814695</td>\n", | |
" <td>30.344030</td>\n", | |
" <td>206296</td>\n", | |
" <td>27.0</td>\n", | |
" <td>13</td>\n", | |
" <td>5.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1316</th>\n", | |
" <td>1834602512</td>\n", | |
" <td>46</td>\n", | |
" <td>Санкт-Петербург</td>\n", | |
" <td>60.040240</td>\n", | |
" <td>30.215571</td>\n", | |
" <td>203200</td>\n", | |
" <td>25.0</td>\n", | |
" <td>15</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Совмещенный</td>\n", | |
" <td>...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Подземная</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1317</th>\n", | |
" <td>1838378337</td>\n", | |
" <td>0</td>\n", | |
" <td>Санкт-Петербург</td>\n", | |
" <td>59.865506</td>\n", | |
" <td>30.513345</td>\n", | |
" <td>173333</td>\n", | |
" <td>30.0</td>\n", | |
" <td>15</td>\n", | |
" <td>5.0</td>\n", | |
" <td>Совмещенный</td>\n", | |
" <td>...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Подземная</td>\n", | |
" <td>Детская площадка</td>\n", | |
" <td>Спортивная площадка</td>\n", | |
" <td>Школа</td>\n", | |
" <td>Детский сад</td>\n", | |
" <td>Торговый центр</td>\n", | |
" <td>Парк</td>\n", | |
" <td>Фитнес</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1318</th>\n", | |
" <td>1840264239</td>\n", | |
" <td>49</td>\n", | |
" <td>Санкт-Петербург</td>\n", | |
" <td>59.892550</td>\n", | |
" <td>30.450202</td>\n", | |
" <td>216000</td>\n", | |
" <td>25.0</td>\n", | |
" <td>14</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Детская площадка</td>\n", | |
" <td>Спортивная площадка</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1319</th>\n", | |
" <td>1912385822</td>\n", | |
" <td>0</td>\n", | |
" <td>Санкт-Петербург</td>\n", | |
" <td>59.833235</td>\n", | |
" <td>30.525750</td>\n", | |
" <td>247619</td>\n", | |
" <td>21.0</td>\n", | |
" <td>15</td>\n", | |
" <td>5.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>1320 rows × 37 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id offer_views city lat lon \\\n", | |
"0 1900703057 0 Москва 55.812120 37.632269 \n", | |
"1 1871863655 0 Казань 55.728718 49.171865 \n", | |
"2 1912069065 0 Новосибирск 54.955856 82.838521 \n", | |
"3 1915932774 0 Новосибирск 54.930900 82.913180 \n", | |
"4 1902155995 0 Новосибирск 54.935571 82.885261 \n", | |
"... ... ... ... ... ... \n", | |
"1315 1865111487 0 Санкт-Петербург 59.814695 30.344030 \n", | |
"1316 1834602512 46 Санкт-Петербург 60.040240 30.215571 \n", | |
"1317 1838378337 0 Санкт-Петербург 59.865506 30.513345 \n", | |
"1318 1840264239 49 Санкт-Петербург 59.892550 30.450202 \n", | |
"1319 1912385822 0 Санкт-Петербург 59.833235 30.525750 \n", | |
"\n", | |
" price_sq area floor kitchen_area bathroom_type ... \\\n", | |
"0 456666 15.0 1 3.0 NaN ... \n", | |
"1 199975 41.5 16 8.6 Совмещенный ... \n", | |
"2 80263 38.0 1 9.0 NaN ... \n", | |
"3 101754 28.5 7 0.0 Совмещенный ... \n", | |
"4 112500 20.0 5 0.0 Совмещенный ... \n", | |
"... ... ... ... ... ... ... \n", | |
"1315 206296 27.0 13 5.0 NaN ... \n", | |
"1316 203200 25.0 15 0.0 Совмещенный ... \n", | |
"1317 173333 30.0 15 5.0 Совмещенный ... \n", | |
"1318 216000 25.0 14 0.0 NaN ... \n", | |
"1319 247619 21.0 15 5.0 NaN ... \n", | |
"\n", | |
" all_data.object_info.loggias all_data.house.parking[0].display_name \\\n", | |
"0 NaN NaN \n", | |
"1 NaN Во дворе \n", | |
"2 1.0 NaN \n", | |
"3 NaN Во дворе \n", | |
"4 NaN Во дворе \n", | |
"... ... ... \n", | |
"1315 1.0 NaN \n", | |
"1316 NaN Подземная \n", | |
"1317 NaN Подземная \n", | |
"1318 1.0 NaN \n", | |
"1319 1.0 NaN \n", | |
"\n", | |
" all_data.house.yard[0].display_name all_data.house.yard[1].display_name \\\n", | |
"0 NaN NaN \n", | |
"1 Детская площадка Спортивная площадка \n", | |
"2 Детская площадка NaN \n", | |
"3 Детская площадка Спортивная площадка \n", | |
"4 Детская площадка Спортивная площадка \n", | |
"... ... ... \n", | |
"1315 NaN NaN \n", | |
"1316 NaN NaN \n", | |
"1317 Детская площадка Спортивная площадка \n", | |
"1318 Детская площадка Спортивная площадка \n", | |
"1319 NaN NaN \n", | |
"\n", | |
" all_data.house.infrastructure[0].display_name \\\n", | |
"0 NaN \n", | |
"1 Школа \n", | |
"2 NaN \n", | |
"3 Школа \n", | |
"4 Школа \n", | |
"... ... \n", | |
"1315 NaN \n", | |
"1316 NaN \n", | |
"1317 Школа \n", | |
"1318 NaN \n", | |
"1319 NaN \n", | |
"\n", | |
" all_data.house.infrastructure[1].display_name \\\n", | |
"0 NaN \n", | |
"1 Парк \n", | |
"2 NaN \n", | |
"3 Детский сад \n", | |
"4 Детский сад \n", | |
"... ... \n", | |
"1315 NaN \n", | |
"1316 NaN \n", | |
"1317 Детский сад \n", | |
"1318 NaN \n", | |
"1319 NaN \n", | |
"\n", | |
" all_data.house.infrastructure[2].display_name \\\n", | |
"0 NaN \n", | |
"1 Детский сад \n", | |
"2 NaN \n", | |
"3 Торговый центр \n", | |
"4 Торговый центр \n", | |
"... ... \n", | |
"1315 NaN \n", | |
"1316 NaN \n", | |
"1317 Торговый центр \n", | |
"1318 NaN \n", | |
"1319 NaN \n", | |
"\n", | |
" all_data.house.infrastructure[3].display_name \\\n", | |
"0 NaN \n", | |
"1 Фитнес \n", | |
"2 NaN \n", | |
"3 Парк \n", | |
"4 Фитнес \n", | |
"... ... \n", | |
"1315 NaN \n", | |
"1316 NaN \n", | |
"1317 Парк \n", | |
"1318 NaN \n", | |
"1319 NaN \n", | |
"\n", | |
" all_data.house.infrastructure[4].display_name \\\n", | |
"0 NaN \n", | |
"1 Торговый центр \n", | |
"2 NaN \n", | |
"3 NaN \n", | |
"4 NaN \n", | |
"... ... \n", | |
"1315 NaN \n", | |
"1316 NaN \n", | |
"1317 Фитнес \n", | |
"1318 NaN \n", | |
"1319 NaN \n", | |
"\n", | |
" all_data.object_info.garage_type.display_name \n", | |
"0 NaN \n", | |
"1 NaN \n", | |
"2 NaN \n", | |
"3 NaN \n", | |
"4 NaN \n", | |
"... ... \n", | |
"1315 NaN \n", | |
"1316 NaN \n", | |
"1317 NaN \n", | |
"1318 NaN \n", | |
"1319 NaN \n", | |
"\n", | |
"[1320 rows x 37 columns]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"euro_renovation_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "282b5019", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features = [\n", | |
" 'lat', 'lon', 'area', 'floor', 'kitchen_area', 'balconies', 'rooms', 'house_floors', 'lifts', 'freight_lifts',\n", | |
" 'time_on_foot_to_subway', 'build_year', 'all_data.house.ceiling_height', 'all_data.object_info.living_area'\n", | |
" ]\n", | |
"X = euro_renovation_data[features].fillna(0)\n", | |
"y = euro_renovation_data['price_sq'].fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "e07f6eeb", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "f6ac0d9f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>" | |
], | |
"text/plain": [ | |
"LinearRegression()" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = LinearRegression()\n", | |
"model.fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "f00d3b51", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"y_pred = model.predict(X_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "404131d3", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([158308.57893418, 113804.9334211 , 195328.41141539, 253142.12661004,\n", | |
" 215328.27334567, 352700.18038176, 358964.06830085, 202472.1496059 ,\n", | |
" 235098.79877387, 348860.33969325, 236244.65959536, 204957.29908458,\n", | |
" 333986.19399744, 325165.18737963, 236628.02375922, 218112.54172629,\n", | |
" 204981.95754782, 168248.69840367, 209252.94614687, 331984.76444805,\n", | |
" 233979.08942076, 113244.6318856 , 238331.93155688, 231602.54453511,\n", | |
" 259679.98374802, 51877.63477389, 61222.29883317, 213159.53470919,\n", | |
" 11815.01350663, 270906.9421026 , 197969.46663388, 315382.81001094,\n", | |
" 158293.84978613, 325183.6036804 , 238962.23459714, 316993.63599154,\n", | |
" 237338.31996822, 187266.14147511, 327788.1318949 , 322582.77057171,\n", | |
" 169676.79087785, 251306.60488549, 147063.22118163, 303644.1552397 ,\n", | |
" 270713.50092253, 304744.56845649, 237689.80908054, 170579.90889038,\n", | |
" 254643.26280378, 316351.09667621, 327589.48474355, 219320.66794371,\n", | |
" 248791.62622332, 356787.07041663, 199919.49704428, 181875.6212454 ,\n", | |
" 347265.15238456, 333588.76984506, 189185.72423132, 331791.64494801,\n", | |
" 215269.26681572, 205680.58956436, 190718.35716875, 213436.07973419,\n", | |
" 213127.39567569, 239651.37122056, 326692.21336863, 228221.02300754,\n", | |
" 226004.49695694, 321636.72610922, 207004.71301829, 177878.99630808,\n", | |
" 242709.70866688, 198476.50440054, 316971.26205136, 230897.81425897,\n", | |
" 239903.13863894, 306555.75073801, 163127.70809986, 217571.51776461,\n", | |
" 240681.62762624, 363803.49369922, 150373.07087228, 236173.28807812,\n", | |
" 205721.12984102, 189707.60956607, 254424.10042233, 305109.60515334,\n", | |
" 238447.41168201, 46993.03324287, 335621.7179906 , 301025.1081848 ,\n", | |
" 174482.08821758, 249557.56031078, 232521.69999368, 329251.24917876,\n", | |
" 323722.10765463, 178380.50223698, 199729.3685908 , 324626.27707656,\n", | |
" 195704.37327913, 226696.92705399, 199346.90511129, 321726.24037777,\n", | |
" 319043.42912166, 98158.0291179 , 333345.10756638, 249870.99280698,\n", | |
" 202313.78670846, 386835.21359448, 340851.12176094, 197620.25441093,\n", | |
" 47413.13975956, 264014.54767244, 236957.05295284, 217129.02383758,\n", | |
" 347060.05577498, 306545.44752373, 318249.57893213, 334466.17868048,\n", | |
" 136970.08612806, 174898.24499811, 151927.0300354 , 221686.06437907,\n", | |
" 327229.09398716, 187771.79077822, 306878.72741629, 220638.42886431,\n", | |
" 202173.71241293, 235200.04626312, 304166.97122397, 366088.64755634,\n", | |
" 212036.49978184, 237404.90191534, 301278.40220089, 327973.12165353,\n", | |
" 215505.29964448, 201360.46593289, 263809.5802149 , 153509.18805461,\n", | |
" 212938.98286304, 331488.45382171, 166091.60936469, 218563.53774827,\n", | |
" 183606.32354133, 299723.48092432, 74208.66367749, 194936.27574547,\n", | |
" 199410.5502203 , 211751.10885168, 211927.26787849, 320220.46891834,\n", | |
" 334702.87025838, 130086.65576048, 210516.78166994, 220354.67081343,\n", | |
" 68490.72850411, 278141.55732109, 270160.46100879, 212797.4490555 ,\n", | |
" 193485.21392229, 232457.95776405, 95564.47376113, 209520.5835529 ,\n", | |
" 69543.55655862, 337506.8780021 , 248293.24391963, 251987.05433778,\n", | |
" 141130.83954873, 118038.57685319, 111829.18963703, 209685.04999566,\n", | |
" 312177.29551679, 351225.68283136, 230488.32597685, 217158.09528493,\n", | |
" 226191.14799541, 224089.0865405 , 333873.39882453, 305061.52413161,\n", | |
" 217905.26540971, 78708.25748924, 239386.87527377, 350009.99119164,\n", | |
" 331226.65490958, 215807.27592224, 333915.09380652, 72173.26540029,\n", | |
" 212834.1514051 , 182935.92354702, 129313.95870794, 65463.81012679,\n", | |
" 316012.96517689, 262665.28549595, 219212.55653626, 157569.25420414,\n", | |
" 330642.22492416, 217008.69204702, 354531.73610757, 178013.05619967,\n", | |
" 223511.03852596, 321622.93845593, 216835.54351213, 181830.13417497,\n", | |
" 341399.7173982 , 196694.51703825, 125294.53811374, 337560.99879497,\n", | |
" 140824.47680708, 344606.77396089, 187460.11912193, 198218.53143498,\n", | |
" 228673.74662871, 211465.18193426, 218566.50648112, 221337.51037589,\n", | |
" 196114.55721264, 322305.6884236 , 217725.33989506, 153335.04743481,\n", | |
" 231823.99054366, 182196.54486784, 193737.38601513, 320720.63296557,\n", | |
" 173427.84785577, 191288.24689397, 206885.85397759, 225357.23215959,\n", | |
" 167730.7506378 , 334524.48876432, 227114.71451519, 358127.33889031,\n", | |
" 323856.98096615, 199794.09197756, 161356.82036884, 242513.62637505,\n", | |
" 172790.38500845, 215661.26457241, 263853.41265434, 194724.58792855,\n", | |
" 228764.11311459, 219820.2275069 , 347993.33289005, 95103.16939647,\n", | |
" 194973.6469018 , 209514.04215248, 154238.47212537, 219739.17171234,\n", | |
" 202944.28413149, 236965.17972684, 163964.45083335, 240950.9058513 ,\n", | |
" 175336.52646008, 350177.06991306, 151302.94174306, 323320.40693806,\n", | |
" 218675.82773419, 94511.8276292 , 185080.00173497, 184918.59552592,\n", | |
" 214337.5625851 , 221841.81193038, 345235.31586771, 186248.70778755])" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_pred" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "7944eb4c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse = mean_squared_error(y_test, y_pred)\n", | |
"mae = mean_absolute_error(y_test, y_pred)\n", | |
"r2 = r2_score(y_test, y_pred)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "8f275d7e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Mean Squared Error: 2730624134.7515383\n", | |
"Mean Absolute Error: 38640.9023358776\n", | |
"R^2 Score: 0.6543735175593763\n" | |
] | |
} | |
], | |
"source": [ | |
"print(f'Mean Squared Error: {mse}')\n", | |
"print(f'Mean Absolute Error: {mae}')\n", | |
"print(f'R^2 Score: {r2}')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "43ae035f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"flats_data['predicted_price'] = model.predict(flats_data[features].fillna(0))\n", | |
"flats_data.to_excel('flats_with_predictions.xlsx', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "e80cc87e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>price_sq</th>\n", | |
" <th>predicted_price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>176510.0</td>\n", | |
" <td>241056.806181</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>179404.0</td>\n", | |
" <td>249954.521618</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>91863.0</td>\n", | |
" <td>30791.284986</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>112903.0</td>\n", | |
" <td>70319.626989</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>167682.0</td>\n", | |
" <td>139234.317315</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6686</th>\n", | |
" <td>196000.0</td>\n", | |
" <td>236578.106069</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6687</th>\n", | |
" <td>225225.0</td>\n", | |
" <td>249090.977238</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6688</th>\n", | |
" <td>256157.0</td>\n", | |
" <td>260806.772063</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6689</th>\n", | |
" <td>176875.0</td>\n", | |
" <td>173515.296312</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6690</th>\n", | |
" <td>187725.0</td>\n", | |
" <td>267134.528082</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>6691 rows × 2 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" price_sq predicted_price\n", | |
"0 176510.0 241056.806181\n", | |
"1 179404.0 249954.521618\n", | |
"2 91863.0 30791.284986\n", | |
"3 112903.0 70319.626989\n", | |
"4 167682.0 139234.317315\n", | |
"... ... ...\n", | |
"6686 196000.0 236578.106069\n", | |
"6687 225225.0 249090.977238\n", | |
"6688 256157.0 260806.772063\n", | |
"6689 176875.0 173515.296312\n", | |
"6690 187725.0 267134.528082\n", | |
"\n", | |
"[6691 rows x 2 columns]" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"flats_data[['price_sq', 'predicted_price']]" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment