This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
def custom_train_test_split(cur_data, random_state=42, cur_target="price", cur_boolean="is_brooklyn"): | |
if cur_boolean in cur_data.columns: | |
cur_train_1, cur_test_1 = _custom_train_test_split( | |
cur_data[cur_data[cur_boolean]], random_state, cur_target | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
variable | $/unit | |
---|---|---|
bathrooms | 110.28 | |
bedrooms | 27.12 | |
accommodates | 10.95 | |
year_2019 | -6.48 | |
month_12 | -2.25 | |
beds | -1.99 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from sklearn.base import BaseEstimator, TransformerMixin | |
class ReciprocalFeatures(BaseEstimator, TransformerMixin): | |
def __init__(self): | |
self.input_features = None | |
self.rename_lambda = \ | |
lambda input_feature: f"_inv_{input_feature}" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.compose import TransformedTargetRegressor | |
class LogTransformedTargetRegressor(TransformedTargetRegressor): | |
def __init__(self, regressor): | |
inverse_func = lambda cur_log_val: 10 ** cur_log_val | |
TransformedTargetRegressor.__init__( | |
self, regressor=regressor, | |
func=np.log10, inverse_func=inverse_func |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.base import BaseEstimator | |
from sklearn.feature_selection.base import SelectorMixin | |
class CleanFeatures(BaseEstimator, SelectorMixin): | |
def __init__(self): | |
self.sieve = [] | |
def _get_support_mask(self): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, TransformerMixin | |
class PassThroughTransformer(BaseEstimator, TransformerMixin): | |
def __init__(self): | |
self.input_features = None | |
def fit(self, X, y=None): | |
assert self.input_features is None | |
if type(X) == np.ndarray : |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cur_data = price_data.copy() | |
cur_data = cur_data.merge(rental_data, on="id") | |
cur_data = cur_data.merge(location_data, on="id") | |
X_train, X_test, y_train, y_test = \ | |
custom_train_test_split(cur_data) | |
pass_cols = ["is_brooklyn", "density"] | |
drop_cols = ["year", "geometry", "zipcode"] | |
one_hot_cols = ["month"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
feature | coefficient | |
---|---|---|
accommodates | 1.0 | |
accommodates _per_bathrooms | -0.43 | |
is_brooklyn | -0.405 | |
accommodates _per_bedrooms | -0.338 | |
accommodates bedrooms | -0.305 | |
density | -0.115 | |
accommodates bathrooms | 0.111 | |
bedrooms _per_beds | -0.108 | |
accommodates _per_beds | 0.105 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name | lat | lon | price | is_com | is_bkn | ||
---|---|---|---|---|---|---|---|
0 | center | 40.7269845195146 | -73.96825439838058 | 170 | True | False | |
1 | wash sq | 40.73232616009288 | -74.00113694319073 | 197 | False | False | |
2 | flatiron | 40.74010002079383 | -73.99202965334189 | 555 | False | False | |
3 | bowery | 40.728100910208695 | -73.99392182445409 | 298 | False | False | |
4 | uptown | 40.79674265987271 | -73.95298422453986 | 302 | False | False | |
5 | midtown | 40.76134044954954 | -73.98369000821411 | 303 | False | False | |
6 | barclays | 40.68061129890911 | -73.97564856875336 | 267 | False | True | |
7 | bushwick | 40.688196501332044 | -73.92715263054822 | 148 | False | True | |
8 | williamsburg | 40.715948534974885 | -73.95220786983514 | 225 | False | True |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import geopandas | |
import pickle | |
from sklearn.cluster import KMeans | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import StandardScaler |