Skip to content

Instantly share code, notes, and snippets.

View djsegal's full-sized avatar

djsegal

  • home
View GitHub Profile
import pandas as pd
from sklearn.model_selection import train_test_split
def custom_train_test_split(cur_data, random_state=42, cur_target="price", cur_boolean="is_brooklyn"):
if cur_boolean in cur_data.columns:
cur_train_1, cur_test_1 = _custom_train_test_split(
cur_data[cur_data[cur_boolean]], random_state, cur_target
)
variable $/unit
bathrooms 110.28
bedrooms 27.12
accommodates 10.95
year_2019 -6.48
month_12 -2.25
beds -1.99
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
class ReciprocalFeatures(BaseEstimator, TransformerMixin):
def __init__(self):
self.input_features = None
self.rename_lambda = \
lambda input_feature: f"_inv_{input_feature}"
import numpy as np
from sklearn.compose import TransformedTargetRegressor
class LogTransformedTargetRegressor(TransformedTargetRegressor):
def __init__(self, regressor):
inverse_func = lambda cur_log_val: 10 ** cur_log_val
TransformedTargetRegressor.__init__(
self, regressor=regressor,
func=np.log10, inverse_func=inverse_func
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.feature_selection.base import SelectorMixin
class CleanFeatures(BaseEstimator, SelectorMixin):
def __init__(self):
self.sieve = []
def _get_support_mask(self):
from sklearn.base import BaseEstimator, TransformerMixin
class PassThroughTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
self.input_features = None
def fit(self, X, y=None):
assert self.input_features is None
if type(X) == np.ndarray :
cur_data = price_data.copy()
cur_data = cur_data.merge(rental_data, on="id")
cur_data = cur_data.merge(location_data, on="id")
X_train, X_test, y_train, y_test = \
custom_train_test_split(cur_data)
pass_cols = ["is_brooklyn", "density"]
drop_cols = ["year", "geometry", "zipcode"]
one_hot_cols = ["month"]
feature coefficient
accommodates 1.0
accommodates _per_bathrooms -0.43
is_brooklyn -0.405
accommodates _per_bedrooms -0.338
accommodates bedrooms -0.305
density -0.115
accommodates bathrooms 0.111
bedrooms _per_beds -0.108
accommodates _per_beds 0.105
name lat lon price is_com is_bkn
0 center 40.7269845195146 -73.96825439838058 170 True False
1 wash sq 40.73232616009288 -74.00113694319073 197 False False
2 flatiron 40.74010002079383 -73.99202965334189 555 False False
3 bowery 40.728100910208695 -73.99392182445409 298 False False
4 uptown 40.79674265987271 -73.95298422453986 302 False False
5 midtown 40.76134044954954 -73.98369000821411 303 False False
6 barclays 40.68061129890911 -73.97564856875336 267 False True
7 bushwick 40.688196501332044 -73.92715263054822 148 False True
8 williamsburg 40.715948534974885 -73.95220786983514 225 False True
import numpy as np
import pandas as pd
import geopandas
import pickle
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler