This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.base import BaseEstimator | |
from sklearn.feature_selection.base import SelectorMixin | |
class CleanFeatures(BaseEstimator, SelectorMixin): | |
def __init__(self): | |
self.sieve = [] | |
def _get_support_mask(self): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.compose import TransformedTargetRegressor | |
class LogTransformedTargetRegressor(TransformedTargetRegressor): | |
def __init__(self, regressor): | |
inverse_func = lambda cur_log_val: 10 ** cur_log_val | |
TransformedTargetRegressor.__init__( | |
self, regressor=regressor, | |
func=np.log10, inverse_func=inverse_func |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from sklearn.base import BaseEstimator, TransformerMixin | |
class ReciprocalFeatures(BaseEstimator, TransformerMixin): | |
def __init__(self): | |
self.input_features = None | |
self.rename_lambda = \ | |
lambda input_feature: f"_inv_{input_feature}" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
variable | $/unit | |
---|---|---|
bathrooms | 110.28 | |
bedrooms | 27.12 | |
accommodates | 10.95 | |
year_2019 | -6.48 | |
month_12 | -2.25 | |
beds | -1.99 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
def custom_train_test_split(cur_data, random_state=42, cur_target="price", cur_boolean="is_brooklyn"): | |
if cur_boolean in cur_data.columns: | |
cur_train_1, cur_test_1 = _custom_train_test_split( | |
cur_data[cur_data[cur_boolean]], random_state, cur_target | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
id | zipcode | price | month | year | accommodates | bathrooms | bedrooms | beds | geometry | is_brooklyn | |
---|---|---|---|---|---|---|---|---|---|---|---|
38553206 | 10002 | 160 | 11 | 2019 | 4 | 1 | 1 | 2 | POINT (-73.99462 40.71355) | False | |
38553206 | 10002 | 160 | 10 | 2019 | 4 | 1 | 1 | 2 | POINT (-73.99462 40.71355) | False | |
38553206 | 10002 | 160 | 9 | 2019 | 4 | 1 | 1 | 2 | POINT (-73.99462 40.71355) | False | |
38553206 | 10002 | 160 | 8 | 2019 | 4 | 1 | 1 | 2 | POINT (-73.99462 40.71355) | False | |
38529246 | 10036 | 450 | 11 | 2019 | 6 | 1 | 2 | 2 | POINT (-73.98781 40.7588) | False | |
38529246 | 10036 | 450 | 10 | 2019 | 6 | 1 | 2 | 2 | POINT (-73.98781 40.7588) | False | |
38529246 | 10036 | 450 | 9 | 2019 | 6 | 1 | 2 | 2 | POINT (-73.98781 40.7588) | False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
month | year | url | |
---|---|---|---|
11 | 2019 | http://data.insideairbnb.com/united-states/ny/new-york-city/2019-12-04/data/listings.csv.gz | |
10 | 2019 | http://data.insideairbnb.com/united-states/ny/new-york-city/2019-11-01/data/listings.csv.gz | |
9 | 2019 | http://data.insideairbnb.com/united-states/ny/new-york-city/2019-10-14/data/listings.csv.gz | |
8 | 2019 | http://data.insideairbnb.com/united-states/ny/new-york-city/2019-09-12/data/listings.csv.gz | |
7 | 2019 | http://data.insideairbnb.com/united-states/ny/new-york-city/2019-08-06/data/listings.csv.gz | |
6 | 2019 | http://data.insideairbnb.com/united-states/ny/new-york-city/2019-07-08/data/listings.csv.gz | |
5 | 2019 | http://data.insideairbnb.com/united-states/ny/new-york-city/2019-06-02/data/listings.csv.gz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import pandas as pd | |
import requests | |
import datetime | |
def get_airbnb_links(): | |
cur_url = "http://insideairbnb.com/get-the-data.html" | |
cur_response = requests.get(cur_url) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Playlist | Daft Punk | Bonobo | Hans Zimmer | ... | Billie Eilish | |
---|---|---|---|---|---|---|
Coding 💻 | 13 | 6 | 2 | ... | 0 | |
Pop Programming | 0 | 0 | 0 | ... | 3 | |
... | ... | ... | ... | ... | ... | |
Night Coding | 1 | 2 | 0 | ... | 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
"about", | |
"about-u", | |
"about-us", | |
"abouts", | |
"abuse", | |
"abuses", | |
"access", | |
"accesses", | |
"account", |