Skip to content

Instantly share code, notes, and snippets.

@djsegal
Last active January 1, 2020 04:34
Show Gist options
  • Save djsegal/25ca2a2526b5d13bcc86686a2d8aba67 to your computer and use it in GitHub Desktop.
Save djsegal/25ca2a2526b5d13bcc86686a2d8aba67 to your computer and use it in GitHub Desktop.
cur_data = price_data.copy()
cur_data = cur_data.merge(rental_data, on="id")
cur_data = cur_data.merge(location_data, on="id")
X_train, X_test, y_train, y_test = \
custom_train_test_split(cur_data)
pass_cols = ["is_brooklyn", "density"]
drop_cols = ["year", "geometry", "zipcode"]
one_hot_cols = ["month"]
poly_cols = rental_data.columns.drop("id").tolist()
mentioned_cols = [
"id", "price",
*one_hot_cols, *drop_cols,
*poly_cols, *pass_cols
]
assert sorted(cur_data.columns) == sorted(mentioned_cols)
cur_one_hot = OneHotEncoder(categories="auto")
cur_poly_feats = PolynomialFeatures(
degree=2, include_bias=False, interaction_only=True
)
cur_sub_pipeline = Pipeline([
("reciprocal", ReciprocalFeatures()),
("polynomial", cur_poly_feats),
("cancel", VarianceThreshold()),
("clean", CleanFeatures()),
("box_cox", PowerTransformer(method="box-cox"))
])
cur_col_transformers = [
("one_hot", cur_one_hot, one_hot_cols),
("poly_feats", cur_sub_pipeline, poly_cols),
("passthrough", PassThroughTransformer(), pass_cols)
]
cur_transformer = ColumnTransformer(cur_col_transformers)
cur_selector = SelectFromModel(
LogTransformedTargetRegressor(
LassoCV(cv=4, n_jobs=-1, max_iter=5e4)
), threshold=5e-4
)
cur_regressor = LogTransformedTargetRegressor(
ElasticNetCV(cv=4, n_jobs=-1, max_iter=5e4)
)
cur_pipeline = Pipeline([
("transformer", cur_transformer),
("scalar", StandardScaler()),
("selector", cur_selector),
("regressor", cur_regressor)
])
cur_pipeline.fit(X_train, y_train)
cur_pipeline.score(X_test, y_test)
@djsegal
Copy link
Author

djsegal commented Dec 30, 2019

This uses the following code:

And produces the following csv of feature importances:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment