Skip to content

Instantly share code, notes, and snippets.

View kshirsagarsiddharth's full-sized avatar
🎯
Focusing

kshirsagarsiddharth

🎯
Focusing
View GitHub Profile
import plotly.express as px
import pandas as pd
from dash import Dash, dcc, html, Input, Output, State
from jupyter_dash import JupyterDash
import dash_bootstrap_components as dbc
import numpy as np
df = pd.read_csv('car_price_data.csv').drop('Unnamed: 0', axis = 1)
param_grid = {
'regressor__alpha': [0.1,1,10,0.01,0.01,5],
'regressor__l1_ratio': np.arange(0.40,1.00,0.10),
}
grid_search = GridSearchCV(reg, param_grid, cv=2)
grid_search.fit(X_train, y_train)
preds = grid_search.best_estimator_.predict(X_test)
score = np.sqrt(mean_squared_error(y_test, preds))
from sklearn import set_config
set_config(display="diagram")
numeric_columns = X.select_dtypes(exclude='object').columns
numeric_transformer = Pipeline(steps = [("scalar", StandardScaler())])
unordered_columns =['transmission', 'fuel_type']
unordered_transformer = Pipeline(steps = [('onehot', OneHotEncoder(handle_unknown='ignore', sparse=False))])
ordered_columns = ['year', 'model']
elastic_net_parag_grid = {
'alpha' : [0.1,1,10,0.01,0.01,5],
'l1_ratio' : np.arange(0.40,1.00,0.10),
}
elastic_net_regressor = ElasticNet()
elastic_net_grid_search = GridSearchCV(
elastic_net_regressor,
param_grid=elastic_net_parag_grid,
cv = 5,
n_jobs=-1,
### Function to test various models
def score_dataset(X_train,X_valid,y_train,y_valid, input_model):
model = input_model
model.fit(X_train,y_train)
preds = model.predict(X_valid)
return np.sqrt(mean_squared_error(y_valid, preds))
input_model = LinearRegression()
score_dataset(encoded_X_train,encoded_X_valid,y_train,y_valid, input_model)
def transform_columns(transformer,columns_to_transform,X_train = X_train,X_valid = X_valid,):
cols_train = pd.DataFrame(transformer.fit_transform(X_train[columns_to_transform]))
cols_valid = pd.DataFrame(transformer.transform(X_valid[columns_to_transform]))
cols_train.index = X_train.index
cols_valid.index = X_valid.index
return cols_train, cols_valid
columns_to_transform_list = [ordered_columns, unordered_columns, numeric_columns]
ordinal_encoder = OrdinalEncoder()
onehot_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
# Add one-hot encoded columns and numerical features
encoded_X_train = pd.concat([oh_cols_train, sc_cols_train,ord_cols_train], axis = 1)
encoded_X_valid = pd.concat([oh_cols_valid, sc_cols_valid,ord_cols_valid], axis = 1)
encoded_X_train
# Remove categorical columns (will replace with one-hot encoding)
sc = StandardScaler()
sc_cols_train = pd.DataFrame(sc.fit_transform(X_train[numeric_columns]))
sc_cols_valid = pd.DataFrame(sc.transform(X_valid[numeric_columns]))
# One-hot encoding removed index; put it back
sc_cols_train.index = X_train.index
sc_cols_valid.index = X_valid.index
ord_encoder = OrdinalEncoder()
ord_cols_train = pd.DataFrame(ord_encoder.fit_transform(X_train[ordered_columns]))
ord_cols_valid = pd.DataFrame(ord_encoder.transform(X_valid[ordered_columns]))
ord_cols_train.index = X_train.index
ord_cols_valid.index = X_valid.index
oh_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
oh_cols_train = pd.DataFrame(oh_encoder.fit_transform(X_train[unordered_columns]))
oh_cols_valid = pd.DataFrame(oh_encoder.transform(X_valid[unordered_columns]))
# One-hot encoding removed index; put it back
oh_cols_train.index = X_train.index
oh_cols_valid.index = X_valid.index