Skip to content

Instantly share code, notes, and snippets.

View aniruddha27's full-sized avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
from sklearn.model_selection import train_test_split
X = df
y = target
# training and validation set
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=27)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
lr = LinearRegression()
lr.fit(X_train,y_train)
rmse = np.sqrt(mean_squared_error(y_test,lr.predict(X_test)))
print(rmse)
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
# different alpha values
alphas = [0.01, 0.1, 0.3, 1, 3, 5, 10, 15, 20]
for a in alphas:
lr = Ridge(alpha=a)
model = Ridge(alpha=3)
model.fit(X_train,y_train)
log_pred = model.predict(test)
actual_pred = np.exp(log_pred)
data_dict = {'Id':test_id,'SalePrice':actual_pred}
submit = pd.DataFrame(data_dict)
submit.to_csv('submission.csv',index=False)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
df = pd.read_csv('drive/My Drive/AV/train.csv')
df.head()
df.drop(['PassengerId','Ticket','Name'],inplace=True,axis=1)
#a single index
table = pd.pivot_table(data=df,index=['Sex'])
table