Skip to content

Instantly share code, notes, and snippets.

@ksv-muralidhar
Last active February 17, 2021 13:22
Show Gist options
  • Save ksv-muralidhar/2fa0904da39379fed5314c4065420548 to your computer and use it in GitHub Desktop.
Save ksv-muralidhar/2fa0904da39379fed5314c4065420548 to your computer and use it in GitHub Desktop.
cross val
import pandas as pd
from sklearn.model_selection import train_test_split,StratifiedKFold,cross_validate
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
X = load_iris()["data"].copy()
y = load_iris()["target"].copy()
#Splitting the data into train and test_val sets.
X_train,X_test_val,y_train,y_test_val = train_test_split(X,y,test_size=0.3,random_state=11)
#Splitting the test_val set into 'test' and 'validation' sets.
X_val,X_test,y_val,y_test = train_test_split(X_test_val,y_test_val,test_size=0.15,random_state=11)
#Initializing the logistic regression model and fitting it to the training set.
lr = LogisticRegression(random_state=11,max_iter=1000)
lr.fit(X_train,y_train)
#Predicting the validation set using the trained model and finding the accuracy score.
pred = lr.predict(X_val)
accuracy_score(y_val,pred)
f_pred = lr.predict(X_test)
accuracy_score(y_test,f_pred)
kfold = StratifiedKFold(n_splits=3,random_state=11,shuffle=True)
cv_result = cross_validate(estimator=lr,
X=X_train,
y=y_train,
scoring="accuracy",
cv=kfold,
return_train_score=True)
cv_result.keys()
#validation set scores of the three folds
cv_result["test_score"]
#mean of the validation scores
cv_result["test_score"].mean()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment