rohanjoseph93’s gists

rohanjoseph93 / Grid6.py

Created December 29, 2018 20:10

	#Grid Search
	from sklearn.model_selection import GridSearchCV
	clf = LogisticRegression()
	grid_values = {'penalty': ['l1', 'l2'],'C':[0.001,.009,0.01,.09,1,5,10,25]}
	grid_clf_acc = GridSearchCV(clf, param_grid = grid_values,scoring = 'recall')
	grid_clf_acc.fit(X_train, y_train)

	#Predict values based on new parameters
	y_pred_acc = grid_clf_acc.predict(X_test)

rohanjoseph93 / Grid5.py

Last active December 29, 2018 19:58

	#Logistic regression
	from sklearn.linear_model import LogisticRegression

	clf = LogisticRegression().fit(X_train,y_train)
	y_pred = clf.predict(X_test)

	# Model Evaluation metrics
	from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
	print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
	print('Precision Score : ' + str(precision_score(y_test,y_pred)))

rohanjoseph93 / Grid4.py

Last active September 22, 2023 10:36

	# Model Evaluation metrics
	from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
	print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
	print('Precision Score : ' + str(precision_score(y_test,y_pred)))
	print('Recall Score : ' + str(recall_score(y_test,y_pred)))
	print('F1 Score : ' + str(f1_score(y_test,y_pred)))

	#Dummy Classifier Confusion matrix
	from sklearn.metrics import confusion_matrix
	print('Confusion Matrix : \n' + str(confusion_matrix(y_test,y_pred)))

rohanjoseph93 / Grid3.py

Created December 27, 2018 19:59

	#Split data into attributes and class
	X = data.drop(['Class'],axis=1)
	y = data['Class']

	#perform training and test split
	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

	#Dummy Classifier
	from sklearn.dummy import DummyClassifier

rohanjoseph93 / Grid2.py

Created December 27, 2018 19:46

	data = data.drop(['Sample Code Number'],axis=1) #Drop 1st column
	data = data[data['Bare Nuclei'] != '?'] #Remove rows with missing data
	data['Class'] = np.where(data['Class'] ==2,0,1) #Change the Class representation
	data['Class'].value_counts() #Class distribution

rohanjoseph93 / Grid1.py

Created December 27, 2018 19:37

	#import data
	data = pd.read_csv('breast-cancer-wisconsin.csv',header=None)

	#set column names
	data.columns = ['Sample Code Number','Clump Thickness','Uniformity of Cell Size',
	'Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',
	'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']
	#view top 10 rows
	data.head(10)

rohanjoseph93 / onesample_ttest.py

Last active December 20, 2018 06:59

	#import libraries
	import pandas as pd
	from scipy import stats
	import os
	os.chdir('C:\\Users\\rohan\\Documents\\Analytics\\Data')

	#import file and apply one sample t test
	a = pd.read_excel('onesamplet.xlsx')
	stats.ttest_1samp(a,14)

rohanjoseph93 / CLT4.py

Created September 4, 2018 01:08

a.save('C:/Users/rohan/Documents/clt2.gif', writer='imagemagick', fps=10)

rohanjoseph93 / CLT3.py

Created September 4, 2018 01:04

	# Function that will plot the histogram, where current is the latest figure
	def clt(current):
	# if animation is at the last frame, stop it
	plt.cla()
	if current == 1000:
	a.event_source.stop()

	plt.hist(avg[0:current])

	plt.gca().set_title('Expected value of die rolls')

rohanjoseph93 / CLT3.py

Created September 3, 2018 23:41

	# Function that will plot the histogram, where current is the latest figure
	def clt(current):
	# if animation is at the last frame, stop it
	plt.cla()
	if current == 1000:
	a.event_source.stop()

	plt.hist(avg[0:current])

	plt.gca().set_title('Expected value of die rolls')

Rohan Joseph rohanjoseph93