Marco Santos marcosan93

Journeying towards the world of Data Science.

marcosan93 / con_mat.py

Last active January 15, 2020 02:35

	def plot_confusion_matrix(y_true, y_pred, labels=["Sell", "Buy", "Hold"],
	normalize=False, title=None, cmap=plt.cm.coolwarm):
	"""
	Creates a more visually appealing confusion matrix
	"""

	cm = confusion_matrix(y_true, y_pred)
	fig, ax = plt.subplots(figsize=(12,6))
	im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
	ax.figure.colorbar(im, ax=ax)

marcosan93 / stock_class.py

Created January 15, 2020 02:57

	### Importing libraries
	import _pickle as pickle
	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import confusion_matrix, classification_report
	import matplotlib.pyplot as plt

	### Loading in the data (can be changed to the other pkl feature file)
	with open("top10_df.pkl",'rb') as fp:

marcosan93 / grid_search.py

Created January 15, 2020 17:14

	# Importing GridSearch
	from sklearn.model_selection import GridSearchCV

	# Parameters to Tune (this would need to be modified for each different model)
	params = {'n_estimators': [50, 100, 200, 500],
	'learning_rate': [1, .1, .01]}

	# Instantiating GridSearch with the parameters set above
	search = GridSearchCV(clf, params, cv=3, return_train_score=True, verbose=5, scoring='f1_macro')

marcosan93 / classify_new_imports.py

Last active January 16, 2020 20:38

	# Importing the necessary libraries and model
	import matplotlib.pyplot as plt
	from matplotlib import rcParams
	import _pickle as pickle
	import numpy as np
	import pandas as pd
	from joblib import load
	from sklearn.preprocessing import StandardScaler
	from xgboost import XGBClassifier

marcosan93 / classify_new_help.py

Created January 16, 2020 20:43

	def format_qr(df, df2):
	"""
	Formats the DF from the Stockpup dataset so that the values and columns line up with training and testing data
	Uses the second DF of our data to find the appropriate feature columns to use and will be appended to
	Scales the DF with the new data
	Returns the QR with the new scaled data
	"""
	# Dropping the class column as it is not needed
	df2 = df2.drop("Decision", axis=1)

marcosan93 / classify_new_amd.py

Created January 16, 2020 20:47

	# Viewing the DF with the latest scaled QR
	qr_df = format_qr(stock_df['AMD'], top10_df)
	qr_df

	# Predicting the class for our latest QR
	class_label = clf.predict(qr_df)[0]
	prediction(class_label)

	# Visualizing the Prediction probabilities for our latest QR
	pie_stock(clf.predict_proba(qr_df), "AMD")

marcosan93 / pipe_imports.py

Last active January 24, 2020 23:33

	# Importing Libraries
	import _pickle as pickle
	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import confusion_matrix, classification_report
	from sklearn.pipeline import Pipeline

	# Loading the Data
	## Uncomment the desired line to use its data

marcosan93 / pipe_split.py

Created January 24, 2020 22:27

	# Scaling the Data
	from sklearn.preprocessing import StandardScaler

	scaler = StandardScaler()

	features_df = df.drop(["Decision"], 1)

	scaled_df = pd.DataFrame(scaler.fit_transform(features_df),
	index=features_df.index,
	columns=features_df.columns)

marcosan93 / pipe_creation.py

Last active January 24, 2020 23:39

	# Importing the 10 models
	from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.dummy import DummyClassifier
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.naive_bayes import GaussianNB
	from sklearn.svm import SVC
	from xgboost import XGBClassifier

marcosan93 / pipe_reports.py

Created January 24, 2020 23:43

	# Dictionary containing the model names and their scores
	models_f1 = {}

	# Looping through each model's predictions and getting their classification reports
	for name, pipe in model_pipelines.items():
	print('\n'+ name + ' (Macro Avg - F1 Score):')

	# Classification Report
	report = classification_report(y_test, pipe.predict(X_test), target_names=['Sell', 'Buy', 'Hold'], output_dict=True)
	f1 = report['macro avg']['f1-score']