Marco Santos marcosan93

Journeying towards the world of Data Science.

marcosan93 / class_for_loops.py

Created January 9, 2020 02:42

	# Setting the index as the Date
	for i in tqdm(stocks_df.keys()):
	stocks_df[i] = setting_index(stocks_df[i])

	# Replacing all "None" values with NaN
	for i in tqdm(stocks_df.keys()):
	stocks_df[i].replace("None", 0, inplace=True)

	# Creating a new dictionary that contains the numerical values, then converting all values to numeric values
	num_df = {}

marcosan93 / class_eda_lib.py

Last active January 9, 2020 16:49

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import _pickle as pickle

	# Opening the .pkl file created
	with open("main_df.pkl",'rb') as fp:
	final_df = pickle.load(fp)

marcosan93 / class_bal.py

Created January 9, 2020 16:37

	# Separating each class into respective DataFrames
	buy_df = final_df[final_df['Decision']==1].loc[:, final_df.columns != 'Decision'].reset_index(drop=True)
	hold_df = final_df[final_df['Decision']==2].loc[:, final_df.columns != 'Decision'].reset_index(drop=True)
	sell_df = final_df[final_df['Decision']==0].loc[:, final_df.columns != 'Decision'].reset_index(drop=True)

	# Visualizing in matplotlib
	plt.figure(figsize=(10,6))
	plt.style.use('fivethirtyeight')

	# Plotting the count of each DataFrame of each class

marcosan93 / class_eda_corr.py

Created January 9, 2020 16:55

	def CorrMtx(df, dropDuplicates = True):
	"""
	Takes in a Correlation DF and excludes nonessential visuals.
	Creates a more visually pleasing correlation matrix
	"""

	# Exclude duplicate correlations by masking uper right values
	if dropDuplicates:
	mask = np.zeros_like(df, dtype=np.bool)
	mask[np.triu_indices_from(mask)] = True

marcosan93 / top10_corr.py

Created January 10, 2020 20:36

	# Correlation DF of all classes
	corr = final_df.corr().iloc[[-1],:-1]

	# Sorting our Correlation DF by their absolute values and selecting the top 10
	top10_corr = corr.transpose().apply(abs).sort_values(by='Decision', ascending=False)[:10]

	# Creating a new DF with the features from the top10_corr and joing the 'Decision' class labels
	top10_corr_df = final_df[top10_corr.index].join(final_df.Decision)

	# Pickling the DF for use in our Classification models

marcosan93 / class_feat_imp.py

Last active January 11, 2020 18:27

	# Importing the necessary libraries
	from sklearn.ensemble import ExtraTreesClassifier

	# Instatiating the classifier
	forest = ExtraTreesClassifier(n_estimators=200)

	# Setting the corresponding variables for our classifier
	X = final_df.drop(['Decision'], 1)
	y = final_df.Decision

marcosan93 / class_feat_imp_plot.py

Created January 11, 2020 17:05

	# Matplotlib style to use
	plt.style.use('seaborn')

	# Printing out the different features as a list
	print("Feature Rankings:")

	# Showing the top 10 features
	for i in range(10):
	print(f"{i+1}. {X.columns[indices[i]]}: {importances[indices[i]]}")

marcosan93 / dummy_lib.py

Last active January 14, 2020 20:59

	# Importing the necessary libraries
	import _pickle as pickle
	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import confusion_matrix, classification_report
	import matplotlib.pyplot as plt
	from sklearn.dummy import DummyClassifier

	# Loading in the Data (can be changed to the other features .pkl file if needed)

marcosan93 / scale_split.py

Created January 14, 2020 21:12

	### Scaling the Data
	# Importing the Scacler
	from sklearn.preprocessing import StandardScaler

	# Instantiating the Scaler
	scaler = StandardScaler()

	# Removing the class labels from the dataset (because we do not scale the class labels)
	features_df = df.drop(["Decision"], 1)

marcosan93 / dum_class.py

Created January 14, 2020 21:28

	# Fitting and training the dummy
	dummy = DummyClassifier(strategy='stratified')
	dummy.fit(X_train, y_train)

	# Dummy predictions
	dum_pred = dummy.predict(X_test)

	#Printing out results
	report = classification_report(y_test, dum_pred, target_names=['Sell', 'Buy', 'Hold'])
	print(report)