Skip to content

Instantly share code, notes, and snippets.

View marcosan93's full-sized avatar

Marco Santos marcosan93

View GitHub Profile
# Setting the index as the Date
for i in tqdm(stocks_df.keys()):
stocks_df[i] = setting_index(stocks_df[i])
# Replacing all "None" values with NaN
for i in tqdm(stocks_df.keys()):
stocks_df[i].replace("None", 0, inplace=True)
# Creating a new dictionary that contains the numerical values, then converting all values to numeric values
num_df = {}
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import _pickle as pickle
# Opening the .pkl file created
with open("main_df.pkl",'rb') as fp:
final_df = pickle.load(fp)
# Separating each class into respective DataFrames
buy_df = final_df[final_df['Decision']==1].loc[:, final_df.columns != 'Decision'].reset_index(drop=True)
hold_df = final_df[final_df['Decision']==2].loc[:, final_df.columns != 'Decision'].reset_index(drop=True)
sell_df = final_df[final_df['Decision']==0].loc[:, final_df.columns != 'Decision'].reset_index(drop=True)
# Visualizing in matplotlib
plt.figure(figsize=(10,6))
plt.style.use('fivethirtyeight')
# Plotting the count of each DataFrame of each class
def CorrMtx(df, dropDuplicates = True):
"""
Takes in a Correlation DF and excludes nonessential visuals.
Creates a more visually pleasing correlation matrix
"""
# Exclude duplicate correlations by masking uper right values
if dropDuplicates:
mask = np.zeros_like(df, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Correlation DF of all classes
corr = final_df.corr().iloc[[-1],:-1]
# Sorting our Correlation DF by their absolute values and selecting the top 10
top10_corr = corr.transpose().apply(abs).sort_values(by='Decision', ascending=False)[:10]
# Creating a new DF with the features from the top10_corr and joing the 'Decision' class labels
top10_corr_df = final_df[top10_corr.index].join(final_df.Decision)
# Pickling the DF for use in our Classification models
# Importing the necessary libraries
from sklearn.ensemble import ExtraTreesClassifier
# Instatiating the classifier
forest = ExtraTreesClassifier(n_estimators=200)
# Setting the corresponding variables for our classifier
X = final_df.drop(['Decision'], 1)
y = final_df.Decision
# Matplotlib style to use
plt.style.use('seaborn')
# Printing out the different features as a list
print("Feature Rankings:")
# Showing the top 10 features
for i in range(10):
print(f"{i+1}. {X.columns[indices[i]]}: {importances[indices[i]]}")
# Importing the necessary libraries
import _pickle as pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
from sklearn.dummy import DummyClassifier
# Loading in the Data (can be changed to the other features .pkl file if needed)
### Scaling the Data
# Importing the Scacler
from sklearn.preprocessing import StandardScaler
# Instantiating the Scaler
scaler = StandardScaler()
# Removing the class labels from the dataset (because we do not scale the class labels)
features_df = df.drop(["Decision"], 1)
# Fitting and training the dummy
dummy = DummyClassifier(strategy='stratified')
dummy.fit(X_train, y_train)
# Dummy predictions
dum_pred = dummy.predict(X_test)
#Printing out results
report = classification_report(y_test, dum_pred, target_names=['Sell', 'Buy', 'Hold'])
print(report)