Skip to content

Instantly share code, notes, and snippets.

View marcosan93's full-sized avatar

Marco Santos marcosan93

View GitHub Profile
def plot_confusion_matrix(y_true, y_pred, labels=["Sell", "Buy", "Hold"],
normalize=False, title=None, cmap=plt.cm.coolwarm):
"""
Creates a more visually appealing confusion matrix
"""
cm = confusion_matrix(y_true, y_pred)
fig, ax = plt.subplots(figsize=(12,6))
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
### Importing libraries
import _pickle as pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
### Loading in the data (can be changed to the other pkl feature file)
with open("top10_df.pkl",'rb') as fp:
# Importing GridSearch
from sklearn.model_selection import GridSearchCV
# Parameters to Tune (this would need to be modified for each different model)
params = {'n_estimators': [50, 100, 200, 500],
'learning_rate': [1, .1, .01]}
# Instantiating GridSearch with the parameters set above
search = GridSearchCV(clf, params, cv=3, return_train_score=True, verbose=5, scoring='f1_macro')
# Importing the necessary libraries and model
import matplotlib.pyplot as plt
from matplotlib import rcParams
import _pickle as pickle
import numpy as np
import pandas as pd
from joblib import load
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
def format_qr(df, df2):
"""
Formats the DF from the Stockpup dataset so that the values and columns line up with training and testing data
Uses the second DF of our data to find the appropriate feature columns to use and will be appended to
Scales the DF with the new data
Returns the QR with the new scaled data
"""
# Dropping the class column as it is not needed
df2 = df2.drop("Decision", axis=1)
# Viewing the DF with the latest scaled QR
qr_df = format_qr(stock_df['AMD'], top10_df)
qr_df
# Predicting the class for our latest QR
class_label = clf.predict(qr_df)[0]
prediction(class_label)
# Visualizing the Prediction probabilities for our latest QR
pie_stock(clf.predict_proba(qr_df), "AMD")
# Importing Libraries
import _pickle as pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.pipeline import Pipeline
# Loading the Data
## Uncomment the desired line to use its data
# Scaling the Data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features_df = df.drop(["Decision"], 1)
scaled_df = pd.DataFrame(scaler.fit_transform(features_df),
index=features_df.index,
columns=features_df.columns)
# Importing the 10 models
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from xgboost import XGBClassifier
# Dictionary containing the model names and their scores
models_f1 = {}
# Looping through each model's predictions and getting their classification reports
for name, pipe in model_pipelines.items():
print('\n'+ name + ' (Macro Avg - F1 Score):')
# Classification Report
report = classification_report(y_test, pipe.predict(X_test), target_names=['Sell', 'Buy', 'Hold'], output_dict=True)
f1 = report['macro avg']['f1-score']