This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Setting the index as the Date | |
| for i in tqdm(stocks_df.keys()): | |
| stocks_df[i] = setting_index(stocks_df[i]) | |
| # Replacing all "None" values with NaN | |
| for i in tqdm(stocks_df.keys()): | |
| stocks_df[i].replace("None", 0, inplace=True) | |
| # Creating a new dictionary that contains the numerical values, then converting all values to numeric values | |
| num_df = {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import _pickle as pickle | |
| # Opening the .pkl file created | |
| with open("main_df.pkl",'rb') as fp: | |
| final_df = pickle.load(fp) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Separating each class into respective DataFrames | |
| buy_df = final_df[final_df['Decision']==1].loc[:, final_df.columns != 'Decision'].reset_index(drop=True) | |
| hold_df = final_df[final_df['Decision']==2].loc[:, final_df.columns != 'Decision'].reset_index(drop=True) | |
| sell_df = final_df[final_df['Decision']==0].loc[:, final_df.columns != 'Decision'].reset_index(drop=True) | |
| # Visualizing in matplotlib | |
| plt.figure(figsize=(10,6)) | |
| plt.style.use('fivethirtyeight') | |
| # Plotting the count of each DataFrame of each class |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def CorrMtx(df, dropDuplicates = True): | |
| """ | |
| Takes in a Correlation DF and excludes nonessential visuals. | |
| Creates a more visually pleasing correlation matrix | |
| """ | |
| # Exclude duplicate correlations by masking uper right values | |
| if dropDuplicates: | |
| mask = np.zeros_like(df, dtype=np.bool) | |
| mask[np.triu_indices_from(mask)] = True |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Correlation DF of all classes | |
| corr = final_df.corr().iloc[[-1],:-1] | |
| # Sorting our Correlation DF by their absolute values and selecting the top 10 | |
| top10_corr = corr.transpose().apply(abs).sort_values(by='Decision', ascending=False)[:10] | |
| # Creating a new DF with the features from the top10_corr and joing the 'Decision' class labels | |
| top10_corr_df = final_df[top10_corr.index].join(final_df.Decision) | |
| # Pickling the DF for use in our Classification models |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Importing the necessary libraries | |
| from sklearn.ensemble import ExtraTreesClassifier | |
| # Instatiating the classifier | |
| forest = ExtraTreesClassifier(n_estimators=200) | |
| # Setting the corresponding variables for our classifier | |
| X = final_df.drop(['Decision'], 1) | |
| y = final_df.Decision |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Matplotlib style to use | |
| plt.style.use('seaborn') | |
| # Printing out the different features as a list | |
| print("Feature Rankings:") | |
| # Showing the top 10 features | |
| for i in range(10): | |
| print(f"{i+1}. {X.columns[indices[i]]}: {importances[indices[i]]}") | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Importing the necessary libraries | |
| import _pickle as pickle | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import confusion_matrix, classification_report | |
| import matplotlib.pyplot as plt | |
| from sklearn.dummy import DummyClassifier | |
| # Loading in the Data (can be changed to the other features .pkl file if needed) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ### Scaling the Data | |
| # Importing the Scacler | |
| from sklearn.preprocessing import StandardScaler | |
| # Instantiating the Scaler | |
| scaler = StandardScaler() | |
| # Removing the class labels from the dataset (because we do not scale the class labels) | |
| features_df = df.drop(["Decision"], 1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Fitting and training the dummy | |
| dummy = DummyClassifier(strategy='stratified') | |
| dummy.fit(X_train, y_train) | |
| # Dummy predictions | |
| dum_pred = dummy.predict(X_test) | |
| #Printing out results | |
| report = classification_report(y_test, dum_pred, target_names=['Sell', 'Buy', 'Hold']) | |
| print(report) |