Last active
April 1, 2024 19:31
-
-
Save MuslemRahimi/169c0decab03effc7736890b4c82c6cf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import yfinance as yf | |
import pandas as pd | |
from datetime import datetime, timedelta | |
#from sklearn.ensemble import RandomForestClassifier | |
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier | |
from xgboost import XGBClassifier | |
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score | |
from ta.utils import * | |
from ta.volatility import * | |
from ta.momentum import * | |
from ta.trend import * | |
from ta.volume import * | |
from tqdm import tqdm | |
from sklearn.feature_selection import SelectKBest, f_classif | |
class StockPredictor: | |
def __init__(self, ticker, start_date, end_date): | |
self.ticker = ticker | |
self.start_date = start_date | |
self.end_date = end_date | |
self.nth_day = 60 | |
self.model = AdaBoostClassifier(algorithm="SAMME", random_state=42) #RandomForestClassifier(n_estimators=5000, max_depth=10, min_samples_split=100, random_state=42, n_jobs=-1) #XGBClassifier(n_estimators=200, max_depth=2, learning_rate=1, objective='binary:logistic') | |
self.horizons = [10,20, 50,100] | |
self.test_size = 0.2 | |
def download_data(self): | |
df_original = yf.download(self.ticker, start=self.start_date, end=self.end_date, interval="1d") | |
df_original.index = pd.to_datetime(df_original.index) | |
return df_original | |
def generate_features(self, df): | |
new_predictors = [] | |
for horizon in self.horizons: | |
rolling_averages = df.rolling(horizon).mean() | |
ratio_column = f"Close_Ratio_{horizon}" | |
df[ratio_column] = df["Close"] / rolling_averages["Close"] | |
new_predictors.append(ratio_column) | |
trend_column = f"Trend_{horizon}" | |
df[trend_column] = df["Close"].pct_change(periods=horizon) | |
new_predictors.append(trend_column) | |
volatility_column = f"Volatility_{horizon}" | |
df[volatility_column] = df["Close"].pct_change().rolling(horizon).std() | |
new_predictors.append(volatility_column) | |
volatility_mean_column = f"Volatility_Mean_{horizon}" | |
df[volatility_mean_column] = df["Close"].pct_change().rolling(horizon).mean() | |
new_predictors.append(volatility_mean_column) | |
sma_column = f"SMA_{horizon}" | |
df[sma_column] = sma_indicator(df['Close'], window=horizon) | |
ema_column = f"EMA_{horizon}" | |
df[ema_column] = ema_indicator(df['Close'], window=horizon) | |
rsi_column = f"RSI_{horizon}" | |
df[rsi_column] = rsi(df["Close"], window=horizon) | |
new_predictors.append(rsi_column) | |
stoch_rsi_column = f"STOCH_RSI_{horizon}" | |
df[stoch_rsi_column] = stochrsi_k(df['Close'], window=horizon, smooth1=3, smooth2=3) | |
new_predictors.append(stoch_rsi_column) | |
stoch_column = f"STOCH_{horizon}" | |
df[stoch_column] = stoch(df['High'], df['Low'], df['Close'], window=horizon) | |
new_predictors.append(stoch_column) | |
roc_column = f"ROC_{horizon}" | |
df[roc_column] = roc(df['Close'], window=horizon) | |
new_predictors.append(roc_column) | |
wma_column = f"WMA_{horizon}" | |
df[wma_column] = wma_indicator(df['Close'], window=horizon) | |
new_predictors.append(wma_column) | |
# Additional features | |
atr_column = f"ATR_{horizon}" | |
df[atr_column] = average_true_range(df['High'], df['Low'], df['Close'], window=horizon) | |
new_predictors.append(atr_column) | |
adx_column = f"ADX_{horizon}" | |
df[adx_column] = adx(df['High'], df['Low'], df['Close'], window=horizon) | |
new_predictors.append(adx_column) | |
bb_bands_column = f"BB_{horizon}" | |
df[bb_bands_column] = bollinger_hband(df['Close'], window=horizon) / df['Close'] | |
new_predictors.append(bb_bands_column) | |
df['macd'] = macd(df['Close']) | |
df['macd_signal'] = macd_signal(df['Close']) | |
df['macd_hist'] = 2*macd_diff(df['Close']) | |
new_predictors.append('macd') | |
new_predictors.append('macd_signal') | |
new_predictors.append('macd_hist') | |
return new_predictors | |
def feature_selection(self, df, predictors): | |
X = df[predictors] | |
y = df['Target'] | |
selector = SelectKBest(score_func=f_classif, k=3) | |
selector.fit(X, y) | |
selector.transform(X) | |
selected_features = [col for i, col in enumerate(X.columns) if selector.get_support()[i]] | |
return selected_features | |
def train_model(self, df, predictors): | |
train_size = int(len(df) * (1-self.test_size)) | |
train_df = df.iloc[:train_size] | |
self.model.fit(train_df[predictors], train_df["Target"]) | |
def evaluate_model(self, df, predictors): | |
test_df = df.iloc[int(len(df) * (1-self.test_size)):] | |
test_predictions = self.model.predict_proba(test_df[predictors])[:,1] | |
test_predictions[test_predictions >=.5] = 1 | |
test_predictions[test_predictions <.5] = 0 | |
#print(test_df[['Close','Target']][-100:]) | |
#print(test_predictions[-100:]) | |
compare_df = test_df[['Close','Target']][-100:] | |
df_predictions = pd.DataFrame(test_predictions[-100:], columns=['Predictions']) | |
df_predictions.index = test_df.index[-100:] | |
compare_df['Predictions'] = df_predictions['Predictions'] | |
print(compare_df) | |
test_precision = precision_score(test_df["Target"], test_predictions) | |
test_accuracy = accuracy_score(test_df["Target"], test_predictions) | |
test_recall = recall_score(test_df["Target"], test_predictions) | |
test_f1 = f1_score(test_df["Target"], test_predictions) | |
test_roc_auc = roc_auc_score(test_df["Target"], test_predictions) | |
print("Test Set Metrics:") | |
print(f"Precision: {round(test_precision * 100)}%") | |
print(f"Accuracy: {round(test_accuracy * 100)}%") | |
print(f"Recall: {round(test_recall * 100)}%") | |
print(f"F1-Score: {round(test_f1 * 100)}%") | |
print(f"ROC-AUC: {round(test_roc_auc * 100)}%") | |
print(pd.DataFrame(test_predictions).value_counts()) | |
def predict_next_value(self, df, predictors): | |
latest_data_point = df.iloc[-1][predictors] | |
next_value_prediction = self.model.predict([latest_data_point])[0] | |
next_value_probability = self.model.predict_proba([latest_data_point])[0][1] | |
print("Predicted next value:", next_value_prediction) | |
print("Probability of predicted next value:", round(next_value_probability * 100, 2), "%") | |
latest_date_index = df.index[-1] | |
next_prediction_date = latest_date_index + pd.DateOffset(days=self.nth_day) | |
print("Corresponding date for the next prediction:", next_prediction_date) | |
if __name__ == "__main__": | |
ticker = 'AMD' | |
start_date = datetime(1970, 1, 1) | |
end_date = datetime.today() | |
predictor = StockPredictor(ticker, start_date, end_date) | |
df = predictor.download_data() | |
#df_spy = StockPredictor('SPY', start_date, end_date).download_data() | |
#print(df) | |
#df_spy = df_spy.rename(columns=lambda x: x + '_spy') | |
df["nth_day"] = df["Close"].shift(-predictor.nth_day) | |
df["Target"] = (df["nth_day"] > df["Close"]).astype(int) | |
#df = pd.concat([df,df_spy], axis=1) | |
#print(df) | |
predictors = predictor.generate_features(df) | |
predictors = [pred for pred in predictors if pred in df.columns] | |
df = df.dropna(subset=df.columns[df.columns != "nth_day"]) | |
all_features = [col for col in df.columns if col not in ['nth_day','Target']] | |
best_features = predictor.feature_selection(df, predictors) | |
print("Best Features Selected:", best_features) | |
predictor.train_model(df, best_features) | |
predictor.evaluate_model(df, best_features) | |
predictor.predict_next_value(df, best_features) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment