Skip to content

Instantly share code, notes, and snippets.

@MuslemRahimi
Last active April 1, 2024 19:31
Show Gist options
  • Save MuslemRahimi/169c0decab03effc7736890b4c82c6cf to your computer and use it in GitHub Desktop.
Save MuslemRahimi/169c0decab03effc7736890b4c82c6cf to your computer and use it in GitHub Desktop.
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
#from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
from ta.utils import *
from ta.volatility import *
from ta.momentum import *
from ta.trend import *
from ta.volume import *
from tqdm import tqdm
from sklearn.feature_selection import SelectKBest, f_classif
class StockPredictor:
def __init__(self, ticker, start_date, end_date):
self.ticker = ticker
self.start_date = start_date
self.end_date = end_date
self.nth_day = 60
self.model = AdaBoostClassifier(algorithm="SAMME", random_state=42) #RandomForestClassifier(n_estimators=5000, max_depth=10, min_samples_split=100, random_state=42, n_jobs=-1) #XGBClassifier(n_estimators=200, max_depth=2, learning_rate=1, objective='binary:logistic')
self.horizons = [10,20, 50,100]
self.test_size = 0.2
def download_data(self):
df_original = yf.download(self.ticker, start=self.start_date, end=self.end_date, interval="1d")
df_original.index = pd.to_datetime(df_original.index)
return df_original
def generate_features(self, df):
new_predictors = []
for horizon in self.horizons:
rolling_averages = df.rolling(horizon).mean()
ratio_column = f"Close_Ratio_{horizon}"
df[ratio_column] = df["Close"] / rolling_averages["Close"]
new_predictors.append(ratio_column)
trend_column = f"Trend_{horizon}"
df[trend_column] = df["Close"].pct_change(periods=horizon)
new_predictors.append(trend_column)
volatility_column = f"Volatility_{horizon}"
df[volatility_column] = df["Close"].pct_change().rolling(horizon).std()
new_predictors.append(volatility_column)
volatility_mean_column = f"Volatility_Mean_{horizon}"
df[volatility_mean_column] = df["Close"].pct_change().rolling(horizon).mean()
new_predictors.append(volatility_mean_column)
sma_column = f"SMA_{horizon}"
df[sma_column] = sma_indicator(df['Close'], window=horizon)
ema_column = f"EMA_{horizon}"
df[ema_column] = ema_indicator(df['Close'], window=horizon)
rsi_column = f"RSI_{horizon}"
df[rsi_column] = rsi(df["Close"], window=horizon)
new_predictors.append(rsi_column)
stoch_rsi_column = f"STOCH_RSI_{horizon}"
df[stoch_rsi_column] = stochrsi_k(df['Close'], window=horizon, smooth1=3, smooth2=3)
new_predictors.append(stoch_rsi_column)
stoch_column = f"STOCH_{horizon}"
df[stoch_column] = stoch(df['High'], df['Low'], df['Close'], window=horizon)
new_predictors.append(stoch_column)
roc_column = f"ROC_{horizon}"
df[roc_column] = roc(df['Close'], window=horizon)
new_predictors.append(roc_column)
wma_column = f"WMA_{horizon}"
df[wma_column] = wma_indicator(df['Close'], window=horizon)
new_predictors.append(wma_column)
# Additional features
atr_column = f"ATR_{horizon}"
df[atr_column] = average_true_range(df['High'], df['Low'], df['Close'], window=horizon)
new_predictors.append(atr_column)
adx_column = f"ADX_{horizon}"
df[adx_column] = adx(df['High'], df['Low'], df['Close'], window=horizon)
new_predictors.append(adx_column)
bb_bands_column = f"BB_{horizon}"
df[bb_bands_column] = bollinger_hband(df['Close'], window=horizon) / df['Close']
new_predictors.append(bb_bands_column)
df['macd'] = macd(df['Close'])
df['macd_signal'] = macd_signal(df['Close'])
df['macd_hist'] = 2*macd_diff(df['Close'])
new_predictors.append('macd')
new_predictors.append('macd_signal')
new_predictors.append('macd_hist')
return new_predictors
def feature_selection(self, df, predictors):
X = df[predictors]
y = df['Target']
selector = SelectKBest(score_func=f_classif, k=3)
selector.fit(X, y)
selector.transform(X)
selected_features = [col for i, col in enumerate(X.columns) if selector.get_support()[i]]
return selected_features
def train_model(self, df, predictors):
train_size = int(len(df) * (1-self.test_size))
train_df = df.iloc[:train_size]
self.model.fit(train_df[predictors], train_df["Target"])
def evaluate_model(self, df, predictors):
test_df = df.iloc[int(len(df) * (1-self.test_size)):]
test_predictions = self.model.predict_proba(test_df[predictors])[:,1]
test_predictions[test_predictions >=.5] = 1
test_predictions[test_predictions <.5] = 0
#print(test_df[['Close','Target']][-100:])
#print(test_predictions[-100:])
compare_df = test_df[['Close','Target']][-100:]
df_predictions = pd.DataFrame(test_predictions[-100:], columns=['Predictions'])
df_predictions.index = test_df.index[-100:]
compare_df['Predictions'] = df_predictions['Predictions']
print(compare_df)
test_precision = precision_score(test_df["Target"], test_predictions)
test_accuracy = accuracy_score(test_df["Target"], test_predictions)
test_recall = recall_score(test_df["Target"], test_predictions)
test_f1 = f1_score(test_df["Target"], test_predictions)
test_roc_auc = roc_auc_score(test_df["Target"], test_predictions)
print("Test Set Metrics:")
print(f"Precision: {round(test_precision * 100)}%")
print(f"Accuracy: {round(test_accuracy * 100)}%")
print(f"Recall: {round(test_recall * 100)}%")
print(f"F1-Score: {round(test_f1 * 100)}%")
print(f"ROC-AUC: {round(test_roc_auc * 100)}%")
print(pd.DataFrame(test_predictions).value_counts())
def predict_next_value(self, df, predictors):
latest_data_point = df.iloc[-1][predictors]
next_value_prediction = self.model.predict([latest_data_point])[0]
next_value_probability = self.model.predict_proba([latest_data_point])[0][1]
print("Predicted next value:", next_value_prediction)
print("Probability of predicted next value:", round(next_value_probability * 100, 2), "%")
latest_date_index = df.index[-1]
next_prediction_date = latest_date_index + pd.DateOffset(days=self.nth_day)
print("Corresponding date for the next prediction:", next_prediction_date)
if __name__ == "__main__":
ticker = 'AMD'
start_date = datetime(1970, 1, 1)
end_date = datetime.today()
predictor = StockPredictor(ticker, start_date, end_date)
df = predictor.download_data()
#df_spy = StockPredictor('SPY', start_date, end_date).download_data()
#print(df)
#df_spy = df_spy.rename(columns=lambda x: x + '_spy')
df["nth_day"] = df["Close"].shift(-predictor.nth_day)
df["Target"] = (df["nth_day"] > df["Close"]).astype(int)
#df = pd.concat([df,df_spy], axis=1)
#print(df)
predictors = predictor.generate_features(df)
predictors = [pred for pred in predictors if pred in df.columns]
df = df.dropna(subset=df.columns[df.columns != "nth_day"])
all_features = [col for col in df.columns if col not in ['nth_day','Target']]
best_features = predictor.feature_selection(df, predictors)
print("Best Features Selected:", best_features)
predictor.train_model(df, best_features)
predictor.evaluate_model(df, best_features)
predictor.predict_next_value(df, best_features)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment