Skip to content

Instantly share code, notes, and snippets.

@quantra-go-algo
Created April 8, 2025 01:42
Show Gist options
  • Save quantra-go-algo/97a041c49066ecd02b1298af835c40c8 to your computer and use it in GitHub Desktop.
Save quantra-go-algo/97a041c49066ecd02b1298af835c40c8 to your computer and use it in GitHub Desktop.
# Set the for loop
for i in range(1,len(monthly_index)):
print('='*100)
# Set the current month-end variable
current_month_end = monthly_index[(i-1)]
# Set the next month-end variable
next_month_end = monthly_index[i]
# Set the span from the previous to the next month end
span = len(data.loc[current_month_end:next_month_end,:].index)
datetime_now = dt.datetime.now().replace(microsecond=0)
print(f"Predictions for period {next_month_end.strftime("%b")}-{next_month_end.year} begin at {datetime_now}")
# Set the data sample up to the next month end
data_sample = data.loc[:next_month_end,:].iloc[-(window+span):,:].copy()
# Set the accuracy scores dictionary
accuracy_scores = dict()
# Set the models dictionary
models = dict()
# Set the train data for the TGAN algorithm up to the previous month end
tgan_train_data = data_sample.loc[:current_month_end,:].copy()
# Create the synthetic data
synthetic_data_dict = create_synthetic_data(seeds_list, 'AAPL', tgan_train_data, test_span)
for seed in seeds_list:
# Set the synthetic data for the seed
synthetic_data = synthetic_data_dict[seed]
# Update the first Open price with the real last Close price
synthetic_data.loc[synthetic_data.index[0],'Open'] = tgan_train_data['Close'].iloc[-1]
# Concatenate the real data with synthetic data
whole_sample = pd.concat([tgan_train_data[tgan_train_data['stock']=='AAPL'], synthetic_data])
# Sort the dataframe by the index
whole_sample.sort_index(inplace=True)
all_features, features = get_all_features(whole_sample)
# Set the train sample for the ML model using embargo
train_sample = all_features.iloc[:-(test_span+1),:]
# Set the test sample for the ML model using purging
test_sample = all_features.iloc[-(test_span-1):,:]
X, y, X_test, y_test = get_input_and_prediction_features(train_sample, test_sample, features)
# Set the ML model
models[seed] = RandomForestClassifier(n_estimators=50, max_depth=20, max_features=1.0,
random_state=seed, class_weight='balanced_subsample')
# Fit the model
models[seed].fit(X, y)
# Save the accuracy score of the ML model in the score dictionary
accuracy_scores[seed] = models[seed].score(X_test, y_test)
# Select the best-model seed based on the maximum accuracy score
best_model_seed = max(accuracy_scores, key=accuracy_scores.get)
all_features, features = get_all_features(data_sample[data_sample['stock']=='AAPL'])
# Set the train sample for the best ML model using embargo and purging
train_sample = all_features.loc[:current_month_end,:].iloc[1:-1,:]
# Set the test sample for the best ML model using embargo
test_sample = all_features.loc[current_month_end:next_month_end,:].iloc[:-1,:]
# Compute the train-sample predictions
apple.loc[test_sample.index,'signal'] = models[best_model_seed].predict(test_sample[features])
datetime_now = dt.datetime.now().replace(microsecond=0)
print(f'\t Predictions for this period end at {datetime_now}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment