Created
April 8, 2025 01:42
-
-
Save quantra-go-algo/97a041c49066ecd02b1298af835c40c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set the for loop | |
for i in range(1,len(monthly_index)): | |
print('='*100) | |
# Set the current month-end variable | |
current_month_end = monthly_index[(i-1)] | |
# Set the next month-end variable | |
next_month_end = monthly_index[i] | |
# Set the span from the previous to the next month end | |
span = len(data.loc[current_month_end:next_month_end,:].index) | |
datetime_now = dt.datetime.now().replace(microsecond=0) | |
print(f"Predictions for period {next_month_end.strftime("%b")}-{next_month_end.year} begin at {datetime_now}") | |
# Set the data sample up to the next month end | |
data_sample = data.loc[:next_month_end,:].iloc[-(window+span):,:].copy() | |
# Set the accuracy scores dictionary | |
accuracy_scores = dict() | |
# Set the models dictionary | |
models = dict() | |
# Set the train data for the TGAN algorithm up to the previous month end | |
tgan_train_data = data_sample.loc[:current_month_end,:].copy() | |
# Create the synthetic data | |
synthetic_data_dict = create_synthetic_data(seeds_list, 'AAPL', tgan_train_data, test_span) | |
for seed in seeds_list: | |
# Set the synthetic data for the seed | |
synthetic_data = synthetic_data_dict[seed] | |
# Update the first Open price with the real last Close price | |
synthetic_data.loc[synthetic_data.index[0],'Open'] = tgan_train_data['Close'].iloc[-1] | |
# Concatenate the real data with synthetic data | |
whole_sample = pd.concat([tgan_train_data[tgan_train_data['stock']=='AAPL'], synthetic_data]) | |
# Sort the dataframe by the index | |
whole_sample.sort_index(inplace=True) | |
all_features, features = get_all_features(whole_sample) | |
# Set the train sample for the ML model using embargo | |
train_sample = all_features.iloc[:-(test_span+1),:] | |
# Set the test sample for the ML model using purging | |
test_sample = all_features.iloc[-(test_span-1):,:] | |
X, y, X_test, y_test = get_input_and_prediction_features(train_sample, test_sample, features) | |
# Set the ML model | |
models[seed] = RandomForestClassifier(n_estimators=50, max_depth=20, max_features=1.0, | |
random_state=seed, class_weight='balanced_subsample') | |
# Fit the model | |
models[seed].fit(X, y) | |
# Save the accuracy score of the ML model in the score dictionary | |
accuracy_scores[seed] = models[seed].score(X_test, y_test) | |
# Select the best-model seed based on the maximum accuracy score | |
best_model_seed = max(accuracy_scores, key=accuracy_scores.get) | |
all_features, features = get_all_features(data_sample[data_sample['stock']=='AAPL']) | |
# Set the train sample for the best ML model using embargo and purging | |
train_sample = all_features.loc[:current_month_end,:].iloc[1:-1,:] | |
# Set the test sample for the best ML model using embargo | |
test_sample = all_features.loc[current_month_end:next_month_end,:].iloc[:-1,:] | |
# Compute the train-sample predictions | |
apple.loc[test_sample.index,'signal'] = models[best_model_seed].predict(test_sample[features]) | |
datetime_now = dt.datetime.now().replace(microsecond=0) | |
print(f'\t Predictions for this period end at {datetime_now}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment