quantra-go-algo · April 8, 2025 01:42
diff --git a/tgan_backtesting_script.py b/tgan_backtesting_script.py
 # Set the for loop 
 for i in range(1,len(monthly_index)):
    print('='*100)
    # Set the current month-end variable
    current_month_end = monthly_index[(i-1)]
    # Set the next month-end variable
    next_month_end = monthly_index[i]

    # Set the span from the previous to the next month end
    span = len(data.loc[current_month_end:next_month_end,:].index)

    datetime_now = dt.datetime.now().replace(microsecond=0)
    print(f"Predictions for period {next_month_end.strftime("%b")}-{next_month_end.year} begin at {datetime_now}")
    
    # Set the data sample up to the next month end
    data_sample = data.loc[:next_month_end,:].iloc[-(window+span):,:].copy()  

    # Set the accuracy scores dictionary
    accuracy_scores = dict()
    # Set the models dictionary
    models = dict()

    # Set the train data for the TGAN algorithm up to the previous month end
    tgan_train_data = data_sample.loc[:current_month_end,:].copy()
    # Create the synthetic data
    synthetic_data_dict = create_synthetic_data(seeds_list, 'AAPL', tgan_train_data, test_span)

    for seed in seeds_list:
        # Set the synthetic data for the seed
        synthetic_data = synthetic_data_dict[seed]
        # Update the first Open price with the real last Close price
        synthetic_data.loc[synthetic_data.index[0],'Open'] = tgan_train_data['Close'].iloc[-1]
        # Concatenate the real data with synthetic data
        whole_sample = pd.concat([tgan_train_data[tgan_train_data['stock']=='AAPL'], synthetic_data])
        
        # Sort the dataframe by the index
        whole_sample.sort_index(inplace=True)

        all_features, features = get_all_features(whole_sample)

        # Set the train sample for the ML model using embargo
        train_sample = all_features.iloc[:-(test_span+1),:]
        # Set the test sample for the ML model using purging
        test_sample = all_features.iloc[-(test_span-1):,:]

        X, y, X_test, y_test = get_input_and_prediction_features(train_sample, test_sample, features)
        
        # Set the ML model
        models[seed] = RandomForestClassifier(n_estimators=50, max_depth=20, max_features=1.0, 
                                              random_state=seed, class_weight='balanced_subsample')
        # Fit the model
        models[seed].fit(X, y)

        # Save the accuracy score of the ML model in the score dictionary
        accuracy_scores[seed] = models[seed].score(X_test, y_test)

    # Select the best-model seed based on the maximum accuracy score
    best_model_seed = max(accuracy_scores, key=accuracy_scores.get)
    
    all_features, features = get_all_features(data_sample[data_sample['stock']=='AAPL'])

    # Set the train sample for the best ML model using embargo and purging
    train_sample = all_features.loc[:current_month_end,:].iloc[1:-1,:]
    # Set the test sample for the best ML model using embargo
    test_sample = all_features.loc[current_month_end:next_month_end,:].iloc[:-1,:]

    # Compute the train-sample predictions
    apple.loc[test_sample.index,'signal'] = models[best_model_seed].predict(test_sample[features])
    
    datetime_now = dt.datetime.now().replace(microsecond=0)
    print(f'\t Predictions for this period end at {datetime_now}')
	# Set the for loop
	for i in range(1,len(monthly_index)):
	print('='*100)
	# Set the current month-end variable
	current_month_end = monthly_index[(i-1)]
	# Set the next month-end variable
	next_month_end = monthly_index[i]

	# Set the span from the previous to the next month end
	span = len(data.loc[current_month_end:next_month_end,:].index)

	datetime_now = dt.datetime.now().replace(microsecond=0)
	print(f"Predictions for period {next_month_end.strftime("%b")}-{next_month_end.year} begin at {datetime_now}")

	# Set the data sample up to the next month end
	data_sample = data.loc[:next_month_end,:].iloc[-(window+span):,:].copy()

	# Set the accuracy scores dictionary
	accuracy_scores = dict()
	# Set the models dictionary
	models = dict()

	# Set the train data for the TGAN algorithm up to the previous month end
	tgan_train_data = data_sample.loc[:current_month_end,:].copy()
	# Create the synthetic data
	synthetic_data_dict = create_synthetic_data(seeds_list, 'AAPL', tgan_train_data, test_span)

	for seed in seeds_list:
	# Set the synthetic data for the seed
	synthetic_data = synthetic_data_dict[seed]
	# Update the first Open price with the real last Close price
	synthetic_data.loc[synthetic_data.index[0],'Open'] = tgan_train_data['Close'].iloc[-1]
	# Concatenate the real data with synthetic data
	whole_sample = pd.concat([tgan_train_data[tgan_train_data['stock']=='AAPL'], synthetic_data])

	# Sort the dataframe by the index
	whole_sample.sort_index(inplace=True)

	all_features, features = get_all_features(whole_sample)

	# Set the train sample for the ML model using embargo
	train_sample = all_features.iloc[:-(test_span+1),:]
	# Set the test sample for the ML model using purging
	test_sample = all_features.iloc[-(test_span-1):,:]

	X, y, X_test, y_test = get_input_and_prediction_features(train_sample, test_sample, features)

	# Set the ML model
	models[seed] = RandomForestClassifier(n_estimators=50, max_depth=20, max_features=1.0,
	random_state=seed, class_weight='balanced_subsample')
	# Fit the model
	models[seed].fit(X, y)

	# Save the accuracy score of the ML model in the score dictionary
	accuracy_scores[seed] = models[seed].score(X_test, y_test)

	# Select the best-model seed based on the maximum accuracy score
	best_model_seed = max(accuracy_scores, key=accuracy_scores.get)

	all_features, features = get_all_features(data_sample[data_sample['stock']=='AAPL'])

	# Set the train sample for the best ML model using embargo and purging
	train_sample = all_features.loc[:current_month_end,:].iloc[1:-1,:]
	# Set the test sample for the best ML model using embargo
	test_sample = all_features.loc[current_month_end:next_month_end,:].iloc[:-1,:]

	# Compute the train-sample predictions
	apple.loc[test_sample.index,'signal'] = models[best_model_seed].predict(test_sample[features])

	datetime_now = dt.datetime.now().replace(microsecond=0)
	print(f'\t Predictions for this period end at {datetime_now}')