Created
June 26, 2023 01:11
-
-
Save Vivraan/f5314cbc911ac6231731526690165478 to your computer and use it in GitHub Desktop.
ChatGPT's best attempt at the two-stage reinforcement learning task analysis for the paper by Daw et al. 2011.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.optim as optim | |
import torch.nn.functional as F | |
import pandas as pd | |
class LearningModel: | |
def __init__(self, num_first_stage_choices=2, num_second_stage_choices=2, num_trials=2, num_epochs=1000): | |
# Initialize model parameters (w, alpha, lambda, beta, stay_bias) and any other necessary variables | |
self.num_first_stage_choices = num_first_stage_choices | |
self.num_second_stage_choices = num_second_stage_choices | |
self.num_trials = num_trials | |
self.num_trials = 2 | |
self.num_epochs = num_epochs | |
self.alpha = 0.1 | |
self.lambda_val = 0.5 | |
self.beta = 1.0 | |
self.stay_bias = 0.2 | |
self.weight = 0.5 | |
# Move the tensors to the GPU if available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
self.num_first_stage_choices = torch.tensor(num_first_stage_choices, device=device) | |
self.num_second_stage_choices = torch.tensor(num_second_stage_choices, device=device) | |
self.alpha = torch.tensor(self.alpha, device=device) | |
self.lambda_val = torch.tensor(self.lambda_val, device=device) | |
self.beta = torch.tensor(self.beta, device=device) | |
self.stay_bias = torch.tensor(self.stay_bias, device=device) | |
self.weight = torch.tensor(self.weight, device=device) | |
def hybrid_algorithm(self, data): | |
# Initialize transition probabilities as PyTorch tensors | |
transition_probs = torch.tensor([[0.7, 0.3], [0.3, 0.7]], dtype=torch.float32) | |
# Initialize first-stage and second-stage stimulus values as PyTorch tensors | |
first_stage_values = torch.zeros(self.num_first_stage_choices, dtype=torch.float32) | |
second_stage_values = torch.zeros((self.num_first_stage_choices, self.num_second_stage_choices), | |
dtype=torch.float32) | |
for trial in range(self.num_trials): | |
# Reset eligibility trace at the beginning of each trial | |
eligibility_trace = torch.zeros((self.num_first_stage_choices, self.num_second_stage_choices), | |
dtype=torch.float32) | |
# Initialize first-stage and second-stage choices | |
first_stage_choice = 0 | |
second_stage_choice = 0 | |
for step in range(len(data[trial])): | |
# Update first-stage choice and second-stage choice | |
if step == 0: | |
first_stage_choice = data[trial][step][0] | |
second_stage_choice = data[trial][step][1] | |
# Update second-stage stimulus values based on model-based algorithm | |
reward = data[trial][step][2] | |
expected_reward = (transition_probs[first_stage_choice][0] * second_stage_values[0] + | |
transition_probs[first_stage_choice][1] * second_stage_values[1]) | |
prediction_error_model_based = reward - expected_reward | |
# Update second-stage values based on prediction error and learning rate | |
prediction_error_sarsa = reward - second_stage_values[first_stage_choice][second_stage_choice] | |
eligibility_trace[first_stage_choice][second_stage_choice] += 1 | |
second_stage_values += self.alpha * prediction_error_sarsa * eligibility_trace | |
eligibility_trace *= self.lambda_val | |
# Update first-stage stimulus values | |
stay_bias_term = self.stay_bias * first_stage_values[first_stage_choice] | |
w = self.weight | |
prediction_error_hybrid = w * prediction_error_model_based + (1 - w) * prediction_error_sarsa | |
first_stage_values[first_stage_choice] += self.alpha * prediction_error_hybrid - stay_bias_term | |
# Update second-stage values after the last step in the trial | |
prediction_error = 0 - second_stage_values[first_stage_choice][second_stage_choice] | |
second_stage_values += self.alpha * prediction_error * eligibility_trace | |
# Calculate action probabilities using softmax choice rule | |
first_stage_action_probs = F.softmax(self.beta * first_stage_values, dim=0) | |
return first_stage_action_probs, second_stage_values | |
def log_likelihood(self, data) -> torch.Tensor: | |
# TODO Calculate the log-likelihood of the data given the model parameters | |
# hint: use the hybrid algorithm to calculate the likelihood. | |
pass | |
def log_prior(self) -> torch.Tensor: | |
# TODO Calculate the log-prior of the model parameters (if using Bayesian approach) | |
# hint: use the prior distribution of the parameters to calculate the log-prior. | |
pass | |
def log_posterior(self, data) -> torch.Tensor: | |
# Calculate the log-posterior (log-likelihood + log-prior) of the model parameters | |
log_likelihood = self.log_likelihood(data) | |
log_prior = self.log_prior() | |
return log_likelihood + log_prior | |
def fit(self, data): | |
# TODO fix this garbage | |
# Define the objective function (negative log-posterior) and optimizer | |
optimizer = optim.Adam(lr=0.01) | |
for epoch in range(self.num_epochs): | |
# Clear gradients | |
optimizer.zero_grad() | |
# Compute log-posterior and loss | |
log_posterior = self.log_posterior(data) | |
loss = -log_posterior | |
# Backpropagation | |
loss.backward() | |
optimizer.step() | |
# TODO Print progress or monitor convergence criteria | |
# Return the estimated parameter values | |
return self.get_parameters() | |
def fit_all_users(): | |
# Example usage | |
data = pd.read_csv("Gtaskmb_E2_clean.csv") # Replace with the path to your dataset | |
num_first_stage_choices = 2 | |
num_second_stage_choices = 2 | |
num_epochs = 100 | |
# Create an instance of the learning model | |
model = LearningModel(num_first_stage_choices, num_second_stage_choices, num_epochs) | |
# TODO Run this estimation for each user in your dataset and save the estimated parameters for each user | |
estimated_parameters = model.fit(data) | |
if __name__ == "__main__": | |
fit_all_users() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment