Skip to content

Instantly share code, notes, and snippets.

@Vivraan
Created June 26, 2023 01:11
Show Gist options
  • Save Vivraan/f5314cbc911ac6231731526690165478 to your computer and use it in GitHub Desktop.
Save Vivraan/f5314cbc911ac6231731526690165478 to your computer and use it in GitHub Desktop.
ChatGPT's best attempt at the two-stage reinforcement learning task analysis for the paper by Daw et al. 2011.
import torch
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
class LearningModel:
def __init__(self, num_first_stage_choices=2, num_second_stage_choices=2, num_trials=2, num_epochs=1000):
# Initialize model parameters (w, alpha, lambda, beta, stay_bias) and any other necessary variables
self.num_first_stage_choices = num_first_stage_choices
self.num_second_stage_choices = num_second_stage_choices
self.num_trials = num_trials
self.num_trials = 2
self.num_epochs = num_epochs
self.alpha = 0.1
self.lambda_val = 0.5
self.beta = 1.0
self.stay_bias = 0.2
self.weight = 0.5
# Move the tensors to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.num_first_stage_choices = torch.tensor(num_first_stage_choices, device=device)
self.num_second_stage_choices = torch.tensor(num_second_stage_choices, device=device)
self.alpha = torch.tensor(self.alpha, device=device)
self.lambda_val = torch.tensor(self.lambda_val, device=device)
self.beta = torch.tensor(self.beta, device=device)
self.stay_bias = torch.tensor(self.stay_bias, device=device)
self.weight = torch.tensor(self.weight, device=device)
def hybrid_algorithm(self, data):
# Initialize transition probabilities as PyTorch tensors
transition_probs = torch.tensor([[0.7, 0.3], [0.3, 0.7]], dtype=torch.float32)
# Initialize first-stage and second-stage stimulus values as PyTorch tensors
first_stage_values = torch.zeros(self.num_first_stage_choices, dtype=torch.float32)
second_stage_values = torch.zeros((self.num_first_stage_choices, self.num_second_stage_choices),
dtype=torch.float32)
for trial in range(self.num_trials):
# Reset eligibility trace at the beginning of each trial
eligibility_trace = torch.zeros((self.num_first_stage_choices, self.num_second_stage_choices),
dtype=torch.float32)
# Initialize first-stage and second-stage choices
first_stage_choice = 0
second_stage_choice = 0
for step in range(len(data[trial])):
# Update first-stage choice and second-stage choice
if step == 0:
first_stage_choice = data[trial][step][0]
second_stage_choice = data[trial][step][1]
# Update second-stage stimulus values based on model-based algorithm
reward = data[trial][step][2]
expected_reward = (transition_probs[first_stage_choice][0] * second_stage_values[0] +
transition_probs[first_stage_choice][1] * second_stage_values[1])
prediction_error_model_based = reward - expected_reward
# Update second-stage values based on prediction error and learning rate
prediction_error_sarsa = reward - second_stage_values[first_stage_choice][second_stage_choice]
eligibility_trace[first_stage_choice][second_stage_choice] += 1
second_stage_values += self.alpha * prediction_error_sarsa * eligibility_trace
eligibility_trace *= self.lambda_val
# Update first-stage stimulus values
stay_bias_term = self.stay_bias * first_stage_values[first_stage_choice]
w = self.weight
prediction_error_hybrid = w * prediction_error_model_based + (1 - w) * prediction_error_sarsa
first_stage_values[first_stage_choice] += self.alpha * prediction_error_hybrid - stay_bias_term
# Update second-stage values after the last step in the trial
prediction_error = 0 - second_stage_values[first_stage_choice][second_stage_choice]
second_stage_values += self.alpha * prediction_error * eligibility_trace
# Calculate action probabilities using softmax choice rule
first_stage_action_probs = F.softmax(self.beta * first_stage_values, dim=0)
return first_stage_action_probs, second_stage_values
def log_likelihood(self, data) -> torch.Tensor:
# TODO Calculate the log-likelihood of the data given the model parameters
# hint: use the hybrid algorithm to calculate the likelihood.
pass
def log_prior(self) -> torch.Tensor:
# TODO Calculate the log-prior of the model parameters (if using Bayesian approach)
# hint: use the prior distribution of the parameters to calculate the log-prior.
pass
def log_posterior(self, data) -> torch.Tensor:
# Calculate the log-posterior (log-likelihood + log-prior) of the model parameters
log_likelihood = self.log_likelihood(data)
log_prior = self.log_prior()
return log_likelihood + log_prior
def fit(self, data):
# TODO fix this garbage
# Define the objective function (negative log-posterior) and optimizer
optimizer = optim.Adam(lr=0.01)
for epoch in range(self.num_epochs):
# Clear gradients
optimizer.zero_grad()
# Compute log-posterior and loss
log_posterior = self.log_posterior(data)
loss = -log_posterior
# Backpropagation
loss.backward()
optimizer.step()
# TODO Print progress or monitor convergence criteria
# Return the estimated parameter values
return self.get_parameters()
def fit_all_users():
# Example usage
data = pd.read_csv("Gtaskmb_E2_clean.csv") # Replace with the path to your dataset
num_first_stage_choices = 2
num_second_stage_choices = 2
num_epochs = 100
# Create an instance of the learning model
model = LearningModel(num_first_stage_choices, num_second_stage_choices, num_epochs)
# TODO Run this estimation for each user in your dataset and save the estimated parameters for each user
estimated_parameters = model.fit(data)
if __name__ == "__main__":
fit_all_users()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment