WhatIThinkAbout’s gists

WhatIThinkAbout / socket_charge.py

Created October 31, 2020 10:15

	def charge(self):
	""" return a random amount of charge """

	# the reward is a guassian distribution with unit variance around the true value 'q'
	value = np.random.randn() + self.q

WhatIThinkAbout / thompson_gaussian_sample.py

Created October 31, 2020 10:26

	def sample(self):
	""" return a value from the the posterior normal distribution """
	return (np.random.randn() / np.sqrt(self.τ_0)) + self.μ_0

WhatIThinkAbout / Iterative_Policy_Evaluation.py

Created December 31, 2020 16:02

	def get_state_value( state_row, state_col, start_values ):
	''' calculate the state value for a single state'''

	# get the list of next states of the current state
	next_states, number_of_states = get_next_states( state_row, state_col )

	state_value = 0
	for next_state in next_states:
	# add the reward for moving to the next state (always -1) and the value of the next state
	state_value += (1/number_of_states) * (-1 + start_values[next_state[0],next_state[1]])

WhatIThinkAbout / Bellman_Expectation.py

Created February 3, 2021 13:20

	def get_state_value( state , start_values ):
	''' calculate the value of the specified state using the supplied current state values
	- this implements equation 9 '''

	# iterate over all possible actions for the state
	state_value = 0
	for action in get_π( state ):

	target_state = action[0]
	action_probability = action[1]

WhatIThinkAbout / UnknownMeanUnknownVarianceUpdate.py

Last active March 3, 2021 13:36

Update the hyper-parameters for unknown mean and unknown variance.

	def update(self,x):
	''' increase the number of times this socket has been used and improve the estimate of the
	mean and variance by combining the single new value 'x' with the current estimate '''
	n = 1
	v = self.n

	self.α = self.α + n/2
	self.β = self.β + ((nv/(v + n)) (((x - self.μ_0)**2)/2))

	# estimate the variance - calculate the mean from the gamma hyper-parameters

WhatIThinkAbout / UnknownMeanKnownVarianceUpdate.py

Last active March 3, 2021 13:30

Update function for the conjugate prior hyper-parameters for a normal distribution with unknown mean and known variance.

	def update(self,x):
	''' increase the number of times this socket has been used and improve the estimate of the
	value (the mean) by combining the new value 'x' with the current mean '''

	self.n += 1
	self.x.append(x) # append the new value to the list of samples

	# update the mean of the posterior
	self.μ_0 = ((self.τ_0 * self.μ_0) + (self.τ * np.array(self.x).sum()))/(self.τ_0 + (self.n*self.τ))

WhatIThinkAbout / KnownMeanUnknownVarianceUpdate.py

Last active March 5, 2021 16:56

Update function for conjugate prior hyper-parameters for normal distribution with known mean and unknown variance

	def update(self,x):
	''' increase the number of times this socket has been used and improve the estimate of the
	variance by updating the gamma distribution's hyper-parameters using the new value 'x' '''

	self.n += 1
	self.x.append(x) # append the new value to the list of samples

	self.α = self.n/2
	self.β = ((np.array(self.x) - self.μ)**2).sum()/2

WhatIThinkAbout / UnknownMeanUnknownVarianceSample.py

Last active March 7, 2021 12:08

Sample from a normal-gamma distribution

	def sample(self):
	''' sample from our estimated normal '''

	precision = np.random.gamma(self.α, 1/self.β)
	if precision == 0 or self.n == 0: precision = 0.001

	estimated_variance = 1/precision
	return np.random.normal( self.μ_0, np.sqrt(estimated_variance))

WhatIThinkAbout / Cartpole_OpenAI_Gym.py

Last active January 3, 2023 13:21

Cartpole OpenAI Gym

	import gymnasium as gym

	###########################################
	# Stage 1 - Initialization
	###########################################

	# create the cartpole environment
	env = gym.make('CartPole-v1', render_mode="human")

	# run for 10 episodes

WhatIThinkAbout / BabyRobotEnv_V0.py

Last active January 3, 2023 17:11

Baby Robot Environment V0

Steve Roberts WhatIThinkAbout