WhatIThinkAbout · February 3, 2021 13:20
diff --git a/Bellman_Expectation.py b/Bellman_Expectation.py
 def get_state_value( state , start_values ):
    ''' calculate the value of the specified state using the supplied current state values
      - this implements equation 9 '''

    # iterate over all possible actions for the state
    state_value = 0
    for action in get_π( state ):
        
        target_state = action[0]
        action_probability = action[1]
        
        # iterate over all possible next states and rewards for the action   
        q_value = 0
        for action_dynamics in get_p( state, target_state ):
            next_state = action_dynamics['next_state']
            reward = action_dynamics['reward']
            probability = action_dynamics['probability']

            q_value += probability * (reward + start_values[next_state])

        state_value += action_probability * q_value
    
    return state_value
	def get_state_value( state , start_values ):
	''' calculate the value of the specified state using the supplied current state values
	- this implements equation 9 '''

	# iterate over all possible actions for the state
	state_value = 0
	for action in get_π( state ):

	target_state = action[0]
	action_probability = action[1]

	# iterate over all possible next states and rewards for the action
	q_value = 0
	for action_dynamics in get_p( state, target_state ):
	next_state = action_dynamics['next_state']
	reward = action_dynamics['reward']
	probability = action_dynamics['probability']

	q_value += probability * (reward + start_values[next_state])

	state_value += action_probability * q_value

	return state_value
No results found