stefanbschneider · July 19, 2021 07:37
diff --git a/sac_obs1_combi_64hid_099gam_00001tau_001alp_001ent_005exp.yaml b/sac_obs1_combi_64hid_099gam_00001tau_001alp_001ent_005exp.yaml
 # copy from our private repo: https://github.com/RealVNF/rl-coordination/blob/master/res/config/agent/sac/sac_obs1_combi_64hid_099gam_00001tau_001alp_001ent_005exp.yaml
 # for DeepCoord DRL agent:
 # https://github.com/RealVNF/DeepCoord

 # module for configuring the RL agent
 # configuration parameters are loaded and used both when using the agent via the CLI and via the interface
 # all parameters are required, defaults are in comments

 # observation_space = ['ingress_traffic', 'node_load']
 observation_space:
 - ingress_traffic

 # Agent type: SAC or DDPG

 agent_type: 'SAC'

 # shuffle the order of nodes in state and action. slower but should be more effective. default = False
 shuffle_nodes: False

 # Setting episode steps
 episode_steps: 200

 # NN Config for actor and critic
 hidden_layers: [64]

 # Delay config for normalization of reward

 # Reward weights
 flow_reward_weight: 2
 delay_reward_weight: 1

 gamma: 0.99
 learning_rate: 0.01
 buffer_size: 10000 # Same as mem_limit?
 learning_starts: 0
 train_freq: 1
 batch_size: 64
 tau: 0.0001
 ent_coef: 0.01
 target_update_interval: 1
 gradient_steps: 1
 target_entropy: 'auto'
 action_noise: None
 random_exploration: 0.05
	# copy from our private repo: https://github.com/RealVNF/rl-coordination/blob/master/res/config/agent/sac/sac_obs1_combi_64hid_099gam_00001tau_001alp_001ent_005exp.yaml
	# for DeepCoord DRL agent:
	# https://github.com/RealVNF/DeepCoord

	# module for configuring the RL agent
	# configuration parameters are loaded and used both when using the agent via the CLI and via the interface
	# all parameters are required, defaults are in comments

	# observation_space = ['ingress_traffic', 'node_load']
	observation_space:
	- ingress_traffic

	# Agent type: SAC or DDPG

	agent_type: 'SAC'

	# shuffle the order of nodes in state and action. slower but should be more effective. default = False
	shuffle_nodes: False

	# Setting episode steps
	episode_steps: 200

	# NN Config for actor and critic
	hidden_layers: [64]

	# Delay config for normalization of reward

	# Reward weights
	flow_reward_weight: 2
	delay_reward_weight: 1

	gamma: 0.99
	learning_rate: 0.01
	buffer_size: 10000 # Same as mem_limit?
	learning_starts: 0
	train_freq: 1
	batch_size: 64
	tau: 0.0001
	ent_coef: 0.01
	target_update_interval: 1
	gradient_steps: 1
	target_entropy: 'auto'
	action_noise: None
	random_exploration: 0.05