from linear_model import LinearModel import numpy as np class HillClimbing: def __init__(self, action_space, observation_space, noise=0.5, update_percent=0.2): self.name = "Hill Climbing" self.alg_id = "1"#alg_WKinUO3TNabzwPeaD7A" self.action_space = action_space self.observation_space = observation_space self.model = LinearModel(len(self.observation_space.low)) self.noise = noise self.best_score = 0 self.episode_reward = 0 self.update_percent = update_percent # Maximum percent below current best the update will accept def mutate(self): self.model.mutate(self.noise) def act(self, observation, reward, done): action = self.model.score(observation) self.episode_reward += reward if done: # Determines the difference between the episode reward and current best update = self.update_percent + (self.episode_reward - self.best_score)/(self.best_score + 0.01) # +0.01 to stops divide by zero if np.random.uniform() < update: self.best_score = self.episode_reward self.model.set_best_vals() else: self.model.revert() self.episode_reward = 0 return action