from linear_model import LinearModel
import numpy as np


class HillClimbing:
    def __init__(self, action_space, observation_space, noise=0.5, update_percent=0.2):
        self.name = "Hill Climbing"
        self.alg_id = "1"#alg_WKinUO3TNabzwPeaD7A"
        self.action_space = action_space
        self.observation_space = observation_space

        self.model = LinearModel(len(self.observation_space.low))
        self.noise = noise

        self.best_score = 0
        self.episode_reward = 0

        self.update_percent = update_percent  # Maximum percent below current best the update will accept

    def mutate(self):
        self.model.mutate(self.noise)

    def act(self, observation, reward, done):
        action = self.model.score(observation)

        self.episode_reward += reward

        if done:
            # Determines the difference between the episode reward and current best
            update = self.update_percent + (self.episode_reward - self.best_score)/(self.best_score + 0.01)  # +0.01 to stops divide by zero
            if np.random.uniform() < update:
                self.best_score = self.episode_reward
                self.model.set_best_vals()

            else:
                self.model.revert()

            self.episode_reward = 0

        return action