Created
August 3, 2018 11:16
-
-
Save SoutrikBandyopadhyay/8fb445872ea01fdd64aab00108185934 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gym | |
| from keras.models import Sequential | |
| from keras.layers import Dense,Dropout,Flatten | |
| from keras.optimizers import Adam | |
| from collections import deque | |
| import numpy as np | |
| import random | |
| EPISODES = 5000 | |
| class DQNAgent: | |
| def __init__(self,stateSize,actionSize): | |
| self.stateSize = stateSize | |
| self.actionSize = actionSize | |
| self.memory = deque(maxlen=2000) | |
| #HyperParams | |
| self.gamma = 0.95 | |
| self.epsilon = 1 | |
| self.epsilonMin = 0.01 | |
| self.epsilonDecay = 0.95 | |
| self.learningRate = 0.01 | |
| self.model = self.buildModel() | |
| def buildModel(self): | |
| model = Sequential() | |
| model.add(Dense(24, input_dim=self.stateSize, activation='relu')) | |
| model.add(Dense(24, activation='relu')) | |
| model.add(Dense(self.actionSize, activation='linear')) | |
| model.compile(loss='mse',optimizer=Adam(lr=self.learningRate)) | |
| return model | |
| def remember(self,state,action,reward,next_state,done): | |
| #Remember Remember the 5th of November XD | |
| self.memory.append((state,action,reward,next_state,done)) | |
| def act(self,state): | |
| if(np.random.random()<=self.epsilon): | |
| return random.randrange(self.actionSize) | |
| actionValues = self.model.predict(state) | |
| return np.argmax(actionValues[0]) | |
| def replay(self,batchSize): | |
| #Action Replay | |
| minibatch = random.sample(self.memory,batchSize) #Benedict Minibatch :-P | |
| for state,action,reward,next_state,done in minibatch: | |
| if done: | |
| target = reward | |
| else: | |
| target = reward + self.gamma * np.amax(self.model.predict(next_state)[0]) | |
| targetF = self.model.predict(state) | |
| targetF[0][action] = target | |
| self.model.fit(state, targetF, epochs=1, verbose=0) | |
| if(self.epsilon>self.epsilonMin): | |
| self.epsilon*=self.epsilonDecay | |
| def save(self,name): | |
| self.model.save_weights(name) | |
| def load(self,name): | |
| self.model.load_weights(name) | |
| if (__name__ == "__main__"): | |
| env = gym.make('CartPole-v0') | |
| checkpointName = "./modelCheckpoints/cartpole-dqn.h5" | |
| # env = gym.make('MountainCar-v0') | |
| # checkpointName = "./modelCheckpoints/mountainCar-v0-dqn.h5" | |
| stateSize = env.observation_space.shape[0] | |
| actionSize = env.action_space.n | |
| agent = DQNAgent(stateSize,actionSize) | |
| #If loading model from memory | |
| agent.load(checkpointName) | |
| done = False | |
| batchSize = 32 | |
| for e in range(EPISODES): | |
| state = env.reset() | |
| state = np.reshape(state,[1,stateSize]) | |
| # score = 0 | |
| for time in range(500): | |
| # env.render() | |
| action = agent.act(state) | |
| nextState, reward, done, _ = env.step(action) | |
| reward = reward if not done else -10 | |
| # score+=reward | |
| nextState = np.reshape(nextState, [1, stateSize]) | |
| agent.remember(state,action,reward,nextState,done) | |
| state = nextState | |
| if done: | |
| print("Epsiode {}/{},score: {},e: {}".format(e,EPISODES,time,agent.epsilon)) | |
| break | |
| if(len(agent.memory)>batchSize): | |
| agent.replay(batchSize) | |
| if(e%10 == 0): | |
| agent.save(checkpointName) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment