Skip to content

Instantly share code, notes, and snippets.

@SoutrikBandyopadhyay
Created August 3, 2018 11:16
Show Gist options
  • Select an option

  • Save SoutrikBandyopadhyay/8fb445872ea01fdd64aab00108185934 to your computer and use it in GitHub Desktop.

Select an option

Save SoutrikBandyopadhyay/8fb445872ea01fdd64aab00108185934 to your computer and use it in GitHub Desktop.
import gym
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten
from keras.optimizers import Adam
from collections import deque
import numpy as np
import random
EPISODES = 5000
class DQNAgent:
def __init__(self,stateSize,actionSize):
self.stateSize = stateSize
self.actionSize = actionSize
self.memory = deque(maxlen=2000)
#HyperParams
self.gamma = 0.95
self.epsilon = 1
self.epsilonMin = 0.01
self.epsilonDecay = 0.95
self.learningRate = 0.01
self.model = self.buildModel()
def buildModel(self):
model = Sequential()
model.add(Dense(24, input_dim=self.stateSize, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(self.actionSize, activation='linear'))
model.compile(loss='mse',optimizer=Adam(lr=self.learningRate))
return model
def remember(self,state,action,reward,next_state,done):
#Remember Remember the 5th of November XD
self.memory.append((state,action,reward,next_state,done))
def act(self,state):
if(np.random.random()<=self.epsilon):
return random.randrange(self.actionSize)
actionValues = self.model.predict(state)
return np.argmax(actionValues[0])
def replay(self,batchSize):
#Action Replay
minibatch = random.sample(self.memory,batchSize) #Benedict Minibatch :-P
for state,action,reward,next_state,done in minibatch:
if done:
target = reward
else:
target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
targetF = self.model.predict(state)
targetF[0][action] = target
self.model.fit(state, targetF, epochs=1, verbose=0)
if(self.epsilon>self.epsilonMin):
self.epsilon*=self.epsilonDecay
def save(self,name):
self.model.save_weights(name)
def load(self,name):
self.model.load_weights(name)
if (__name__ == "__main__"):
env = gym.make('CartPole-v0')
checkpointName = "./modelCheckpoints/cartpole-dqn.h5"
# env = gym.make('MountainCar-v0')
# checkpointName = "./modelCheckpoints/mountainCar-v0-dqn.h5"
stateSize = env.observation_space.shape[0]
actionSize = env.action_space.n
agent = DQNAgent(stateSize,actionSize)
#If loading model from memory
agent.load(checkpointName)
done = False
batchSize = 32
for e in range(EPISODES):
state = env.reset()
state = np.reshape(state,[1,stateSize])
# score = 0
for time in range(500):
# env.render()
action = agent.act(state)
nextState, reward, done, _ = env.step(action)
reward = reward if not done else -10
# score+=reward
nextState = np.reshape(nextState, [1, stateSize])
agent.remember(state,action,reward,nextState,done)
state = nextState
if done:
print("Epsiode {}/{},score: {},e: {}".format(e,EPISODES,time,agent.epsilon))
break
if(len(agent.memory)>batchSize):
agent.replay(batchSize)
if(e%10 == 0):
agent.save(checkpointName)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment