This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import gym | |
#env = gym.make('MountainCar-v0') | |
#env = gym.make('CartPole-v0') | |
#env = gym.make('MsPacman-v0') | |
import gym # get the environment from openAI | |
import curses # for keypress |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import numpy as np | |
import cv2, math | |
import logging | |
import os | |
import scipy | |
from numpy import linalg as LA | |
from matplotlib import pyplot as plt | |
%matplotlib inline | |
from poleCart_RL import EpisodicAgent #get the RL agent |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Quick script for an "Episodic Controller" Agent, i.e. nearest neighbor """ | |
import logging | |
import os | |
#import tempfile | |
import numpy as np | |
import gym | |
class EpisodicAgent(object): | |
def __init__(self, action_space): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.demo_batch_size = 128 | |
def initDemoBuffer(self, demoDataFile, update_stats=True): | |
#To initiaze the demobuffer with the recorded demonstration data. We also normalize the demo data. | |
def sample_batch(self): | |
if self.bc_loss: | |
transitions = self.buffer.sample(self.batch_size - self.demo_batch_size) | |
global demoBuffer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.lambda1 = 0.001 | |
self.lambda2 = 0.0078 | |
def _create_network(self, reuse=False): | |
mask = np.concatenate((np.zeros(self.batch_size - self.demo_batch_size), np.ones(self.demo_batch_size)), axis = 0) | |
target_Q_pi_tf = self.target.Q_pi_tf | |
clip_range = (-self.clip_return, 0. if self.clip_pos_returns else np.inf) | |
target_tf = tf.clip_by_value(batch_tf['r'] + self.gamma * target_Q_pi_tf, *clip_range) # y = r + gamma*Q(pi) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# IRL algorith developed for the toy car obstacle avoidance problem for testing. | |
import numpy as np | |
import logging | |
import scipy | |
from playing import play #get the RL Test agent, gives out feature expectations after 2000 frames | |
from nn import neural_net #construct the nn and send to playing | |
from cvxopt import matrix | |
from cvxopt import solvers #convex optimization library | |
from flat_game import carmunk # get the environment | |
from learning import IRL_helper # get the Reinforcement learner |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if __name__ == '__main__': | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
randomPolicyFE = [ 7.74363107 , 4.83296402 , 6.1289194 , 0.39292849 , 2.0488831 , 0.65611318 , 6.90207523 , 2.46475348] | |
# ^the random policy feature expectations | |
expertPolicyYellowFE = [7.5366e+00, 4.6350e+00 , 7.4421e+00, 3.1817e-01, 8.3398e+00, 1.3710e-08, 1.3419e+00 , 0.0000e+00] | |
# ^feature expectations for the "follow Yellow obstacles" behavior | |
expertPolicyRedFE = [7.9100e+00, 5.3745e-01, 5.2363e+00, 2.8652e+00, 3.3120e+00, 3.6478e-06, 3.82276074e+00 , 1.0219e-17] | |
# ^feature expectations for the follow Red obstacles behavior | |
expertPolicyBrownFE = [5.2210e+00, 5.6980e+00, 7.7984e+00, 4.8440e-01, 2.0885e-04, 9.2215e+00, 2.9386e-01 , 4.8498e-17] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def optimization(self): # implement the convex optimization, posed as an SVM problem | |
m = len(self.expertPolicy) | |
P = matrix(2.0*np.eye(m), tc='d') # min ||w|| | |
q = matrix(np.zeros(m), tc='d') | |
policyList = [self.expertPolicy] | |
h_list = [1] | |
for i in self.policiesFE.keys(): | |
policyList.append(self.policiesFE[i]) | |
h_list.append(1) | |
policyMat = np.matrix(policyList) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def policyListUpdater(self, W, i): #add the policyFE list and differences | |
tempFE = self.getRLAgentFE(W, i) # get feature expectations of a new policy respective to the input weights | |
hyperDistance = np.abs(np.dot(W, np.asarray(self.expertPolicy)-np.asarray(tempFE))) #hyperdistance = t | |
self.policiesFE[hyperDistance] = tempFE | |
return hyperDistance # t = (weights.tanspose)*(expert-newPolicy) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def getRLAgentFE(self, W, i): #get the feature expectations of a new poliicy using RL agent | |
IRL_helper(W, self.behavior, self.num_frames, i) # train the agent and save the model in a file used below | |
saved_model = 'saved-models_'+self.behavior+str(i)+'/164-150-100-50000-'+str(self.num_frames)+'.h5' # use the saved model to get the FE | |
model = neural_net(self.num_states, [164, 150], saved_model) | |
return play(model, W)#return feature expectations by executing the learned policy |
OlderNewer