🎯

Focusing

Rishabh Jangir jangirrishabh

🎯

Focusing

Robotics, AI, Reinforcement Learning, Machine Intelligence.

86 followers · 38 following

University of California San Diego
San Diego
jangirrishabh.github.io
@RishabhJangir

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

jangirrishabh / poleCart_manual.py

Created June 17, 2016 09:44

	#import gym

	#env = gym.make('MountainCar-v0')
	#env = gym.make('CartPole-v0')

	#env = gym.make('MsPacman-v0')

	import gym # get the environment from openAI
	import curses # for keypress

jangirrishabh / inverseCart.py

Created June 21, 2016 18:31

	import gym
	import numpy as np
	import cv2, math
	import logging
	import os
	import scipy
	from numpy import linalg as LA
	from matplotlib import pyplot as plt
	%matplotlib inline
	from poleCart_RL import EpisodicAgent #get the RL agent

jangirrishabh / poleCart_RL.py

Last active June 21, 2016 20:10

	""" Quick script for an "Episodic Controller" Agent, i.e. nearest neighbor """

	import logging
	import os
	#import tempfile
	import numpy as np
	import gym

	class EpisodicAgent(object):
	def __init__(self, action_space):

jangirrishabh / ddpg1.py

Last active July 13, 2018 13:25

Snippet for using demonstrations in ddpg.py agent, blog usage, not executable

	self.demo_batch_size = 128

	def initDemoBuffer(self, demoDataFile, update_stats=True):
	#To initiaze the demobuffer with the recorded demonstration data. We also normalize the demo data.

	def sample_batch(self):
	if self.bc_loss:
	transitions = self.buffer.sample(self.batch_size - self.demo_batch_size)
	global demoBuffer

jangirrishabh / ddpg2.py

Last active July 13, 2018 13:24

Snippet for using demonstrations in ddpg.py agent, blog usage, not executable

	self.lambda1 = 0.001
	self.lambda2 = 0.0078

	def _create_network(self, reuse=False):

	mask = np.concatenate((np.zeros(self.batch_size - self.demo_batch_size), np.ones(self.demo_batch_size)), axis = 0)

	target_Q_pi_tf = self.target.Q_pi_tf
	clip_range = (-self.clip_return, 0. if self.clip_pos_returns else np.inf)
	target_tf = tf.clip_by_value(batch_tf['r'] + self.gamma * target_Q_pi_tf, clip_range) # y = r + gammaQ(pi)

jangirrishabh / toyCarIRL.py

Last active June 14, 2018 11:39

Snippet for toyCarIRL, blog usage, not executable

	# IRL algorith developed for the toy car obstacle avoidance problem for testing.
	import numpy as np
	import logging
	import scipy
	from playing import play #get the RL Test agent, gives out feature expectations after 2000 frames
	from nn import neural_net #construct the nn and send to playing
	from cvxopt import matrix
	from cvxopt import solvers #convex optimization library
	from flat_game import carmunk # get the environment
	from learning import IRL_helper # get the Reinforcement learner

jangirrishabh / toyCarIRL6.py

Created June 14, 2018 11:41

Snippet for toyCarIRL, blog usage, not executable

	if __name__ == '__main__':
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	randomPolicyFE = [ 7.74363107 , 4.83296402 , 6.1289194 , 0.39292849 , 2.0488831 , 0.65611318 , 6.90207523 , 2.46475348]
	# ^the random policy feature expectations
	expertPolicyYellowFE = [7.5366e+00, 4.6350e+00 , 7.4421e+00, 3.1817e-01, 8.3398e+00, 1.3710e-08, 1.3419e+00 , 0.0000e+00]
	# ^feature expectations for the "follow Yellow obstacles" behavior
	expertPolicyRedFE = [7.9100e+00, 5.3745e-01, 5.2363e+00, 2.8652e+00, 3.3120e+00, 3.6478e-06, 3.82276074e+00 , 1.0219e-17]
	# ^feature expectations for the follow Red obstacles behavior
	expertPolicyBrownFE = [5.2210e+00, 5.6980e+00, 7.7984e+00, 4.8440e-01, 2.0885e-04, 9.2215e+00, 2.9386e-01 , 4.8498e-17]

jangirrishabh / toyCarIRL5.py

Created June 14, 2018 11:41

Snippet for toyCarIRL, blog usage, not executable

	def optimization(self): # implement the convex optimization, posed as an SVM problem
	m = len(self.expertPolicy)
	P = matrix(2.0*np.eye(m), tc='d') # min \|\|w\|\|
	q = matrix(np.zeros(m), tc='d')
	policyList = [self.expertPolicy]
	h_list = [1]
	for i in self.policiesFE.keys():
	policyList.append(self.policiesFE[i])
	h_list.append(1)
	policyMat = np.matrix(policyList)

jangirrishabh / toyCarIRL4.py

Created June 14, 2018 11:42

Snippet for toyCarIRL, blog usage, not executable

	def policyListUpdater(self, W, i): #add the policyFE list and differences
	tempFE = self.getRLAgentFE(W, i) # get feature expectations of a new policy respective to the input weights
	hyperDistance = np.abs(np.dot(W, np.asarray(self.expertPolicy)-np.asarray(tempFE))) #hyperdistance = t
	self.policiesFE[hyperDistance] = tempFE
	return hyperDistance # t = (weights.tanspose)*(expert-newPolicy)

jangirrishabh / toyCarIRL4.py

Created June 14, 2018 11:42

Snippet for toyCarIRL, blog usage, not executable

	def getRLAgentFE(self, W, i): #get the feature expectations of a new poliicy using RL agent
	IRL_helper(W, self.behavior, self.num_frames, i) # train the agent and save the model in a file used below
	saved_model = 'saved-models_'+self.behavior+str(i)+'/164-150-100-50000-'+str(self.num_frames)+'.h5' # use the saved model to get the FE
	model = neural_net(self.num_states, [164, 150], saved_model)
	return play(model, W)#return feature expectations by executing the learned policy

OlderNewer