Skip to content

Instantly share code, notes, and snippets.

@jeiting
Created February 2, 2017 06:55
Show Gist options
  • Save jeiting/c381e195d6153eaf657c21f691c2e456 to your computer and use it in GitHub Desktop.
Save jeiting/c381e195d6153eaf657c21f691c2e456 to your computer and use it in GitHub Desktop.
import gym
import numpy as np
env = gym.make('CartPole-v0')
x = env.reset()
# x, xdot, theta, thetadot
gamma = (4.0 / 3.0 - env.masspole / env.masscart)
a = -env.gravity * env.masspole / (env.total_mass * gamma)
b = 1.0 / env.total_mass * (1 + env.masspole / (env.total_mass * gamma))
c = env.gravity / (env.length * gamma)
d = -1.0 / (env.total_mass * env.length * gamma)
tau = env.tau
F = np.array([
[1, tau, 0, 0, 0],
[0, 1, tau * a, 0, tau * b],
[0, 0, 1, tau, 0],
[0, 0, tau * c, 1, tau * d],
])
C = np.array([
[0, 0, 0, 0, 0],
[0, 10, 0, 0, 0],
[0, 0, 100, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 1000],
])
c = np.array([0, 0, 0, 0, 0]).T
frame = 0
done = False
while not done:
Ks = []
T = 100
V = np.zeros((4, 4))
v = np.zeros((4))
for t in xrange(T):
t = T - t
# Qt
Qt = C + np.matmul(F.T, np.matmul(V, F))
qt = c + np.matmul(F.T, v)
Quu = Qt[-1:,-1:]
Qux = Qt[-1:,:-1]
Qxu = Qt[:-1, -1:]
qu = qt[-1:]
Qut_inv = np.linalg.inv(Quu)
Kt = -np.matmul(Qut_inv, Qux)
kt = -np.matmul(Qut_inv, qu)
Ks.append((Kt, kt))
V = Qt[:4, :4] + np.matmul(Qxu, Kt) + np.matmul(Qux, Kt.T) + np.matmul(Kt.T, np.matmul(Quu, Kt))
v = qt[:4] + np.matmul(Qxu, kt) + np.matmul(Kt.T, Quu).reshape(-1) + np.matmul(Kt.T, np.matmul(Quu, kt))
Kt, kt = Ks[-1]
ut = np.matmul(Kt, x.reshape((1, -1)).T) + kt
if ut > 0.0:
ut = env.force_mag
action = 1
else:
ut = -env.force_mag
action = 0
xu = np.hstack([x, ut])
my_guess = np.matmul(F, xu.T)
x, reward, done, info = env.step(action)
frame += 1
env.render()
print frame
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment