Created
August 12, 2017 08:12
-
-
Save vihari/3e6b87348fe08dc400c8c5f6fdf5b33d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
I have written this script to see if the optimal policy solution obtained by Linear Programming is same as the one that would be obtained by a Gradient based method | |
The Gradient based approach is implemented with Tensorflow and LP with a scipy library. | |
I found that n of the nk constraints (greater than or equal to) for the solution become equalities for the solution obtained by LinProg and that is not so in the case of Gradient-based approach. This shows that the constraints do not sufficiently specify the solution, but works in case of LP because of the way it finds the solution. | |
TF - Tensorflow's Gradient Descent | |
Run: python <script> [LP|TF] | |
""" | |
import tensorflow as tf | |
import numpy as np | |
import sys | |
n, k = 3, 3 | |
np.random.seed(2) | |
T, R = np.reshape([np.random.sample() for i in range(n*k*n)], [n, k, n]), np.random.standard_normal([n, k, n]) | |
T = T/np.expand_dims(np.sum(T, axis=2), axis=2) | |
assert np.all(np.sum(T, axis=2)>0.99), "T array does not have valid probabilities" | |
g = .7 | |
alg = str(sys.argv[1]) if len(sys.argv)>1 else '' | |
algs = set(['LP', 'TF']) | |
solve_by = 'LP' if (not alg in algs) else alg | |
if solve_by=='TF': | |
vs = [tf.get_variable("v%d" % i, shape=[], initializer=tf.random_normal_initializer) for i in range(n)] | |
l = sum(vs) | |
BIG = 1000 | |
sms = [] | |
for i in range(n): | |
for a in range(k): | |
sm = 0 | |
for j in range(n): | |
sm += T[i][a][j]*(R[i][a][j]+g*vs[j]) | |
sm -= vs[i] | |
sms.append(sm) | |
l -= tf.maximum(0., BIG*sm) | |
lr=0.1 | |
opt = tf.train.GradientDescentOptimizer(lr) | |
op = opt.minimize(-l) | |
sess = tf.InteractiveSession() | |
sess.run(tf.global_variables_initializer()) | |
for i in range(100): | |
x = sess.run([op, l] + sms) | |
if i%10 == 0: | |
print x | |
print sess.run(vs) | |
elif solve_by=='LP': | |
from scipy.optimize import linprog as lp | |
c, A, b = [], [], [] | |
c = [-1]*n | |
for i in range(n): | |
for a in range(k): | |
a_coeffs, b_sum = [0]*n, 0 | |
for j in range(n): | |
a_coeffs[j] = T[i][a][j]*g | |
b_sum -= T[i][a][j]*R[i][a][j] | |
a_coeffs[i] -= 1 | |
A.append(a_coeffs) | |
b.append(b_sum) | |
res = lp(c, A_ub=A, b_ub=b) | |
print res | |
x = res.x | |
# the first k constarints correspond to several actions on the same variable | |
for l in range(n): | |
print [sum([A[i][j]*x[j] for j in range(n)])-b[i] for i in range(k*l, l*k+k)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment