elumixor · May 24, 2020 23:02
diff --git a/TRPO_update_2.py b/TRPO_update_2.py

    # We will calculate the gradient wrt to the new probabilities (surrogate function),
    # so second probabilities should be treated as a constant
    L = surrogate_loss(probabilities, probabilities.detach(), advantages)
    KL = kl_div(distribution, distribution)

    parameters = list(actor.parameters())

    # Retain, because we will use the graph several times
    g = flat_grad(L, parameters, retain_graph=True)  
    
    # Create graph, because we will call backward() on the graph itself (for hessian-vector product)
    d_kl = flat_grad(KL, parameters, create_graph=True)

	# We will calculate the gradient wrt to the new probabilities (surrogate function),
	# so second probabilities should be treated as a constant
	L = surrogate_loss(probabilities, probabilities.detach(), advantages)
	KL = kl_div(distribution, distribution)

	parameters = list(actor.parameters())

	# Retain, because we will use the graph several times
	g = flat_grad(L, parameters, retain_graph=True)

	# Create graph, because we will call backward() on the graph itself (for hessian-vector product)
	d_kl = flat_grad(KL, parameters, create_graph=True)