The only code in the entire package initially is
abstract type CommonEnv end
function reset! end
function step! end
function actions end
(of course there will be extensive documentation, etc.)
using POMDPTools: SparseCat | |
using Distributions: pdf | |
module DecisionMaking | |
import Distributions | |
using Distributions: pdf | |
export | |
transition, |
using POMDPTools: SparseCat | |
using Distributions: pdf | |
module DecisionMaking | |
import Distributions | |
module ModelerInterface | |
function transition end | |
function T end |
from julia.CommonRLSpaces import Box | |
from julia.Main import Float64 | |
from julia.POMDPs import solve, pdf,action | |
from julia.QMDP import QMDPSolver | |
from julia.POMCPOW import POMCPOWSolver | |
from julia.POMDPTools import stepthrough, alphavectors, Uniform, Deterministic | |
from julia.Distributions import Normal,AbstractMvNormal,MvNormal | |
from quickpomdps import QuickPOMDP |
struct LunarLander <: POMDP{Vector{Float64}, Vector{Float64}, Vector{Float64}} | |
dt::Float64 | |
m::Float64 | |
I::Float64 | |
Q::Vector{Float64} | |
R::Vector{Float64} | |
end | |
function LunarLander(;dt::Float64=0.1, m::Float64=1.0, I::Float64=10.0) | |
Q = [0.0, 0.0, 0.0, 0.1, 0.1, 0.01] |
using BenchmarkTools | |
function operate!(shared, locks) | |
i = rand(1:length(shared)) | |
lock(locks[i]) do | |
shared[i] += 1 | |
end | |
end | |
function operate_many!(shared, locks, channel) |
using ReinforcementLearningZoo | |
using ReinforcementLearningBase | |
using ReinforcementLearningCore: NeuralNetworkApproximator, EpsilonGreedyExplorer, QBasedPolicy, CircularCompactSARTSATrajectory | |
using ReinforcementLearning | |
using Flux | |
using Flux: glorot_uniform, huber_loss | |
import Random | |
import BSON | |
RL = ReinforcementLearningBase |
{ | |
"name": "MathOptFormat Model", | |
"version": { | |
"major": 0, | |
"minor": 4 | |
}, | |
"variables": [ | |
{ | |
"name": "x[1,1]" | |
}, |
The only code in the entire package initially is
abstract type CommonEnv end
function reset! end
function step! end
function actions end
(of course there will be extensive documentation, etc.)
Here are the two ways that I was referring to about augmenting the state space (these are illustrative rather than efficient or complete implementations):
struct VariableDiscountWrapper1{S, A, F<:Function} <: MDP{Union{S, TerminalState}, A}
m::MDP{S, A}
discount::F
end
using ExtendedKalmanFilters | |
using Distributions | |
using DelimitedFiles | |
# We may also want to look at DynamicalSystems.jl | |
# The package should accept AbstractArrays wherever possible so people can use StaticArrays | |
# Model semantics | |
# x_{t+1} = f(x_t, u_t) + w_t | |
# y_t = h(x_t) + v_t # should the control be an argument of h? |