ReinforcementLearning.jl Template

Remix this to get started with ReinforcementLearning.jl

]add ReinforcementLearningBase 
32.7s
ReinforcementLearning.jl (Julia)
]add ReinforcementLearningCore ReinforcementLearningEnvironments ReinforcementLearningZoo Flux StatsBase
13.1s
ReinforcementLearning.jl (Julia)
]st
1.0s
ReinforcementLearning.jl (Julia)
using ReinforcementLearningBase,ReinforcementLearningCore, ReinforcementLearningEnvironments, ReinforcementLearningZoo
325.3s
ReinforcementLearning.jl (Julia)
using Flux
using StatsBase
2.0s
ReinforcementLearning.jl (Julia)
env = CartPoleEnv(; T = Float32, seed = 11)
ns, na = length(rand(get_observation_space(env))), length(get_action_space(env))
agent = Agent(
  policy = QBasedPolicy(
    learner = BasicDQNLearner(
      approximator = NeuralNetworkApproximator(
        model = Chain(
          Dense(ns, 128, relu; initW = seed_glorot_uniform(seed = 17)),
          Dense(128, 128, relu; initW = seed_glorot_uniform(seed = 23)),
          Dense(128, na; initW = seed_glorot_uniform(seed = 39)),
          ) |> gpu,
        optimizer = ADAM(),
        ),
      batch_size = 32,
      min_replay_history = 100,
      loss_func = huber_loss,
      seed = 22,
      ),
    explorer = EpsilonGreedyExplorer(
      kind = :exp,
      ϵ_stable = 0.01,
      decay_steps = 500,
      seed = 33,
      ),
    ),
  trajectory = CircularCompactSARTSATrajectory(
    capacity = 1000,
    state_type = Float32,
    state_size = (ns,),
    ),
)
hook = ComposedHook(TotalRewardPerEpisode(), TimePerStep())
run(agent, env, StopAfterStep(10000), hook)
@info "stats for BasicDQNLearner" avg_reward = mean(hook[1].rewards) avg_fps =
1 / mean(hook[2].times)
114.9s
ReinforcementLearning.jl (Julia)
Runtimes (1)