Jun / Apr 14 2020 / Published
Remix of Julia by
Nextjournal
ReinforcementLearning.jl Template
Remix this to get started with ReinforcementLearning.jl
]add ReinforcementLearningBase 32.7s
ReinforcementLearning.jl (Julia)
]add ReinforcementLearningCore ReinforcementLearningEnvironments ReinforcementLearningZoo Flux StatsBase13.1s
ReinforcementLearning.jl (Julia)
]st1.0s
ReinforcementLearning.jl (Julia)
using ReinforcementLearningBase,ReinforcementLearningCore, ReinforcementLearningEnvironments, ReinforcementLearningZoo325.3s
ReinforcementLearning.jl (Julia)
using Fluxusing StatsBase2.0s
ReinforcementLearning.jl (Julia)
env = CartPoleEnv(; T = Float32, seed = 11)ns, na = length(rand(get_observation_space(env))), length(get_action_space(env))agent = Agent( policy = QBasedPolicy( learner = BasicDQNLearner( approximator = NeuralNetworkApproximator( model = Chain( Dense(ns, 128, relu; initW = seed_glorot_uniform(seed = 17)), Dense(128, 128, relu; initW = seed_glorot_uniform(seed = 23)), Dense(128, na; initW = seed_glorot_uniform(seed = 39)), ) |> gpu, optimizer = ADAM(), ), batch_size = 32, min_replay_history = 100, loss_func = huber_loss, seed = 22, ), explorer = EpsilonGreedyExplorer( kind = :exp, ϵ_stable = 0.01, decay_steps = 500, seed = 33, ), ), trajectory = CircularCompactSARTSATrajectory( capacity = 1000, state_type = Float32, state_size = (ns,), ),)hook = ComposedHook(TotalRewardPerEpisode(), TimePerStep())run(agent, env, StopAfterStep(10000), hook) "stats for BasicDQNLearner" avg_reward = mean(hook[1].rewards) avg_fps =1 / mean(hook[2].times)114.9s
ReinforcementLearning.jl (Julia)