Jun / Sep 25 2019
Chapter06 Maximization Bias
using ReinforcementLearning, ReinforcementLearningEnvironments using RLIntro.MaximizationBias
using Plots, StatsBase
mutable struct CountOfLeft <: AbstractHook counts::Vector{Int} count::Int CountOfLeft() = new([],0) end function (f::CountOfLeft)(::PreActStage, agent, env, obs_action) obs, action = obs_action if get_state(obs) == 1 && action == MaximizationBias.LEFT f.count += 1 end end function (f::CountOfLeft)(::PostEpisodeStage, agent, env, obs) push!(f.counts, f.count) f.count = 0 end
env = MaximizationBiasEnv() ns, na = length(observation_space(env)), length(action_space(env))
(3, 10)
init_double_Q_agent() = Agent( π=QBasedPolicy( learner=DoubleLearner( L1=TDLearner( approximator=TabularQApproximator(n_state=ns, n_action=na), optimizer=Descent(0.1), method=:SARS ), L2=TDLearner( approximator=TabularQApproximator(n_state=ns, n_action=na), optimizer=Descent(0.1), method=:SARS ) ), selector=EpsilonGreedySelector(0.1) ), buffer=episode_RTSA_buffer() ) init_Q_agent() = Agent( π=QBasedPolicy( learner=TDLearner( approximator=TabularQApproximator(n_state=ns, n_action=na), optimizer=Descent(0.1), method=:SARS ), selector=EpsilonGreedySelector(0.1) ), buffer=episode_RTSA_buffer() )
init_Q_agent (generic function with 1 method)
stats = [] for _ in 1:10000 hook = CountOfLeft() run(init_double_Q_agent(), env, StopAfterEpisode(300);hook=hook) push!(stats, hook.counts) end plot(mean(stats), legend=:topright, label="double q")
stats = [] for _ in 1:10000 hook = CountOfLeft() run(init_Q_agent(), env, StopAfterEpisode(300);hook=hook) push!(stats, hook.counts) end plot!(mean(stats), legend=:topright, label="q")
TODO
We need to add a legal action here to restrict that the actions of the first step can only be 1(left) or 2(right). So that the figure will look exactly like the one on the book.