Chapter06 Maximization Bias

using ReinforcementLearning, ReinforcementLearningEnvironments
using RLIntro.MaximizationBias

15.5s

Julia

using Plots, StatsBase

4.7s

Julia

mutable struct CountOfLeft <: AbstractHook
    counts::Vector{Int}
    count::Int
    CountOfLeft() = new([],0)
end

function (f::CountOfLeft)(::PreActStage, agent, env, obs_action)
    obs, action = obs_action
    if get_state(obs) == 1 && action == MaximizationBias.LEFT
        f.count += 1
    end
end

function (f::CountOfLeft)(::PostEpisodeStage, agent, env, obs)
    push!(f.counts, f.count)
    f.count = 0
end

0.2s

Julia

env = MaximizationBiasEnv()
ns, na = length(observation_space(env)), length(action_space(env))

0.8s

Julia

(3, 10)

init_double_Q_agent() = Agent(
    π=QBasedPolicy(
        learner=DoubleLearner(
            L1=TDLearner(
                approximator=TabularQApproximator(n_state=ns, n_action=na),
                optimizer=Descent(0.1),
                method=:SARS
                ),
            L2=TDLearner(
                approximator=TabularQApproximator(n_state=ns, n_action=na),
                optimizer=Descent(0.1),
                method=:SARS
                )
            ),
        selector=EpsilonGreedySelector(0.1)
        ),
    buffer=episode_RTSA_buffer()
)

init_Q_agent() = Agent(
    π=QBasedPolicy(
        learner=TDLearner(
            approximator=TabularQApproximator(n_state=ns, n_action=na),
            optimizer=Descent(0.1),
            method=:SARS
            ),
        selector=EpsilonGreedySelector(0.1)
        ),
    buffer=episode_RTSA_buffer()
)

0.6s

Julia

init_Q_agent (generic function with 1 method)

stats = []
for _ in 1:10000
    hook = CountOfLeft()
    run(init_double_Q_agent(), env, StopAfterEpisode(300);hook=hook)
    push!(stats, hook.counts)
end

plot(mean(stats), legend=:topright, label="double q")

54.2s

Julia

stats = []
for _ in 1:10000
    hook = CountOfLeft()
    run(init_Q_agent(), env, StopAfterEpisode(300);hook=hook)
    push!(stats, hook.counts)
end
plot!(mean(stats), legend=:topright, label="q")

22.9s

Julia

TODO

We need to add a legal action here to restrict that the actions of the first step can only be 1(left) or 2(right). So that the figure will look exactly like the one on the book.