Chapter05 Black Jack (Fig_5_1)

using ReinforcementLearning, ReinforcementLearningEnvironments

17.0s

Julia

using RLIntro
using RLIntro.BlackJack

0.5s

Julia

env = BlackJackEnv()

ns, na = length(observation_space(env)), length(action_space(env))

1.2s

Julia

(220, 2)

stick_action = findall(x -> x == :stick, BlackJack.ACTIONS)[]

table = fill(1, size(BlackJack.INDS)...)
table[:, 10:11, :] .= stick_action
table = reshape(table, :);

1.0s

Julia

agent = Agent(
    VBasedPolicy(
        learner=MonteCarloLearner(approximator=TabularVApproximator(ns)),
        f=obs -> table[get_state(obs)]),
    episode_RTSA_buffer()
);

0.8s

Julia

run(agent, env, StopAfterEpisode(10000))

3.8s

Julia

EmptyHook()

using Plots
V = reshape(agent.π.learner.approximator.table, size(BlackJack.INDS)...)
V_with_usable_ace = V[1, 2:11, :]
V_without_usable_ace = V[2, 2:11, :]

heatmap(V_with_usable_ace)

48.7s

Julia

heatmap(V_without_usable_ace)

4.1s

Julia

agent = Agent(
    VBasedPolicy(
        learner=MonteCarloLearner(approximator=TabularVApproximator(ns)),
        f=obs -> table[get_state(obs)]),
    episode_RTSA_buffer()
);
run(agent, env, StopAfterEpisode(500000))

11.5s

Julia

EmptyHook()

V = reshape(agent.π.learner.approximator.table, size(BlackJack.INDS)...)
V_with_usable_ace = V[1, 2:11, :]
V_without_usable_ace = V[2, 2:11, :]

heatmap(V_with_usable_ace)

4.0s

Julia

heatmap(V_without_usable_ace)

4.1s

Julia