Jun / Sep 25 2019

Chapter05 Black Jack (Fig_5_1)

using ReinforcementLearning, ReinforcementLearningEnvironments
using RLIntro
using RLIntro.BlackJack
env = BlackJackEnv()

ns, na = length(observation_space(env)), length(action_space(env))
(220, 2)
stick_action = findall(x -> x == :stick, BlackJack.ACTIONS)[]

table = fill(1, size(BlackJack.INDS)...)
table[:, 10:11, :] .= stick_action
table = reshape(table, :);
agent = Agent(
    VBasedPolicy(
        learner=MonteCarloLearner(approximator=TabularVApproximator(ns)),
        f=obs -> table[get_state(obs)]),
    episode_RTSA_buffer()
);
run(agent, env, StopAfterEpisode(10000))
EmptyHook()
using Plots
V = reshape(agent.π.learner.approximator.table, size(BlackJack.INDS)...)
V_with_usable_ace = V[1, 2:11, :]
V_without_usable_ace = V[2, 2:11, :]

heatmap(V_with_usable_ace)
heatmap(V_without_usable_ace)
agent = Agent(
    VBasedPolicy(
        learner=MonteCarloLearner(approximator=TabularVApproximator(ns)),
        f=obs -> table[get_state(obs)]),
    episode_RTSA_buffer()
);
run(agent, env, StopAfterEpisode(500000))
EmptyHook()
V = reshape(agent.π.learner.approximator.table, size(BlackJack.INDS)...)
V_with_usable_ace = V[1, 2:11, :]
V_without_usable_ace = V[2, 2:11, :]

heatmap(V_with_usable_ace)
heatmap(V_without_usable_ace)