Jun / Sep 25 2019
Chapter05 Black Jack (Fig_5_1)
using ReinforcementLearning, ReinforcementLearningEnvironments
using RLIntro using RLIntro.BlackJack
env = BlackJackEnv() ns, na = length(observation_space(env)), length(action_space(env))
(220, 2)
stick_action = findall(x -> x == :stick, BlackJack.ACTIONS)[] table = fill(1, size(BlackJack.INDS)...) table[:, 10:11, :] .= stick_action table = reshape(table, :);
agent = Agent( VBasedPolicy( learner=MonteCarloLearner(approximator=TabularVApproximator(ns)), f=obs -> table[get_state(obs)]), episode_RTSA_buffer() );
run(agent, env, StopAfterEpisode(10000))
EmptyHook()
using Plots V = reshape(agent.π.learner.approximator.table, size(BlackJack.INDS)...) V_with_usable_ace = V[1, 2:11, :] V_without_usable_ace = V[2, 2:11, :] heatmap(V_with_usable_ace)
heatmap(V_without_usable_ace)
agent = Agent( VBasedPolicy( learner=MonteCarloLearner(approximator=TabularVApproximator(ns)), f=obs -> table[get_state(obs)]), episode_RTSA_buffer() ); run(agent, env, StopAfterEpisode(500000))
EmptyHook()
V = reshape(agent.π.learner.approximator.table, size(BlackJack.INDS)...) V_with_usable_ace = V[1, 2:11, :] V_without_usable_ace = V[2, 2:11, :] heatmap(V_with_usable_ace)
heatmap(V_without_usable_ace)