Jun / Sep 25 2019
Chapter06 Windy Grid World
using ReinforcementLearning, ReinforcementLearningEnvironments using RLIntro, RLIntro.WindyGridWorld
env = WindyGridWorldEnv() ns = length(observation_space(env)) na = length(action_space(env)) agent = Agent( π=QBasedPolicy( learner=TDLearner( approximator=TabularQApproximator(;n_state=ns, n_action=na), optimizer=Descent(0.5) ), selector=EpsilonGreedySelector(0.1) ), buffer=episode_RTSA_buffer() ); hook = StepsPerEpisode() run(agent, env, StopAfterStep(8000);hook=hook)
StepsPerEpisode([1203, 74, 100, 154, 37, 199, 100, 90, 30, 71 … 15, 17, 23, 17, 18, 15, 15, 18, 15, 15], 16, "TRAINING")
using Plots plot([i for (i, x) in enumerate(hook.steps) for _ in 1:x])