Jun / Sep 27 2019
Chapter04 Grid World
using ReinforcementLearning, ReinforcementLearningEnvironments
const GridWorldLinearIndices = LinearIndices((4,4)) const GridWorldCartesianIndices = CartesianIndices((4,4)) isterminal(s::CartesianIndex{2}) = s == CartesianIndex(1,1) || s == CartesianIndex(4,4) function nextstep(s::CartesianIndex{2}, a::CartesianIndex{2}) ns = s + a if isterminal(s) || ns[1] < 1 || ns[1] > 4 || ns[2] < 1 || ns[2] > 4 ns = s end r = isterminal(s) ? 0. : -1.0 [(nextstate=GridWorldLinearIndices[ns], reward=r, prob=1.0)] end const GridWorldActions = [CartesianIndex(-1, 0), CartesianIndex(1,0), CartesianIndex(0, 1), CartesianIndex(0, -1)] const GridWorldEnvModel = DeterministicDistributionModel([nextstep(GridWorldCartesianIndices[s], a) for s in 1:16, a in GridWorldActions]);
V, π = TabularVApproximator(16), TabularRandomPolicy(fill(0.25, 16, 4)) policy_evaluation!(V=V, π=π, model=GridWorldEnvModel, γ=1.0)
TabularVApproximator([0.0, -13.9993, -19.999, -21.9989, -13.9993, -17.9992, -19.9991, -19.9991, -19.999, -19.9991, -17.9992, -13.9994, -21.9989, -19.9991, -13.9994, 0.0])
using Plots heatmap(1:4, 1:4, reshape(V.table, 4,4), yflip=true)
And you can compare it with the figure on the book:
