diff --git a/NEWS.md b/NEWS.md index 2d6162f83..7ff7d6fd3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -185,6 +185,10 @@ ### ReinforcementLearningCore.jl +#### v0.10.1 + +- Fix hook issue with 'extra' call; always run `push!` at end of episode, regardless of whether stopped or terminated + #### v0.10.0 - Transition to `RLCore.forward`, `RLBase.act!`, `RLBase.plan!` and `Base.push!` syntax instead of functional objects for hooks, policies and environments diff --git a/src/ReinforcementLearningCore/Project.toml b/src/ReinforcementLearningCore/Project.toml index 1d85301b0..b6743add3 100644 --- a/src/ReinforcementLearningCore/Project.toml +++ b/src/ReinforcementLearningCore/Project.toml @@ -1,6 +1,6 @@ name = "ReinforcementLearningCore" uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6" -version = "0.10.0" +version = "0.10.1" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/ReinforcementLearningCore/src/core/hooks.jl b/src/ReinforcementLearningCore/src/core/hooks.jl index 176ad7f2b..c75531470 100644 --- a/src/ReinforcementLearningCore/src/core/hooks.jl +++ b/src/ReinforcementLearningCore/src/core/hooks.jl @@ -90,9 +90,9 @@ Base.getindex(h::StepsPerEpisode) = h.steps Base.push!(hook::StepsPerEpisode, ::PostActStage, args...) = hook.count += 1 -Base.push!(hook::StepsPerEpisode, stage::Union{PostEpisodeStage,PostExperimentStage}, agent, env, ::Symbol) = Base.push!(hook, stage, agent, env) +Base.push!(hook::StepsPerEpisode, stage::PostEpisodeStage, agent, env, ::Symbol) = Base.push!(hook, stage, agent, env) -function Base.push!(hook::StepsPerEpisode, ::Union{PostEpisodeStage,PostExperimentStage}, agent, env) +function Base.push!(hook::StepsPerEpisode, ::PostEpisodeStage, agent, env) Base.push!(hook.steps, hook.count) hook.count = 0 end diff --git a/src/ReinforcementLearningCore/src/core/run.jl b/src/ReinforcementLearningCore/src/core/run.jl index 1e86d732d..1779bd4dc 100644 --- a/src/ReinforcementLearningCore/src/core/run.jl +++ b/src/ReinforcementLearningCore/src/core/run.jl @@ -112,10 +112,8 @@ function _run(policy::AbstractPolicy, end end # end of an episode - if is_terminated(env) - push!(policy, PostEpisodeStage(), env) # let the policy see the last observation - push!(hook, PostEpisodeStage(), policy, env) - end + push!(policy, PostEpisodeStage(), env) # let the policy see the last observation + push!(hook, PostEpisodeStage(), policy, env) end push!(policy, PostExperimentStage(), env) push!(hook, PostExperimentStage(), policy, env) diff --git a/src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl b/src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl index 7acb54834..a81158114 100644 --- a/src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl +++ b/src/ReinforcementLearningCore/src/policies/agent/multi_agent.jl @@ -137,10 +137,8 @@ function Base.run( end end # end of an episode - if is_terminated(env) - push!(multiagent_policy, PostEpisodeStage(), env) # let the policy see the last observation - push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env) - end + push!(multiagent_policy, PostEpisodeStage(), env) # let the policy see the last observation + push!(multiagent_hook, PostEpisodeStage(), multiagent_policy, env) end push!(multiagent_policy, PostExperimentStage(), env) push!(multiagent_hook, PostExperimentStage(), multiagent_policy, env) diff --git a/src/ReinforcementLearningCore/test/core/hooks.jl b/src/ReinforcementLearningCore/test/core/hooks.jl index 4bde98d0e..c48d050ee 100644 --- a/src/ReinforcementLearningCore/test/core/hooks.jl +++ b/src/ReinforcementLearningCore/test/core/hooks.jl @@ -30,6 +30,12 @@ function test_noop!(hook::AbstractHook; stages=[PreActStage(), PostActStage(), P end end +function test_run!(hook::AbstractHook) + hook_ = deepcopy(hook) + run(RandomPolicy(), RandomWalk1D(), StopAfterEpisode(10), hook_) + return hook_ +end + @testset "TotalRewardPerEpisode" begin h_1 = TotalRewardPerEpisode(; is_display_on_exit=true) h_2 = TotalRewardPerEpisode(; is_display_on_exit=false) @@ -42,6 +48,9 @@ end policy = RandomPolicy(legal_action_space(env)) for h in (h_1, h_2, h_3, h_4, h_5) + h_ = test_run!(h) + @test length(h_.rewards) == 10 + push!(h, PostActStage(), policy, env) @test h.reward == 1 push!(h, PostEpisodeStage(), policy, env) @@ -65,6 +74,9 @@ end h_5 = TotalBatchRewardPerEpisode(10) for h in (h_1, h_2, h_3, h_4, h_5) + h_ = test_run!(h) + @test length(h_.rewards) == 10 + push!(h, PostActStage(), policy, env) @test h.reward == fill(1, 10) push!(h, PostEpisodeStage(), policy, env) @@ -119,7 +131,7 @@ end @test h.steps == [100] push!(h, PostExperimentStage(), agent, env) - @test h.steps == [100, 0] + @test h.steps == [100] test_noop!(h, stages=[PreActStage(), PreEpisodeStage(), PreExperimentStage()]) end @@ -133,6 +145,9 @@ end h_3 = RewardsPerEpisode{Float16}() for h in (h_1, h_2, h_3) + h_ = test_run!(h) + @test length(h_.rewards) == 10 + push!(h, PreEpisodeStage(), agent, env) @test h.rewards == [[]]