From fcb02871e8cce023f6ae1676b1e64b3c20ad4f55 Mon Sep 17 00:00:00 2001 From: Jeremiah Lewis <4462211+jeremiahpslewis@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:21:53 +0100 Subject: [PATCH] Tweak / fix --- src/AIAPC2020/env.jl | 8 ++++++++ src/DDDC2023/env.jl | 7 +++++++ test/policy.jl | 1 + test/runtests.jl | 14 +++++++++++--- test/tabular_approximator.jl | 6 ------ 5 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/AIAPC2020/env.jl b/src/AIAPC2020/env.jl index 02c4543f..e106ed80 100644 --- a/src/AIAPC2020/env.jl +++ b/src/AIAPC2020/env.jl @@ -174,3 +174,11 @@ RLBase.StateStyle(::AIAPCEnv) = Observation{Int64}() RLBase.RewardStyle(::AIAPCEnv) = STEP_REWARD RLBase.UtilityStyle(::AIAPCEnv) = GENERAL_SUM RLBase.ChanceStyle(::AIAPCEnv) = DETERMINISTIC + + +# Need special handling of episodes and experiments for the AIAPC and DDDC environments: an episode is a single price setting interaction, and an experiment is a sequence of episodes, but the environment state is not reset between episodes. As a result, the state is initialized once, in the PreExperimentStage and PreEpisodeStage becomes a no-op. +Base.push!(agent::Agent, ::PreEpisodeStage, env::AIAPCEnv, player::Player) = nothing + +function Base.push!(agent::Agent, ::PreExperimentStage, env::AIAPCEnv, player::Player) + push!(agent.trajectory, (state = state(env, player),)) +end diff --git a/src/DDDC2023/env.jl b/src/DDDC2023/env.jl index 4f880c7d..09c30652 100644 --- a/src/DDDC2023/env.jl +++ b/src/DDDC2023/env.jl @@ -191,3 +191,10 @@ RLBase.StateStyle(::DDDCEnv) = Observation{Int64}() RLBase.RewardStyle(::DDDCEnv) = STEP_REWARD RLBase.UtilityStyle(::DDDCEnv) = GENERAL_SUM RLBase.ChanceStyle(::DDDCEnv) = DETERMINISTIC + +# Need special handling of episodes and experiments for the AIAPC and DDDC environments: an episode is a single price setting interaction, and an experiment is a sequence of episodes, but the environment state is not reset between episodes. As a result, the state is initialized once, in the PreExperimentStage and PreEpisodeStage becomes a no-op. +Base.push!(agent::Agent, ::PreEpisodeStage, env::DDDCEnv, player::Player) = nothing + +function Base.push!(agent::Agent, ::PreExperimentStage, env::DDDCEnv, player::Player) + push!(agent.trajectory, (state = state(env, player),)) +end diff --git a/test/policy.jl b/test/policy.jl index 5048aaa7..46ca52a2 100644 --- a/test/policy.jl +++ b/test/policy.jl @@ -60,6 +60,7 @@ end # First three rounds # t=1 + push!(policy, PreExperimentStage(), env) push!(policy, PreEpisodeStage(), env) push!(policy, PreActStage(), env) @test length(policy.agents[Player(1)].trajectory.container) == 0 diff --git a/test/runtests.jl b/test/runtests.jl index 0bcd2402..75a63e81 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,6 +9,7 @@ using ReinforcementLearning: PreActStage, PostEpisodeStage, PreEpisodeStage, + PreExperimentStage, state, reward, current_player, @@ -20,7 +21,9 @@ using ReinforcementLearning: RLBase, AbstractPolicy, act!, - plan! + plan!, + Player + import ReinforcementLearning: RLCore using Statistics using AlgorithmicCompetition: @@ -43,8 +46,8 @@ using AlgorithmicCompetition: DataDemandDigitalParams, DDDCEnv, DDDCHyperParameters, - DDDCTotalRewardPerLastNEpisodes, DDDCPolicy, + DDDCTotalRewardPerLastNEpisodes, economic_summary, Experiment, extract_profit_vars, @@ -58,15 +61,20 @@ using AlgorithmicCompetition: profit_gain, Q_i_0, Q, + QBasedPolicy, reward, run_and_extract, run, solve_bertrand, solve_monopolist, + TabularApproximator, + TabularQApproximator, + TabularVApproximator, + TDLearner, TDLearner, π using Distributed - + @testset "AlgorithmicCompetition.jl" begin @testset "Paramter tests" begin include("alpha_beta.jl") diff --git a/test/tabular_approximator.jl b/test/tabular_approximator.jl index e3a50816..41f4262b 100644 --- a/test/tabular_approximator.jl +++ b/test/tabular_approximator.jl @@ -1,9 +1,3 @@ -using Test -using AlgorithmicCompetition: - TabularApproximator, TabularVApproximator, TabularQApproximator, TDLearner, QBasedPolicy -import ReinforcementLearning: RLBase -using ReinforcementLearning - @testset "Constructors" begin @test TabularApproximator(fill(1, 10, 10)) isa TabularApproximator @test TabularVApproximator(n_state = 10) isa TabularApproximator{Vector{Float64}}