Tweak / fix

jeremiahpslewis · Jan 14, 2025 · fcb0287 · fcb0287
1 parent b2a1865
commit fcb0287
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 9 deletions.
diff --git a/src/AIAPC2020/env.jl b/src/AIAPC2020/env.jl
@@ -174,3 +174,11 @@ RLBase.StateStyle(::AIAPCEnv) = Observation{Int64}()
 RLBase.RewardStyle(::AIAPCEnv) = STEP_REWARD
 RLBase.UtilityStyle(::AIAPCEnv) = GENERAL_SUM
 RLBase.ChanceStyle(::AIAPCEnv) = DETERMINISTIC
+
+
+# Need special handling of episodes and experiments for the AIAPC and DDDC environments: an episode is a single price setting interaction, and an experiment is a sequence of episodes, but the environment state is not reset between episodes. As a result, the state is initialized once, in the PreExperimentStage and PreEpisodeStage becomes a no-op.
+Base.push!(agent::Agent, ::PreEpisodeStage, env::AIAPCEnv, player::Player) = nothing
+
+function Base.push!(agent::Agent, ::PreExperimentStage, env::AIAPCEnv, player::Player)
+    push!(agent.trajectory, (state = state(env, player),))
+end
diff --git a/src/DDDC2023/env.jl b/src/DDDC2023/env.jl
@@ -191,3 +191,10 @@ RLBase.StateStyle(::DDDCEnv) = Observation{Int64}()
 RLBase.RewardStyle(::DDDCEnv) = STEP_REWARD
 RLBase.UtilityStyle(::DDDCEnv) = GENERAL_SUM
 RLBase.ChanceStyle(::DDDCEnv) = DETERMINISTIC
+
+# Need special handling of episodes and experiments for the AIAPC and DDDC environments: an episode is a single price setting interaction, and an experiment is a sequence of episodes, but the environment state is not reset between episodes. As a result, the state is initialized once, in the PreExperimentStage and PreEpisodeStage becomes a no-op.
+Base.push!(agent::Agent, ::PreEpisodeStage, env::DDDCEnv, player::Player) = nothing
+
+function Base.push!(agent::Agent, ::PreExperimentStage, env::DDDCEnv, player::Player)
+    push!(agent.trajectory, (state = state(env, player),))
+end
diff --git a/test/policy.jl b/test/policy.jl
@@ -60,6 +60,7 @@ end
     # First three rounds
 
     # t=1
+    push!(policy, PreExperimentStage(), env)
     push!(policy, PreEpisodeStage(), env)
     push!(policy, PreActStage(), env)
     @test length(policy.agents[Player(1)].trajectory.container) == 0

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -9,6 +9,7 @@ using ReinforcementLearning:
     PreActStage,
     PostEpisodeStage,
     PreEpisodeStage,
+    PreExperimentStage,
     state,
     reward,
     current_player,
@@ -20,7 +21,9 @@ using ReinforcementLearning:
     RLBase,
     AbstractPolicy,
     act!,
-    plan!
+    plan!,
+    Player
+
 import ReinforcementLearning: RLCore
 using Statistics
 using AlgorithmicCompetition:
@@ -43,8 +46,8 @@ using AlgorithmicCompetition:
     DataDemandDigitalParams,
     DDDCEnv,
     DDDCHyperParameters,
-    DDDCTotalRewardPerLastNEpisodes,
     DDDCPolicy,
+    DDDCTotalRewardPerLastNEpisodes,
     economic_summary,
     Experiment,
     extract_profit_vars,
@@ -58,15 +61,20 @@ using AlgorithmicCompetition:
     profit_gain,
     Q_i_0,
     Q,
+    QBasedPolicy,
     reward,
     run_and_extract,
     run,
     solve_bertrand,
     solve_monopolist,
+    TabularApproximator,
+    TabularQApproximator,
+    TabularVApproximator,
+    TDLearner,
     TDLearner,
     π
 using Distributed
-
+    
 @testset "AlgorithmicCompetition.jl" begin
     @testset "Paramter tests" begin
         include("alpha_beta.jl")

diff --git a/test/tabular_approximator.jl b/test/tabular_approximator.jl
@@ -1,9 +1,3 @@
-using Test
-using AlgorithmicCompetition:
-    TabularApproximator, TabularVApproximator, TabularQApproximator, TDLearner, QBasedPolicy
-import ReinforcementLearning: RLBase
-using ReinforcementLearning
-
 @testset "Constructors" begin
     @test TabularApproximator(fill(1, 10, 10)) isa TabularApproximator
     @test TabularVApproximator(n_state = 10) isa TabularApproximator{Vector{Float64}}