From fcb02871e8cce023f6ae1676b1e64b3c20ad4f55 Mon Sep 17 00:00:00 2001
From: Jeremiah Lewis <4462211+jeremiahpslewis@users.noreply.github.com>
Date: Tue, 14 Jan 2025 16:21:53 +0100
Subject: [PATCH] Tweak / fix

---
 src/AIAPC2020/env.jl         |  8 ++++++++
 src/DDDC2023/env.jl          |  7 +++++++
 test/policy.jl               |  1 +
 test/runtests.jl             | 14 +++++++++++---
 test/tabular_approximator.jl |  6 ------
 5 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/AIAPC2020/env.jl b/src/AIAPC2020/env.jl
index 02c4543f..e106ed80 100644
--- a/src/AIAPC2020/env.jl
+++ b/src/AIAPC2020/env.jl
@@ -174,3 +174,11 @@ RLBase.StateStyle(::AIAPCEnv) = Observation{Int64}()
 RLBase.RewardStyle(::AIAPCEnv) = STEP_REWARD
 RLBase.UtilityStyle(::AIAPCEnv) = GENERAL_SUM
 RLBase.ChanceStyle(::AIAPCEnv) = DETERMINISTIC
+
+
+# Need special handling of episodes and experiments for the AIAPC and DDDC environments: an episode is a single price setting interaction, and an experiment is a sequence of episodes, but the environment state is not reset between episodes. As a result, the state is initialized once, in the PreExperimentStage and PreEpisodeStage becomes a no-op.
+Base.push!(agent::Agent, ::PreEpisodeStage, env::AIAPCEnv, player::Player) = nothing
+
+function Base.push!(agent::Agent, ::PreExperimentStage, env::AIAPCEnv, player::Player)
+    push!(agent.trajectory, (state = state(env, player),))
+end
diff --git a/src/DDDC2023/env.jl b/src/DDDC2023/env.jl
index 4f880c7d..09c30652 100644
--- a/src/DDDC2023/env.jl
+++ b/src/DDDC2023/env.jl
@@ -191,3 +191,10 @@ RLBase.StateStyle(::DDDCEnv) = Observation{Int64}()
 RLBase.RewardStyle(::DDDCEnv) = STEP_REWARD
 RLBase.UtilityStyle(::DDDCEnv) = GENERAL_SUM
 RLBase.ChanceStyle(::DDDCEnv) = DETERMINISTIC
+
+# Need special handling of episodes and experiments for the AIAPC and DDDC environments: an episode is a single price setting interaction, and an experiment is a sequence of episodes, but the environment state is not reset between episodes. As a result, the state is initialized once, in the PreExperimentStage and PreEpisodeStage becomes a no-op.
+Base.push!(agent::Agent, ::PreEpisodeStage, env::DDDCEnv, player::Player) = nothing
+
+function Base.push!(agent::Agent, ::PreExperimentStage, env::DDDCEnv, player::Player)
+    push!(agent.trajectory, (state = state(env, player),))
+end
diff --git a/test/policy.jl b/test/policy.jl
index 5048aaa7..46ca52a2 100644
--- a/test/policy.jl
+++ b/test/policy.jl
@@ -60,6 +60,7 @@ end
     # First three rounds
 
     # t=1
+    push!(policy, PreExperimentStage(), env)
     push!(policy, PreEpisodeStage(), env)
     push!(policy, PreActStage(), env)
     @test length(policy.agents[Player(1)].trajectory.container) == 0
diff --git a/test/runtests.jl b/test/runtests.jl
index 0bcd2402..75a63e81 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,6 +9,7 @@ using ReinforcementLearning:
     PreActStage,
     PostEpisodeStage,
     PreEpisodeStage,
+    PreExperimentStage,
     state,
     reward,
     current_player,
@@ -20,7 +21,9 @@ using ReinforcementLearning:
     RLBase,
     AbstractPolicy,
     act!,
-    plan!
+    plan!,
+    Player
+
 import ReinforcementLearning: RLCore
 using Statistics
 using AlgorithmicCompetition:
@@ -43,8 +46,8 @@ using AlgorithmicCompetition:
     DataDemandDigitalParams,
     DDDCEnv,
     DDDCHyperParameters,
-    DDDCTotalRewardPerLastNEpisodes,
     DDDCPolicy,
+    DDDCTotalRewardPerLastNEpisodes,
     economic_summary,
     Experiment,
     extract_profit_vars,
@@ -58,15 +61,20 @@ using AlgorithmicCompetition:
     profit_gain,
     Q_i_0,
     Q,
+    QBasedPolicy,
     reward,
     run_and_extract,
     run,
     solve_bertrand,
     solve_monopolist,
+    TabularApproximator,
+    TabularQApproximator,
+    TabularVApproximator,
+    TDLearner,
     TDLearner,
     π
 using Distributed
-
+    
 @testset "AlgorithmicCompetition.jl" begin
     @testset "Paramter tests" begin
         include("alpha_beta.jl")
diff --git a/test/tabular_approximator.jl b/test/tabular_approximator.jl
index e3a50816..41f4262b 100644
--- a/test/tabular_approximator.jl
+++ b/test/tabular_approximator.jl
@@ -1,9 +1,3 @@
-using Test
-using AlgorithmicCompetition:
-    TabularApproximator, TabularVApproximator, TabularQApproximator, TDLearner, QBasedPolicy
-import ReinforcementLearning: RLBase
-using ReinforcementLearning
-
 @testset "Constructors" begin
     @test TabularApproximator(fill(1, 10, 10)) isa TabularApproximator
     @test TabularVApproximator(n_state = 10) isa TabularApproximator{Vector{Float64}}