Skip to content

Commit

Permalink
Tweak / fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremiahpslewis committed Jan 14, 2025
1 parent b2a1865 commit fcb0287
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 9 deletions.
8 changes: 8 additions & 0 deletions src/AIAPC2020/env.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,11 @@ RLBase.StateStyle(::AIAPCEnv) = Observation{Int64}()
RLBase.RewardStyle(::AIAPCEnv) = STEP_REWARD
RLBase.UtilityStyle(::AIAPCEnv) = GENERAL_SUM
RLBase.ChanceStyle(::AIAPCEnv) = DETERMINISTIC


# Need special handling of episodes and experiments for the AIAPC and DDDC environments: an episode is a single price setting interaction, and an experiment is a sequence of episodes, but the environment state is not reset between episodes. As a result, the state is initialized once, in the PreExperimentStage and PreEpisodeStage becomes a no-op.
Base.push!(agent::Agent, ::PreEpisodeStage, env::AIAPCEnv, player::Player) = nothing

function Base.push!(agent::Agent, ::PreExperimentStage, env::AIAPCEnv, player::Player)
push!(agent.trajectory, (state = state(env, player),))
end
7 changes: 7 additions & 0 deletions src/DDDC2023/env.jl
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,10 @@ RLBase.StateStyle(::DDDCEnv) = Observation{Int64}()
RLBase.RewardStyle(::DDDCEnv) = STEP_REWARD
RLBase.UtilityStyle(::DDDCEnv) = GENERAL_SUM
RLBase.ChanceStyle(::DDDCEnv) = DETERMINISTIC

# Need special handling of episodes and experiments for the AIAPC and DDDC environments: an episode is a single price setting interaction, and an experiment is a sequence of episodes, but the environment state is not reset between episodes. As a result, the state is initialized once, in the PreExperimentStage and PreEpisodeStage becomes a no-op.
Base.push!(agent::Agent, ::PreEpisodeStage, env::DDDCEnv, player::Player) = nothing

function Base.push!(agent::Agent, ::PreExperimentStage, env::DDDCEnv, player::Player)
push!(agent.trajectory, (state = state(env, player),))
end
1 change: 1 addition & 0 deletions test/policy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ end
# First three rounds

# t=1
push!(policy, PreExperimentStage(), env)
push!(policy, PreEpisodeStage(), env)
push!(policy, PreActStage(), env)
@test length(policy.agents[Player(1)].trajectory.container) == 0
Expand Down
14 changes: 11 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ using ReinforcementLearning:
PreActStage,
PostEpisodeStage,
PreEpisodeStage,
PreExperimentStage,
state,
reward,
current_player,
Expand All @@ -20,7 +21,9 @@ using ReinforcementLearning:
RLBase,
AbstractPolicy,
act!,
plan!
plan!,
Player

import ReinforcementLearning: RLCore
using Statistics
using AlgorithmicCompetition:
Expand All @@ -43,8 +46,8 @@ using AlgorithmicCompetition:
DataDemandDigitalParams,
DDDCEnv,
DDDCHyperParameters,
DDDCTotalRewardPerLastNEpisodes,
DDDCPolicy,
DDDCTotalRewardPerLastNEpisodes,
economic_summary,
Experiment,
extract_profit_vars,
Expand All @@ -58,15 +61,20 @@ using AlgorithmicCompetition:
profit_gain,
Q_i_0,
Q,
QBasedPolicy,
reward,
run_and_extract,
run,
solve_bertrand,
solve_monopolist,
TabularApproximator,
TabularQApproximator,
TabularVApproximator,
TDLearner,
TDLearner,
π
using Distributed

@testset "AlgorithmicCompetition.jl" begin
@testset "Paramter tests" begin
include("alpha_beta.jl")
Expand Down
6 changes: 0 additions & 6 deletions test/tabular_approximator.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
using Test
using AlgorithmicCompetition:
TabularApproximator, TabularVApproximator, TabularQApproximator, TDLearner, QBasedPolicy
import ReinforcementLearning: RLBase
using ReinforcementLearning

@testset "Constructors" begin
@test TabularApproximator(fill(1, 10, 10)) isa TabularApproximator
@test TabularVApproximator(n_state = 10) isa TabularApproximator{Vector{Float64}}
Expand Down

0 comments on commit fcb0287

Please sign in to comment.