From 9f1d947f3891c3c9acf4053346c9d3cce2ca2241 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 5 Dec 2023 10:52:50 -0500 Subject: [PATCH 1/3] use a highly optimistic initial competence until the second learning cycle --- predicators/competence_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/predicators/competence_models.py b/predicators/competence_models.py index 36fb21c15d..e38b4af12a 100644 --- a/predicators/competence_models.py +++ b/predicators/competence_models.py @@ -66,7 +66,8 @@ def predict_competence(self, num_additional_data: int) -> float: # Highly naive: predict a constant improvement in competence. del num_additional_data # unused current_competence = self.get_current_competence() - return min(1.0, current_competence + 1e-2) + # Use a highly optimistic initial competence until the second cycle. + return min(1.0, current_competence + 0.5) class OptimisticSkillCompetenceModel(SkillCompetenceModel): From af727668408937d6e6c7fe742986cb2bcfb3e702 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 5 Dec 2023 10:56:00 -0500 Subject: [PATCH 2/3] fix --- predicators/competence_models.py | 12 +++++++++--- predicators/settings.py | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/predicators/competence_models.py b/predicators/competence_models.py index e38b4af12a..6bd99959fa 100644 --- a/predicators/competence_models.py +++ b/predicators/competence_models.py @@ -67,7 +67,9 @@ def predict_competence(self, num_additional_data: int) -> float: del num_additional_data # unused current_competence = self.get_current_competence() # Use a highly optimistic initial competence until the second cycle. - return min(1.0, current_competence + 0.5) + return min( + 1.0, + current_competence + CFG.skill_competence_initial_prediction_bonus) class OptimisticSkillCompetenceModel(SkillCompetenceModel): @@ -101,7 +103,9 @@ def predict_competence(self, num_additional_data: int) -> float: nonempty_cycle_obs = self._get_nonempty_cycle_observations() current_competence = self.get_current_competence() if len(nonempty_cycle_obs) < 2: - return min(1.0, current_competence + 1e-2) # default + return min( + 1.0, current_competence + + CFG.skill_competence_initial_prediction_bonus) # default # Look at changes between individual cycles. inference_window = 1 recency_size = CFG.skill_competence_model_optimistic_recency_size @@ -144,7 +148,9 @@ def predict_competence(self, num_additional_data: int) -> float: # the LegacySkillCompetenceModel. if self._competence_regressor is None: current_competence = self.get_current_competence() - return min(1.0, current_competence + 1e-2) + return min( + 1.0, current_competence + + CFG.skill_competence_initial_prediction_bonus) # Use the regressor to predict future competence. current_num_data = self._get_current_num_data() current_rv = self._competence_regressor.predict_beta(current_num_data) diff --git a/predicators/settings.py b/predicators/settings.py index e2f6a5c6c2..b3fe92e663 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -563,6 +563,7 @@ class GlobalSettings: skill_competence_model_optimistic_window_size = 5 skill_competence_model_optimistic_recency_size = 5 skill_competence_default_alpha_beta = (10.0, 1.0) + skill_competence_initial_prediction_bonus = 0.5 # refinement cost estimation parameters refinement_estimator = "oracle" # default refinement cost estimator From b25d08beb697cb568c58e4d13b6fe87cfc5b26fd Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 5 Dec 2023 11:33:21 -0500 Subject: [PATCH 3/3] fix --- tests/test_competence_models.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/test_competence_models.py b/tests/test_competence_models.py index 6626cd51a7..8a7551405f 100644 --- a/tests/test_competence_models.py +++ b/tests/test_competence_models.py @@ -29,20 +29,25 @@ def test_legacy_skill_competence_model(): """Tests for LegacySkillCompetenceModel().""" utils.reset_config({ "skill_competence_default_alpha_beta": (1.0, 1.0), + "skill_competence_initial_prediction_bonus": 1e-2, }) model = create_competence_model("legacy", "test") assert isinstance(model, LegacySkillCompetenceModel) assert np.isclose(model.get_current_competence(), 0.5) - assert np.isclose(model.predict_competence(1), 0.5 + 1e-2) + assert np.isclose(model.predict_competence(1), + 0.5 + CFG.skill_competence_initial_prediction_bonus) model.observe(True) assert model.get_current_competence() > 0.5 - assert model.predict_competence(1) > 0.5 + 1e-2 + assert model.predict_competence( + 1) > 0.5 + CFG.skill_competence_initial_prediction_bonus model.observe(False) assert np.isclose(model.get_current_competence(), 0.5) - assert np.isclose(model.predict_competence(1), 0.5 + 1e-2) + assert np.isclose(model.predict_competence(1), + 0.5 + CFG.skill_competence_initial_prediction_bonus) model.advance_cycle() assert np.isclose(model.get_current_competence(), 0.5) - assert np.isclose(model.predict_competence(1), 0.5 + 1e-2) + assert np.isclose(model.predict_competence(1), + 0.5 + CFG.skill_competence_initial_prediction_bonus) model.observe(True) assert model.get_current_competence() > 0.5 @@ -53,10 +58,12 @@ def test_latent_variable_skill_competence_model_short(): "skill_competence_model_num_em_iters": 1, "skill_competence_model_max_train_iters": 10, "skill_competence_default_alpha_beta": (1.0, 1.0), + "skill_competence_initial_prediction_bonus": 1e-2, }) model = create_competence_model("latent_variable", "test") assert np.isclose(model.get_current_competence(), 0.5) - assert np.isclose(model.predict_competence(1), 0.5 + 1e-2) + assert np.isclose(model.predict_competence(1), + 0.5 + CFG.skill_competence_initial_prediction_bonus) model.observe(True) assert model.get_current_competence() > 0.5 assert model.predict_competence(1) > model.get_current_competence() @@ -72,12 +79,14 @@ def test_optimistic_skill_competence_model(): """Tests for OptimisticSkillCompetenceModel().""" utils.reset_config({ "skill_competence_default_alpha_beta": (1.0, 1.0), + "skill_competence_initial_prediction_bonus": 1e-2, }) h = CFG.skill_competence_model_lookahead model = create_competence_model("optimistic", "test") assert np.isclose(model.get_current_competence(), 0.5) - assert np.isclose(model.predict_competence(h), 0.5 + 1e-2) + assert np.isclose(model.predict_competence(h), + 0.5 + CFG.skill_competence_initial_prediction_bonus) # Test impossible skill. model = create_competence_model("optimistic", "impossible-skill") @@ -154,12 +163,14 @@ def test_latent_variable_skill_competence_model_long(): """Long tests for LatentVariableSkillCompetenceModel().""" utils.reset_config({ "skill_competence_default_alpha_beta": (1.0, 1.0), + "skill_competence_initial_prediction_bonus": 1e-2, }) h = CFG.skill_competence_model_lookahead model = create_competence_model("latent_variable", "test") assert np.isclose(model.get_current_competence(), 0.5) - assert np.isclose(model.predict_competence(h), 0.5 + 1e-2) + assert np.isclose(model.predict_competence(h), + 0.5 + CFG.skill_competence_initial_prediction_bonus) # Test impossible skill. model = create_competence_model("latent_variable", "impossible-skill")