Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use a highly optimistic initial competence until the second cycle #1595

Merged
merged 3 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions predicators/competence_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ def predict_competence(self, num_additional_data: int) -> float:
# Highly naive: predict a constant improvement in competence.
del num_additional_data # unused
current_competence = self.get_current_competence()
return min(1.0, current_competence + 1e-2)
# Use a highly optimistic initial competence until the second cycle.
return min(
1.0,
current_competence + CFG.skill_competence_initial_prediction_bonus)


class OptimisticSkillCompetenceModel(SkillCompetenceModel):
Expand Down Expand Up @@ -100,7 +103,9 @@ def predict_competence(self, num_additional_data: int) -> float:
nonempty_cycle_obs = self._get_nonempty_cycle_observations()
current_competence = self.get_current_competence()
if len(nonempty_cycle_obs) < 2:
return min(1.0, current_competence + 1e-2) # default
return min(
1.0, current_competence +
CFG.skill_competence_initial_prediction_bonus) # default
# Look at changes between individual cycles.
inference_window = 1
recency_size = CFG.skill_competence_model_optimistic_recency_size
Expand Down Expand Up @@ -143,7 +148,9 @@ def predict_competence(self, num_additional_data: int) -> float:
# the LegacySkillCompetenceModel.
if self._competence_regressor is None:
current_competence = self.get_current_competence()
return min(1.0, current_competence + 1e-2)
return min(
1.0, current_competence +
CFG.skill_competence_initial_prediction_bonus)
# Use the regressor to predict future competence.
current_num_data = self._get_current_num_data()
current_rv = self._competence_regressor.predict_beta(current_num_data)
Expand Down
1 change: 1 addition & 0 deletions predicators/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ class GlobalSettings:
skill_competence_model_optimistic_window_size = 5
skill_competence_model_optimistic_recency_size = 5
skill_competence_default_alpha_beta = (10.0, 1.0)
skill_competence_initial_prediction_bonus = 0.5

# refinement cost estimation parameters
refinement_estimator = "oracle" # default refinement cost estimator
Expand Down
25 changes: 18 additions & 7 deletions tests/test_competence_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,25 @@ def test_legacy_skill_competence_model():
"""Tests for LegacySkillCompetenceModel()."""
utils.reset_config({
"skill_competence_default_alpha_beta": (1.0, 1.0),
"skill_competence_initial_prediction_bonus": 1e-2,
})
model = create_competence_model("legacy", "test")
assert isinstance(model, LegacySkillCompetenceModel)
assert np.isclose(model.get_current_competence(), 0.5)
assert np.isclose(model.predict_competence(1), 0.5 + 1e-2)
assert np.isclose(model.predict_competence(1),
0.5 + CFG.skill_competence_initial_prediction_bonus)
model.observe(True)
assert model.get_current_competence() > 0.5
assert model.predict_competence(1) > 0.5 + 1e-2
assert model.predict_competence(
1) > 0.5 + CFG.skill_competence_initial_prediction_bonus
model.observe(False)
assert np.isclose(model.get_current_competence(), 0.5)
assert np.isclose(model.predict_competence(1), 0.5 + 1e-2)
assert np.isclose(model.predict_competence(1),
0.5 + CFG.skill_competence_initial_prediction_bonus)
model.advance_cycle()
assert np.isclose(model.get_current_competence(), 0.5)
assert np.isclose(model.predict_competence(1), 0.5 + 1e-2)
assert np.isclose(model.predict_competence(1),
0.5 + CFG.skill_competence_initial_prediction_bonus)
model.observe(True)
assert model.get_current_competence() > 0.5

Expand All @@ -53,10 +58,12 @@ def test_latent_variable_skill_competence_model_short():
"skill_competence_model_num_em_iters": 1,
"skill_competence_model_max_train_iters": 10,
"skill_competence_default_alpha_beta": (1.0, 1.0),
"skill_competence_initial_prediction_bonus": 1e-2,
})
model = create_competence_model("latent_variable", "test")
assert np.isclose(model.get_current_competence(), 0.5)
assert np.isclose(model.predict_competence(1), 0.5 + 1e-2)
assert np.isclose(model.predict_competence(1),
0.5 + CFG.skill_competence_initial_prediction_bonus)
model.observe(True)
assert model.get_current_competence() > 0.5
assert model.predict_competence(1) > model.get_current_competence()
Expand All @@ -72,12 +79,14 @@ def test_optimistic_skill_competence_model():
"""Tests for OptimisticSkillCompetenceModel()."""
utils.reset_config({
"skill_competence_default_alpha_beta": (1.0, 1.0),
"skill_competence_initial_prediction_bonus": 1e-2,
})
h = CFG.skill_competence_model_lookahead

model = create_competence_model("optimistic", "test")
assert np.isclose(model.get_current_competence(), 0.5)
assert np.isclose(model.predict_competence(h), 0.5 + 1e-2)
assert np.isclose(model.predict_competence(h),
0.5 + CFG.skill_competence_initial_prediction_bonus)

# Test impossible skill.
model = create_competence_model("optimistic", "impossible-skill")
Expand Down Expand Up @@ -154,12 +163,14 @@ def test_latent_variable_skill_competence_model_long():
"""Long tests for LatentVariableSkillCompetenceModel()."""
utils.reset_config({
"skill_competence_default_alpha_beta": (1.0, 1.0),
"skill_competence_initial_prediction_bonus": 1e-2,
})
h = CFG.skill_competence_model_lookahead

model = create_competence_model("latent_variable", "test")
assert np.isclose(model.get_current_competence(), 0.5)
assert np.isclose(model.predict_competence(h), 0.5 + 1e-2)
assert np.isclose(model.predict_competence(h),
0.5 + CFG.skill_competence_initial_prediction_bonus)

# Test impossible skill.
model = create_competence_model("latent_variable", "impossible-skill")
Expand Down
Loading