diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py
index 552ba1b2e..ac0493b25 100644
--- a/src/ontogpt/cli.py
+++ b/src/ontogpt/cli.py
@@ -1450,6 +1450,7 @@ def eval_enrichment(genes, input_file, number_to_drop, annotations_path, model,
 
 @main.command()
 @recurse_option
+@model_option
 @output_option_txt
 @click.option(
     "--num-tests",
@@ -1466,12 +1467,18 @@ def eval_enrichment(genes, input_file, number_to_drop, annotations_path, model,
             " Otherwise the full input text is passed.",
 )
 @click.argument("evaluator")
-def eval(evaluator, num_tests, output, chunking, **kwargs):
+def eval(evaluator, num_tests, output, chunking, model, **kwargs):
     """Evaluate an extractor."""
     logging.info(f"Creating for {evaluator}")
+
+    if model:
+        selectmodel = get_model_by_name(model)
+        modelname = selectmodel["alternative_names"][0]
+
     evaluator = create_evaluator(evaluator)
     evaluator.num_tests = num_tests
     evaluator.chunking = chunking
+    evaluator.model = modelname
     eos = evaluator.eval()
     output.write(dump_minimal_yaml(eos, minimize=False))
 
diff --git a/src/ontogpt/evaluation/ctd/eval_ctd.py b/src/ontogpt/evaluation/ctd/eval_ctd.py
index 06199a7a8..03b80c00e 100644
--- a/src/ontogpt/evaluation/ctd/eval_ctd.py
+++ b/src/ontogpt/evaluation/ctd/eval_ctd.py
@@ -136,7 +136,8 @@ class EvalCTD(SPIRESEvaluationEngine):
     object_prefix = "MESH"
 
     def __post_init__(self):
-        self.extractor = SPIRESEngine("ctd.ChemicalToDiseaseDocument")
+        self.extractor = SPIRESEngine(template="ctd.ChemicalToDiseaseDocument",
+                                      model=self.model)
         # synonyms are derived entirely from training set
         self.extractor.load_dictionary(DATABASE_DIR / "synonyms.yaml")
 
diff --git a/src/ontogpt/evaluation/evaluation_engine.py b/src/ontogpt/evaluation/evaluation_engine.py
index aa1a2241a..534f7b8bf 100644
--- a/src/ontogpt/evaluation/evaluation_engine.py
+++ b/src/ontogpt/evaluation/evaluation_engine.py
@@ -16,6 +16,7 @@
 from oaklib import BasicOntologyInterface
 from pydantic import BaseModel
 
+from ontogpt import DEFAULT_MODEL
 from ontogpt.engines.spires_engine import SPIRESEngine
 
 
@@ -99,3 +100,7 @@ class SPIRESEvaluationEngine(EvaluationEngine):
     chunking: bool = False
     """Whether to pre-process input texts by chunking. If True, each chunk gets its own
     prompt. Otherwise, pass the full text with each prompt."""
+
+    model: str = DEFAULT_MODEL
+    """Name of the model to use in evaluation. Defaults to the default model defined
+    in models.yaml, generally gpt-3.5-turbo."""