Fix for #215 (#216)

Ensure YAML output contains separators so multiple documents may be loaded. These need to go on outputs even if they're only a single doc (note that ruamel.yaml has a method called dump_all, but this only includes separators for multiple doc outputs).
monarch-initiative · Sep 22, 2023 · 1125d83 · 1125d83
2 parents 4ac7322 + 941d250
commit 1125d83
Showing 4 changed files with 10 additions and 8 deletions.
diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py
@@ -105,6 +105,7 @@ def write_extraction(
             exporter = HTMLExporter(output=output)
             exporter.export(results, output)
         elif output_format == "yaml":
+            output.write("---\n")  # type: ignore
             output.write(dump_minimal_yaml(results))  # type: ignore
         elif output_format == "turtle":
             exporter = RDFExporter()
@@ -119,6 +120,7 @@ def write_extraction(
                 for line in output_parser(obj=results, file=output):
                     secondoutput.write(line)
         else:
+            output.write("---\n")  # type: ignore
             output.write(dump_minimal_yaml(results))  # type: ignore
 
 
@@ -339,7 +341,6 @@ def extract(
     write_extraction(results, output, output_format, ke)
 
 
-# TODO: combine this command with pubmed_annotate - they are converging
 @main.command()
 @template_option
 @model_option
@@ -437,6 +438,7 @@ def iteratively_generate_extract(
         write_extraction(results, output, output_format)
 
 
+# TODO: combine this command with pubmed_annotate - they are converging
 @main.command()
 @template_option
 @model_option

diff --git a/src/ontogpt/engines/spires_engine.py b/src/ontogpt/engines/spires_engine.py
@@ -79,7 +79,7 @@ def extract_from_text(
                 logging.info(f"RAW TEXT: {raw_text}")
                 next_object = self.parse_completion_payload(
                     raw_text, cls, object=object  # type: ignore
-                )  
+                )
                 if extracted_object is None:
                     extracted_object = next_object
                 else:
@@ -96,7 +96,7 @@ def extract_from_text(
             logging.info(f"RAW TEXT: {raw_text}")
             extracted_object = self.parse_completion_payload(
                 raw_text, cls, object=object  # type: ignore
-            )  
+            )
         return ExtractionResult(
             input_text=text,
             raw_completion_output=raw_text,
@@ -516,13 +516,13 @@ def _parse_line_to_dict(
                 logging.debug(f"  RECURSING ON SLOT: {slot.name}, range={slot_range.name}")
                 vals = [
                     self._extract_from_text_to_dict(v, slot_range) for v in vals  # type: ignore
-                ]  
+                ]
             else:
                 for sep in [" - ", ":", "/", "*", "-"]:
                     if all([sep in v for v in vals]):
                         vals = [
                             dict(zip(slots_of_range, v.split(sep, 1))) for v in vals  # type: ignore
-                        ]  
+                        ]
                         for v in vals:
                             for k in v.keys():  # type: ignore
                                 v[k] = v[k].strip()  # type: ignore

diff --git a/src/ontogpt/evaluation/hpoa/eval_hpoa.py b/src/ontogpt/evaluation/hpoa/eval_hpoa.py
@@ -21,7 +21,7 @@
 DATABASE_DIR = Path(__file__).parent / "database"
 TEST_CASES_DIR = Path("tests").joinpath("input")
 TEST_HPOA_FILE = "test_sample.hpoa.tsv"
-NUM_TESTS = 3 # Note: each test requires input text; see provided test cases
+NUM_TESTS = 3  # Note: each test requires input text; see provided test cases
 
 DISEASE_ID = str
 TERM = str
@@ -182,7 +182,7 @@ def eval_against_pubs(self, num_tests=NUM_TESTS) -> EvaluationObjectSetHPOA:
         eos.training = []
         eos.predictions = []
         shuffle(eos.test)
-        for test_case in eos.test[0:num_tests-1]:
+        for test_case in eos.test[0 : num_tests - 1]:
             # text = self.disease_text(test_case.id)
             if len(test_case.publications) != 1:
                 raise ValueError(f"Expected 1 publication, got {len(test_case.publications)}")

diff --git a/src/ontogpt/io/yaml_wrapper.py b/src/ontogpt/io/yaml_wrapper.py
@@ -19,7 +19,7 @@ def eliminate_empty(obj: Any, preserve=False) -> Any:
     elif isinstance(obj, dict):
         return {k: eliminate_empty(v, preserve) for k, v in obj.items() if v or preserve}
     elif isinstance(obj, pydantic.BaseModel):
-        return eliminate_empty(obj.dict(), preserve)
+        return eliminate_empty(obj.model_dump(), preserve)
     elif isinstance(obj, tuple):
         return [eliminate_empty(x, preserve) for x in obj]
     elif isinstance(obj, str):