Skip to content

Commit

Permalink
Fix for #215 (#216)
Browse files Browse the repository at this point in the history
Ensure YAML output contains separators so multiple documents may be
loaded.
These need to go on outputs even if they're only a single doc (note that
ruamel.yaml has a method called dump_all, but this only includes
separators for multiple doc outputs).
caufieldjh authored Sep 22, 2023
2 parents 4ac7322 + 941d250 commit 1125d83
Showing 4 changed files with 10 additions and 8 deletions.
4 changes: 3 additions & 1 deletion src/ontogpt/cli.py
Original file line number Diff line number Diff line change
@@ -105,6 +105,7 @@ def write_extraction(
exporter = HTMLExporter(output=output)
exporter.export(results, output)
elif output_format == "yaml":
output.write("---\n") # type: ignore
output.write(dump_minimal_yaml(results)) # type: ignore
elif output_format == "turtle":
exporter = RDFExporter()
@@ -119,6 +120,7 @@ def write_extraction(
for line in output_parser(obj=results, file=output):
secondoutput.write(line)
else:
output.write("---\n") # type: ignore
output.write(dump_minimal_yaml(results)) # type: ignore


@@ -339,7 +341,6 @@ def extract(
write_extraction(results, output, output_format, ke)


# TODO: combine this command with pubmed_annotate - they are converging
@main.command()
@template_option
@model_option
@@ -437,6 +438,7 @@ def iteratively_generate_extract(
write_extraction(results, output, output_format)


# TODO: combine this command with pubmed_annotate - they are converging
@main.command()
@template_option
@model_option
8 changes: 4 additions & 4 deletions src/ontogpt/engines/spires_engine.py
Original file line number Diff line number Diff line change
@@ -79,7 +79,7 @@ def extract_from_text(
logging.info(f"RAW TEXT: {raw_text}")
next_object = self.parse_completion_payload(
raw_text, cls, object=object # type: ignore
)
)
if extracted_object is None:
extracted_object = next_object
else:
@@ -96,7 +96,7 @@ def extract_from_text(
logging.info(f"RAW TEXT: {raw_text}")
extracted_object = self.parse_completion_payload(
raw_text, cls, object=object # type: ignore
)
)
return ExtractionResult(
input_text=text,
raw_completion_output=raw_text,
@@ -516,13 +516,13 @@ def _parse_line_to_dict(
logging.debug(f" RECURSING ON SLOT: {slot.name}, range={slot_range.name}")
vals = [
self._extract_from_text_to_dict(v, slot_range) for v in vals # type: ignore
]
]
else:
for sep in [" - ", ":", "/", "*", "-"]:
if all([sep in v for v in vals]):
vals = [
dict(zip(slots_of_range, v.split(sep, 1))) for v in vals # type: ignore
]
]
for v in vals:
for k in v.keys(): # type: ignore
v[k] = v[k].strip() # type: ignore
4 changes: 2 additions & 2 deletions src/ontogpt/evaluation/hpoa/eval_hpoa.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@
DATABASE_DIR = Path(__file__).parent / "database"
TEST_CASES_DIR = Path("tests").joinpath("input")
TEST_HPOA_FILE = "test_sample.hpoa.tsv"
NUM_TESTS = 3 # Note: each test requires input text; see provided test cases
NUM_TESTS = 3 # Note: each test requires input text; see provided test cases

DISEASE_ID = str
TERM = str
@@ -182,7 +182,7 @@ def eval_against_pubs(self, num_tests=NUM_TESTS) -> EvaluationObjectSetHPOA:
eos.training = []
eos.predictions = []
shuffle(eos.test)
for test_case in eos.test[0:num_tests-1]:
for test_case in eos.test[0 : num_tests - 1]:
# text = self.disease_text(test_case.id)
if len(test_case.publications) != 1:
raise ValueError(f"Expected 1 publication, got {len(test_case.publications)}")
2 changes: 1 addition & 1 deletion src/ontogpt/io/yaml_wrapper.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@ def eliminate_empty(obj: Any, preserve=False) -> Any:
elif isinstance(obj, dict):
return {k: eliminate_empty(v, preserve) for k, v in obj.items() if v or preserve}
elif isinstance(obj, pydantic.BaseModel):
return eliminate_empty(obj.dict(), preserve)
return eliminate_empty(obj.model_dump(), preserve)
elif isinstance(obj, tuple):
return [eliminate_empty(x, preserve) for x in obj]
elif isinstance(obj, str):

0 comments on commit 1125d83

Please sign in to comment.