From dc1a28356781959d9d857667aa4bb2e76ec258cb Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Fri, 22 Sep 2023 15:17:18 -0400 Subject: [PATCH 1/7] use model_dump with BaseModel obj --- src/ontogpt/io/yaml_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ontogpt/io/yaml_wrapper.py b/src/ontogpt/io/yaml_wrapper.py index 99c8229f6..4a3c4aadf 100644 --- a/src/ontogpt/io/yaml_wrapper.py +++ b/src/ontogpt/io/yaml_wrapper.py @@ -19,7 +19,7 @@ def eliminate_empty(obj: Any, preserve=False) -> Any: elif isinstance(obj, dict): return {k: eliminate_empty(v, preserve) for k, v in obj.items() if v or preserve} elif isinstance(obj, pydantic.BaseModel): - return eliminate_empty(obj.dict(), preserve) + return eliminate_empty(obj.model_dump(), preserve) elif isinstance(obj, tuple): return [eliminate_empty(x, preserve) for x in obj] elif isinstance(obj, str): From 7bb16625eeaf254f60f724d927b5c21b4eb6e431 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Fri, 22 Sep 2023 15:19:21 -0400 Subject: [PATCH 2/7] Move a comment - was in the wrong place --- src/ontogpt/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py index 5035a6733..a3499b1ee 100644 --- a/src/ontogpt/cli.py +++ b/src/ontogpt/cli.py @@ -339,7 +339,6 @@ def extract( write_extraction(results, output, output_format, ke) -# TODO: combine this command with pubmed_annotate - they are converging @main.command() @template_option @model_option @@ -436,7 +435,7 @@ def iteratively_generate_extract( ): write_extraction(results, output, output_format) - +# TODO: combine this command with pubmed_annotate - they are converging @main.command() @template_option @model_option From 0b5fb133756e6973388c1bab17b1a622e5d1d607 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Fri, 22 Sep 2023 16:10:39 -0400 Subject: [PATCH 3/7] Add separator to yaml output --- src/ontogpt/io/yaml_wrapper.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ontogpt/io/yaml_wrapper.py b/src/ontogpt/io/yaml_wrapper.py index 4a3c4aadf..06634daa1 100644 --- a/src/ontogpt/io/yaml_wrapper.py +++ b/src/ontogpt/io/yaml_wrapper.py @@ -5,6 +5,7 @@ import pydantic from ruamel.yaml import YAML, RoundTripRepresenter +from ruamel.yaml.comments import CommentedMap # import yaml # from yaml import SafeDumper @@ -42,13 +43,17 @@ def repr_str(dumper: RoundTripRepresenter, data: str): def dump_minimal_yaml(obj: Any, minimize=True, file: Optional[TextIO] = None) -> str: """Dump a YAML string, but eliminating Nones and empty lists and dicts.""" yaml = YAML() + separator = YAML().load("") yaml.representer.add_representer(str, repr_str) yaml.default_flow_style = False + yaml.default_style=None yaml.indent(sequence=4, offset=2) + # A bit of a hack here to ensure all yaml output has a separator, even + # if it's a single document if not file: file = io.StringIO() - yaml.dump(eliminate_empty(obj, not minimize), file) + yaml.dump_all(documents=[separator, eliminate_empty(obj, not minimize)], stream=file) return file.getvalue() else: - yaml.dump(eliminate_empty(obj, not minimize), file) + yaml.dump_all(documents=[separator, eliminate_empty(obj, not minimize)], stream=file) return "" From dc414853a3bcf9d32e970abb7d8a688b6dbde7a3 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Fri, 22 Sep 2023 16:21:20 -0400 Subject: [PATCH 4/7] Partial revert of changes to yaml_wrapper - there's an easier way --- src/ontogpt/io/yaml_wrapper.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ontogpt/io/yaml_wrapper.py b/src/ontogpt/io/yaml_wrapper.py index 06634daa1..344508671 100644 --- a/src/ontogpt/io/yaml_wrapper.py +++ b/src/ontogpt/io/yaml_wrapper.py @@ -43,17 +43,13 @@ def repr_str(dumper: RoundTripRepresenter, data: str): def dump_minimal_yaml(obj: Any, minimize=True, file: Optional[TextIO] = None) -> str: """Dump a YAML string, but eliminating Nones and empty lists and dicts.""" yaml = YAML() - separator = YAML().load("") yaml.representer.add_representer(str, repr_str) yaml.default_flow_style = False - yaml.default_style=None yaml.indent(sequence=4, offset=2) - # A bit of a hack here to ensure all yaml output has a separator, even - # if it's a single document if not file: file = io.StringIO() - yaml.dump_all(documents=[separator, eliminate_empty(obj, not minimize)], stream=file) + yaml.dump(eliminate_empty(obj, not minimize), file) return file.getvalue() else: - yaml.dump_all(documents=[separator, eliminate_empty(obj, not minimize)], stream=file) + yaml.dump(eliminate_empty(obj, not minimize), file) return "" From be3acb875c7343a803deff9b4898dd17a335089a Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Fri, 22 Sep 2023 16:26:55 -0400 Subject: [PATCH 5/7] A much more straightforward solution --- src/ontogpt/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py index a3499b1ee..0cfaeb81b 100644 --- a/src/ontogpt/cli.py +++ b/src/ontogpt/cli.py @@ -105,6 +105,7 @@ def write_extraction( exporter = HTMLExporter(output=output) exporter.export(results, output) elif output_format == "yaml": + output.write("---\n") output.write(dump_minimal_yaml(results)) # type: ignore elif output_format == "turtle": exporter = RDFExporter() @@ -119,6 +120,7 @@ def write_extraction( for line in output_parser(obj=results, file=output): secondoutput.write(line) else: + output.write("---\n") output.write(dump_minimal_yaml(results)) # type: ignore From 025d56befc07c8e305d3778cd0e992862fbc00e4 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Fri, 22 Sep 2023 16:27:55 -0400 Subject: [PATCH 6/7] Ignore a couple mypy errors - I think they're false pos --- src/ontogpt/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py index 0cfaeb81b..16a7987bd 100644 --- a/src/ontogpt/cli.py +++ b/src/ontogpt/cli.py @@ -105,7 +105,7 @@ def write_extraction( exporter = HTMLExporter(output=output) exporter.export(results, output) elif output_format == "yaml": - output.write("---\n") + output.write("---\n") # type: ignore output.write(dump_minimal_yaml(results)) # type: ignore elif output_format == "turtle": exporter = RDFExporter() @@ -120,7 +120,7 @@ def write_extraction( for line in output_parser(obj=results, file=output): secondoutput.write(line) else: - output.write("---\n") + output.write("---\n") # type: ignore output.write(dump_minimal_yaml(results)) # type: ignore From 941d2503a63bcd02fec514a1671a163faf7ca296 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Fri, 22 Sep 2023 16:29:44 -0400 Subject: [PATCH 7/7] Lintin' --- src/ontogpt/cli.py | 1 + src/ontogpt/engines/spires_engine.py | 8 ++++---- src/ontogpt/evaluation/hpoa/eval_hpoa.py | 4 ++-- src/ontogpt/io/yaml_wrapper.py | 1 - 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py index 16a7987bd..33eae4307 100644 --- a/src/ontogpt/cli.py +++ b/src/ontogpt/cli.py @@ -437,6 +437,7 @@ def iteratively_generate_extract( ): write_extraction(results, output, output_format) + # TODO: combine this command with pubmed_annotate - they are converging @main.command() @template_option diff --git a/src/ontogpt/engines/spires_engine.py b/src/ontogpt/engines/spires_engine.py index 43117de99..4096eeddc 100644 --- a/src/ontogpt/engines/spires_engine.py +++ b/src/ontogpt/engines/spires_engine.py @@ -79,7 +79,7 @@ def extract_from_text( logging.info(f"RAW TEXT: {raw_text}") next_object = self.parse_completion_payload( raw_text, cls, object=object # type: ignore - ) + ) if extracted_object is None: extracted_object = next_object else: @@ -96,7 +96,7 @@ def extract_from_text( logging.info(f"RAW TEXT: {raw_text}") extracted_object = self.parse_completion_payload( raw_text, cls, object=object # type: ignore - ) + ) return ExtractionResult( input_text=text, raw_completion_output=raw_text, @@ -516,13 +516,13 @@ def _parse_line_to_dict( logging.debug(f" RECURSING ON SLOT: {slot.name}, range={slot_range.name}") vals = [ self._extract_from_text_to_dict(v, slot_range) for v in vals # type: ignore - ] + ] else: for sep in [" - ", ":", "/", "*", "-"]: if all([sep in v for v in vals]): vals = [ dict(zip(slots_of_range, v.split(sep, 1))) for v in vals # type: ignore - ] + ] for v in vals: for k in v.keys(): # type: ignore v[k] = v[k].strip() # type: ignore diff --git a/src/ontogpt/evaluation/hpoa/eval_hpoa.py b/src/ontogpt/evaluation/hpoa/eval_hpoa.py index d7ac008cc..72c3eb676 100644 --- a/src/ontogpt/evaluation/hpoa/eval_hpoa.py +++ b/src/ontogpt/evaluation/hpoa/eval_hpoa.py @@ -21,7 +21,7 @@ DATABASE_DIR = Path(__file__).parent / "database" TEST_CASES_DIR = Path("tests").joinpath("input") TEST_HPOA_FILE = "test_sample.hpoa.tsv" -NUM_TESTS = 3 # Note: each test requires input text; see provided test cases +NUM_TESTS = 3 # Note: each test requires input text; see provided test cases DISEASE_ID = str TERM = str @@ -182,7 +182,7 @@ def eval_against_pubs(self, num_tests=NUM_TESTS) -> EvaluationObjectSetHPOA: eos.training = [] eos.predictions = [] shuffle(eos.test) - for test_case in eos.test[0:num_tests-1]: + for test_case in eos.test[0 : num_tests - 1]: # text = self.disease_text(test_case.id) if len(test_case.publications) != 1: raise ValueError(f"Expected 1 publication, got {len(test_case.publications)}") diff --git a/src/ontogpt/io/yaml_wrapper.py b/src/ontogpt/io/yaml_wrapper.py index 344508671..4a3c4aadf 100644 --- a/src/ontogpt/io/yaml_wrapper.py +++ b/src/ontogpt/io/yaml_wrapper.py @@ -5,7 +5,6 @@ import pydantic from ruamel.yaml import YAML, RoundTripRepresenter -from ruamel.yaml.comments import CommentedMap # import yaml # from yaml import SafeDumper