Skip to content

Commit

Permalink
Improvements to iterative generate-extract.
Browse files Browse the repository at this point in the history
  • Loading branch information
cmungall committed Aug 2, 2023
1 parent 78aa4d0 commit 95fe932
Show file tree
Hide file tree
Showing 8 changed files with 405 additions and 187 deletions.
65 changes: 29 additions & 36 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ oaklib = ">=0.5.12"
gilda = ">=1.0.0"
jsonlines = ">=3.1.0"
python-multipart = "^0.0.5"
linkml-owl = ">=0.2.7"
linkml-owl = "^0.2.8"
beautifulsoup4 = ">=4.11.1"
eutils = ">=0.6.0"
class-resolver = ">=0.4.2"
inflect = ">=6.0.2"
bioc = ">=2.0.post5"
linkml = ">=1.4.10"
linkml = "^1.5.6"
wikipedia = ">=1.4.0"
tiktoken = ">=0.3.3"
airium = ">=0.2.5"
Expand Down
59 changes: 49 additions & 10 deletions src/ontogpt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@
from sssom.util import to_mapping_set_dataframe

import ontogpt.ontex.extractor as extractor
from ontogpt import MODELS, DEFAULT_MODEL, __version__
from ontogpt import DEFAULT_MODEL, MODELS, __version__
from ontogpt.clients import OpenAIClient
from ontogpt.clients.pubmed_client import PubmedClient
from ontogpt.clients.soup_client import SoupClient
from ontogpt.clients.wikipedia_client import WikipediaClient
from ontogpt.engines import create_engine
from ontogpt.engines.embedding_similarity_engine import SimilarityEngine
from ontogpt.engines.enrichment import EnrichmentEngine
from ontogpt.engines.ggml_engine import GGMLEngine
from ontogpt.engines.generic_engine import GenericEngine, QuestionCollection
from ontogpt.engines.ggml_engine import GGMLEngine
from ontogpt.engines.halo_engine import HALOEngine
from ontogpt.engines.hfhub_engine import HFHubEngine
from ontogpt.engines.knowledge_engine import KnowledgeEngine
Expand Down Expand Up @@ -128,9 +128,7 @@ def write_extraction(
help="Interactive mode - rather than call the LLM API it will prompt you do this.",
)
model_option = click.option(
"-m",
"--model",
help="Model name to use, e.g. openai-text-davinci-003."
"-m", "--model", help="Model name to use, e.g. openai-text-davinci-003."
)
prompt_template_option = click.option(
"--prompt-template", help="Path to a file containing the prompt."
Expand Down Expand Up @@ -319,7 +317,50 @@ def generate_extract(entity, template, output, output_format, **kwargs):
ke = SPIRESEngine(template, **kwargs)
logging.debug(f"Input entity: {entity}")
results = ke.generate_and_extract(entity)
write_extraction(results, output, output_format)
write_extraction(results, output, output_format, ke)


@main.command()
@template_option
@model_option
@recurse_option
@output_option_wb
@output_format_options
@auto_prefix_option
@click.option("--ontology", "-r", help="Ontology to use; use oaklib selector path")
@click.option("--max-iterations", "-M", default=10, type=click.INT)
@click.option("--iteration-slot", "-I", multiple=True, help="Slots to iterate over")
@click.option("--db", "-D", help="Where the resulting yaml database is stored")
@click.option(
"--clear/--no-clear", default=False, show_default=True, help="Clear the db before starting"
)
@click.argument("entity")
def iteratively_generate_extract(
entity,
template,
output,
output_format,
db,
iteration_slot,
max_iterations,
clear,
ontology,
**kwargs,
):
"""Iterate through generate-extract."""
logging.info(f"Creating for {template}")
ke = SPIRESEngine(template, **kwargs)
logging.debug(f"Input entity: {entity}")
adapter = get_adapter(ontology)
for results in ke.iteratively_generate_and_extract(
entity,
db,
iteration_slots=list(iteration_slot),
max_iterations=max_iterations,
adapter=adapter,
clear=clear,
):
write_extraction(results, output, output_format)


@main.command()
Expand Down Expand Up @@ -350,10 +391,10 @@ def pubmed_extract(pmid, template, output, output_format, **kwargs):
def pubmed_annotate(search, template, output, output_format, **kwargs):
"""Retrieve a collection of PubMed IDs for a search term; annotate them using a template."""
logging.info(f"Creating for {template}")
pubmed_annotate_limit = 20 # TODO: make this a CLI argument
pubmed_annotate_limit = 20 # TODO: make this a CLI argument
pmc = PubmedClient()
pmids = pmc.get_pmids(search)
textlist = pmc.text(pmids[:pubmed_annotate_limit + 1])
textlist = pmc.text(pmids[: pubmed_annotate_limit + 1])
for text in textlist:
ke = SPIRESEngine(template, **kwargs)
logging.debug(f"Input text: {text}")
Expand Down Expand Up @@ -1328,7 +1369,5 @@ def list_models():
print(f"{primary_name}\t{provider}\t{alternative_names}\t{status}")




if __name__ == "__main__":
main()
Loading

0 comments on commit 95fe932

Please sign in to comment.