From 10f4da264d9a70c6c6ad6bd88a8447f1c140f011 Mon Sep 17 00:00:00 2001 From: Valerio Arnaboldi Date: Wed, 7 Sep 2022 16:37:26 -0700 Subject: [PATCH] refactor(lit_processing): created new project structure and moved files --- .gitignore | 16 +- Makefile | 2 +- .../api/routers/reference_router.py | 4 +- agr_literature_service/api/s3/download.py | 2 +- .../{benchmark => data_export}/__init__.py | 0 .../export_all_mod_references_to_json.py} | 2 +- .../export_single_mod_references_to_json.py} | 6 +- .../lit_processing/data_ingest/__init__.py | 0 .../data_ingest/dqm_ingest/__init__.py | 0 .../dqm_ingest}/get_dqm_data.py | 0 .../dqm_ingest}/parse_dqm_json_reference.py | 5 +- .../dqm_ingest}/parse_dqm_json_resource.py | 9 +- .../sort_dqm_json_reference_updates.py | 21 +- .../sort_dqm_json_resource_updates.py | 9 +- .../data_ingest/dqm_ingest/utils/__init__.py | 0 .../dqm_ingest/utils/md5sum_utils.py} | 5 +- .../{ => data_ingest}/post_reference_to_db.py | 6 +- .../post_resource_to_db.py} | 7 +- .../data_ingest/pubmed_ingest/__init__.py | 0 .../FB_false_positive_pmids.txt | 0 .../SGD_false_positive_pmids.txt | 0 .../WB_false_positive_pmids.txt | 0 .../XB_false_positive_pmids.txt | 754 +++++++++--------- .../pubMedType2allianceCategory_mapping.tsv | 166 ++-- .../pubmed_ingest/fulltext/__init__.py | 0 .../fulltext}/expand_upload_tgz.py | 5 +- .../pubmed_ingest/fulltext}/get_pubmed_tgz.py | 2 +- .../generate_pubmed_nlm_resource.py | 5 +- .../pubmed_ingest}/process_single_pmid.py | 15 +- .../pubmed_search_new_references.py} | 19 +- .../pubmed_update_references_all_mods.py} | 10 +- .../pubmed_update_references_single_mod.py} | 14 +- .../pubmed_update_resources_nlm.py} | 16 +- .../pubmed_ingest}/sanitize_pubmed_json.py | 2 +- .../data_ingest/pubmed_ingest/xml/__init__.py | 0 .../pubmed_ingest/xml}/get_pubmed_xml.py | 0 .../pubmed_ingest/xml}/xml_to_json.py | 4 +- .../lit_processing/find_duplicate_doi.py | 113 --- .../lit_processing/find_pubmed_type.py | 231 ------ .../get_datatypes_cross_references.py | 43 - .../lit_processing/get_md5sum.py | 126 --- .../lit_processing/oneoff_scripts/__init__.py | 0 .../{oneoff => oneoff_scripts}/cleanup_DOI.py | 2 +- ...eanup_modCorpusAssoForCommentCorrection.py | 2 +- .../cleanup_userEmailToOktaUid.py | 2 +- .../delete_duplicate_commentCorrection.py | 2 +- .../delete_duplicate_modRefType.py | 2 +- .../delete_noPmid_dateLastModifiedInPubmed.py | 2 +- .../fix_modCorpusAssoForCommentCorrection.py | 2 +- .../generate_chunk_files.py | 2 +- .../get_pubmed_nlm_resource_unmatched.py | 2 +- .../populate_auditedColumns.py | 2 +- .../process_mod_corpus_association_to_api.py | 4 +- .../post_comments_corrections_to_api.py | 181 ----- .../pubmed_sample/32542232.json | 52 -- .../lit_processing/pubmed_sample/32542232.xml | 250 ------ .../pubmed_sample/32644453.json | 50 -- .../lit_processing/pubmed_sample/32644453.xml | 152 ---- .../pubmed_sample/33002525.json | 191 ----- .../lit_processing/pubmed_sample/33002525.xml | 245 ------ .../pubmed_sample/33408224.json | 83 -- .../lit_processing/pubmed_sample/33408224.xml | 733 ----------------- .../pubmed_sample/33410237.json | 100 --- .../lit_processing/pubmed_sample/33410237.xml | 146 ---- .../pubmed_sample/33440160.json | 89 --- .../lit_processing/pubmed_sample/33440160.xml | 140 ---- .../sample_reference_populate_load.sh | 15 - .../sort_not_found_pmids_by_mod.py | 87 -- .../lit_processing/tests/functional_tests.py | 14 +- .../{ => tests}/generate_dqm_json_test_set.py | 2 +- .../{ => tests}/mod_populate_load.py | 5 +- .../parse_pubmed_json_reference.py | 5 +- .../{ => tests}/process_many_pmids_to_json.py | 4 +- .../tests/sample_reference_populate_load.sh | 13 + .../sample_reference_populate_update.sh | 0 .../lit_processing/utils/__init__.py | 0 .../{helper_email.py => utils/email_utils.py} | 0 .../file_processing_utils.py} | 4 +- .../okta_utils.py} | 0 .../{helper_s3.py => utils/s3_utils.py} | 0 .../sqlalchemy_utils.py} | 2 +- .../7d68b38fe026_add_citation_raw_sql.py | 2 +- cleanup.sh | 1 + crontab | 6 +- .../lit_processing/chunking_pmids/README | 0 .../agr_lit_processing_cytoscape.html | 0 .../cytoscape/cytoscape-dagre.js | 0 .../cytoscape/cytoscape.min.js | 0 examples/__init__.py | 0 examples/lit_processing/__init__.py | 0 examples/lit_processing/benchmark/__init__.py | 0 .../lit_processing/benchmark/benchmark.sh | 0 .../benchmark/benchmark_python.py | 0 .../benchmark/benchmark_read_json.py | 0 .../benchmark/benchmark_xml_open_copy.py | 0 .../lit_processing/benchmark/split_xml.py | 0 96 files changed, 586 insertions(+), 3629 deletions(-) rename agr_literature_service/lit_processing/{benchmark => data_export}/__init__.py (100%) rename agr_literature_service/lit_processing/{dump_all_json_data.py => data_export/export_all_mod_references_to_json.py} (82%) rename agr_literature_service/lit_processing/{dump_json_data.py => data_export/export_single_mod_references_to_json.py} (98%) create mode 100644 agr_literature_service/lit_processing/data_ingest/__init__.py create mode 100644 agr_literature_service/lit_processing/data_ingest/dqm_ingest/__init__.py rename agr_literature_service/lit_processing/{ => data_ingest/dqm_ingest}/get_dqm_data.py (100%) rename agr_literature_service/lit_processing/{ => data_ingest/dqm_ingest}/parse_dqm_json_reference.py (99%) rename agr_literature_service/lit_processing/{ => data_ingest/dqm_ingest}/parse_dqm_json_resource.py (98%) rename agr_literature_service/lit_processing/{ => data_ingest/dqm_ingest}/sort_dqm_json_reference_updates.py (98%) rename agr_literature_service/lit_processing/{ => data_ingest/dqm_ingest}/sort_dqm_json_resource_updates.py (98%) create mode 100644 agr_literature_service/lit_processing/data_ingest/dqm_ingest/utils/__init__.py rename agr_literature_service/lit_processing/{filter_dqm_md5sum.py => data_ingest/dqm_ingest/utils/md5sum_utils.py} (97%) rename agr_literature_service/lit_processing/{ => data_ingest}/post_reference_to_db.py (99%) rename agr_literature_service/lit_processing/{post_resource_to_api.py => data_ingest/post_resource_to_db.py} (97%) create mode 100644 agr_literature_service/lit_processing/data_ingest/pubmed_ingest/__init__.py rename agr_literature_service/lit_processing/{pubmed_searches => data_ingest/pubmed_ingest/data_for_pubmed_processing}/FB_false_positive_pmids.txt (100%) rename agr_literature_service/lit_processing/{pubmed_searches => data_ingest/pubmed_ingest/data_for_pubmed_processing}/SGD_false_positive_pmids.txt (100%) rename agr_literature_service/lit_processing/{pubmed_searches => data_ingest/pubmed_ingest/data_for_pubmed_processing}/WB_false_positive_pmids.txt (100%) rename agr_literature_service/lit_processing/{pubmed_searches => data_ingest/pubmed_ingest/data_for_pubmed_processing}/XB_false_positive_pmids.txt (93%) rename agr_literature_service/lit_processing/{pubmed_searches => data_ingest/pubmed_ingest/data_for_pubmed_processing}/pubMedType2allianceCategory_mapping.tsv (97%) create mode 100644 agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/__init__.py rename agr_literature_service/lit_processing/{ => data_ingest/pubmed_ingest/fulltext}/expand_upload_tgz.py (97%) rename agr_literature_service/lit_processing/{ => data_ingest/pubmed_ingest/fulltext}/get_pubmed_tgz.py (99%) rename agr_literature_service/lit_processing/{ => data_ingest/pubmed_ingest}/generate_pubmed_nlm_resource.py (97%) rename agr_literature_service/lit_processing/{ => data_ingest/pubmed_ingest}/process_single_pmid.py (81%) rename agr_literature_service/lit_processing/{query_pubmed_mod_updates.py => data_ingest/pubmed_ingest/pubmed_search_new_references.py} (97%) rename agr_literature_service/lit_processing/{update_all_pubmed_papers.py => data_ingest/pubmed_ingest/pubmed_update_references_all_mods.py} (88%) rename agr_literature_service/lit_processing/{update_pubmed_papers.py => data_ingest/pubmed_ingest/pubmed_update_references_single_mod.py} (98%) rename agr_literature_service/lit_processing/{update_resource_pubmed_nlm.py => data_ingest/pubmed_ingest/pubmed_update_resources_nlm.py} (73%) rename agr_literature_service/lit_processing/{ => data_ingest/pubmed_ingest}/sanitize_pubmed_json.py (97%) create mode 100644 agr_literature_service/lit_processing/data_ingest/pubmed_ingest/xml/__init__.py rename agr_literature_service/lit_processing/{ => data_ingest/pubmed_ingest/xml}/get_pubmed_xml.py (100%) rename agr_literature_service/lit_processing/{ => data_ingest/pubmed_ingest/xml}/xml_to_json.py (99%) delete mode 100644 agr_literature_service/lit_processing/find_duplicate_doi.py delete mode 100644 agr_literature_service/lit_processing/find_pubmed_type.py delete mode 100644 agr_literature_service/lit_processing/get_datatypes_cross_references.py delete mode 100644 agr_literature_service/lit_processing/get_md5sum.py create mode 100644 agr_literature_service/lit_processing/oneoff_scripts/__init__.py rename agr_literature_service/lit_processing/{oneoff => oneoff_scripts}/cleanup_DOI.py (91%) rename agr_literature_service/lit_processing/{oneoff => oneoff_scripts}/cleanup_modCorpusAssoForCommentCorrection.py (98%) rename agr_literature_service/lit_processing/{oneoff => oneoff_scripts}/cleanup_userEmailToOktaUid.py (99%) rename agr_literature_service/lit_processing/{oneoff => oneoff_scripts}/delete_duplicate_commentCorrection.py (93%) rename agr_literature_service/lit_processing/{oneoff => oneoff_scripts}/delete_duplicate_modRefType.py (93%) rename agr_literature_service/lit_processing/{oneoff => oneoff_scripts}/delete_noPmid_dateLastModifiedInPubmed.py (96%) rename agr_literature_service/lit_processing/{oneoff => oneoff_scripts}/fix_modCorpusAssoForCommentCorrection.py (91%) rename agr_literature_service/lit_processing/{ => oneoff_scripts}/generate_chunk_files.py (99%) rename agr_literature_service/lit_processing/{ => oneoff_scripts}/get_pubmed_nlm_resource_unmatched.py (98%) rename agr_literature_service/lit_processing/{oneoff => oneoff_scripts}/populate_auditedColumns.py (98%) rename agr_literature_service/lit_processing/{ => oneoff_scripts}/process_mod_corpus_association_to_api.py (95%) delete mode 100644 agr_literature_service/lit_processing/post_comments_corrections_to_api.py delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/32542232.json delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/32542232.xml delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/32644453.json delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/32644453.xml delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/33002525.json delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/33002525.xml delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/33408224.json delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/33408224.xml delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/33410237.json delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/33410237.xml delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/33440160.json delete mode 100644 agr_literature_service/lit_processing/pubmed_sample/33440160.xml delete mode 100755 agr_literature_service/lit_processing/sample_reference_populate_load.sh delete mode 100644 agr_literature_service/lit_processing/sort_not_found_pmids_by_mod.py rename agr_literature_service/lit_processing/{ => tests}/generate_dqm_json_test_set.py (98%) rename agr_literature_service/lit_processing/{ => tests}/mod_populate_load.py (96%) rename agr_literature_service/lit_processing/{ => tests}/parse_pubmed_json_reference.py (93%) rename agr_literature_service/lit_processing/{ => tests}/process_many_pmids_to_json.py (95%) create mode 100755 agr_literature_service/lit_processing/tests/sample_reference_populate_load.sh rename agr_literature_service/lit_processing/{ => tests}/sample_reference_populate_update.sh (100%) create mode 100644 agr_literature_service/lit_processing/utils/__init__.py rename agr_literature_service/lit_processing/{helper_email.py => utils/email_utils.py} (100%) rename agr_literature_service/lit_processing/{helper_file_processing.py => utils/file_processing_utils.py} (99%) rename agr_literature_service/lit_processing/{helper_post_to_api.py => utils/okta_utils.py} (100%) rename agr_literature_service/lit_processing/{helper_s3.py => utils/s3_utils.py} (100%) rename agr_literature_service/lit_processing/{helper_sqlalchemy.py => utils/sqlalchemy_utils.py} (97%) create mode 100644 cleanup.sh rename {agr_literature_service => docs}/lit_processing/chunking_pmids/README (100%) rename docs/{xml_processing => lit_processing}/cytoscape/agr_lit_processing_cytoscape.html (100%) rename docs/{xml_processing => lit_processing}/cytoscape/cytoscape-dagre.js (100%) rename docs/{xml_processing => lit_processing}/cytoscape/cytoscape.min.js (100%) create mode 100644 examples/__init__.py create mode 100644 examples/lit_processing/__init__.py create mode 100644 examples/lit_processing/benchmark/__init__.py rename {agr_literature_service => examples}/lit_processing/benchmark/benchmark.sh (100%) rename {agr_literature_service => examples}/lit_processing/benchmark/benchmark_python.py (100%) rename {agr_literature_service => examples}/lit_processing/benchmark/benchmark_read_json.py (100%) rename {agr_literature_service => examples}/lit_processing/benchmark/benchmark_xml_open_copy.py (100%) rename {agr_literature_service => examples}/lit_processing/benchmark/split_xml.py (100%) diff --git a/.gitignore b/.gitignore index 225a2daaf..e675af4e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,7 @@ __pycache__/ Pipfile.lock -junk htmlcov -agr_literature_service/lit_processing/pubmed_json -agr_literature_service/lit_processing/pubmed_xml -agr_literature_service/lit_processing/reference_primary_id_to_curie -src/xml_processing/resource_primary_id_to_curie -src/xml_processing/junk -src/xml_processing/past_runs -agr_literature_service/lit_processing/inputs -src/xml_processing/J_Medline.txt -src/xml_processing/auth0_token -agr_literature_service/lit_processing/okta_token -src/xml_processing/pmids_by_mods -agr_literature_service/lit_processing/pmids_not_found - -src/xml_processing/code/venv/ +agr_literature_service/lit_processing/data_ingest/tmp/* *.coverage *.coverage.* .idea/* diff --git a/Makefile b/Makefile index 5018ea3f8..c3def4ac1 100644 --- a/Makefile +++ b/Makefile @@ -70,7 +70,7 @@ run-functest: sleep 5 # load the mods - docker-compose --env-file .env.test run test_runner python3 agr_literature_service/lit_processing/mod_populate_load.py + docker-compose --env-file .env.test run test_runner python3 agr_literature_service/lit_processing/tests/mod_populate_load.py # load the data docker-compose --env-file .env.test run test_runner /bin/bash agr_literature_service/lit_processing/sample_reference_populate_load.sh diff --git a/agr_literature_service/api/routers/reference_router.py b/agr_literature_service/api/routers/reference_router.py index 048f85bcc..d9abe00ac 100644 --- a/agr_literature_service/api/routers/reference_router.py +++ b/agr_literature_service/api/routers/reference_router.py @@ -15,8 +15,8 @@ import logging -from agr_literature_service.lit_processing.process_single_pmid import process_pmid -from agr_literature_service.lit_processing.dump_json_data import dump_data +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.process_single_pmid import process_pmid +from agr_literature_service.lit_processing.data_export.export_single_mod_references_to_json import dump_data logger = logging.getLogger(__name__) diff --git a/agr_literature_service/api/s3/download.py b/agr_literature_service/api/s3/download.py index eef150897..e1c22b14a 100644 --- a/agr_literature_service/api/s3/download.py +++ b/agr_literature_service/api/s3/download.py @@ -2,7 +2,7 @@ from fastapi import HTTPException from fastapi.encoders import jsonable_encoder from os import environ, getcwd, path -from agr_literature_service.lit_processing.helper_s3 import download_file_from_s3 +from agr_literature_service.lit_processing.utils.s3_utils import download_file_from_s3 from fastapi.responses import FileResponse from agr_literature_service.api.config import config diff --git a/agr_literature_service/lit_processing/benchmark/__init__.py b/agr_literature_service/lit_processing/data_export/__init__.py similarity index 100% rename from agr_literature_service/lit_processing/benchmark/__init__.py rename to agr_literature_service/lit_processing/data_export/__init__.py diff --git a/agr_literature_service/lit_processing/dump_all_json_data.py b/agr_literature_service/lit_processing/data_export/export_all_mod_references_to_json.py similarity index 82% rename from agr_literature_service/lit_processing/dump_all_json_data.py rename to agr_literature_service/lit_processing/data_export/export_all_mod_references_to_json.py index ec3cb6f3f..b8827236d 100644 --- a/agr_literature_service/lit_processing/dump_all_json_data.py +++ b/agr_literature_service/lit_processing/data_export/export_all_mod_references_to_json.py @@ -1,7 +1,7 @@ import logging import time -from agr_literature_service.lit_processing.dump_json_data import dump_data +from agr_literature_service.lit_processing.data_export.export_single_mod_references_to_json import dump_data logging.basicConfig(format='%(message)s') log = logging.getLogger() diff --git a/agr_literature_service/lit_processing/dump_json_data.py b/agr_literature_service/lit_processing/data_export/export_single_mod_references_to_json.py similarity index 98% rename from agr_literature_service/lit_processing/dump_json_data.py rename to agr_literature_service/lit_processing/data_export/export_single_mod_references_to_json.py index 48920ba72..a8a5e4f3f 100644 --- a/agr_literature_service/lit_processing/dump_json_data.py +++ b/agr_literature_service/lit_processing/data_export/export_single_mod_references_to_json.py @@ -7,9 +7,9 @@ import gzip import shutil -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_engine -from agr_literature_service.lit_processing.helper_s3 import upload_file_to_s3 -from agr_literature_service.lit_processing.helper_email import send_email +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_engine +from agr_literature_service.lit_processing.utils.s3_utils import upload_file_to_s3 +from agr_literature_service.lit_processing.utils.email_utils import send_email logging.basicConfig(format='%(message)s') log = logging.getLogger() diff --git a/agr_literature_service/lit_processing/data_ingest/__init__.py b/agr_literature_service/lit_processing/data_ingest/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/data_ingest/dqm_ingest/__init__.py b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/get_dqm_data.py b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/get_dqm_data.py similarity index 100% rename from agr_literature_service/lit_processing/get_dqm_data.py rename to agr_literature_service/lit_processing/data_ingest/dqm_ingest/get_dqm_data.py diff --git a/agr_literature_service/lit_processing/parse_dqm_json_reference.py b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/parse_dqm_json_reference.py similarity index 99% rename from agr_literature_service/lit_processing/parse_dqm_json_reference.py rename to agr_literature_service/lit_processing/data_ingest/dqm_ingest/parse_dqm_json_reference.py index aa5fff0d0..779b0b7fc 100644 --- a/agr_literature_service/lit_processing/parse_dqm_json_reference.py +++ b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/parse_dqm_json_reference.py @@ -1,6 +1,5 @@ import argparse import json -import logging import logging.config import re import sys @@ -11,8 +10,8 @@ import bs4 from dotenv import load_dotenv -from agr_literature_service.lit_processing.helper_file_processing import (clean_up_keywords, split_identifier, - write_json) +from agr_literature_service.lit_processing.utils.file_processing_utils import (clean_up_keywords, split_identifier, + write_json) warnings.filterwarnings("ignore", category=UserWarning, module='bs4') diff --git a/agr_literature_service/lit_processing/parse_dqm_json_resource.py b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/parse_dqm_json_resource.py similarity index 98% rename from agr_literature_service/lit_processing/parse_dqm_json_resource.py rename to agr_literature_service/lit_processing/data_ingest/dqm_ingest/parse_dqm_json_resource.py index f5e4ecd52..fadba2130 100644 --- a/agr_literature_service/lit_processing/parse_dqm_json_resource.py +++ b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/parse_dqm_json_resource.py @@ -1,19 +1,18 @@ import json -import logging import logging.config import re from os import environ, makedirs, path from dotenv import load_dotenv -from agr_literature_service.lit_processing.helper_file_processing import (load_pubmed_resource_basic, - save_resource_file, split_identifier, - write_json) +from agr_literature_service.lit_processing.utils.file_processing_utils import (load_pubmed_resource_basic, + save_resource_file, split_identifier, + write_json) load_dotenv() -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/sort_dqm_json_reference_updates.py b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/sort_dqm_json_reference_updates.py similarity index 98% rename from agr_literature_service/lit_processing/sort_dqm_json_reference_updates.py rename to agr_literature_service/lit_processing/data_ingest/dqm_ingest/sort_dqm_json_reference_updates.py index 791f17773..4c187f53d 100644 --- a/agr_literature_service/lit_processing/sort_dqm_json_reference_updates.py +++ b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/sort_dqm_json_reference_updates.py @@ -1,29 +1,28 @@ import argparse import json import sys -import logging import logging.config import warnings from os import environ, makedirs, path from dotenv import load_dotenv from fastapi.encoders import jsonable_encoder -from agr_literature_service.lit_processing.filter_dqm_md5sum import load_s3_md5data,\ +from agr_literature_service.lit_processing.data_ingest.dqm_ingest.utils.md5sum_utils import load_s3_md5data,\ generate_new_md5, save_s3_md5data from agr_literature_service.api.models import ReferenceModel, ModReferenceTypeModel,\ ModCorpusAssociationModel, AuthorModel, CrossReferenceModel, ModModel -from agr_literature_service.lit_processing.helper_file_processing import compare_authors_or_editors,\ +from agr_literature_service.lit_processing.utils.file_processing_utils import compare_authors_or_editors,\ split_identifier, write_json -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session,\ +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session,\ create_postgres_engine, sqlalchemy_load_ref_xref -from agr_literature_service.lit_processing.helper_email import send_email -from agr_literature_service.lit_processing.parse_dqm_json_reference import generate_pmid_data,\ +from agr_literature_service.lit_processing.utils.email_utils import send_email +from agr_literature_service.lit_processing.data_ingest.dqm_ingest.parse_dqm_json_reference import generate_pmid_data,\ aggregate_dqm_with_pubmed -from agr_literature_service.lit_processing.get_pubmed_xml import download_pubmed_xml -from agr_literature_service.lit_processing.xml_to_json import generate_json -from agr_literature_service.lit_processing.post_reference_to_db import post_references -from agr_literature_service.lit_processing.update_resource_pubmed_nlm import update_resource_pubmed_nlm -from agr_literature_service.lit_processing.get_dqm_data import download_dqm_json +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.get_pubmed_xml import download_pubmed_xml +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.xml_to_json import generate_json +from agr_literature_service.lit_processing.data_ingest.post_reference_to_db import post_references +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.pubmed_update_resources_nlm import update_resource_pubmed_nlm +from agr_literature_service.lit_processing.data_ingest.dqm_ingest.get_dqm_data import download_dqm_json from agr_literature_service.api.user import set_global_user_id # For WB needing 57578 references checked for updating, diff --git a/agr_literature_service/lit_processing/sort_dqm_json_resource_updates.py b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/sort_dqm_json_resource_updates.py similarity index 98% rename from agr_literature_service/lit_processing/sort_dqm_json_resource_updates.py rename to agr_literature_service/lit_processing/data_ingest/dqm_ingest/sort_dqm_json_resource_updates.py index ae5b39b18..50ec76e2d 100644 --- a/agr_literature_service/lit_processing/sort_dqm_json_resource_updates.py +++ b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/sort_dqm_json_resource_updates.py @@ -1,5 +1,4 @@ import json -import logging import logging.config import warnings from os import environ, makedirs, path @@ -8,10 +7,10 @@ from fastapi.encoders import jsonable_encoder from agr_literature_service.api.models import ResourceModel, CrossReferenceModel -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session,\ +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session,\ sqlalchemy_load_ref_xref -from agr_literature_service.lit_processing.helper_file_processing import (compare_authors_or_editors, - save_resource_file, split_identifier) +from agr_literature_service.lit_processing.utils.file_processing_utils import (compare_authors_or_editors, + save_resource_file, split_identifier) from agr_literature_service.api.user import set_global_user_id warnings.filterwarnings("ignore", category=UserWarning, module='bs4') @@ -32,7 +31,7 @@ # keep working off of lit-4003, comparing data from 20211025 files (loaded at lit-4005) -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/data_ingest/dqm_ingest/utils/__init__.py b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/filter_dqm_md5sum.py b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/utils/md5sum_utils.py similarity index 97% rename from agr_literature_service/lit_processing/filter_dqm_md5sum.py rename to agr_literature_service/lit_processing/data_ingest/dqm_ingest/utils/md5sum_utils.py index b010d20d6..915b192f1 100644 --- a/agr_literature_service/lit_processing/filter_dqm_md5sum.py +++ b/agr_literature_service/lit_processing/data_ingest/dqm_ingest/utils/md5sum_utils.py @@ -4,11 +4,10 @@ import argparse import sys from os import environ, path, makedirs, listdir -import logging import logging.config -from agr_literature_service.lit_processing.helper_file_processing import split_identifier, write_json -from agr_literature_service.lit_processing.helper_s3 import upload_file_to_s3, download_file_from_s3 +from agr_literature_service.lit_processing.utils.file_processing_utils import split_identifier, write_json +from agr_literature_service.lit_processing.utils.s3_utils import upload_file_to_s3, download_file_from_s3 from dotenv import load_dotenv diff --git a/agr_literature_service/lit_processing/post_reference_to_db.py b/agr_literature_service/lit_processing/data_ingest/post_reference_to_db.py similarity index 99% rename from agr_literature_service/lit_processing/post_reference_to_db.py rename to agr_literature_service/lit_processing/data_ingest/post_reference_to_db.py index dfdb355b2..cea336301 100644 --- a/agr_literature_service/lit_processing/post_reference_to_db.py +++ b/agr_literature_service/lit_processing/data_ingest/post_reference_to_db.py @@ -6,10 +6,8 @@ from agr_literature_service.api.models import CrossReferenceModel, ReferenceModel,\ AuthorModel, ModCorpusAssociationModel, ModReferenceTypeModel, ModModel,\ ReferenceCommentAndCorrectionModel, MeshDetailModel -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session,\ - create_postgres_engine -from agr_literature_service.api.crud.reference_crud import get_citation_from_args,\ - get_next_curie +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session, create_postgres_engine +from agr_literature_service.api.crud.reference_crud import get_citation_from_args, get_next_curie logging.basicConfig(format='%(message)s') log = logging.getLogger() diff --git a/agr_literature_service/lit_processing/post_resource_to_api.py b/agr_literature_service/lit_processing/data_ingest/post_resource_to_db.py similarity index 97% rename from agr_literature_service/lit_processing/post_resource_to_api.py rename to agr_literature_service/lit_processing/data_ingest/post_resource_to_db.py index e84cf9495..4dede59af 100644 --- a/agr_literature_service/lit_processing/post_resource_to_api.py +++ b/agr_literature_service/lit_processing/data_ingest/post_resource_to_db.py @@ -1,6 +1,5 @@ import argparse import json -import logging import logging.config import sqlalchemy import sys @@ -8,11 +7,11 @@ from dotenv import load_dotenv -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session, \ + sqlalchemy_load_ref_xref from agr_literature_service.api.models import ResourceModel, CrossReferenceModel, EditorModel from agr_literature_service.api.crud.resource_crud import create_next_curie -from agr_literature_service.lit_processing.helper_sqlalchemy import sqlalchemy_load_ref_xref -from agr_literature_service.lit_processing.helper_file_processing import split_identifier +from agr_literature_service.lit_processing.utils.file_processing_utils import split_identifier load_dotenv() diff --git a/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/__init__.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/pubmed_searches/FB_false_positive_pmids.txt b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/FB_false_positive_pmids.txt similarity index 100% rename from agr_literature_service/lit_processing/pubmed_searches/FB_false_positive_pmids.txt rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/FB_false_positive_pmids.txt diff --git a/agr_literature_service/lit_processing/pubmed_searches/SGD_false_positive_pmids.txt b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/SGD_false_positive_pmids.txt similarity index 100% rename from agr_literature_service/lit_processing/pubmed_searches/SGD_false_positive_pmids.txt rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/SGD_false_positive_pmids.txt diff --git a/agr_literature_service/lit_processing/pubmed_searches/WB_false_positive_pmids.txt b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/WB_false_positive_pmids.txt similarity index 100% rename from agr_literature_service/lit_processing/pubmed_searches/WB_false_positive_pmids.txt rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/WB_false_positive_pmids.txt diff --git a/agr_literature_service/lit_processing/pubmed_searches/XB_false_positive_pmids.txt b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/XB_false_positive_pmids.txt similarity index 93% rename from agr_literature_service/lit_processing/pubmed_searches/XB_false_positive_pmids.txt rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/XB_false_positive_pmids.txt index 5aa7d03fc..d5d1aa67b 100644 --- a/agr_literature_service/lit_processing/pubmed_searches/XB_false_positive_pmids.txt +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/XB_false_positive_pmids.txt @@ -1,377 +1,377 @@ -PMID -PMID:0 -PMID:14 -PMID:15 -PMID:16 -PMID:37 -PMID:47 -PMID:86 -PMID:100 -PMID:118 -PMID:129 -PMID:145 -PMID:183 -PMID:276 -PMID:284 -PMID:290 -PMID:334 -PMID:335 -PMID:336 -PMID:350 -PMID:360 -PMID:382 -PMID:383 -PMID:387 -PMID:393 -PMID:417 -PMID:436 -PMID:471 -PMID:472 -PMID:523 -PMID:598 -PMID:599 -PMID:635 -PMID:645 -PMID:646 -PMID:707 -PMID:725 -PMID:732 -PMID:753 -PMID:758 -PMID:785 -PMID:796 -PMID:1361 -PMID:2050686 -PMID:2491924 -PMID:2498837 -PMID:2717293 -PMID:2773952 -PMID:3831303 -PMID:4115930 -PMID:4152599 -PMID:4212417 -PMID:4545128 -PMID:5613991 -PMID:7477384 -PMID:7591991 -PMID:7671795 -PMID:7678491 -PMID:7683786 -PMID:7763770 -PMID:8218399 -PMID:8558229 -PMID:8596715 -PMID:8736869 -PMID:8879047 -PMID:8927522 -PMID:9045617 -PMID:9056773 -PMID:9072809 -PMID:9072810 -PMID:9072811 -PMID:9072812 -PMID:9261568 -PMID:9303554 -PMID:9307438 -PMID:9654145 -PMID:9705932 -PMID:9915701 -PMID:10548684 -PMID:10617464 -PMID:10903181 -PMID:10936415 -PMID:11096085 -PMID:11118493 -PMID:11152639 -PMID:11168591 -PMID:11195933 -PMID:11753368 -PMID:11827163 -PMID:11959693 -PMID:12115642 -PMID:12117810 -PMID:12392175 -PMID:12511087 -PMID:12617815 -PMID:12617832 -PMID:12617836 -PMID:12617841 -PMID:12617853 -PMID:14103790 -PMID:14103796 -PMID:14642568 -PMID:15456730 -PMID:15829515 -PMID:16100089 -PMID:16444610 -PMID:16510506 -PMID:16530180 -PMID:16572174 -PMID:16645828 -PMID:16649018 -PMID:16676306 -PMID:16691421 -PMID:16696592 -PMID:16738323 -PMID:16758255 -PMID:16775137 -PMID:16891776 -PMID:16973872 -PMID:17143855 -PMID:17216612 -PMID:17303458 -PMID:17355257 -PMID:17625566 -PMID:17654706 -PMID:17797911 -PMID:17900564 -PMID:18265022 -PMID:18624825 -PMID:18956193 -PMID:18995805 -PMID:19000768 -PMID:19036800 -PMID:19060336 -PMID:19144720 -PMID:19303046 -PMID:19371574 -PMID:19441059 -PMID:19479989 -PMID:19496177 -PMID:19705128 -PMID:19705574 -PMID:19924829 -PMID:19936929 -PMID:19936948 -PMID:19965875 -PMID:20161793 -PMID:20236224 -PMID:20238476 -PMID:20351263 -PMID:20357968 -PMID:20627848 -PMID:20733350 -PMID:20946532 -PMID:21069798 -PMID:21089032 -PMID:21148004 -PMID:21162080 -PMID:21165941 -PMID:21165943 -PMID:21489457 -PMID:22155207 -PMID:22201122 -PMID:22311656 -PMID:22351237 -PMID:22394725 -PMID:22949618 -PMID:23076718 -PMID:23132664 -PMID:23150334 -PMID:23264447 -PMID:23341352 -PMID:23443939 -PMID:23614018 -PMID:24095732 -PMID:24105205 -PMID:24214099 -PMID:24389818 -PMID:24668461 -PMID:24943846 -PMID:25063841 -PMID:25109271 -PMID:25127088 -PMID:25145968 -PMID:25350749 -PMID:25507799 -PMID:26324981 -PMID:26450748 -PMID:26452007 -PMID:26856261 -PMID:27039280 -PMID:27291418 -PMID:27436043 -PMID:27641515 -PMID:27832124 -PMID:28366786 -PMID:28487477 -PMID:28830978 -PMID:28869924 -PMID:29585001 -PMID:29585266 -PMID:29585314 -PMID:29585375 -PMID:29585439 -PMID:29585810 -PMID:29585827 -PMID:29587098 -PMID:29587108 -PMID:29587111 -PMID:29590810 -PMID:29590862 -PMID:29590933 -PMID:29590942 -PMID:29591063 -PMID:29591146 -PMID:29591271 -PMID:29591377 -PMID:29591622 -PMID:29591649 -PMID:29592094 -PMID:29592254 -PMID:29781295 -PMID:29795182 -PMID:29900546 -PMID:30031069 -PMID:30302808 -PMID:30466149 -PMID:30723090 -PMID:30805668 -PMID:30837868 -PMID:30877126 -PMID:30980349 -PMID:30988061 -PMID:30998355 -PMID:31024335 -PMID:31028742 -PMID:31221732 -PMID:31234412 -PMID:31249377 -PMID:31302115 -PMID:31307394 -PMID:31337682 -PMID:31349189 -PMID:31387748 -PMID:31399534 -PMID:31550387 -PMID:31776516 -PMID:31872463 -PMID:32014250 -PMID:32037086 -PMID:32040304 -PMID:32110806 -PMID:32113684 -PMID:32146351 -PMID:32163155 -PMID:32235388 -PMID:32260357 -PMID:32284126 -PMID:32284139 -PMID:32356447 -PMID:32375909 -PMID:32396161 -PMID:32436842 -PMID:32448414 -PMID:32453026 -PMID:32515307 -PMID:32528489 -PMID:32540451 -PMID:32636307 -PMID:32648330 -PMID:32688718 -PMID:32689047 -PMID:32690627 -PMID:32716968 -PMID:32805257 -PMID:32806654 -PMID:32814817 -PMID:32820366 -PMID:32823677 -PMID:32849692 -PMID:32865276 -PMID:32880369 -PMID:32954620 -PMID:32955735 -PMID:32980012 -PMID:33103046 -PMID:33110737 -PMID:33114777 -PMID:33155802 -PMID:33203744 -PMID:33209252 -PMID:33236980 -PMID:33288503 -PMID:33321094 -PMID:33355332 -PMID:33407100 -PMID:33413079 -PMID:33442958 -PMID:33444715 -PMID:33449379 -PMID:33450225 -PMID:33451482 -PMID:33451765 -PMID:33453414 -PMID:33460944 -PMID:33479385 -PMID:33499862 -PMID:33506773 -PMID:33531499 -PMID:33544769 -PMID:33562460 -PMID:33580260 -PMID:33597302 -PMID:33600484 -PMID:33602819 -PMID:33629581 -PMID:33637875 -PMID:33656557 -PMID:33667331 -PMID:33716172 -PMID:33724033 -PMID:33737538 -PMID:33741526 -PMID:33746991 -PMID:33750901 -PMID:33751023 -PMID:33775243 -PMID:33793218 -PMID:33837730 -PMID:33885891 -PMID:33912908 -PMID:33983390 -PMID:33988713 -PMID:33994962 -PMID:34004851 -PMID:34004857 -PMID:34004898 -PMID:34004944 -PMID:34004994 -PMID:34005281 -PMID:34005480 -PMID:34051231 -PMID:34081905 -PMID:34111210 -PMID:34195804 -PMID:34234379 -PMID:34282161 -PMID:34299083 -PMID:34329593 -PMID:34358124 -PMID:34374537 -PMID:34424964 -PMID:34434116 -PMID:34437699 -PMID:34445719 -PMID:34460912 -PMID:34487959 -PMID:34489418 -PMID:34523752 -PMID:34531858 -PMID:34552471 -PMID:34555899 -PMID:34557389 -PMID:34619015 -PMID:34684325 -PMID:34768066 -PMID:34790700 -PMID:34796173 -PMID:34800681 -PMID:34841092 -PMID:34842138 -PMID:34858215 -PMID:35082026 -PMID:35082033 -PMID:35153658 -PMID:35322911 -PMID:35354863 +PMID +PMID:0 +PMID:14 +PMID:15 +PMID:16 +PMID:37 +PMID:47 +PMID:86 +PMID:100 +PMID:118 +PMID:129 +PMID:145 +PMID:183 +PMID:276 +PMID:284 +PMID:290 +PMID:334 +PMID:335 +PMID:336 +PMID:350 +PMID:360 +PMID:382 +PMID:383 +PMID:387 +PMID:393 +PMID:417 +PMID:436 +PMID:471 +PMID:472 +PMID:523 +PMID:598 +PMID:599 +PMID:635 +PMID:645 +PMID:646 +PMID:707 +PMID:725 +PMID:732 +PMID:753 +PMID:758 +PMID:785 +PMID:796 +PMID:1361 +PMID:2050686 +PMID:2491924 +PMID:2498837 +PMID:2717293 +PMID:2773952 +PMID:3831303 +PMID:4115930 +PMID:4152599 +PMID:4212417 +PMID:4545128 +PMID:5613991 +PMID:7477384 +PMID:7591991 +PMID:7671795 +PMID:7678491 +PMID:7683786 +PMID:7763770 +PMID:8218399 +PMID:8558229 +PMID:8596715 +PMID:8736869 +PMID:8879047 +PMID:8927522 +PMID:9045617 +PMID:9056773 +PMID:9072809 +PMID:9072810 +PMID:9072811 +PMID:9072812 +PMID:9261568 +PMID:9303554 +PMID:9307438 +PMID:9654145 +PMID:9705932 +PMID:9915701 +PMID:10548684 +PMID:10617464 +PMID:10903181 +PMID:10936415 +PMID:11096085 +PMID:11118493 +PMID:11152639 +PMID:11168591 +PMID:11195933 +PMID:11753368 +PMID:11827163 +PMID:11959693 +PMID:12115642 +PMID:12117810 +PMID:12392175 +PMID:12511087 +PMID:12617815 +PMID:12617832 +PMID:12617836 +PMID:12617841 +PMID:12617853 +PMID:14103790 +PMID:14103796 +PMID:14642568 +PMID:15456730 +PMID:15829515 +PMID:16100089 +PMID:16444610 +PMID:16510506 +PMID:16530180 +PMID:16572174 +PMID:16645828 +PMID:16649018 +PMID:16676306 +PMID:16691421 +PMID:16696592 +PMID:16738323 +PMID:16758255 +PMID:16775137 +PMID:16891776 +PMID:16973872 +PMID:17143855 +PMID:17216612 +PMID:17303458 +PMID:17355257 +PMID:17625566 +PMID:17654706 +PMID:17797911 +PMID:17900564 +PMID:18265022 +PMID:18624825 +PMID:18956193 +PMID:18995805 +PMID:19000768 +PMID:19036800 +PMID:19060336 +PMID:19144720 +PMID:19303046 +PMID:19371574 +PMID:19441059 +PMID:19479989 +PMID:19496177 +PMID:19705128 +PMID:19705574 +PMID:19924829 +PMID:19936929 +PMID:19936948 +PMID:19965875 +PMID:20161793 +PMID:20236224 +PMID:20238476 +PMID:20351263 +PMID:20357968 +PMID:20627848 +PMID:20733350 +PMID:20946532 +PMID:21069798 +PMID:21089032 +PMID:21148004 +PMID:21162080 +PMID:21165941 +PMID:21165943 +PMID:21489457 +PMID:22155207 +PMID:22201122 +PMID:22311656 +PMID:22351237 +PMID:22394725 +PMID:22949618 +PMID:23076718 +PMID:23132664 +PMID:23150334 +PMID:23264447 +PMID:23341352 +PMID:23443939 +PMID:23614018 +PMID:24095732 +PMID:24105205 +PMID:24214099 +PMID:24389818 +PMID:24668461 +PMID:24943846 +PMID:25063841 +PMID:25109271 +PMID:25127088 +PMID:25145968 +PMID:25350749 +PMID:25507799 +PMID:26324981 +PMID:26450748 +PMID:26452007 +PMID:26856261 +PMID:27039280 +PMID:27291418 +PMID:27436043 +PMID:27641515 +PMID:27832124 +PMID:28366786 +PMID:28487477 +PMID:28830978 +PMID:28869924 +PMID:29585001 +PMID:29585266 +PMID:29585314 +PMID:29585375 +PMID:29585439 +PMID:29585810 +PMID:29585827 +PMID:29587098 +PMID:29587108 +PMID:29587111 +PMID:29590810 +PMID:29590862 +PMID:29590933 +PMID:29590942 +PMID:29591063 +PMID:29591146 +PMID:29591271 +PMID:29591377 +PMID:29591622 +PMID:29591649 +PMID:29592094 +PMID:29592254 +PMID:29781295 +PMID:29795182 +PMID:29900546 +PMID:30031069 +PMID:30302808 +PMID:30466149 +PMID:30723090 +PMID:30805668 +PMID:30837868 +PMID:30877126 +PMID:30980349 +PMID:30988061 +PMID:30998355 +PMID:31024335 +PMID:31028742 +PMID:31221732 +PMID:31234412 +PMID:31249377 +PMID:31302115 +PMID:31307394 +PMID:31337682 +PMID:31349189 +PMID:31387748 +PMID:31399534 +PMID:31550387 +PMID:31776516 +PMID:31872463 +PMID:32014250 +PMID:32037086 +PMID:32040304 +PMID:32110806 +PMID:32113684 +PMID:32146351 +PMID:32163155 +PMID:32235388 +PMID:32260357 +PMID:32284126 +PMID:32284139 +PMID:32356447 +PMID:32375909 +PMID:32396161 +PMID:32436842 +PMID:32448414 +PMID:32453026 +PMID:32515307 +PMID:32528489 +PMID:32540451 +PMID:32636307 +PMID:32648330 +PMID:32688718 +PMID:32689047 +PMID:32690627 +PMID:32716968 +PMID:32805257 +PMID:32806654 +PMID:32814817 +PMID:32820366 +PMID:32823677 +PMID:32849692 +PMID:32865276 +PMID:32880369 +PMID:32954620 +PMID:32955735 +PMID:32980012 +PMID:33103046 +PMID:33110737 +PMID:33114777 +PMID:33155802 +PMID:33203744 +PMID:33209252 +PMID:33236980 +PMID:33288503 +PMID:33321094 +PMID:33355332 +PMID:33407100 +PMID:33413079 +PMID:33442958 +PMID:33444715 +PMID:33449379 +PMID:33450225 +PMID:33451482 +PMID:33451765 +PMID:33453414 +PMID:33460944 +PMID:33479385 +PMID:33499862 +PMID:33506773 +PMID:33531499 +PMID:33544769 +PMID:33562460 +PMID:33580260 +PMID:33597302 +PMID:33600484 +PMID:33602819 +PMID:33629581 +PMID:33637875 +PMID:33656557 +PMID:33667331 +PMID:33716172 +PMID:33724033 +PMID:33737538 +PMID:33741526 +PMID:33746991 +PMID:33750901 +PMID:33751023 +PMID:33775243 +PMID:33793218 +PMID:33837730 +PMID:33885891 +PMID:33912908 +PMID:33983390 +PMID:33988713 +PMID:33994962 +PMID:34004851 +PMID:34004857 +PMID:34004898 +PMID:34004944 +PMID:34004994 +PMID:34005281 +PMID:34005480 +PMID:34051231 +PMID:34081905 +PMID:34111210 +PMID:34195804 +PMID:34234379 +PMID:34282161 +PMID:34299083 +PMID:34329593 +PMID:34358124 +PMID:34374537 +PMID:34424964 +PMID:34434116 +PMID:34437699 +PMID:34445719 +PMID:34460912 +PMID:34487959 +PMID:34489418 +PMID:34523752 +PMID:34531858 +PMID:34552471 +PMID:34555899 +PMID:34557389 +PMID:34619015 +PMID:34684325 +PMID:34768066 +PMID:34790700 +PMID:34796173 +PMID:34800681 +PMID:34841092 +PMID:34842138 +PMID:34858215 +PMID:35082026 +PMID:35082033 +PMID:35153658 +PMID:35322911 +PMID:35354863 diff --git a/agr_literature_service/lit_processing/pubmed_searches/pubMedType2allianceCategory_mapping.tsv b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/pubMedType2allianceCategory_mapping.tsv similarity index 97% rename from agr_literature_service/lit_processing/pubmed_searches/pubMedType2allianceCategory_mapping.tsv rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/pubMedType2allianceCategory_mapping.tsv index 97bf8579b..f649e6b65 100644 --- a/agr_literature_service/lit_processing/pubmed_searches/pubMedType2allianceCategory_mapping.tsv +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/data_for_pubmed_processing/pubMedType2allianceCategory_mapping.tsv @@ -1,84 +1,84 @@ -# column1 = one of pubmed_types -# column2 = Alliance category -# column3 = filter: -# 1 = use this Alliance Category if the pubmed_type is in the pubmed_type list -# Secondary = if there is another category that is more appropriate then use the other category -# Last = don't use these types to map to Alliance category unless there is no other option -Adaptive Clinical Trial Research_Article -Autobiography Other -Bibliography Other -Biography Other -Case Reports Research_Article -Classical Article Research_Article -Clinical Conference Research_Article -Clinical Study Research_Article -Clinical Trial Research_Article -Clinical Trial Protocol Research_Article -Clinical Trial, Phase I Research_Article -Clinical Trial, Phase II Research_Article -Clinical Trial, Phase III Research_Article -Clinical Trial, Phase IV Research_Article -Clinical Trial, Veterinary Research_Article -Collected Work -Comment Other -Comparative Study Research_Article -Congress -Consensus Development Conference -Consensus Development Conference, NIH -Controlled Clinical Trial -Corrected and Republished Article Correction -Dataset -Dictionary -Directory -Duplicate Publication Research_Article -Editorial Other -Electronic Supplementary Materials -English Abstract -Equivalence Trial -Evaluation Study Research_Article -Expression of Concern Other -Festschrift Other -Government Publication -Guideline -Historical Article -Interactive Tutorial Other -Interview Other -Introductory Journal Article -Journal Article Research_Article secondary -Lecture -Legal Case -Legislation -Letter -Meta-Analysis Research_Article -Multicenter Study Research_Article -News Other -Newspaper Article -Observational Study Research_Article -Observational Study, Veterinary -Overall -Patient Education Handout -Periodical Index -Personal Narrative -Portrait Other -Practice Guideline -Preprint Preprint -Pragmatic Clinical Trial -Published Erratum Correction -Randomized Controlled Trial Research_Article Last -Randomized Controlled Trial, Veterinary Research_Article Last -Research Support, American Recovery and Reinvestment Act Research_Article Last -Research Support, N.I.H., Extramural Research_Article Last -Research Support, N.I.H., Intramural Research_Article Last -Research Support, Non-U.S. Gov't Research_Article Last -Research Support, U.S. Gov't, Non-P.H.S. Research_Article Last -Research Support, U.S. Gov't, P.H.S. Research_Article Last -Retracted Publication Retraction -Retraction of Publication Retraction -Review Review_Article 1 -Scientific Integrity Review -Systematic Review Review_Article -Technical Report Research_Article -Twin Study Research_Article Secondary -Validation Study Research_Article -Video-Audio Media Research_Article +# column1 = one of pubmed_types +# column2 = Alliance category +# column3 = filter: +# 1 = use this Alliance Category if the pubmed_type is in the pubmed_type list +# Secondary = if there is another category that is more appropriate then use the other category +# Last = don't use these types to map to Alliance category unless there is no other option +Adaptive Clinical Trial Research_Article +Autobiography Other +Bibliography Other +Biography Other +Case Reports Research_Article +Classical Article Research_Article +Clinical Conference Research_Article +Clinical Study Research_Article +Clinical Trial Research_Article +Clinical Trial Protocol Research_Article +Clinical Trial, Phase I Research_Article +Clinical Trial, Phase II Research_Article +Clinical Trial, Phase III Research_Article +Clinical Trial, Phase IV Research_Article +Clinical Trial, Veterinary Research_Article +Collected Work +Comment Other +Comparative Study Research_Article +Congress +Consensus Development Conference +Consensus Development Conference, NIH +Controlled Clinical Trial +Corrected and Republished Article Correction +Dataset +Dictionary +Directory +Duplicate Publication Research_Article +Editorial Other +Electronic Supplementary Materials +English Abstract +Equivalence Trial +Evaluation Study Research_Article +Expression of Concern Other +Festschrift Other +Government Publication +Guideline +Historical Article +Interactive Tutorial Other +Interview Other +Introductory Journal Article +Journal Article Research_Article secondary +Lecture +Legal Case +Legislation +Letter +Meta-Analysis Research_Article +Multicenter Study Research_Article +News Other +Newspaper Article +Observational Study Research_Article +Observational Study, Veterinary +Overall +Patient Education Handout +Periodical Index +Personal Narrative +Portrait Other +Practice Guideline +Preprint Preprint +Pragmatic Clinical Trial +Published Erratum Correction +Randomized Controlled Trial Research_Article Last +Randomized Controlled Trial, Veterinary Research_Article Last +Research Support, American Recovery and Reinvestment Act Research_Article Last +Research Support, N.I.H., Extramural Research_Article Last +Research Support, N.I.H., Intramural Research_Article Last +Research Support, Non-U.S. Gov't Research_Article Last +Research Support, U.S. Gov't, Non-P.H.S. Research_Article Last +Research Support, U.S. Gov't, P.H.S. Research_Article Last +Retracted Publication Retraction +Retraction of Publication Retraction +Review Review_Article 1 +Scientific Integrity Review +Systematic Review Review_Article +Technical Report Research_Article +Twin Study Research_Article Secondary +Validation Study Research_Article +Video-Audio Media Research_Article Webcast Other \ No newline at end of file diff --git a/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/__init__.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/expand_upload_tgz.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/expand_upload_tgz.py similarity index 97% rename from agr_literature_service/lit_processing/expand_upload_tgz.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/expand_upload_tgz.py index 3d7969d1f..763daf1ad 100644 --- a/agr_literature_service/lit_processing/expand_upload_tgz.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/expand_upload_tgz.py @@ -11,19 +11,18 @@ import hashlib -import logging import logging.config import tarfile from os import environ, listdir, makedirs, path, rename, walk from shutil import copy2 -from agr_literature_service.lit_processing.helper_s3 import upload_file_to_s3 +from agr_literature_service.lit_processing.utils.s3_utils import upload_file_to_s3 from dotenv import load_dotenv load_dotenv() -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') logging.getLogger("s3transfer.utils").setLevel(logging.WARNING) diff --git a/agr_literature_service/lit_processing/get_pubmed_tgz.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/get_pubmed_tgz.py similarity index 99% rename from agr_literature_service/lit_processing/get_pubmed_tgz.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/get_pubmed_tgz.py index a855db7d9..a12f84180 100644 --- a/agr_literature_service/lit_processing/get_pubmed_tgz.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/fulltext/get_pubmed_tgz.py @@ -19,7 +19,7 @@ # if file already exists in pmids_found, skip it. -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/generate_pubmed_nlm_resource.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/generate_pubmed_nlm_resource.py similarity index 97% rename from agr_literature_service/lit_processing/generate_pubmed_nlm_resource.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/generate_pubmed_nlm_resource.py index 2c0e5bf18..d274d87a1 100644 --- a/agr_literature_service/lit_processing/generate_pubmed_nlm_resource.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/generate_pubmed_nlm_resource.py @@ -1,13 +1,12 @@ import argparse import json -import logging import logging.config import re import urllib from os import environ, makedirs, path from dotenv import load_dotenv -from agr_literature_service.lit_processing.helper_s3 import upload_file_to_s3 +from agr_literature_service.lit_processing.utils.s3_utils import upload_file_to_s3 # generate from local file and do not upload to s3 # pipenv run python generate_pubmed_nlm_resource.py -l @@ -23,7 +22,7 @@ load_dotenv() -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/process_single_pmid.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/process_single_pmid.py similarity index 81% rename from agr_literature_service/lit_processing/process_single_pmid.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/process_single_pmid.py index cfbe5f3bc..08347a123 100644 --- a/agr_literature_service/lit_processing/process_single_pmid.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/process_single_pmid.py @@ -1,21 +1,20 @@ import argparse -import logging import logging.config from os import environ, path import requests -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session -from agr_literature_service.lit_processing.get_pubmed_xml import download_pubmed_xml -from agr_literature_service.lit_processing.post_reference_to_db import post_references -from agr_literature_service.lit_processing.sanitize_pubmed_json import sanitize_pubmed_json_list -from agr_literature_service.lit_processing.xml_to_json import generate_json -from agr_literature_service.lit_processing.helper_s3 import upload_xml_file_to_s3 +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.get_pubmed_xml import download_pubmed_xml +from agr_literature_service.lit_processing.data_ingest.post_reference_to_db import post_references +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.sanitize_pubmed_json import sanitize_pubmed_json_list +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.xml_to_json import generate_json +from agr_literature_service.lit_processing.utils.s3_utils import upload_xml_file_to_s3 from agr_literature_service.api.user import set_global_user_id # pipenv run python process_single_pmid.py -c 12345678 # enter a single pmid as an argument, download xml, convert to json, sanitize, post to api -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/query_pubmed_mod_updates.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_search_new_references.py similarity index 97% rename from agr_literature_service/lit_processing/query_pubmed_mod_updates.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_search_new_references.py index 13ae551e9..b6bc33181 100644 --- a/agr_literature_service/lit_processing/query_pubmed_mod_updates.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_search_new_references.py @@ -1,5 +1,4 @@ import argparse -import logging import logging.config import re import time @@ -11,17 +10,17 @@ import requests from dotenv import load_dotenv -from agr_literature_service.lit_processing.get_pubmed_xml import download_pubmed_xml -from agr_literature_service.lit_processing.xml_to_json import generate_json -from agr_literature_service.lit_processing.sanitize_pubmed_json import sanitize_pubmed_json_list -from agr_literature_service.lit_processing.post_reference_to_db import post_references -from agr_literature_service.lit_processing.helper_s3 import upload_xml_file_to_s3 -from agr_literature_service.lit_processing.update_resource_pubmed_nlm import update_resource_pubmed_nlm +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.get_pubmed_xml import download_pubmed_xml +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.xml_to_json import generate_json +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.sanitize_pubmed_json import sanitize_pubmed_json_list +from agr_literature_service.lit_processing.data_ingest.post_reference_to_db import post_references +from agr_literature_service.lit_processing.utils.s3_utils import upload_xml_file_to_s3 +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.pubmed_update_resources_nlm import update_resource_pubmed_nlm from agr_literature_service.api.database.main import get_db from agr_literature_service.api.models import ReferenceModel, CrossReferenceModel,\ ModCorpusAssociationModel, ModModel -from helper_sqlalchemy import sqlalchemy_load_ref_xref -from agr_literature_service.lit_processing.helper_email import send_email +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import sqlalchemy_load_ref_xref +from agr_literature_service.lit_processing.utils.email_utils import send_email from agr_literature_service.api.user import set_global_user_id load_dotenv() @@ -124,7 +123,7 @@ # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=zebrafish[Title/Abstract]+OR+zebra+fish[Title/Abstract]+OR+danio[Title/Abstract]+OR+zebrafish[keyword]+OR+zebra+fish[keyword]+OR+danio[keyword]+OR+zebrafish[Mesh+Terms]+OR+zebra+fish[Mesh+Terms]+OR+danio[Mesh+Terms]&retmax=100000000 -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/update_all_pubmed_papers.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_references_all_mods.py similarity index 88% rename from agr_literature_service/lit_processing/update_all_pubmed_papers.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_references_all_mods.py index 200b7fa93..ee4a09535 100644 --- a/agr_literature_service/lit_processing/update_all_pubmed_papers.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_references_all_mods.py @@ -5,11 +5,11 @@ import shutil from agr_literature_service.api.models import CrossReferenceModel, ReferenceModel -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session -from agr_literature_service.lit_processing.update_resource_pubmed_nlm import update_resource_pubmed_nlm -from agr_literature_service.lit_processing.get_pubmed_xml import download_pubmed_xml -from agr_literature_service.lit_processing.update_pubmed_papers import update_data -from agr_literature_service.lit_processing.filter_dqm_md5sum import load_s3_md5data, save_s3_md5data +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.pubmed_update_resources_nlm import update_resource_pubmed_nlm +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.get_pubmed_xml import download_pubmed_xml +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.pubmed_update_references_single_mod import update_data +from agr_literature_service.lit_processing.data_ingest.dqm_ingest.utils.md5sum_utils import load_s3_md5data, save_s3_md5data from datetime import datetime, timedelta logging.basicConfig(format='%(message)s') diff --git a/agr_literature_service/lit_processing/update_pubmed_papers.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_references_single_mod.py similarity index 98% rename from agr_literature_service/lit_processing/update_pubmed_papers.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_references_single_mod.py index ef0502feb..0f486bedb 100644 --- a/agr_literature_service/lit_processing/update_pubmed_papers.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_references_single_mod.py @@ -11,14 +11,14 @@ ModModel, ModCorpusAssociationModel, ReferenceCommentAndCorrectionModel, \ AuthorModel, MeshDetailModel, ResourceModel from agr_literature_service.api.crud.reference_crud import get_citation_from_args -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session, \ +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session, \ create_postgres_engine -from agr_literature_service.lit_processing.update_resource_pubmed_nlm import update_resource_pubmed_nlm -from agr_literature_service.lit_processing.get_pubmed_xml import download_pubmed_xml -from agr_literature_service.lit_processing.xml_to_json import generate_json -from agr_literature_service.lit_processing.filter_dqm_md5sum import load_s3_md5data -from agr_literature_service.lit_processing.helper_s3 import upload_xml_file_to_s3 -from agr_literature_service.lit_processing.helper_email import send_email +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.pubmed_update_resources_nlm import update_resource_pubmed_nlm +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.get_pubmed_xml import download_pubmed_xml +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.xml_to_json import generate_json +from agr_literature_service.lit_processing.data_ingest.dqm_ingest.utils.md5sum_utils import load_s3_md5data +from agr_literature_service.lit_processing.utils.s3_utils import upload_xml_file_to_s3 +from agr_literature_service.lit_processing.utils.email_utils import send_email from agr_literature_service.api.user import set_global_user_id logging.basicConfig(format='%(message)s') diff --git a/agr_literature_service/lit_processing/update_resource_pubmed_nlm.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_resources_nlm.py similarity index 73% rename from agr_literature_service/lit_processing/update_resource_pubmed_nlm.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_resources_nlm.py index 43c8de640..c6609d202 100644 --- a/agr_literature_service/lit_processing/update_resource_pubmed_nlm.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_resources_nlm.py @@ -1,16 +1,14 @@ - -import logging import logging.config import sys from os import environ, path -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session -from agr_literature_service.lit_processing.generate_pubmed_nlm_resource import (populate_from_url, populate_nlm_info, - generate_json) -from agr_literature_service.lit_processing.helper_file_processing import load_pubmed_resource_basic -from agr_literature_service.lit_processing.parse_dqm_json_resource import (save_resource_file, create_storage_path) -from agr_literature_service.lit_processing.helper_sqlalchemy import sqlalchemy_load_ref_xref -from agr_literature_service.lit_processing.post_resource_to_api import post_resources +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.generate_pubmed_nlm_resource import (populate_from_url, populate_nlm_info, + generate_json) +from agr_literature_service.lit_processing.utils.file_processing_utils import load_pubmed_resource_basic +from agr_literature_service.lit_processing.data_ingest.dqm_ingest.parse_dqm_json_resource import (save_resource_file, create_storage_path) +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import sqlalchemy_load_ref_xref +from agr_literature_service.lit_processing.data_ingest.post_resource_to_db import post_resources from agr_literature_service.api.user import set_global_user_id logging.basicConfig(level=logging.INFO, diff --git a/agr_literature_service/lit_processing/sanitize_pubmed_json.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/sanitize_pubmed_json.py similarity index 97% rename from agr_literature_service/lit_processing/sanitize_pubmed_json.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/sanitize_pubmed_json.py index 4dcebbc4b..72c1eb9f7 100644 --- a/agr_literature_service/lit_processing/sanitize_pubmed_json.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/sanitize_pubmed_json.py @@ -1,7 +1,7 @@ import json from os import environ, makedirs, path -from agr_literature_service.lit_processing.helper_file_processing import write_json +from agr_literature_service.lit_processing.utils.file_processing_utils import write_json def sanitize_pubmed_json_list(pmids, inject_list): diff --git a/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/xml/__init__.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/xml/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/get_pubmed_xml.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/xml/get_pubmed_xml.py similarity index 100% rename from agr_literature_service/lit_processing/get_pubmed_xml.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/xml/get_pubmed_xml.py diff --git a/agr_literature_service/lit_processing/xml_to_json.py b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/xml/xml_to_json.py similarity index 99% rename from agr_literature_service/lit_processing/xml_to_json.py rename to agr_literature_service/lit_processing/data_ingest/pubmed_ingest/xml/xml_to_json.py index 8c94f6597..72fba5014 100644 --- a/agr_literature_service/lit_processing/xml_to_json.py +++ b/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/xml/xml_to_json.py @@ -5,8 +5,8 @@ import urllib.request from os import environ, makedirs, path from typing import List, Set -from agr_literature_service.lit_processing.filter_dqm_md5sum import load_s3_md5data, save_s3_md5data, generate_md5sum_from_dict -from agr_literature_service.lit_processing.helper_file_processing import write_json +from agr_literature_service.lit_processing.data_ingest.dqm_ingest.utils.md5sum_utils import load_s3_md5data, save_s3_md5data, generate_md5sum_from_dict +from agr_literature_service.lit_processing.utils.file_processing_utils import write_json # pipenv run python xml_to_json.py -f /home/azurebrd/git/agr_literature_service_demo/src/xml_processing/inputs/sample_set diff --git a/agr_literature_service/lit_processing/find_duplicate_doi.py b/agr_literature_service/lit_processing/find_duplicate_doi.py deleted file mode 100644 index 3a099edce..000000000 --- a/agr_literature_service/lit_processing/find_duplicate_doi.py +++ /dev/null @@ -1,113 +0,0 @@ - -import json -import logging.config -import warnings -from os import environ, listdir, path - -from dotenv import load_dotenv - -from agr_literature_service.lit_processing.helper_file_processing import split_identifier - -warnings.filterwarnings("ignore", category=UserWarning, module='bs4') - -load_dotenv() - -# pipenv run python3 find_duplicate_doi.py - - -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') -logging.config.fileConfig(log_file_path) -logger = logging.getLogger('literature logger') - -base_path = environ.get('XML_PATH') - - -def find_doi_duplicates(): - """ - - :return: - """ - - json_storage_path = base_path + 'sanitized_reference_json/' - - files_to_process = [] - dir_list = listdir(json_storage_path) - for filename in dir_list: - # logger.info("%s", filename) - if 'REFERENCE_' in filename and '.REFERENCE_' not in filename: - # logger.info("%s", filename) - files_to_process.append(json_storage_path + filename) - - duplicate_dois_file = base_path + 'duplicate_dois' - with open(duplicate_dois_file, 'w') as duplicate_fh: - xrefs = dict() - primary_id_in_file = dict() - for filepath in files_to_process: - # only test one file for run - # if filepath != json_storage_path + 'REFERENCE_PUBMED_ZFIN_1.json': - # continue - logger.info("opening file\t%s", filepath) - f = open(filepath) - reference_data = json.load(f) - - filename = filepath.replace(json_storage_path, '') - - # counter = 0 - for entry in reference_data: - # counter += 1 - # if counter > 2: - # break - - # output what we get from the file before converting for the API - # json_object = json.dumps(entry, indent=4) - # print(json_object) - - primary_id = entry['primaryId'] - # if primary_id != 'PMID:9643811': - # continue - - if primary_id in primary_id_in_file: - primary_id_in_file[primary_id].append(filename) - else: - primary_id_in_file[primary_id] = [] - primary_id_in_file[primary_id].append(filename) - - if 'crossReferences' in entry: - for xref in entry['crossReferences']: - prefix, identifier, separator = split_identifier(xref['id']) - if prefix == 'NLM' or prefix == 'ISSN': - continue - - ident = xref['id'] - if ident in xrefs: - xrefs[ident].add(primary_id) - else: - xrefs[ident] = set() - xrefs[ident].add(primary_id) - for ident in xrefs: - if len(xrefs[ident]) > 1: - sorted_ids = sorted(xrefs[ident]) - duplicate_list = [] - for primary_id in sorted_ids: - files = ", ".join(primary_id_in_file[primary_id]) - duplicate_text = primary_id + " (" + files + ")" - duplicate_list.append(duplicate_text) - primary_ids = "; ".join(duplicate_list) - duplicate_fh.write(ident + "\t" + primary_ids + "\n") - # logger.info("ident %s\tset %s", ident, primary_ids) - - duplicate_fh.close - - -if __name__ == "__main__": - """ - call main start function - """ - - logger.info("starting find_duplicate_doi.py") - - find_doi_duplicates() - -# pipenv run python3 find_duplicate_doi.py - - logger.info("ending parse_dqm_json_reference.py") diff --git a/agr_literature_service/lit_processing/find_pubmed_type.py b/agr_literature_service/lit_processing/find_pubmed_type.py deleted file mode 100644 index f8cd383f8..000000000 --- a/agr_literature_service/lit_processing/find_pubmed_type.py +++ /dev/null @@ -1,231 +0,0 @@ -import argparse -import logging -import logging.config -import re -import urllib.request -from os import environ, makedirs, path -from typing import List, Set - -# search all pubmed_xml/*.xml for types of -# -# -# for curators to decide what we want to capture - -# pipenv run python find_pubmed_type.py -f /home/azurebrd/git/agr_literature_service_demo/src/xml_processing/inputs/sample_set - -pmids = [] # type: List - - -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') -logging.config.fileConfig(log_file_path) -logger = logging.getLogger('literature logger') - -base_path = environ.get('XML_PATH', "") - -publication_type_set = set() # type: Set -comments_ref_type_set = set() # type: Set - - -def represents_int(s): - """ - - :param s: - :return: - """ - - try: - int(s) - return True - except ValueError: - return False - - -def month_name_to_number_string(string): - """ - - :param string: - :return: - """ - - m = { - 'jan': '01', - 'feb': '02', - 'mar': '03', - 'apr': '04', - 'may': '05', - 'jun': '06', - 'jul': '07', - 'aug': '08', - 'sep': '09', - 'oct': '10', - 'nov': '11', - 'dec': '12'} - s = string.strip()[:3].lower() - - try: - out = m[s] - return out - except ValueError: - raise ValueError(string + ' is not a month') - - -def get_year_month_day_from_xml_date(pub_date): - """ - - :param pub_date: - :return: - """ - - date_list = [] - year = '' - month = '01' - day = '01' - year_re_output = re.search("(.+?)", pub_date) - if year_re_output is not None: - year = year_re_output.group(1) - month_re_output = re.search("(.+?)", pub_date) - if month_re_output is not None: - month_text = month_re_output.group(1) - if represents_int(month_text): - month = month_text - else: - month = month_name_to_number_string(month_text) - day_re_output = re.search("(.+?)", pub_date) - if day_re_output is not None: - day = day_re_output.group(1) - date_list.append(year) - date_list.append(month) - date_list.append(day) - return date_list - - -def get_medline_date_from_xml_date(pub_date): - """ - - :param pub_date: - :return: - """ - - medline_re_output = re.search("(.+?)", pub_date) - if medline_re_output is not None: - return medline_re_output.group(1) - - -def generate_json(): - """ - - open input xml file and read data in form of python dictionary using xmltodict module - storage_path = base_path + 'pubmed_xml_20210322/' - json_storage_path = base_path + 'pubmed_json_20210322/' - - :return: - """ - - storage_path = base_path + 'pubmed_xml/' - json_storage_path = base_path + 'pubmed_json/' - if not path.exists(storage_path): - makedirs(storage_path) - if not path.exists(json_storage_path): - makedirs(json_storage_path) - for pmid in pmids: - filename = storage_path + pmid + '.xml' - # if getting pmids from directories split into multiple sub-subdirectories - # filename = get_path_from_pmid(pmid, 'xml') - if not path.exists(filename): - continue - # logger.info("processing %s", filename) - with open(filename) as xml_file: - - xml = xml_file.read() - # print(xml) - - # xmltodict is treating html markup like text as xml, which is creating mistaken structure in the conversion. - # may be better to parse full xml instead. - # data_dict = xmltodict.parse(xml_file.read()) - xml_file.close() - - # print (pmid) - - if re.findall("(.+?)", xml): - types_group = re.findall("(.+?)", xml) - for type in types_group: - publication_type_set.add(type) - elif re.findall("(.+?)", xml): - types_group = re.findall("(.+?)", xml) - for type in types_group: - publication_type_set.add(type) - # publication_type_set.add(types_group) - - if re.findall("", xml): - types_group = re.findall("", xml) - for type in types_group: - comments_ref_type_set.add(type) - - for comments_ref_type in comments_ref_type_set: - logger.info("comments_ref_type %s", comments_ref_type) - - for publication_type in publication_type_set: - logger.info("publication_type %s", publication_type) - - -if __name__ == "__main__": - """ - call main start function - """ - - parser = argparse.ArgumentParser() - parser.add_argument('-c', '--commandline', nargs='*', action='store', help='take input from command line flag') - parser.add_argument('-d', '--database', action='store_true', help='take input from database query') - parser.add_argument('-f', '--file', action='store', help='take input from entries in file with full path') - parser.add_argument('-r', '--restapi', action='store', help='take input from rest api') - parser.add_argument('-s', '--sample', action='store_true', help='test sample input from hardcoded entries') - parser.add_argument('-u', '--url', action='store', help='take input from entries in file at url') - - args = vars(parser.parse_args()) - -# python find_pubmed_type.py -d - if args['database']: - logger.info("Processing database entries") - - elif args['restapi']: - logger.info("Processing rest api entries") - -# python find_pubmed_type.py -f /home/azurebrd/git/agr_literature_service_demo/src/xml_processing/inputs/sample_set - elif args['file']: - logger.info("Processing file input from %s", args['file']) - with open(args['file'], 'r') as fp: - pmid = fp.readline() - while pmid: - pmids.append(pmid.rstrip()) - pmid = fp.readline() - -# python find_pubmed_type.py -u http://tazendra.caltech.edu/~azurebrd/var/work/pmid_sample - elif args['url']: - logger.info("Processing url input from %s", args['url']) - req = urllib.request.urlopen(args['url']) - data = req.read() - lines = data.splitlines() - for pmid in lines: - pmids.append(str(int(pmid))) - -# python find_pubmed_type.py -c 1234 4576 1828 - elif args['commandline']: - logger.info("Processing commandline input") - for pmid in args['commandline']: - pmids.append(pmid) - -# python find_pubmed_type.py -s - elif args['sample']: - logger.info("Processing hardcoded sample input") - pmid = '12345678' - pmids.append(pmid) - pmid = '12345679' - pmids.append(pmid) - pmid = '12345680' - pmids.append(pmid) - - else: - logger.info("Processing database entries") - - generate_json() - logger.info("Done converting XML to JSON") diff --git a/agr_literature_service/lit_processing/get_datatypes_cross_references.py b/agr_literature_service/lit_processing/get_datatypes_cross_references.py deleted file mode 100644 index 983808c68..000000000 --- a/agr_literature_service/lit_processing/get_datatypes_cross_references.py +++ /dev/null @@ -1,43 +0,0 @@ -import argparse -import logging -import logging.config -from os import path - -from agr_literature_service.lit_processing.helper_file_processing import generate_cross_references_file - -# pipenv run python get_datatypes_cross_references.py -d resource -# pipenv run python get_datatypes_cross_references.py -d reference - -# about 1 minute 13 seconds to generate file with cross_references and is_obsolete -# about 45 seconds to generate file when it only had cross_references without is_obsolete -# generate reference_curie_to_xref file mapping alliance reference curies to cross_references identifiers from database - - -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') -logging.config.fileConfig(log_file_path) -logger = logging.getLogger('get_datatypes_cross_references') - - -if __name__ == "__main__": - """ - This script generates bulk cross_reference data from the API and database. - 4 seconds for resource - 88 seconds for reference - - call main start function - """ - - parser = argparse.ArgumentParser() - parser.add_argument('-d', '--datatype', action='store', help='take input from RESOURCE files in full path') - - args = vars(parser.parse_args()) - - logger.info("starting get_datatypes_cross_references.py") - - if args['datatype']: - generate_cross_references_file(args['datatype']) - - else: - logger.info("No flag passed in. Use -h for help.") - - logger.info("ending get_datatypes_cross_references.py") diff --git a/agr_literature_service/lit_processing/get_md5sum.py b/agr_literature_service/lit_processing/get_md5sum.py deleted file mode 100644 index c13d15814..000000000 --- a/agr_literature_service/lit_processing/get_md5sum.py +++ /dev/null @@ -1,126 +0,0 @@ -import argparse -import hashlib -import logging -import logging.config -import urllib -from os import environ, path -from typing import List - -from dotenv import load_dotenv - -# pipenv run python get_md5sum.py -x -f /home/azurebrd/git/agr_literature_service_demo/src/xml_processing/inputs/alliance_pmids -# pipenv run python get_md5sum.py -j -f /home/azurebrd/git/agr_literature_service_demo/src/xml_processing/inputs/alliance_pmids - -# generate file mapping directory's pmid files to their md5sums, taking as input xml or json directories. - -# 5 minutes 5 seconds for 649073 xml - -load_dotenv() - -pmids = [] # type: List - - -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') -logging.config.fileConfig(log_file_path) -logger = logging.getLogger('literature logger') - -base_path = environ.get('XML_PATH', "") - - -def generate_md5sums(file_type): - """ - - :param file_type: - :return: - """ - - storage_path = base_path + 'pubmed_' + file_type + '/' - md5data = '' - for pmid in pmids: - filename = storage_path + pmid + '.' + file_type - if not path.exists(filename): - continue - md5_hash = hashlib.md5() - with open(filename, "rb") as f: - # Read and update hash in chunks of 4K - for byte_block in iter(lambda: f.read(4096), b""): - md5_hash.update(byte_block) - # logger.info("Found %s %s %s", file_type, md5_hash.hexdigest(), filename) - md5data += pmid + "\t" + md5_hash.hexdigest() + "\n" - md5file = storage_path + 'md5sum' - with open(md5file, "w") as md5file_fh: - md5file_fh.write(md5data) - - -if __name__ == "__main__": - """ - call main start function - """ - parser = argparse.ArgumentParser() - parser.add_argument('-x', '--xml', action='store_true', help='process xml files') - parser.add_argument('-j', '--json', action='store_true', help='process json files') - parser.add_argument('-c', '--commandline', nargs='*', action='store', help='take input from command line flag') - parser.add_argument('-d', '--database', action='store_true', help='take input from database query') - parser.add_argument('-f', '--file', action='store', help='take input from entries in file with full path') - parser.add_argument('-r', '--restapi', action='store', help='take input from rest api') - parser.add_argument('-s', '--sample', action='store_true', help='test sample input from hardcoded entries') - parser.add_argument('-u', '--url', action='store', help='take input from entries in file at url') - - args = vars(parser.parse_args()) - - file_type = 'xml' - - if args['xml']: - file_type = 'xml' - logger.info("generating md5sums of xml directory") - - elif args['json']: - file_type = 'json' - logger.info("generating md5sums of json directory") - -# python get_md5sum.py -d - if args['database']: - logger.info("Processing database entries") - - elif args['restapi']: - logger.info("Processing rest api entries") - -# python get_md5sum.py -f /home/azurebrd/git/agr_literature_service_demo/src/xml_processing/inputs/sample_set - elif args['file']: - logger.info("Processing file input from %s", args['file']) - with open(args['file'], 'r') as fp: - pmid = fp.readline() - while pmid: - pmids.append(pmid.rstrip()) - pmid = fp.readline() - -# python get_md5sum.py -u http://tazendra.caltech.edu/~azurebrd/var/work/pmid_sample - elif args['url']: - logger.info("Processing url input from %s", args['url']) - with urllib.request.urlopen(args["url"]) as req: - data = req.read() - lines = data.splitlines() - for pmid in lines: - pmids.append(pmid) - -# python get_md5sum.py -c 1234 4576 1828 - elif args['commandline']: - logger.info("Processing commandline input") - for pmid in args['commandline']: - pmids.append(pmid) - -# python get_md5sum.py -s - elif args['sample']: - logger.info("Processing hardcoded sample input") - pmid = '12345678' - pmids.append(pmid) - pmid = '12345679' - pmids.append(pmid) - pmid = '12345680' - pmids.append(pmid) - - else: - logger.info("Processing database entries") - - generate_md5sums(file_type) - logger.info("Done generating md5sum of %s files", file_type) diff --git a/agr_literature_service/lit_processing/oneoff_scripts/__init__.py b/agr_literature_service/lit_processing/oneoff_scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/oneoff/cleanup_DOI.py b/agr_literature_service/lit_processing/oneoff_scripts/cleanup_DOI.py similarity index 91% rename from agr_literature_service/lit_processing/oneoff/cleanup_DOI.py rename to agr_literature_service/lit_processing/oneoff_scripts/cleanup_DOI.py index 0dbd616c8..d5cee4a73 100644 --- a/agr_literature_service/lit_processing/oneoff/cleanup_DOI.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/cleanup_DOI.py @@ -1,5 +1,5 @@ from agr_literature_service.api.models import CrossReferenceModel -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session import logging logging.basicConfig(format='%(message)s') diff --git a/agr_literature_service/lit_processing/oneoff/cleanup_modCorpusAssoForCommentCorrection.py b/agr_literature_service/lit_processing/oneoff_scripts/cleanup_modCorpusAssoForCommentCorrection.py similarity index 98% rename from agr_literature_service/lit_processing/oneoff/cleanup_modCorpusAssoForCommentCorrection.py rename to agr_literature_service/lit_processing/oneoff_scripts/cleanup_modCorpusAssoForCommentCorrection.py index 459bbfde5..9c35c09ef 100644 --- a/agr_literature_service/lit_processing/oneoff/cleanup_modCorpusAssoForCommentCorrection.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/cleanup_modCorpusAssoForCommentCorrection.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_engine, \ +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_engine, \ create_postgres_session from agr_literature_service.api.models import ModCorpusAssociationModel import logging diff --git a/agr_literature_service/lit_processing/oneoff/cleanup_userEmailToOktaUid.py b/agr_literature_service/lit_processing/oneoff_scripts/cleanup_userEmailToOktaUid.py similarity index 99% rename from agr_literature_service/lit_processing/oneoff/cleanup_userEmailToOktaUid.py rename to agr_literature_service/lit_processing/oneoff_scripts/cleanup_userEmailToOktaUid.py index e05ae70d5..01637d67a 100644 --- a/agr_literature_service/lit_processing/oneoff/cleanup_userEmailToOktaUid.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/cleanup_userEmailToOktaUid.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_engine +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_engine import logging logging.basicConfig(format='%(message)s') diff --git a/agr_literature_service/lit_processing/oneoff/delete_duplicate_commentCorrection.py b/agr_literature_service/lit_processing/oneoff_scripts/delete_duplicate_commentCorrection.py similarity index 93% rename from agr_literature_service/lit_processing/oneoff/delete_duplicate_commentCorrection.py rename to agr_literature_service/lit_processing/oneoff_scripts/delete_duplicate_commentCorrection.py index 3e10d0006..ccb008957 100644 --- a/agr_literature_service/lit_processing/oneoff/delete_duplicate_commentCorrection.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/delete_duplicate_commentCorrection.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_engine, \ +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_engine, \ create_postgres_session from agr_literature_service.api.models import ReferenceCommentAndCorrectionModel import logging diff --git a/agr_literature_service/lit_processing/oneoff/delete_duplicate_modRefType.py b/agr_literature_service/lit_processing/oneoff_scripts/delete_duplicate_modRefType.py similarity index 93% rename from agr_literature_service/lit_processing/oneoff/delete_duplicate_modRefType.py rename to agr_literature_service/lit_processing/oneoff_scripts/delete_duplicate_modRefType.py index 8c821db2a..d3153b7aa 100644 --- a/agr_literature_service/lit_processing/oneoff/delete_duplicate_modRefType.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/delete_duplicate_modRefType.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_engine, \ +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_engine, \ create_postgres_session from agr_literature_service.api.models import ModReferenceTypeModel import logging diff --git a/agr_literature_service/lit_processing/oneoff/delete_noPmid_dateLastModifiedInPubmed.py b/agr_literature_service/lit_processing/oneoff_scripts/delete_noPmid_dateLastModifiedInPubmed.py similarity index 96% rename from agr_literature_service/lit_processing/oneoff/delete_noPmid_dateLastModifiedInPubmed.py rename to agr_literature_service/lit_processing/oneoff_scripts/delete_noPmid_dateLastModifiedInPubmed.py index 58e6eb05f..7e6cad1dd 100644 --- a/agr_literature_service/lit_processing/oneoff/delete_noPmid_dateLastModifiedInPubmed.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/delete_noPmid_dateLastModifiedInPubmed.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_engine, \ +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_engine, \ create_postgres_session from agr_literature_service.api.models import ReferenceModel import logging diff --git a/agr_literature_service/lit_processing/oneoff/fix_modCorpusAssoForCommentCorrection.py b/agr_literature_service/lit_processing/oneoff_scripts/fix_modCorpusAssoForCommentCorrection.py similarity index 91% rename from agr_literature_service/lit_processing/oneoff/fix_modCorpusAssoForCommentCorrection.py rename to agr_literature_service/lit_processing/oneoff_scripts/fix_modCorpusAssoForCommentCorrection.py index 461f25b39..9e4745667 100644 --- a/agr_literature_service/lit_processing/oneoff/fix_modCorpusAssoForCommentCorrection.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/fix_modCorpusAssoForCommentCorrection.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session from agr_literature_service.api.models import ModCorpusAssociationModel import logging diff --git a/agr_literature_service/lit_processing/generate_chunk_files.py b/agr_literature_service/lit_processing/oneoff_scripts/generate_chunk_files.py similarity index 99% rename from agr_literature_service/lit_processing/generate_chunk_files.py rename to agr_literature_service/lit_processing/oneoff_scripts/generate_chunk_files.py index 1825c4785..090abc56d 100644 --- a/agr_literature_service/lit_processing/generate_chunk_files.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/generate_chunk_files.py @@ -15,7 +15,7 @@ load_dotenv() -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/get_pubmed_nlm_resource_unmatched.py b/agr_literature_service/lit_processing/oneoff_scripts/get_pubmed_nlm_resource_unmatched.py similarity index 98% rename from agr_literature_service/lit_processing/get_pubmed_nlm_resource_unmatched.py rename to agr_literature_service/lit_processing/oneoff_scripts/get_pubmed_nlm_resource_unmatched.py index 85a7deab0..ed908cae8 100644 --- a/agr_literature_service/lit_processing/get_pubmed_nlm_resource_unmatched.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/get_pubmed_nlm_resource_unmatched.py @@ -18,7 +18,7 @@ # for cleanup, see which dqm resourceAbbreviations don't match NLM data from J_Medline.txt # and query pubmed from Kimberly's query to try to find info. -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/oneoff/populate_auditedColumns.py b/agr_literature_service/lit_processing/oneoff_scripts/populate_auditedColumns.py similarity index 98% rename from agr_literature_service/lit_processing/oneoff/populate_auditedColumns.py rename to agr_literature_service/lit_processing/oneoff_scripts/populate_auditedColumns.py index 949237417..63dc0c386 100644 --- a/agr_literature_service/lit_processing/oneoff/populate_auditedColumns.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/populate_auditedColumns.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_engine +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_engine import logging import sys diff --git a/agr_literature_service/lit_processing/process_mod_corpus_association_to_api.py b/agr_literature_service/lit_processing/oneoff_scripts/process_mod_corpus_association_to_api.py similarity index 95% rename from agr_literature_service/lit_processing/process_mod_corpus_association_to_api.py rename to agr_literature_service/lit_processing/oneoff_scripts/process_mod_corpus_association_to_api.py index bdbb41b2b..8f34493db 100644 --- a/agr_literature_service/lit_processing/process_mod_corpus_association_to_api.py +++ b/agr_literature_service/lit_processing/oneoff_scripts/process_mod_corpus_association_to_api.py @@ -10,9 +10,9 @@ from agr_literature_service.api.models import ModCorpusAssociationModel, ReferenceModel, ModModel import time -from agr_literature_service.lit_processing.helper_sqlalchemy import sqlalchemy_load_ref_xref +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import sqlalchemy_load_ref_xref -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/post_comments_corrections_to_api.py b/agr_literature_service/lit_processing/post_comments_corrections_to_api.py deleted file mode 100644 index 08b9121c2..000000000 --- a/agr_literature_service/lit_processing/post_comments_corrections_to_api.py +++ /dev/null @@ -1,181 +0,0 @@ -import argparse -import sys -import json -import logging -import logging.config -from os import environ - -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session -from agr_literature_service.api.models import ReferenceModel, CrossReferenceModel,\ - ReferenceCommentAndCorrectionModel - -# pipenv run python post_comments_corrections_to_api.py -f /home/azurebrd/git/agr_literature_service_demo/src/xml_processing/inputs/all_pmids > log_post_comments_corrections_to_api -# enter a file of pmids as an argument, sanitize, post to api -# 1 hour 19 minutes for 669998 pmids and 6268 rows created - - -logging.basicConfig(level=logging.INFO, - stream=sys.stdout, - format= '%(asctime)s - %(levelname)s - {%(module)s %(funcName)s:%(lineno)d} - %(message)s', # noqa E251 - datefmt='%Y-%m-%d %H:%M:%S') -logger = logging.getLogger(__name__) - - -def get_pmid_to_reference(db_session, pmids): - - query = db_session.query( - CrossReferenceModel.curie, - ReferenceModel.curie - ).join( - ReferenceModel.cross_reference - ).filter( - CrossReferenceModel.curie.in_(pmids) - ) - - results = query.all() - - pmid_curie_dict = {} - - for result in results: - if result[0] not in pmid_curie_dict or pmid_curie_dict[result[0]] is None: - pmid_curie_dict[result[0]] = result[1] - # json_object = json.dumps(pmid_curie_dict, indent=4) - # print(json_object) - - return pmid_curie_dict - - -def post_comments_corrections(pmids_wanted): # noqa: C901 - """ - - :param pmids_wanted: - :return: - """ - - logger.info(pmids_wanted) - - allowed_com_cor_types = ['CommentOn', 'ErratumFor', 'ExpressionOfConcernFor', 'ReprintOf', - 'RepublishedFrom', 'RetractionOf', 'UpdateOf'] - remap_com_cor_types = dict() - remap_com_cor_types['CommentIn'] = 'CommentOn' - remap_com_cor_types['ErratumIn'] = 'ErratumFor' - remap_com_cor_types['ExpressionOfConcernIn'] = 'ExpressionOfConcernFor' - remap_com_cor_types['ReprintIn'] = 'ReprintOf' - remap_com_cor_types['RepublishedIn'] = 'RepublishedFrom' - remap_com_cor_types['RetractionIn'] = 'RetractionOf' - remap_com_cor_types['UpdateIn'] = 'UpdateOf' - - mappings_set = set() - pmids_in_xml = set() - for pmid in pmids_wanted: - pubmed_json_filepath = base_path + 'pubmed_json/' + pmid + '.json' - try: - pubmed_data = dict() - with open(pubmed_json_filepath, 'r') as f: - pubmed_data = json.load(f) - f.close() - if 'commentsCorrections' in pubmed_data: - for com_cor_type in pubmed_data['commentsCorrections']: - reverse = False - for other_pmid in pubmed_data['commentsCorrections'][com_cor_type]: - if com_cor_type in remap_com_cor_types: - reverse = True - com_cor_type = remap_com_cor_types[com_cor_type] - if com_cor_type in allowed_com_cor_types: - primary = 'PMID:' + pmid - secondary = 'PMID:' + other_pmid - pmids_in_xml.add(primary) - pmids_in_xml.add(secondary) - if reverse is True: - primary = 'PMID:' + other_pmid - secondary = 'PMID:' + pmid - mappings_set.add(primary + '\t' + secondary + '\t' + com_cor_type) - except IOError: - print(f"{pubmed_json_filepath} not found in filesystem") - - reference_to_curie = dict() - - # generating only needed pmid mappings of xref to reference curie through sqlalchemy - - db_session = create_postgres_session(False) - - reference_to_curie = get_pmid_to_reference(db_session, list(pmids_in_xml)) - - mappings = sorted(mappings_set) - # counter = 0 - for mapping in mappings: - map_data = mapping.split("\t") - primary_pmid = map_data[0] - secondary_pmid = map_data[1] - com_cor_type = map_data[2] - primary_curie = '' - secondary_curie = '' - if primary_pmid in reference_to_curie: - primary_curie = reference_to_curie[primary_pmid] - if secondary_pmid in reference_to_curie: - secondary_curie = reference_to_curie[secondary_pmid] - if primary_curie == '': - logger.info(f"ERROR {mapping} : {primary_pmid} does not map to an AGR Reference curie") - if secondary_curie == '': - logger.info(f"ERROR {secondary_pmid} does not map to an AGR Reference curie") - if primary_curie != '' and secondary_curie != '': - new_entry = dict() - new_entry['reference_curie_from'] = primary_curie - new_entry['reference_curie_to'] = secondary_curie - new_entry['reference_comment_and_correction_type'] = com_cor_type - - # debug: output what is sent to API after converting file data - # json_object = json.dumps(new_entry, indent=4) - # print(json_object) - - try: - x = ReferenceCommentAndCorrectionModel(**new_entry) - db_session.add(x) - logger.info("The comment/correction row has been added into database for PMID " + str(primary_pmid)) - except Exception as e: - logger.info("An error occurred when adding a comment/correction row into database for PMID " + str(primary_pmid) + " " + str(e)) - - db_session.commit() - db_session.close() - - -if __name__ == "__main__": - """ - call main start function - """ - parser = argparse.ArgumentParser() - parser.add_argument('-p', '--generate-pmid-data', action='store_true', help='generate pmid outputs') - parser.add_argument('-f', '--file', action='store', help='take input from REFERENCE files in full path') - parser.add_argument('-m', '--mod', action='store', help='which mod, use all or leave blank for all') - parser.add_argument('-c', '--commandline', nargs='*', action='store', help='take input from command line flag') - - args = vars(parser.parse_args()) - - pmids_wanted = [] - - # python post_comments_corrections_to_api.py -c 1234 4576 1828 - if args['commandline']: - logger.info("Processing commandline input") - for pmid in args['commandline']: - pmids_wanted.append(pmid) - - elif args['file']: - logger.info("Processing file input from %s", args['file']) - base_path = environ.get('XML_PATH') - filename = base_path + args['file'] - try: - with open(filename, 'r') as fp: - pmid = fp.readline() - while pmid: - pmids_wanted.append(pmid.rstrip()) - pmid = fp.readline() - fp.close() - except IOError: - logger.info("No input file at %s", filename) - - else: - logger.info("Must enter a PMID through command line") - - post_comments_corrections(pmids_wanted) - - logger.info("Done Processing") diff --git a/agr_literature_service/lit_processing/pubmed_sample/32542232.json b/agr_literature_service/lit_processing/pubmed_sample/32542232.json deleted file mode 100644 index 2c09780c3..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/32542232.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "authors": [ - { - "affiliation": [ - "Department of Biological Sciences, University of Southern California, Los Angeles, California, United States of America." - ], - "authorRank": 1, - "firstname": "Alicia Kathryn", - "lastname": "Rogers", - "name": "Alicia Kathryn Rogers" - }, - { - "affiliation": [ - "Department of Biological Sciences, University of Southern California, Los Angeles, California, United States of America." - ], - "authorRank": 2, - "firstname": "Carolyn Marie", - "lastname": "Phillips", - "name": "Carolyn Marie Phillips" - } - ], - "dateLastModified": { - "date_string": "2020-09-28", - "day": "28", - "month": "09", - "year": "2020" - }, - "datePublished": { - "date_string": "2020-01-01", - "day": "01", - "month": "01", - "year": "2020" - }, - "issn": "2578-9430", - "issueDate": { - "date_string": "2020-01-01", - "day": "01", - "month": "01", - "year": "2020" - }, - "journal": "MicroPubl Biol", - "mid": "NIHMS1596474", - "nlm": "101759238", - "pmc": "PMC7295153", - "pubMedType": [ - "Journal Article" - ], - "pubmed": "32542232", - "resourceAbbreviation": "MicroPubl Biol", - "title": "Disruption of the mutator complex triggers a low penetrance larval arrest phenotype.", - "volume": "2020" -} \ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/32542232.xml b/agr_literature_service/lit_processing/pubmed_sample/32542232.xml deleted file mode 100644 index 2883fe865..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/32542232.xml +++ /dev/null @@ -1,250 +0,0 @@ - - - - - - 32542232 - - 2020 - 09 - 28 - -
- - 2578-9430 - - 2020 - - 2020 - - - microPublication biology - MicroPubl Biol - - Disruption of the mutator complex triggers a low penetrance larval arrest phenotype. - 252 - - - Rogers - Alicia Kathryn - AK - - Department of Biological Sciences, University of Southern California, Los Angeles, California, United States of America. - - - - Phillips - Carolyn Marie - CM - - Department of Biological Sciences, University of Southern California, Los Angeles, California, United States of America. - - - - eng - - - R35 GM119656 - GM - NIGMS NIH HHS - United States - - - - Journal Article - -
- - United States - MicroPubl Biol - 101759238 - 2578-9430 - -
- - - - 2020 - 6 - 17 - 6 - 0 - - - 2020 - 6 - 17 - 6 - 0 - - - 2020 - 6 - 17 - 6 - 1 - - - ppublish - - 32542232 - PMC7295153 - NIHMS1596474 - - - - - Nucleic Acids Res. 2005 Jan 13;33(1):347-55 - - 15653635 - - - - Genes Dev. 2012 Jul 1;26(13):1433-44 - - 22713602 - - - - Nucleic Acids Res. 2020 May 7;48(8):4256-4273 - - 32187370 - - - - Mol Cell. 2009 Oct 23;36(2):231-44 - - 19800275 - - - - Dev Biol. 2005 Oct 15;286(2):452-63 - - 16154558 - - - - Cell. 1999 Oct 15;99(2):133-41 - - 10535732 - - - - Development. 1999 May;126(10):2227-39 - - 10207147 - - - - Curr Biol. 2005 Feb 22;15(4):378-83 - - 15723801 - - - - Mol Cell. 2010 Mar 12;37(5):679-89 - - 20116306 - - - - J Cell Sci. 2003 May 1;116(Pt 9):1797-804 - - 12665560 - - - - Mech Ageing Dev. 1984 Nov;28(1):23-40 - - 6542614 - - - - Proc Natl Acad Sci U S A. 2010 Feb 23;107(8):3582-7 - - 20133583 - - - - PLoS Genet. 2018 Jul 23;14(7):e1007542 - - 30036386 - - - - Biol Chem. 2002 Jul-Aug;383(7-8):1263-6 - - 12437114 - - - - Proc Natl Acad Sci U S A. 2011 Jan 25;108(4):1201-8 - - 21245313 - - - - Development. 2003 Jul;130(14):3319-30 - - 12783801 - - - - Cell. 2015 Jan 29;160(3):407-19 - - 25635455 - - - - Genetics. 1974 May;77(1):71-94 - - 4366476 - - - - Science. 2007 Jan 12;315(5809):244-7 - - 17158288 - - - - Science. 2007 Jan 12;315(5809):241-4 - - 17124291 - - - - WormBook. 2007 Jan 22;:1-26 - - 18050503 - - - - Curr Biol. 2003 Aug 5;13(15):1311-6 - - 12906791 - - - - Science. 2002 Jan 25;295(5555):694-7 - - 11809977 - - - - WormBook. 2005 Sep 21;:1-16 - - 18050422 - - - - Curr Biol. 2014 Apr 14;24(8):839-44 - - 24684932 - - - - -
- -
\ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/32644453.json b/agr_literature_service/lit_processing/pubmed_sample/32644453.json deleted file mode 100644 index 24ef10ab4..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/32644453.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "abstract": "Onchocerciasis, also known as the African river blindness, is the second most important cause of infectious blindness worldwide after trachoma. It is caused by the filarial nematode, Onchocerca volvulus, and transmitted by repeated bites of the vector, female black fly of the genus Simulium damnosum. The vector breeds in fast-flowing and oxygen-rich rivers in affected areas with transmission and disease prevalence usually stretching along these river basins and thereby the name river blindness.[1] Aside from blindness, onchocerciasis results in a troubling chronic dermatitis.[1]", - "authors": [ - { - "affiliation": [ - "St. Thomas Eye Hospital" - ], - "authorRank": 1, - "firstname": "Michael E.", - "lastname": "Gyasi", - "name": "Michael E. Gyasi" - }, - { - "affiliation": [ - "St. Thomas Eye Hospital" - ], - "authorRank": 2, - "firstname": "Ogugua N.", - "lastname": "Okonkwo", - "name": "Ogugua N. Okonkwo" - }, - { - "affiliation": [ - "St. Thomas Eye Hospital" - ], - "authorRank": 3, - "firstname": "Koushik", - "lastname": "Tripathy", - "name": "Koushik Tripathy" - } - ], - "bookaccession": "NBK559027", - "datePublished": { - "date_string": "2020-01-01", - "day": "01", - "month": "01", - "year": "2020" - }, - "issueDate": { - "date_string": "2020-01-01", - "day": "01", - "month": "01", - "year": "2020" - }, - "pubMedType": [ - "Review" - ], - "publisher": "StatPearls Publishing", - "title": "Onchocerciasis" -} \ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/32644453.xml b/agr_literature_service/lit_processing/pubmed_sample/32644453.xml deleted file mode 100644 index 814773339..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/32644453.xml +++ /dev/null @@ -1,152 +0,0 @@ - - - - - - 32644453 - - NBK559027 - - - - StatPearls Publishing - Treasure Island (FL) - - StatPearls - - 2020 - 01 - - - 2020 - 01 - - Internet - - Onchocerciasis - eng - - - Gyasi - Michael E. - ME - - St. Thomas Eye Hospital - - - - Okonkwo - Ogugua N. - ON - - Eye Foundation Hospital - - - - Tripathy - Koushik - K - - - Review - - Onchocerciasis, also known as the African river blindness, is the second most important cause of infectious blindness worldwide after trachoma. It is caused by the filarial nematode, Onchocerca volvulus, and transmitted by repeated bites of the vector, female black fly of the genus Simulium damnosum. The vector breeds in fast-flowing and oxygen-rich rivers in affected areas with transmission and disease prevalence usually stretching along these river basins and thereby the name river blindness.[1] Aside from blindness, onchocerciasis results in a troubling chronic dermatitis.[1] - Copyright © 2020, StatPearls Publishing LLC. - - -
- Introduction -
-
- Etiology -
-
- Epidemiology -
-
- Pathophysiology -
-
- Histopathology -
-
- History and Physical -
-
- Evaluation -
-
- Treatment / Management -
-
- Differential Diagnosis -
-
- Pertinent Studies and Ongoing Trials -
-
- Treatment Planning -
-
- Toxicity and Side Effect Management -
-
- Prognosis -
-
- Complications -
-
- Deterrence and Patient Education -
-
- Pearls and Other Issues -
-
- Enhancing Healthcare Team Outcomes -
-
- Continuing Education / Review Questions -
-
- References -
-
- - 2020 - 11 - 8 - -
- - - - 2020 - 7 - 10 - 6 - 1 - - - 2020 - 7 - 10 - 6 - 1 - - - 2020 - 7 - 10 - 6 - 1 - - - ppublish - - 32644453 - - -
- -
\ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/33002525.json b/agr_literature_service/lit_processing/pubmed_sample/33002525.json deleted file mode 100644 index 5a7c48723..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/33002525.json +++ /dev/null @@ -1,191 +0,0 @@ -{ - "abstract": "Beauvericin is an ubiquitous mycotoxin with relevant occurrence in food and feed. It causes a high toxicity in several cell lines, but its general mechanism of action is not fully understood and only limited in vivo studies have been performed. We used Caenorhabditis elegans as a model organism to investigate effects of beauvericin. The mycotoxin displays a moderate acute toxicity at 100 \u03bcM; at this concentration also reproductive toxicity occurred (reduction of total progeny to 32.1 %), developmental toxicity was detectable at 250 \u03bcM. However, even lower concentrations were capable to reduce stress resistance and life span of the nematode: A significant reduction was detected at 10 \u03bcM beauvericin (decrease in mean survival time of 4.3 % and reduction in life span of 12.9 %). An increase in lipofuscin fluorescence was demonstrated starting at 10 \u03bcM suggesting oxidative stress as a mechanism of beauvericin toxicity. Beauvericin (100 \u03bcM) increases the number of apoptotic germ cells comparable to the positive control UV-C (400 J/m2). Conclusion: Low concentrations of beauvericin are capable to cause adverse effects in C. elegans, which may be relevant for hazard identification of this compound.", - "authors": [ - { - "affiliation": [ - "Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany." - ], - "authorRank": 1, - "firstname": "Christian", - "lastname": "B\u00fcchter", - "name": "Christian B\u00fcchter" - }, - { - "affiliation": [ - "Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany." - ], - "authorRank": 2, - "firstname": "Karoline", - "lastname": "Koch", - "name": "Karoline Koch" - }, - { - "affiliation": [ - "Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany." - ], - "authorRank": 3, - "firstname": "Martin", - "lastname": "Freyer", - "name": "Martin Freyer" - }, - { - "affiliation": [ - "Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany." - ], - "authorRank": 4, - "firstname": "Sabrina", - "lastname": "Baier", - "name": "Sabrina Baier" - }, - { - "affiliation": [ - "Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany." - ], - "authorRank": 5, - "firstname": "Christina", - "lastname": "Saier", - "name": "Christina Saier" - }, - { - "affiliation": [ - "Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany." - ], - "authorRank": 6, - "firstname": "Sebastian", - "lastname": "Honnen", - "name": "Sebastian Honnen" - }, - { - "affiliation": [ - "Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany." - ], - "authorRank": 7, - "firstname": "Wim", - "lastname": "W\u00e4tjen", - "name": "Wim W\u00e4tjen" - } - ], - "dateArrivedInPubmed": { - "date_string": "2020-04-09", - "day": "09", - "month": "04", - "year": "2020" - }, - "dateLastModified": { - "date_string": "2020-11-04", - "day": "04", - "month": "11", - "year": "2020" - }, - "datePublished": { - "date_string": "2020-11-01", - "day": "01", - "month": "11", - "year": "2020" - }, - "doi": "10.1016/j.toxlet.2020.09.016", - "issn": "0378-4274", - "issueDate": { - "date_string": "2020-11-01", - "day": "01", - "month": "11", - "year": "2020" - }, - "journal": "Toxicol Lett", - "keywords": [ - "Apoptosis", - "Beauvericin", - "Caenorhabditis elegans", - "Development", - "Fertility", - "Mycotoxin" - ], - "meshTerms": [ - { - "meshHeadingTerm": "Animals", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Apoptosis", - "meshQualfierTerm": "drug effects", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Caenorhabditis elegans", - "meshQualfierTerm": "drug effects", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Caenorhabditis elegans", - "meshQualfierTerm": "growth & development", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Caenorhabditis elegans", - "meshQualfierTerm": "metabolism", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Depsipeptides", - "meshQualfierTerm": "toxicity", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Dose-Response Relationship, Drug", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Fertility", - "meshQualfierTerm": "drug effects", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Food Contamination", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Germ Cells", - "meshQualfierTerm": "drug effects", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Germ Cells", - "meshQualfierTerm": "pathology", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Lipofuscin", - "meshQualfierTerm": "metabolism", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Longevity", - "meshQualfierTerm": "drug effects", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Motor Activity", - "meshQualfierTerm": "drug effects", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Mycotoxins", - "meshQualfierTerm": "toxicity", - "referenceId": "PMID:33002525" - }, - { - "meshHeadingTerm": "Toxicity Tests, Acute", - "referenceId": "PMID:33002525" - } - ], - "nlm": "7709027", - "pages": "102-109", - "pii": "S0378-4274(20)30426-4", - "pubMedType": [ - "Journal Article" - ], - "pubmed": "33002525", - "resourceAbbreviation": "Toxicol Lett", - "title": "The mycotoxin beauvericin impairs development, fertility and life span in the nematode Caenorhabditis elegans accompanied by increased germ cell apoptosis and lipofuscin accumulation.", - "volume": "334" -} \ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/33002525.xml b/agr_literature_service/lit_processing/pubmed_sample/33002525.xml deleted file mode 100644 index 9e6868be2..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/33002525.xml +++ /dev/null @@ -1,245 +0,0 @@ - - - - - - 33002525 - - 2020 - 11 - 04 - - - 2020 - 11 - 04 - -
- - 1879-3169 - - 334 - - 2020 - Nov - 01 - - - Toxicology letters - Toxicol Lett - - The mycotoxin beauvericin impairs development, fertility and life span in the nematode Caenorhabditis elegans accompanied by increased germ cell apoptosis and lipofuscin accumulation. - - 102-109 - - S0378-4274(20)30426-4 - 10.1016/j.toxlet.2020.09.016 - - Beauvericin is an ubiquitous mycotoxin with relevant occurrence in food and feed. It causes a high toxicity in several cell lines, but its general mechanism of action is not fully understood and only limited in vivo studies have been performed. We used Caenorhabditis elegans as a model organism to investigate effects of beauvericin. The mycotoxin displays a moderate acute toxicity at 100 μM; at this concentration also reproductive toxicity occurred (reduction of total progeny to 32.1 %), developmental toxicity was detectable at 250 μM. However, even lower concentrations were capable to reduce stress resistance and life span of the nematode: A significant reduction was detected at 10 μM beauvericin (decrease in mean survival time of 4.3 % and reduction in life span of 12.9 %). An increase in lipofuscin fluorescence was demonstrated starting at 10 μM suggesting oxidative stress as a mechanism of beauvericin toxicity. Beauvericin (100 μM) increases the number of apoptotic germ cells comparable to the positive control UV-C (400 J/m2). Conclusion: Low concentrations of beauvericin are capable to cause adverse effects in C. elegans, which may be relevant for hazard identification of this compound. - Copyright © 2020 Elsevier B.V. All rights reserved. - - - - Büchter - Christian - C - - Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany. - - - - Koch - Karoline - K - - Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany. - - - - Freyer - Martin - M - - Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany. - - - - Baier - Sabrina - S - - Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany. - - - - Saier - Christina - C - - Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany. - - - - Honnen - Sebastian - S - - Heinrich Heine University, Medical Faculty, Institute of Toxicology, Moorenstrasse 5, 40225 Düsseldorf, Germany. - - - - Wätjen - Wim - W - - Martin-Luther-University Halle-Wittenberg, Institute of Agricultural and Nutritional Sciences, Weinbergweg 22, 06120 Halle/Saale, Germany. Electronic address: wim.waetjen@landw.uni-halle.de. - - - - eng - - Journal Article - - - 2020 - 09 - 28 - -
- - Netherlands - Toxicol Lett - 7709027 - 0378-4274 - - - - 0 - Depsipeptides - - - 0 - Lipofuscin - - - 0 - Mycotoxins - - - 26S048LS2R - beauvericin - - - IM - - - Animals - - - Apoptosis - drug effects - - - Caenorhabditis elegans - drug effects - growth & development - metabolism - - - Depsipeptides - toxicity - - - Dose-Response Relationship, Drug - - - Fertility - drug effects - - - Food Contamination - - - Germ Cells - drug effects - pathology - - - Lipofuscin - metabolism - - - Longevity - drug effects - - - Motor Activity - drug effects - - - Mycotoxins - toxicity - - - Toxicity Tests, Acute - - - - Apoptosis - Beauvericin - Caenorhabditis elegans - Development - Fertility - Mycotoxin - - Declaration of Competing Interest None. -
- - - - 2020 - 04 - 09 - - - 2020 - 09 - 14 - - - 2020 - 09 - 18 - - - 2020 - 10 - 2 - 6 - 0 - - - 2020 - 11 - 5 - 6 - 0 - - - 2020 - 10 - 1 - 20 - 11 - - - ppublish - - 33002525 - S0378-4274(20)30426-4 - 10.1016/j.toxlet.2020.09.016 - - -
- -
\ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/33408224.json b/agr_literature_service/lit_processing/pubmed_sample/33408224.json deleted file mode 100644 index 74901fc5c..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/33408224.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "abstract": "Traditionally, treatments for bacterial infection have focused on killing the microbe or preventing its growth. As antimicrobial resistance becomes more ubiquitous, the feasibility of this approach is beginning to wane and attention has begun to shift toward disrupting the host-pathogen interaction by improving the host defense. Using a high-throughput, fragment-based screen to identify compounds that alleviate Pseudomonas aeruginosa-mediated killing of Caenorhabditis elegans, we identified over 20 compounds that stimulated host defense gene expression. Five of these molecules were selected for further characterization. Four of five compounds showed little toxicity against mammalian cells or worms, consistent with their identification in a phenotypic, high-content screen. Each of the compounds activated several host defense pathways, but the pathways were generally dispensable for compound-mediated rescue in liquid killing, suggesting redundancy or that the activation of unknown pathway(s) may be driving compound effects. A genetic mechanism was identified for LK56, which required the Mediator subunit MDT-15/MED15 and NHR-49/HNF4 for its function. Interestingly, LK32, LK34, LK38, and LK56 also rescued C. elegans from P. aeruginosa in an agar-based assay, which uses different virulence factors and defense mechanisms. Rescue in an agar-based assay for LK38 entirely depended upon the PMK-1/p38 MAPK pathway. Three compounds-LK32, LK34, and LK56-also conferred resistance to Enterococcus faecalis, and the two lattermost, LK34 and LK56, also reduced pathogenesis from Staphylococcus aureus This study supports a growing role for MDT-15 and NHR-49 in immune response and identifies five molecules that have significant potential for use as tools in the investigation of innate immunity.IMPORTANCE Trends moving in opposite directions (increasing antimicrobial resistance and declining novel antimicrobial development) have precipitated a looming crisis: the nearly complete inability to safely and effectively treat bacterial infections. To avert this, new approaches are needed. One idea is to stimulate host defense pathways to improve the clearance of bacterial infection. Here, we describe five small molecules that promote resistance to infectious bacteria by activating C. elegans' innate immune pathways. Several are effective against both Gram-positive and Gram-negative pathogens. One of the compounds was mapped to the action of MDT-15/MED15 and NHR-49/HNF4, a pair of transcriptional regulators more generally associated with fatty acid metabolism, potentially highlighting a new link between these biological functions. These studies pave the way for future characterization of the anti-infective activity of the molecules in higher organisms and highlight the compounds' potential utility for further investigation of immune modulation as a novel therapeutic approach.", - "authors": [ - { - "affiliation": [ - "Department of BioSciences, Rice University, Houston, Texas, USA." - ], - "authorRank": 1, - "firstname": "Nicholas A", - "lastname": "Hummell", - "name": "Nicholas A Hummell" - }, - { - "affiliation": [ - "Department of BioSciences, Rice University, Houston, Texas, USA." - ], - "authorRank": 2, - "firstname": "Alexey V", - "lastname": "Revtovich", - "name": "Alexey V Revtovich" - }, - { - "affiliation": [ - "Department of BioSciences, Rice University, Houston, Texas, USA." - ], - "authorRank": 3, - "crossReferences": [ - { - "id": "ORCID:https://orcid.org/0000-0002-1537-4967", - "pages": [ - "person/orcid" - ] - } - ], - "firstname": "Natalia V", - "lastname": "Kirienko", - "name": "Natalia V Kirienko" - } - ], - "dateLastModified": { - "date_string": "2021-02-02", - "day": "02", - "month": "02", - "year": "2021" - }, - "datePublished": { - "date_string": "2021-01-06", - "day": "06", - "month": "01", - "year": "2021" - }, - "doi": "10.1128/mSphere.00950-20", - "issn": "2379-5042", - "issueDate": { - "date_string": "2021-01-06", - "day": "06", - "month": "01", - "year": "2021" - }, - "issueName": "1", - "journal": "mSphere", - "keywords": [ - "C. elegans", - "E. faecalis", - "MDT-15/MED15", - "NHR-49/HNF4", - "P. aeruginosa", - "PMK-1/p38 MAPK", - "S. aureus", - "high-throughput screen", - "immune modulators" - ], - "nlm": "101674533", - "pii": "6/1/e00950-20", - "pmc": "PMC7845594", - "pubMedType": [ - "Journal Article" - ], - "pubmed": "33408224", - "resourceAbbreviation": "mSphere", - "title": "Novel Immune Modulators Enhance Caenorhabditis elegans Resistance to Multiple Pathogens.", - "volume": "6" -} \ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/33408224.xml b/agr_literature_service/lit_processing/pubmed_sample/33408224.xml deleted file mode 100644 index 99839e149..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/33408224.xml +++ /dev/null @@ -1,733 +0,0 @@ - - - - - - 33408224 - - 2021 - 01 - 12 - -
- - 2379-5042 - - 6 - 1 - - 2021 - Jan - 06 - - - mSphere - mSphere - - Novel Immune Modulators Enhance Caenorhabditis elegans Resistance to Multiple Pathogens. - e00950-20 - 10.1128/mSphere.00950-20 - - Traditionally, treatments for bacterial infection have focused on killing the microbe or preventing its growth. As antimicrobial resistance becomes more ubiquitous, the feasibility of this approach is beginning to wane and attention has begun to shift toward disrupting the host-pathogen interaction by improving the host defense. Using a high-throughput, fragment-based screen to identify compounds that alleviate Pseudomonas aeruginosa-mediated killing of Caenorhabditis elegans, we identified over 20 compounds that stimulated host defense gene expression. Five of these molecules were selected for further characterization. Four of five compounds showed little toxicity against mammalian cells or worms, consistent with their identification in a phenotypic, high-content screen. Each of the compounds activated several host defense pathways, but the pathways were generally dispensable for compound-mediated rescue in liquid killing, suggesting redundancy or that the activation of unknown pathway(s) may be driving compound effects. A genetic mechanism was identified for LK56, which required the Mediator subunit MDT-15/MED15 and NHR-49/HNF4 for its function. Interestingly, LK32, LK34, LK38, and LK56 also rescued C. elegans from P. aeruginosa in an agar-based assay, which uses different virulence factors and defense mechanisms. Rescue in an agar-based assay for LK38 entirely depended upon the PMK-1/p38 MAPK pathway. Three compounds-LK32, LK34, and LK56-also conferred resistance to Enterococcus faecalis, and the two lattermost, LK34 and LK56, also reduced pathogenesis from Staphylococcus aureus This study supports a growing role for MDT-15 and NHR-49 in immune response and identifies five molecules that have significant potential for use as tools in the investigation of innate immunity.IMPORTANCE Trends moving in opposite directions (increasing antimicrobial resistance and declining novel antimicrobial development) have precipitated a looming crisis: the nearly complete inability to safely and effectively treat bacterial infections. To avert this, new approaches are needed. One idea is to stimulate host defense pathways to improve the clearance of bacterial infection. Here, we describe five small molecules that promote resistance to infectious bacteria by activating C. elegans' innate immune pathways. Several are effective against both Gram-positive and Gram-negative pathogens. One of the compounds was mapped to the action of MDT-15/MED15 and NHR-49/HNF4, a pair of transcriptional regulators more generally associated with fatty acid metabolism, potentially highlighting a new link between these biological functions. These studies pave the way for future characterization of the anti-infective activity of the molecules in higher organisms and highlight the compounds' potential utility for further investigation of immune modulation as a novel therapeutic approach. - Copyright © 2021 Hummell et al. - - - - Hummell - Nicholas A - NA - - Department of BioSciences, Rice University, Houston, Texas, USA. - - - - Revtovich - Alexey V - AV - - Department of BioSciences, Rice University, Houston, Texas, USA. - - - - Kirienko - Natalia V - NV - https://orcid.org/0000-0002-1537-4967 - - Department of BioSciences, Rice University, Houston, Texas, USA kirienko@rice.edu. - - - - eng - - - R35 GM129294 - GM - NIGMS NIH HHS - United States - - - - Journal Article - - - 2021 - 01 - 06 - -
- - United States - mSphere - 101674533 - 2379-5042 - - IM - - C. elegans - E. faecalis - MDT-15/MED15 - NHR-49/HNF4 - P. aeruginosa - PMK-1/p38 MAPK - S. aureus - high-throughput screen - immune modulators - -
- - - - 2021 - 1 - 7 - 6 - 0 - - - 2021 - 1 - 8 - 6 - 0 - - - 2021 - 1 - 8 - 6 - 0 - - - epublish - - 33408224 - 6/1/e00950-20 - 10.1128/mSphere.00950-20 - - - - Bhagirath AY, Li Y, Somayajula D, Dadashi M, Badr S, Duan K. Cystic fibrosis lung environment and Pseudomonas aeruginosa infection. BMC Pulm Med. 2016;16:174. - - 10.1186/s12890-016-0339-5 - - - - Naqvi SHA, Naqvi SHS. Pseudomonas aeruginosa burn wound infection in a dedicated paediatric burns unit. S Afr J Surg. 2013;51:151–152. - - 10.7196/sajs.1811 - - - - Michael CA, Dominey-Howes D, Labbate M. The antimicrobial resistance crisis: causes, consequences, and management. Front Public Health. 2014;2:145. - - 10.3389/fpubh.2014.00145 - - - - Ventola CL. The antibiotic resistance crisis. 1. Causes and threats. P T. 2015;40:277–283. - - - Sasaki R, Kanda T, Nakamoto S, Haga Y, Nakamura M, Yasui S, Jiang X, Wu S, Arai M, Yokosuka O. Natural interferon-beta treatment for patients with chronic hepatitis C in Japan. World J Hepatol. 2015;7:1125–1132. - - 10.4254/wjh.v7.i8.1125 - - - - Moriyama M, Arakawa Y. Treatment of interferon-alpha for chronic hepatitis C. Expert Opin Pharmacother. 2006;7:1163–1179. - - 10.1517/14656566.7.9.1163 - - - - Yokogawa M, Takaishi M, Nakajima K, Kamijima R, Fujimoto C, Kataoka S, Terada Y, Sano S. Epicutaneous application of Toll-like receptor 7 agonists leads to systemic autoimmunity in wild-type mice: a new model of systemic Lupus erythematosus. Arthritis Rheumatol. 2014;66:694–706. - - 10.1002/art.38298 - - - - Arandjus C, Black PN, Poole PJ, Wood Baker R, Steurer-Stey C. Oral bacterial vaccines for the prevention of acute exacerbations in chronic obstructive pulmonary disease and chronic bronchitis. Respir Med. 2006;100:1671–1681. - - 10.1016/j.rmed.2006.06.029 - - - - Hughes JP, Rees S, Kalindjian SB, Philpott KL. Principles of early drug discovery. Br J Pharmacol. 2011;162:1239–1249. - - 10.1111/j.1476-5381.2010.01127.x - - - - Okesli-Armlovich A, Gupta A, Jimenez M, Auld D, Liu Q, Bassik MC, Khosla C. Discovery of small molecule inhibitors of human uridine-cytidine kinase 2 by high-throughput screening. Bioorg Med Chem Lett. 2019;29:2559–2564. - - 10.1016/j.bmcl.2019.08.010 - - - - Guo W, Yao S, Sun P, Yang TB, Tang CP, Zheng MY, Ye Y, Meng LH. Discovery and characterization of natural products as novel indoleamine 2,3-dioxygenase 1 inhibitors through high-throughput screening. Acta Pharmacol Sin. 2020;41:423–431. - - 10.1038/s41401-019-0246-4 - - - - Baell J, Walters MA. Chemistry: chemical con artists foil drug discovery. Nature. 2014;513:481–483. - - 10.1038/513481a - - - - Aldrich C, Bertozzi C, Georg GI, Kiessling L, Lindsley C, Liotta D, Merz KM Jr, Schepartz A, Wang S. The ecstasy and agony of assay interference compounds. ACS Med Chem Lett. 2017;8:379–382. - - 10.1021/acsmedchemlett.7b00056 - - - - Kirienko DR, Revtovich AV, Kirienko NV. A high-content, phenotypic screen identifies fluorouridine as an inhibitor of pyoverdine biosynthesis and Pseudomonas aeruginosa Virulence. mSphere. 2016;1:e00217-26. - - 10.1128/mSphere.00217-16 - - - - Pukkila-Worley R, Feinbaum R, Kirienko NV, Larkins-Ford J, Conery AL, Ausubel FM. Stimulation of host immune defenses by a small molecule protects Caenorhabditis elegans from bacterial infection. PLoS Genet. 2012;8:e1002733. - - 10.1371/journal.pgen.1002733 - - - - Pukkila-Worley R, Ausubel FM. Immune defense mechanisms in the Caenorhabditis elegans intestinal epithelium. Curr Opin Immunol. 2012;24:3–9. - - 10.1016/j.coi.2011.10.004 - - - - Ermolaeva MA, Schumacher B. Insights from the worm: the Caenorhabditis elegans model for innate immunity. Semin Immunol. 2014;26:303–309. - - 10.1016/j.smim.2014.04.005 - - - - Kirienko DR, Kang D, Kirienko NV. Novel pyoverdine inhibitors mitigate Pseudomonas aeruginosa pathogenesis. Front Microbiol. 2018;9:3317. - - 10.3389/fmicb.2018.03317 - - - - Wang X, Kleerekoper Q, Revtovich AV, Kang D, Kirienko NV. Identification and validation of a novel anti-virulent that binds to pyoverdine and inhibits its function. Virulence. 2020;11:1293–1309. - - 10.1080/21505594.2020.1819144 - - - - Lipinski CA, Lombardo F, Dominy BW, Feeney PJ. Experimental and computational approaches to estimate solubility and permeability in drug discovery and development settings. Adv Drug Deliv Rev. 2001;46:3–26. - - 10.1016/s0169-409x(00)00129-0 - - - - Lamoree B, Hubbard RE. Current perspectives in fragment-based lead discovery (FBLD). Essays Biochem. 2017;61:453–464. - - 10.1042/EBC20170028 - - - - Kang D, Revtovich AV, Chen Q, Shah KN, Cannon CL, Kirienko NV. Pyoverdine-dependent virulence of Pseudomonas aeruginosa isolates from cystic fibrosis patients. Front Microbiol. 2019;10:2048. - - 10.3389/fmicb.2019.02048 - - - - Kang D, Kirienko DR, Webster P, Fisher AL, Kirienko NV. Pyoverdine, a siderophore from Pseudomonas aeruginosa, translocates into Caenorhabditis elegans, removes iron, and activates a distinct host response. Virulence. 2018;9:804–817. - - 10.1080/21505594.2018.1449508 - - - - Sood S, Malhotra M, Das BK, Kapil A. Enterococcal infections and antimicrobial resistance. Indian J Med Res. 2008;128:111–121. - - - Deurenberg RH, Stobberingh EE. The evolution of Staphylococcus aureus. Infect Genet Evol. 2008;8:747–763. - - 10.1016/j.meegid.2008.07.007 - - - - Rajamuthiah R, Fuchs BB, Jayamani E, Kim Y, Larkins-Ford J, Conery A, Ausubel FM, Mylonakis E. Whole animal automated platform for drug discovery against multidrug resistant Staphylococcus aureus. PLoS One. 2014;9:e89189. - - 10.1371/journal.pone.0089189 - - - - Anderson QL, Revtovich AV, Kirienko NV. A high-throughput, high-content, liquid-based Caenorhabditis elegans pathosystem. J Vis Exp. 2018. - - 10.3791/58068 - - - - Kim SM, Escorbar I, Lee K, Fuchs BB, Mylonakis E, Kim W. Anti-MRSA agent discovery using Caenorhabditis elegans-based high-throughput screening. J Microbiol. 2020;58:431–444. - - 10.1007/s12275-020-0163-8 - - - - Garsin DA, Sifri CD, Mylonakis E, Qin X, Singh KV, Murray BE, Calderwood SB, Ausubel FM. A simple model host for identifying Gram-positive virulence factors. Proc Natl Acad Sci U S A. 2001;98:10892–10897. - - 10.1073/pnas.191378698 - - - - Kirienko NV, Ausubel FM, Ruvkun G. Mitophagy confers resistance to siderophore-mediated killing by Pseudomonas aeruginosa. Proc Natl Acad Sci U S A. 2015;112:1821–1826. - - 10.1073/pnas.1424954112 - - - - Kirienko NV, Kirienko DR, Larkins-Ford J, Wahlby C, Ruvkun G, Ausubel FM. Pseudomonas aeruginosa disrupts Caenorhabditis elegans iron homeostasis, causing a hypoxic response and death. Cell Host Microbe. 2013;13:406–416. - - 10.1016/j.chom.2013.03.003 - - - - Sifri CD, Begun J, Ausubel FM, Calderwood SB. Caenorhabditis elegans as a model host for Staphylococcus aureus pathogenesis. Infect Immun. 2003;71:2208–2217. - - 10.1128/iai.71.4.2208-2217.2003 - - - - Irazoqui JE, Troemel ER, Feinbaum RL, Luhachack LG, Cezairliyan BO, Ausubel FM. Distinct pathogenesis and host responses during infection of Caenorhabditis elegans by Pseudomonas aeruginosa and Staphylococcus aureus. PLoS Pathog. 2010;6:e1000982. - - 10.1371/journal.ppat.1000982 - - - - Ewald CY. Redox signaling of NADPH oxidases regulates oxidative stress responses, immunity and aging. Antioxidants (Basel). 2018;7:130. - - 10.3390/antiox7100130 - - - - Kim DH. Bacteria and the aging and longevity of Caenorhabditis elegans. Annu Rev Genet. 2013;47:233–246. - - 10.1146/annurev-genet-111212-133352 - - - - Benedetto A, Bambade T, Au C, Tullet JMA, Monkhouse J, Dang H, Cetnar K, Chan B, Cabreiro F, Gems D. New label-free automated survival assays reveal unexpected stress resistance patterns during Caenorhabditis elegans aging. Aging Cell. 2019;18:e12998. - - 10.1111/acel.12998 - - - - Jha A, Mukherjee C, Prasad AK, Parmar VS, Vadaparti M, Das U, De Clercq E, Balzarini J, Stables JP, Shrivastav A, Sharma RK, Dimmock JR. Derivatives of aryl amines containing the cytotoxic 1,4-dioxo-2-butenyl pharmacophore. Bioorg Med Chem Lett. 2010;20:1510–1515. - - 10.1016/j.bmcl.2010.01.098 - - - - Tjahjono E, McAnena AP, Kirienko NV. The evolutionarily conserved ESRE stress response network is activated by ROS and mitochondrial damage. BMC Biol. 2020;18:74. - - 10.1186/s12915-020-00812-5 - - - - Tjahjono E, Kirienko NV. A conserved mitochondrial surveillance pathway is required for defense against Pseudomonas aeruginosa. PLoS Genet. 2017;13:e1006876. - - 10.1371/journal.pgen.1006876 - - - - Hong M, Kwon JY, Shim J, Lee J. Differential hypoxia response of hsp-16 genes in the nematode. J Mol Biol. 2004;344:369–381. - - 10.1016/j.jmb.2004.09.077 - - - - Kwon JY, Hong M, Choi MS, Kang S, Duke K, Kim S, Lee S, Lee J. Ethanol-response genes and their regulation analyzed by a microarray and comparative genomic approach in the nematode Caenorhabditis elegans. Genomics. 2004;83:600–614. - - 10.1016/j.ygeno.2003.10.008 - - - - Kirienko NV, Fay DS. SLR-2 and JMJC-1 regulate an evolutionarily conserved stress-response network. EMBO J. 2010;29:727–739. - - 10.1038/emboj.2009.387 - - - - Govindan JA, Jayamani E, Zhang X, Breen P, Larkins-Ford J, Mylonakis E, Ruvkun G. Lipid signalling couples translational surveillance to systemic detoxification in Caenorhabditis elegans. Nat Cell Biol. 2015;17:1294–1303. - - 10.1038/ncb3229 - - - - Mir DA, Balamurugan K. Global proteomic response of Caenorhabditis elegans against PemKSa toxin. Front Cell Infect Microbiol. 2019;9:172. - - 10.3389/fcimb.2019.00172 - - - - Sinha A, Rae R. A functional genomic screen for evolutionarily conserved genes required for lifespan and immunity in germline-deficient Caenorhabditis elegans. PLoS One. 2014;9:e101970. - - 10.1371/journal.pone.0101970 - - - - Niu W, Lu ZJ, Zhong M, Sarov M, Murray JI, Brdlik CM, Janette J, Chen C, Alves P, Preston E, Slightham C, Jiang L, Hyman AA, Kim SK, Waterston RH, Gerstein M, Snyder M, Reinke V. Diverse transcription factor binding features revealed by genome-wide ChIP-seq in Caenorhabditis elegans. Genome Res. 2011;21:245–254. - - 10.1101/gr.114587.110 - - - - Troemel ER, Chu SW, Reinke V, Lee SS, Ausubel FM, Kim DH. p38 MAPK regulates expression of immune response genes and contributes to longevity in Caenorhabditis elegans. PLoS Genet. 2006;2:e183. - - 10.1371/journal.pgen.0020183 - - - - Evans EA, Kawli T, Tan MW. Pseudomonas aeruginosa suppresses host immunity by activating the DAF-2 insulin-like signaling pathway in Caenorhabditis elegans. PLoS Pathog. 2008;4:e1000175. - - 10.1371/journal.ppat.1000175 - - - - Papp D, Csermely P, Sőti C. A role for SKN-1/Nrf in pathogen resistance and immunosenescence in Caenorhabditis elegans. PLoS Pathog. 2012;8:e1002673. - - 10.1371/journal.ppat.1002673 - - - - Shapira M, Hamlin BJ, Rong J, Chen K, Ronen M, Tan MW. A conserved role for a GATA transcription factor in regulating epithelial innate immune responses. Proc Natl Acad Sci U S A. 2006;103:14086–14091. - - 10.1073/pnas.0603424103 - - - - Singh V, Aballay A. Regulation of DAF-16-mediated innate immunity in Caenorhabditis elegans. J Biol Chem. 2009;284:35580–35587. - - 10.1074/jbc.M109.060905 - - - - McEwan DL, Kirienko NV, Ausubel FM. Host translational inhibition by Pseudomonas aeruginosa exotoxin A triggers an immune response in Caenorhabditis elegans. Cell Host Microbe. 2012;11:364–374. - - 10.1016/j.chom.2012.02.007 - - - - Maggiora G, Vogt M, Stumpfe D, Bajorath J. Molecular similarity in medicinal chemistry. J Med Chem. 2014;57:3186–3204. - - 10.1021/jm401411z - - - - Yang F, Vought BW, Satterlee JS, Walker AK, Jim Sun ZY, Watts JL, DeBeaumont R, Saito RM, Hyberts SG, Yang S, Macol C, Iyer L, Tjian R, van den Heuvel S, Hart AC, Wagner G, Naar AM. An ARC/Mediator subunit required for SREBP control of cholesterol and lipid homeostasis. Nature. 2006;442:700–704. - - 10.1038/nature04942 - - - - Mao K, Ji F, Breen P, Sewell A, Han M, Sadreyev R, Ruvkun G. Mitochondrial dysfunction in Caenorhabditis elegans activates mitochondrial relocalization and nuclear hormone receptor-dependent detoxification genes. Cell Metab. 2019;29:1182–1191 e4. - - 10.1016/j.cmet.2019.01.022 - - - - Taubert S, Ward JD, Yamamoto KR. Nuclear hormone receptors in nematodes: evolution and function. Mol Cell Endocrinol. 2011;334:49–55. - - 10.1016/j.mce.2010.04.021 - - - - Taubert S, Hansen M, Van Gilst MR, Cooper SB, Yamamoto KR. The Mediator subunit MDT-15 confers metabolic adaptation to ingested material. PLoS Genet. 2008;4:e1000021. - - 10.1371/journal.pgen.1000021 - - - - Pukkila-Worley R, Feinbaum RL, McEwan DL, Conery AL, Ausubel FM. The evolutionarily conserved mediator subunit MDT-15/MED15 links protective innate immune responses and xenobiotic detoxification. PLoS Pathog. 2014;10:e1004143. - - 10.1371/journal.ppat.1004143 - - - - Sim S, Hibberd ML. Caenorhabditis elegans susceptibility to gut Enterococcus faecalis infection is associated with fat metabolism and epithelial junction integrity. BMC Microbiol. 2016;16:6. - - 10.1186/s12866-016-0624-8 - - - - Dasgupta M, Shashikanth M, Gupta A, Sandhu A, De A, Javed S, Singh V. NHR-49 transcription factor regulates immunometabolic response and survival of Caenorhabditis elegans during Enterococcus faecalis infection. Infect Immun. 2020;88. - - 10.1128/IAI.00130-20 - - - - Goh GYS, Winter JJ, Bhanshali F, Doering KRS, Lai R, Lee K, Veal EA, Taubert S. NHR-49/HNF4 integrates regulation of fatty acid metabolism with a protective transcriptional response to oxidative stress and fasting. Aging Cell. 2018;17:e12743. - - 10.1111/acel.12743 - - - - Moreno-Arriola E, El Hafidi M, Ortega-Cuellar D, Carvajal K. AMP-activated protein kinase regulates oxidative metabolism in Caenorhabditis elegans through the NHR-49 and MDT-15 transcriptional regulators. PLoS One. 2016;11:e0148089. - - 10.1371/journal.pone.0148089 - - - - Shivers RP, Pagano DJ, Kooistra T, Richardson CE, Reddy KC, Whitney JK, Kamanzi O, Matsumoto K, Hisamoto N, Kim DH. Phosphorylation of the conserved transcription factor ATF-7 by PMK-1 p38 MAPK regulates innate immunity in Caenorhabditis elegans. PLoS Genet. 2010;6:e1000892. - - 10.1371/journal.pgen.1000892 - - - - Fletcher M, Tillman EJ, Butty VL, Levine SS, Kim DH. Global transcriptional regulation of innate immunity by ATF-7 in Caenorhabditis elegans. PLoS Genet. 2019;15:e1007830. - - 10.1371/journal.pgen.1007830 - - - - Kim DH, Feinbaum R, Alloing G, Emerson FE, Garsin DA, Inoue H, Tanaka-Hino M, Hisamoto N, Matsumoto K, Tan MW, Ausubel FM. A conserved p38 MAP kinase pathway in Caenorhabditis elegans innate immunity. Science. 2002;297:623–626. - - 10.1126/science.1073759 - - - - Giagulli C, Noerder M, Avolio M, Becker PD, Fiorentini S, Guzman CA, Caruso A. Pidotimod promotes functional maturation of dendritic cells and displays adjuvant properties at the nasal mucosa level. Int Immunopharmacol. 2009;9:1366–1373. - - 10.1016/j.intimp.2009.08.010 - - - - Moy TI, Conery AL, Larkins-Ford J, Wu G, Mazitschek R, Casadei G, Lewis K, Carpenter AE, Ausubel FM. High-throughput screen for novel antimicrobials using a whole animal infection model. ACS Chem Biol. 2009;4:527–533. - - 10.1021/cb900084v - - - - Moy TI, Ball AR, Anklesaria Z, Casadei G, Lewis K, Ausubel FM. Identification of novel antimicrobials using a live-animal infection model. Proc Natl Acad Sci U S A. 2006;103:10414–10419. - - 10.1073/pnas.0604055103 - - - - Hummell NA, Kirienko NV. Repurposing bioactive compounds for treating multidrug-resistant pathogens. J Med Microbiol. 2020;69:881–894. - - 10.1099/jmm.0.001172 - - - - Bolz DD, Tenor JL, Aballay A. A conserved PMK-1/p38 MAPK is required in Caenorhabditis elegans tissue-specific immune response to Yersinia pestis infection. J Biol Chem. 2010;285:10832–10840. - - 10.1074/jbc.M109.091629 - - - - Vellasco WT, Gomes CRB, Vasconcelos TRA. Chemistry and biological activities of 1,3-benzoxathiazol-2-ones. Mini-Rev Org Chem. 2011;8:103–109. - - 10.2174/157019311793979882 - - - - Shadyro OI, Timoshchuk VA, Polozov GI, Povalishev VN, Andreeva OT, Zhelobkovich VE. Synthesis and antiviral activity of spatially-screened phenols: 1,3-benzoxathiolan-2-one derivatives. Pharm Chem J. 1999;33:366–369. - - 10.1007/BF02508708 - - - - Konieczny MT, Konieczny W, Sabisz M, Skladanowski A, Wakiec R, Augustynowicz-Kopec E, Zwolska Z. Synthesis of isomeric, oxathiolone fused chalcones, and comparison of their activity toward various microorganisms and human cancer cells line. Chem Pharm Bull (Tokyo). 2007;55:817–820. - - 10.1248/cpb.55.817 - - - - Konieczny MT, Konieczny W, Sabisz M, Skladanowski A, Wakiec R, Augustynowicz-Kopec E, Zwolska Z. Acid-catalyzed synthesis of oxathiolone fused chalcones: comparison of their activity toward various microorganisms and human cancer cells line. Eur J Med Chem. 2007;42:729–733. - - 10.1016/j.ejmech.2006.12.014 - - - - Abdeen S, Salim N, Mammadova N, Summers CM, Frankson R, Ambrose AJ, Anderson GG, Schultz PG, Horwich AL, Chapman E, Johnson SM. GroEL/ES inhibitors as potential antibiotics. Bioorg Med Chem Lett. 2016;26:3127–3134. - - 10.1016/j.bmcl.2016.04.089 - - - - Johnson SM, Sharif O, Mak PA, Wang HT, Engels IH, Brinker A, Schultz PG, Horwich AL, Chapman E. A biochemical screen for GroEL/GroES inhibitors. Bioorg Med Chem Lett. 2014;24:786–789. - - 10.1016/j.bmcl.2013.12.100 - - - - Tisdale MJGiraldi T, Connors TA, Cartei G. Triazenes: chemical, biological, and clinical aspects. 1989;15–22. - - - Foster BJ, Newell DR, Carmichael J, Harris AL, Gumbrell LA, Jones M, Goodard PM, Calvert AH. Preclinical, phase I, and pharmacokinetic studies with the dimethyl phenyltriazene CB10-277. Br J Cancer. 1993;67:362–368. - - 10.1038/bjc.1993.66 - - - - Kanugula S, Pegg AE. Novel DNA repair alkyltransferase from Caenorhabditis elegans. Environ Mol Mutagen. 2001;38:235–243. - - 10.1002/em.1077 - - - - Ermolaeva MA, Segref A, Dakhovnik A, Ou HL, Schneider JI, Utermohlen O, Hoppe T, Schumacher B. DNA damage in germ cells induces an innate immune response that triggers systemic stress resistance. Nature. 2013;501:416–420. - - 10.1038/nature12452 - - - - Jeanneau-Nicolle E, Benoit-Guyod M, Namil A, Leclerc G. New thaizolo[3,2-a]pyrimidine derivatives, synthesis, and structure-activity relationships. Eur J Med Chem. 1992;27:115–120. - - 10.1016/0223-5234(92)90099-M - - - - Veretennikov EA, Pavlov AV. Synthesis of 5H-[1,3]thiazolo[3,2-a]pyrimidin-5-one derivatives. Russ J Org Chem. 2013;49:575–579. - - 10.1134/S1070428013040143 - - - - Brenner S. The genetics of Caenorhabditis elegans. Genetics. 1974;77:71–94. - - - Beanan MJ, Strome S. Characterization of a germ-line proliferation mutation in C. elegans. Development. 1992;116:755–766. - - - Leiers B, Kampkotter A, Grevelding CG, Link CD, Johnson TE, Henkle-Duhrsen K. A stress-responsive glutathione S-transferase confers resistance to oxidative stress in Caenorhabditis elegans. Free Radic Biol Med. 2003;34:1405–1415. - - 10.1016/s0891-5849(03)00102-3 - - - - Tsialikas JAY. xbp-1 mRNA splicing is attenuated under prolonged exposure to ER stress. 2017. - - - Lehrbach NJ, Ruvkun G. Proteasome dysfunction triggers activation of SKN-1A/Nrf1 by the aspartic protease DDI-1. Elife. 2016;5:e17721. - - 10.7554/eLife.17721 - - - - Tan MW, Mahajan-Miklos S, Ausubel FM. Killing of Caenorhabditis elegans by Pseudomonas aeruginosa used to model mammalian bacterial pathogenesis. Proc Natl Acad Sci U S A. 1999;96:715–720. - - 10.1073/pnas.96.2.715 - - - - Schroth MN, Cho JJ, Green SK, Kominos SD. Epidemiology of Pseudomonas aeruginosa in agricultural areas. J Med Microbiol. 2018;67:1191–1201. - - 10.1099/jmm.0.000758 - - - - Beabout K, McCurry MD, Mehta H, Shah AA, Pulukuri KK, Rigol S, Wang Y, Nicolaou KC, Shamoo Y. Experimental evolution of diverse strains as a method for the determination of biochemical mechanisms of action for novel pyrrolizidinone antibiotics. ACS Infect Dis. 2017;3:854–865. - - 10.1021/acsinfecdis.7b00135 - - - - Conery AL, Larkins-Ford J, Ausubel FM, Kirienko NV. High-throughput screening for novel anti-infectives using a Caenorhabditis elegans pathogenesis model. Curr Protoc Chem Biol. 2014;6:25–37. - - 10.1002/9780470559277.ch130160 - - - - Kirienko NV, Cezairliyan BO, Ausubel FM, Powell JR. Pseudomonas aeruginosa PA14 pathogenesis in Caenorhabditis elegans. Methods Mol Biol. 2014;1149:653–669. - - 10.1007/978-1-4939-0473-0_50 - - - - Revtovich AV, Lee R, Kirienko NV. Interplay between mitochondria and diet mediates pathogen and stress resistance in Caenorhabditis elegans. PLoS Genet. 2019;15:e1008011. - - 10.1371/journal.pgen.1008011 - - - - Steinbaugh MJ, Narasimhan SD, Robida-Stubbs S, Moronetti Mazzeo LE, Dreyfuss JM, Hourihan JM, Raghavan P, Operana TN, Esmaillie R, Blackwell TK. Lipid-mediated regulation of SKN-1/Nrf in response to germ cell absence. Elife. 2015;4:e07836. - - 10.7554/eLife.07836 - - - - Estes KA, Dunbar TL, Powell JR, Ausubel FM, Troemel ER. bZIP transcription factor zip-2 mediates an early response to Pseudomonas aeruginosa infection in Caenorhabditis elegans. Proc Natl Acad Sci U S A. 2010;107:2153–2158. - - 10.1073/pnas.0914643107 - - - - Pukkila-Worley R, Ausubel FM, Mylonakis E. Candida albicans infection of Caenorhabditis elegans induces antifungal immune defenses. PLoS Pathog. 2011;7:e1002074. - - 10.1371/journal.ppat.1002074 - - - - Huffman DL, Abrami L, Sasik R, Corbeil J, van der Goot FG, Aroian RV. Mitogen-activated protein kinase pathways defend against bacterial pore-forming toxins. Proc Natl Acad Sci U S A. 2004;101:10995–11000. - - 10.1073/pnas.0404073101 - - - - de Hoon MJ, Imoto S, Nolan J, Miyano S. Open source clustering software. Bioinformatics. 2004;20:1453–1454. - - 10.1093/bioinformatics/bth078 - - - - Saldanha AJ. Java Treeview: extensible visualization of microarray data. Bioinformatics. 2004;20:3246–3248. - - 10.1093/bioinformatics/bth349 - - - - Yang W, Dierking K, Schulenburg H. WormEXP: a web-based application for a Caenorhabditis elegans-specific gene expression enrichment analysis. Bioinformatics. 2016;32:943–945. - - 10.1093/bioinformatics/btv667 - - - - Huang da W, Sherman BT, Lempicki RA. Systematic and integrative analysis of large gene lists using DAVID bioinformatics resources. Nat Protoc. 2009;4:44–57. - - 10.1038/nprot.2008.211 - - - - Huang da W, Sherman BT, Lempicki RA. Bioinformatics enrichment tools: paths toward the comprehensive functional analysis of large gene lists. Nucleic Acids Res. 2009;37:1–13. - - 10.1093/nar/gkn923 - - - - O’Boyle NM, Banck M, James CA, Morley C, Vandermeersch T, Hutchison GR. Open Babel: an open chemical toolbox. J Cheminform. 2011;3:33. - - 10.1186/1758-2946-3-33 - - - - -
- -
\ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/33410237.json b/agr_literature_service/lit_processing/pubmed_sample/33410237.json deleted file mode 100644 index 99e331aa6..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/33410237.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "abstract": "The transparent epidermis of Caenorhabditis elegans makes it an attractive model to study sperm motility and migration within an intact reproductive tract. C elegans synthesize specific F-series prostaglandins (PGFs) that are important for guiding sperm toward the spermatheca. These PGFs are synthesized from polyunsaturated fatty acid (PUFA) precursors, such as arachidonic acid (AA), via a novel pathway, independent of the classical cyclooxygenases (Cox) responsible for most PG synthesis. While the enzyme(s) responsible for PG synthesis has yet to be identified, the DAF-7 TGF\u00df pathway has been implicated in modulating PG levels and sperm guidance. We find that the reduced PGF levels in daf-1 type I receptor mutants are responsible for the sperm guidance defect. The lower level of PGs in daf-1 mutants is due in part to the inaccessibility of AA. Finally, lipid analysis and assessment of sperm guidance in daf-1;daf-3 double mutants suggest DAF-3 suppresses PG production and sperm accumulation at the spermatheca. Our data suggest that DAF-3 functions in the nervous system, and possibly the germline, to affect sperm guidance. The C elegans TGF\u00df pathway regulates many pathways to modulate PG metabolism and sperm guidance. These pathways likely function in the nervous system and possibly the germline.", - "authors": [ - { - "affiliation": [ - "Department of Cell Development and Integrative Biology, University of Alabama at Birmingham, Birmingham, Alabama, USA." - ], - "authorRank": 1, - "crossReferences": [ - { - "id": "ORCID:https://orcid.org/0000-0003-4571-4405", - "pages": [ - "person/orcid" - ] - } - ], - "firstname": "Muhan", - "lastname": "Hu", - "name": "Muhan Hu" - }, - { - "affiliation": [ - "Department of Cell Development and Integrative Biology, University of Alabama at Birmingham, Birmingham, Alabama, USA." - ], - "authorRank": 2, - "firstname": "Ekta", - "lastname": "Tiwary", - "name": "Ekta Tiwary" - }, - { - "affiliation": [ - "Department of Cell Development and Integrative Biology, University of Alabama at Birmingham, Birmingham, Alabama, USA." - ], - "authorRank": 3, - "firstname": "Jeevan K", - "lastname": "Prasain", - "name": "Jeevan K Prasain" - }, - { - "affiliation": [ - "Department of Cell Development and Integrative Biology, University of Alabama at Birmingham, Birmingham, Alabama, USA." - ], - "authorRank": 4, - "firstname": "Michael", - "lastname": "Miller", - "name": "Michael Miller" - }, - { - "affiliation": [ - "Department of Cell Development and Integrative Biology, University of Alabama at Birmingham, Birmingham, Alabama, USA." - ], - "authorRank": 5, - "firstname": "Rosa", - "lastname": "Serra", - "name": "Rosa Serra" - } - ], - "dateArrivedInPubmed": { - "date_string": "2020-06-23", - "day": "23", - "month": "06", - "year": "2020" - }, - "dateLastModified": { - "date_string": "2021-01-29", - "day": "29", - "month": "01", - "year": "2021" - }, - "datePublished": { - "date_string": "2021-01-07", - "day": "07", - "month": "01", - "year": "2021" - }, - "doi": "10.1002/dvdy.296", - "issn": "1058-8388", - "issueDate": { - "date_string": "2021-01-07", - "day": "07", - "month": "01", - "year": "2021" - }, - "journal": "Dev Dyn", - "keywords": [ - "Caenorhabditis elegans", - "daf-1", - "daf-3", - "daf-7", - "oocyte", - "sperm" - ], - "nlm": "9201927", - "pubMedType": [ - "Journal Article" - ], - "pubmed": "33410237", - "resourceAbbreviation": "Dev Dyn", - "title": "Mechanisms of TGF\u00df in prostaglandin synthesis and sperm guidance in Caenorhabditis elegans." -} \ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/33410237.xml b/agr_literature_service/lit_processing/pubmed_sample/33410237.xml deleted file mode 100644 index 800f6aeb8..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/33410237.xml +++ /dev/null @@ -1,146 +0,0 @@ - - - - - - 33410237 - - 2021 - 01 - 07 - -
- - 1097-0177 - - - 2021 - Jan - 07 - - - Developmental dynamics : an official publication of the American Association of Anatomists - Dev Dyn - - Mechanisms of TGFß in prostaglandin synthesis and sperm guidance in C. elegans. - 10.1002/dvdy.296 - - The transparent epidermis of C. elegans makes it an attractive model to study sperm motility and migration within an intact reproductive tract. C. elegans synthesize specific F-series prostaglandins (PGFs) that are important for guiding sperm toward the spermatheca. These PGFs are synthesized from polyunsaturated fatty acid (PUFA) precursors, such as arachidonic acid (AA), via a novel pathway, independent of the classical cyclooxygenases (Cox) responsible for most PG synthesis. While the enzyme(s) responsible for PG synthesis has yet to be identified, the DAF-7 TGFß pathway has been implicated in modulating PG levels and sperm guidance. - We find that the reduced PGF levels in daf-1 Type I receptor mutants are responsible for the sperm guidance defect. The lower level of PGs in daf-1 mutants is due in part to the inaccessibility of AA. Finally, lipid analysis and assessment of sperm guidance in daf-1;daf-3 double mutants suggest DAF-3 suppresses PG production and sperm accumulation at the spermatheca. Our data suggest that DAF-3 functions in the nervous system, and possibly the germline, to affect sperm guidance. - The C. elegans TGFß pathway regulates many pathways to modulate PG metabolism and sperm guidance. These pathways likely function in the nervous system and possibly the germline. This article is protected by copyright. All rights reserved. - © 2021 Wiley Periodicals, Inc. - - - - Hu - Muhan - M - https://orcid.org/0000-0003-4571-4405 - - Department of Cell Development and Integrative Biology, University of Alabama at Birmingham, Birmingham, AL, USA. - - - - Tiwary - Ekta - E - - Department of Pharmacology and Toxicology, University of Alabama at Birmingham, Birmingham, AL, USA. - - - - Prasain - Jeevan K - JK - - Department of Pharmacology and Toxicology, University of Alabama at Birmingham, Birmingham, AL, USA. - - - - Miller - Michael - M - - - Serra - Rosa - R - - Department of Cell Development and Integrative Biology, University of Alabama at Birmingham, Birmingham, AL, USA. - - - - eng - - Journal Article - - - 2021 - 01 - 07 - -
- - United States - Dev Dyn - 9201927 - 1058-8388 - - IM - - C. elegans - daf-1 - daf-3 - daf-7 - oocyte - sperm - -
- - - - 2020 - 06 - 23 - - - 2020 - 12 - 21 - - - 2020 - 12 - 25 - - - 2021 - 1 - 7 - 6 - 16 - - - 2021 - 1 - 8 - 6 - 0 - - - 2021 - 1 - 8 - 6 - 0 - - - aheadofprint - - 33410237 - 10.1002/dvdy.296 - - -
- -
\ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/33440160.json b/agr_literature_service/lit_processing/pubmed_sample/33440160.json deleted file mode 100644 index f10c8c0d8..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/33440160.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "abstract": "Animal nervous systems remodel following stress. Although global stress-dependent changes are well documented, contributions of individual neuron remodeling events to animal behavior modification are challenging to study. In response to environmental insults, C. elegans become stress-resistant dauers. Dauer entry induces amphid sensory organ remodeling in which bilateral AMsh glial cells expand and fuse, allowing embedded AWC chemosensory neurons to extend sensory receptive endings. We show that amphid remodeling correlates with accelerated dauer exit upon exposure to favorable conditions and identify a G protein-coupled receptor, REMO-1, driving AMsh glia fusion, AWC neuron remodeling, and dauer exit. REMO-1 is expressed in and localizes to AMsh glia tips, is dispensable for other remodeling events, and promotes stress-induced expression of the remodeling receptor tyrosine kinase VER-1. Our results demonstrate how single-neuron structural changes affect animal behavior, identify key glial roles in stress-induced nervous system plasticity, and demonstrate that remodeling primes animals to respond to favorable conditions.", - "authors": [ - { - "affiliation": [ - "Laboratory of Developmental Genetics, The Rockefeller University, 1230 York Avenue, New York, NY 10065, USA." - ], - "authorRank": 1, - "firstname": "In Hae", - "lastname": "Lee", - "name": "In Hae Lee" - }, - { - "affiliation": [ - "Laboratory of Developmental Genetics, The Rockefeller University, 1230 York Avenue, New York, NY 10065, USA." - ], - "authorRank": 2, - "firstname": "Carl", - "lastname": "Procko", - "name": "Carl Procko" - }, - { - "affiliation": [ - "Laboratory of Developmental Genetics, The Rockefeller University, 1230 York Avenue, New York, NY 10065, USA." - ], - "authorRank": 3, - "firstname": "Yun", - "lastname": "Lu", - "name": "Yun Lu" - }, - { - "affiliation": [ - "Laboratory of Developmental Genetics, The Rockefeller University, 1230 York Avenue, New York, NY 10065, USA." - ], - "authorRank": 4, - "firstname": "Shai", - "lastname": "Shaham", - "name": "Shai Shaham" - } - ], - "dateArrivedInPubmed": { - "date_string": "2020-06-22", - "day": "22", - "month": "06", - "year": "2020" - }, - "dateLastModified": { - "date_string": "2021-01-30", - "day": "30", - "month": "01", - "year": "2021" - }, - "datePublished": { - "date_string": "2021-01-12", - "day": "12", - "month": "01", - "year": "2021" - }, - "doi": "10.1016/j.celrep.2020.108607", - "issn": "", - "issueDate": { - "date_string": "2021-01-12", - "day": "12", - "month": "01", - "year": "2021" - }, - "issueName": "2", - "journal": "Cell Rep", - "keywords": [ - "AWC neuron", - "C. elegans", - "REMO-1", - "dauer", - "glia", - "nervous system remodeling" - ], - "mid": "NIHMS1662655", - "nlm": "101573691", - "pages": "108607", - "pii": "S2211-1247(20)31596-5", - "pmc": "PMC7845533", - "pubMedType": [ - "Journal Article" - ], - "pubmed": "33440160", - "resourceAbbreviation": "Cell Rep", - "title": "Stress-Induced Neural Plasticity Mediated by Glial GPCR REMO-1 Promotes C. elegans Adaptive Behavior.", - "volume": "34" -} \ No newline at end of file diff --git a/agr_literature_service/lit_processing/pubmed_sample/33440160.xml b/agr_literature_service/lit_processing/pubmed_sample/33440160.xml deleted file mode 100644 index d9f20441a..000000000 --- a/agr_literature_service/lit_processing/pubmed_sample/33440160.xml +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - 33440160 - - 2021 - 01 - 13 - -
- - 2211-1247 - - 34 - 2 - - 2021 - Jan - 12 - - - Cell reports - Cell Rep - - Stress-Induced Neural Plasticity Mediated by Glial GPCR REMO-1 Promotes C. elegans Adaptive Behavior. - - 108607 - - S2211-1247(20)31596-5 - 10.1016/j.celrep.2020.108607 - - Animal nervous systems remodel following stress. Although global stress-dependent changes are well documented, contributions of individual neuron remodeling events to animal behavior modification are challenging to study. In response to environmental insults, C. elegans become stress-resistant dauers. Dauer entry induces amphid sensory organ remodeling in which bilateral AMsh glial cells expand and fuse, allowing embedded AWC chemosensory neurons to extend sensory receptive endings. We show that amphid remodeling correlates with accelerated dauer exit upon exposure to favorable conditions and identify a G protein-coupled receptor, REMO-1, driving AMsh glia fusion, AWC neuron remodeling, and dauer exit. REMO-1 is expressed in and localizes to AMsh glia tips, is dispensable for other remodeling events, and promotes stress-induced expression of the remodeling receptor tyrosine kinase VER-1. Our results demonstrate how single-neuron structural changes affect animal behavior, identify key glial roles in stress-induced nervous system plasticity, and demonstrate that remodeling primes animals to respond to favorable conditions. - Copyright © 2020 The Author(s). Published by Elsevier Inc. All rights reserved. - - - - Lee - In Hae - IH - - Laboratory of Developmental Genetics, The Rockefeller University, 1230 York Avenue, New York, NY 10065, USA. - - - - Procko - Carl - C - - Laboratory of Developmental Genetics, The Rockefeller University, 1230 York Avenue, New York, NY 10065, USA. - - - - Lu - Yun - Y - - Laboratory of Developmental Genetics, The Rockefeller University, 1230 York Avenue, New York, NY 10065, USA. - - - - Shaham - Shai - S - - Laboratory of Developmental Genetics, The Rockefeller University, 1230 York Avenue, New York, NY 10065, USA. Electronic address: shaham@rockefeller.edu. - - - - eng - - Journal Article - -
- - United States - Cell Rep - 101573691 - - IM - - AWC neuron - C. elegans - REMO-1 - dauer - glia - nervous system remodeling - - Declaration of Interests The authors declare no competing interests. -
- - - - 2020 - 06 - 22 - - - 2020 - 10 - 30 - - - 2020 - 12 - 15 - - - 2021 - 1 - 13 - 20 - 6 - - - 2021 - 1 - 14 - 6 - 0 - - - 2021 - 1 - 14 - 6 - 0 - - - ppublish - - 33440160 - S2211-1247(20)31596-5 - 10.1016/j.celrep.2020.108607 - - -
- -
\ No newline at end of file diff --git a/agr_literature_service/lit_processing/sample_reference_populate_load.sh b/agr_literature_service/lit_processing/sample_reference_populate_load.sh deleted file mode 100755 index 9638a62ea..000000000 --- a/agr_literature_service/lit_processing/sample_reference_populate_load.sh +++ /dev/null @@ -1,15 +0,0 @@ -# 22 seconds to call this shell script - -echo $PWD -echo "Running parse_dqm_json_reference.py -p -d ./ -f dqm_load_sample/" -python3 ./agr_literature_service/lit_processing/parse_dqm_json_reference.py -p -d ./ -f dqm_load_sample/ > /logs/log_parse_dqm_json_reference_load_pmid_list -echo "Running process_many_pmids_to_json.py -s -f inputs/alliance_pmids" -python3 ./agr_literature_service/lit_processing/process_many_pmids_to_json.py -s -f inputs/alliance_pmids > /logs/log_process_many_pmids_to_json_load -echo "Running parse_dqm_json_reference.py -d ./ -f dqm_load_sample/ -m all" -python3 ./agr_literature_service/lit_processing/parse_dqm_json_reference.py -d ./ -f dqm_load_sample/ -m all > /logs/log_parse_dqm_json_reference_load_sanitize -echo "Running parse_pubmed_json_reference.py -f inputs/pubmed_only_pmids" -python3 ./agr_literature_service/lit_processing/parse_pubmed_json_reference.py -f inputs/pubmed_only_pmids > /logs/log_parse_pubmed_json_reference_load -echo "Running post_reference_to_api.py" -python3 ./agr_literature_service/lit_processing/post_reference_to_api.py > /logs/log_post_reference_to_api_load # 16 seconds -echo "Running post_comments_corrections_to_api.py -f inputs/all_pmids" -python3 ./agr_literature_service/lit_processing/post_comments_corrections_to_api.py -f inputs/all_pmids > /logs/log_post_comments_corrections_to_api_load diff --git a/agr_literature_service/lit_processing/sort_not_found_pmids_by_mod.py b/agr_literature_service/lit_processing/sort_not_found_pmids_by_mod.py deleted file mode 100644 index 9ffa55242..000000000 --- a/agr_literature_service/lit_processing/sort_not_found_pmids_by_mod.py +++ /dev/null @@ -1,87 +0,0 @@ - -# takes pmids_not_found from get_pubmed_xml.py, and pmids_by_mods from parse_dqm_json.py, and -# generates a set sorted by MODs of pmids that were not found in pubmed. -# -# pipenv run python sort_not_found_pmids_by_mod.py - - -import logging.config -from os import environ, path - -from dotenv import load_dotenv - -load_dotenv() - -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') -logging.config.fileConfig(log_file_path) -logger = logging.getLogger('literature logger') - - -# base_path = '/home/azurebrd/git/agr_literature_service_demo/src/xml_processing/' -base_path = environ.get('XML_PATH') - - -def sort_not_found_pmids_by_mod(): - """ - - :return: - """ - - mod_to_pmids = dict() - - pmids_by_mods_file = base_path + 'pmids_by_mods' - pmid_to_mod = dict() - with open(pmids_by_mods_file) as mods_file: - mods_data = mods_file.read() - mods_split = mods_data.split("\n") - for line in mods_split: - if line == '': - continue - tabs = line.split("\t") - pmid = tabs[0] - if len(tabs) < 2: - print("line %s short" % (line)) - mods = tabs[2].split(", ") - for mod in mods: - try: - pmid_to_mod[pmid].append(mod) - except KeyError: - pmid_to_mod[pmid] = [mod] - mods_file.close() - - pmids_not_found_file = base_path + 'pmids_not_found' - with open(pmids_not_found_file) as not_found_file: - not_found_data = not_found_file.read() - not_found_split = not_found_data.split("\n") - for pmid in not_found_split: - if pmid == '': - continue - for mod in pmid_to_mod[pmid]: - # print("%s\t%s" % (mod, pmid)) - try: - mod_to_pmids[mod].append(pmid) - except KeyError: - mod_to_pmids[mod] = [pmid] - not_found_file.close() - - output_pmids_not_found_by_mod_file = base_path + 'pmids_not_found_by_mod' - with open(output_pmids_not_found_by_mod_file, "w") as pmids_not_found_by_mod_file: - for mod in mod_to_pmids: - count = len(mod_to_pmids[mod]) - pmids = ", ".join(mod_to_pmids[mod]) - logger.info("mod %s has %s pmids not in PubMed %s" % (mod, count, pmids)) - pmids_not_found_by_mod_file.write("mod %s has %s pmids not in PubMed %s\n" % (mod, count, pmids)) - pmids_not_found_by_mod_file.close() - - -# for pmid in pmid_to_mod: -# for mod in pmid_to_mod[pmid]: -# print("mod %s pmid %s" % (mod, pmid)) - - -if __name__ == "__main__": - """ - call main start function - """ - - sort_not_found_pmids_by_mod() diff --git a/agr_literature_service/lit_processing/tests/functional_tests.py b/agr_literature_service/lit_processing/tests/functional_tests.py index d36b472f3..f00aecd3d 100644 --- a/agr_literature_service/lit_processing/tests/functional_tests.py +++ b/agr_literature_service/lit_processing/tests/functional_tests.py @@ -5,16 +5,16 @@ import logging import logging.config -from agr_literature_service.lit_processing.helper_sqlalchemy import sqlalchemy_load_ref_xref -from agr_literature_service.lit_processing.helper_file_processing import split_identifier +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import sqlalchemy_load_ref_xref +from agr_literature_service.lit_processing.utils.file_processing_utils import split_identifier -from agr_literature_service.lit_processing.generate_dqm_json_test_set import load_sample_json -from agr_literature_service.lit_processing.xml_to_json import generate_json +from agr_literature_service.lit_processing.tests.generate_dqm_json_test_set import load_sample_json +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.xml_to_json import generate_json -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session from agr_literature_service.api.models import CrossReferenceModel, AuthorModel -from agr_literature_service.lit_processing.process_single_pmid import process_pmid -from agr_literature_service.lit_processing.update_pubmed_papers import update_data +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.process_single_pmid import process_pmid +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.pubmed_update_references_single_mod import update_data from dotenv import load_dotenv diff --git a/agr_literature_service/lit_processing/generate_dqm_json_test_set.py b/agr_literature_service/lit_processing/tests/generate_dqm_json_test_set.py similarity index 98% rename from agr_literature_service/lit_processing/generate_dqm_json_test_set.py rename to agr_literature_service/lit_processing/tests/generate_dqm_json_test_set.py index 528899fa1..40586e892 100644 --- a/agr_literature_service/lit_processing/generate_dqm_json_test_set.py +++ b/agr_literature_service/lit_processing/tests/generate_dqm_json_test_set.py @@ -8,7 +8,7 @@ from dotenv import load_dotenv -from agr_literature_service.lit_processing.helper_file_processing import split_identifier +from agr_literature_service.lit_processing.utils.helper_file_processing import split_identifier load_dotenv() diff --git a/agr_literature_service/lit_processing/mod_populate_load.py b/agr_literature_service/lit_processing/tests/mod_populate_load.py similarity index 96% rename from agr_literature_service/lit_processing/mod_populate_load.py rename to agr_literature_service/lit_processing/tests/mod_populate_load.py index 39ef04ea3..fbbee3306 100644 --- a/agr_literature_service/lit_processing/mod_populate_load.py +++ b/agr_literature_service/lit_processing/tests/mod_populate_load.py @@ -1,12 +1,11 @@ -import logging import logging.config from os import path -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_session +from agr_literature_service.lit_processing.utils.sqlalchemy_utils import create_postgres_session from agr_literature_service.api.models import ModModel from agr_literature_service.api.user import set_global_user_id -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('literature logger') diff --git a/agr_literature_service/lit_processing/parse_pubmed_json_reference.py b/agr_literature_service/lit_processing/tests/parse_pubmed_json_reference.py similarity index 93% rename from agr_literature_service/lit_processing/parse_pubmed_json_reference.py rename to agr_literature_service/lit_processing/tests/parse_pubmed_json_reference.py index 906b32c2a..330b0b7c3 100644 --- a/agr_literature_service/lit_processing/parse_pubmed_json_reference.py +++ b/agr_literature_service/lit_processing/tests/parse_pubmed_json_reference.py @@ -1,14 +1,13 @@ import argparse -import logging import logging.config from os import environ, path -from agr_literature_service.lit_processing.sanitize_pubmed_json import sanitize_pubmed_json_list +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.sanitize_pubmed_json import sanitize_pubmed_json_list # pipenv run python parse_pubmed_json_reference.py -f /home/azurebrd/git/agr_literature_service_demo/src/xml_processing/inputs/pubmed_only_pmids # enter a file of pmids as an argument, sanitize, post to api -log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../logging.conf') +log_file_path = path.join(path.dirname(path.abspath(__file__)), '../../../logging.conf') logging.config.fileConfig(log_file_path) logger = logging.getLogger('parse_pubmed_json_reference') diff --git a/agr_literature_service/lit_processing/process_many_pmids_to_json.py b/agr_literature_service/lit_processing/tests/process_many_pmids_to_json.py similarity index 95% rename from agr_literature_service/lit_processing/process_many_pmids_to_json.py rename to agr_literature_service/lit_processing/tests/process_many_pmids_to_json.py index 2ac02a940..3c4c6dbd2 100644 --- a/agr_literature_service/lit_processing/process_many_pmids_to_json.py +++ b/agr_literature_service/lit_processing/tests/process_many_pmids_to_json.py @@ -4,8 +4,8 @@ import time from os import environ, makedirs, path -from agr_literature_service.lit_processing.get_pubmed_xml import download_pubmed_xml -from agr_literature_service.lit_processing.xml_to_json import generate_json +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.get_pubmed_xml import download_pubmed_xml +from agr_literature_service.lit_processing.data_ingest.pubmed_ingest.xml.xml_to_json import generate_json # pipenv run python process_many_pmids_to_json.py -f inputs/alliance_pmids # diff --git a/agr_literature_service/lit_processing/tests/sample_reference_populate_load.sh b/agr_literature_service/lit_processing/tests/sample_reference_populate_load.sh new file mode 100755 index 000000000..8d1847c70 --- /dev/null +++ b/agr_literature_service/lit_processing/tests/sample_reference_populate_load.sh @@ -0,0 +1,13 @@ +# 22 seconds to call this shell script + +echo $PWD +echo "Running parse_dqm_json_reference.py -p -d ./ -f dqm_load_sample/" +python3 ./agr_literature_service/lit_processing/data_ingest/dqm_ingest/parse_dqm_json_reference.py -p -d ./ -f dqm_load_sample/ > /logs/log_parse_dqm_json_reference_load_pmid_list +echo "Running process_many_pmids_to_json.py -s -f inputs/alliance_pmids" +python3 ./agr_literature_service/lit_processing/tests/process_many_pmids_to_json.py -s -f inputs/alliance_pmids > /logs/log_process_many_pmids_to_json_load +echo "Running parse_dqm_json_reference.py -d ./ -f dqm_load_sample/ -m all" +python3 ./agr_literature_service/lit_processing/data_ingest/dqm_ingest/parse_dqm_json_reference.py -d ./ -f dqm_load_sample/ -m all > /logs/log_parse_dqm_json_reference_load_sanitize +echo "Running parse_pubmed_json_reference.py -f inputs/pubmed_only_pmids" +python3 ./agr_literature_service/lit_processing/tests/parse_pubmed_json_reference.py -f inputs/pubmed_only_pmids > /logs/log_parse_pubmed_json_reference_load +echo "Running post_reference_to_api.py" +python3 ./agr_literature_service/lit_processing/data_ingest/post_reference_to_db.py > /logs/log_post_reference_to_api_load # 16 seconds diff --git a/agr_literature_service/lit_processing/sample_reference_populate_update.sh b/agr_literature_service/lit_processing/tests/sample_reference_populate_update.sh similarity index 100% rename from agr_literature_service/lit_processing/sample_reference_populate_update.sh rename to agr_literature_service/lit_processing/tests/sample_reference_populate_update.sh diff --git a/agr_literature_service/lit_processing/utils/__init__.py b/agr_literature_service/lit_processing/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/helper_email.py b/agr_literature_service/lit_processing/utils/email_utils.py similarity index 100% rename from agr_literature_service/lit_processing/helper_email.py rename to agr_literature_service/lit_processing/utils/email_utils.py diff --git a/agr_literature_service/lit_processing/helper_file_processing.py b/agr_literature_service/lit_processing/utils/file_processing_utils.py similarity index 99% rename from agr_literature_service/lit_processing/helper_file_processing.py rename to agr_literature_service/lit_processing/utils/file_processing_utils.py index f6be15aac..1c28d0453 100644 --- a/agr_literature_service/lit_processing/helper_file_processing.py +++ b/agr_literature_service/lit_processing/utils/file_processing_utils.py @@ -5,8 +5,8 @@ import bs4 import requests -from agr_literature_service.lit_processing.helper_post_to_api import (generate_headers, get_authentication_token, - update_token) +from agr_literature_service.lit_processing.utils.okta_utils import (generate_headers, get_authentication_token, + update_token) warnings.filterwarnings("ignore", category=UserWarning, module='bs4') diff --git a/agr_literature_service/lit_processing/helper_post_to_api.py b/agr_literature_service/lit_processing/utils/okta_utils.py similarity index 100% rename from agr_literature_service/lit_processing/helper_post_to_api.py rename to agr_literature_service/lit_processing/utils/okta_utils.py diff --git a/agr_literature_service/lit_processing/helper_s3.py b/agr_literature_service/lit_processing/utils/s3_utils.py similarity index 100% rename from agr_literature_service/lit_processing/helper_s3.py rename to agr_literature_service/lit_processing/utils/s3_utils.py diff --git a/agr_literature_service/lit_processing/helper_sqlalchemy.py b/agr_literature_service/lit_processing/utils/sqlalchemy_utils.py similarity index 97% rename from agr_literature_service/lit_processing/helper_sqlalchemy.py rename to agr_literature_service/lit_processing/utils/sqlalchemy_utils.py index 762decdd4..2f48706b4 100644 --- a/agr_literature_service/lit_processing/helper_sqlalchemy.py +++ b/agr_literature_service/lit_processing/utils/sqlalchemy_utils.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_file_processing import split_identifier +from agr_literature_service.lit_processing.utils.file_processing_utils import split_identifier from os import environ diff --git a/alembic/scripts/7d68b38fe026_add_citation_raw_sql.py b/alembic/scripts/7d68b38fe026_add_citation_raw_sql.py index e7f82b688..ee2ed97ff 100644 --- a/alembic/scripts/7d68b38fe026_add_citation_raw_sql.py +++ b/alembic/scripts/7d68b38fe026_add_citation_raw_sql.py @@ -1,4 +1,4 @@ -from agr_literature_service.lit_processing.helper_sqlalchemy import create_postgres_engine +from agr_literature_service.lit_processing.utils.helper_sqlalchemy import create_postgres_engine from datetime import datetime import re diff --git a/cleanup.sh b/cleanup.sh new file mode 100644 index 000000000..219982fa5 --- /dev/null +++ b/cleanup.sh @@ -0,0 +1 @@ +rm -rf agr_literature_service/lit_processing/data_ingest/tmp/* \ No newline at end of file diff --git a/crontab b/crontab index e3e3d1556..1f5e6a4aa 100644 --- a/crontab +++ b/crontab @@ -1,5 +1,5 @@ SHELL=/bin/bash BASH_ENV=/container.env -0 4 * * * python3 /usr/src/app/agr_literature_service/lit_processing/dump_all_json_data.py >> /var/log/automated_scripts/dump_all_json_data.log 2>&1 -0 7 * * * python3 /usr/src/app/agr_literature_service/lit_processing/query_pubmed_mod_updates.py >> /var/log/automated_scripts/query_pubmed_mod_updates.log 2>&1 -0 10 * * 6 python3 /usr/src/app/agr_literature_service/lit_processing/update_all_pubmed_papers.py >> /var/log/automated_scripts/update_all_pubmed_papers.log 2>&1 +0 4 * * * python3 /usr/src/app/agr_literature_service/lit_processing/data_export/export_all_mod_references_to_json.py >> /var/log/automated_scripts/export_all_mod_references_to_json.log 2>&1 +0 7 * * * python3 /usr/src/app/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_search_new_references.py >> /var/log/automated_scripts/pubmed_search_new_references.log 2>&1 +0 10 * * 6 python3 /usr/src/app/agr_literature_service/lit_processing/data_ingest/pubmed_ingest/pubmed_update_references_all_mods.py >> /var/log/automated_scripts/pubmed_update_references_all_mods.log 2>&1 diff --git a/agr_literature_service/lit_processing/chunking_pmids/README b/docs/lit_processing/chunking_pmids/README similarity index 100% rename from agr_literature_service/lit_processing/chunking_pmids/README rename to docs/lit_processing/chunking_pmids/README diff --git a/docs/xml_processing/cytoscape/agr_lit_processing_cytoscape.html b/docs/lit_processing/cytoscape/agr_lit_processing_cytoscape.html similarity index 100% rename from docs/xml_processing/cytoscape/agr_lit_processing_cytoscape.html rename to docs/lit_processing/cytoscape/agr_lit_processing_cytoscape.html diff --git a/docs/xml_processing/cytoscape/cytoscape-dagre.js b/docs/lit_processing/cytoscape/cytoscape-dagre.js similarity index 100% rename from docs/xml_processing/cytoscape/cytoscape-dagre.js rename to docs/lit_processing/cytoscape/cytoscape-dagre.js diff --git a/docs/xml_processing/cytoscape/cytoscape.min.js b/docs/lit_processing/cytoscape/cytoscape.min.js similarity index 100% rename from docs/xml_processing/cytoscape/cytoscape.min.js rename to docs/lit_processing/cytoscape/cytoscape.min.js diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/lit_processing/__init__.py b/examples/lit_processing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/lit_processing/benchmark/__init__.py b/examples/lit_processing/benchmark/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agr_literature_service/lit_processing/benchmark/benchmark.sh b/examples/lit_processing/benchmark/benchmark.sh similarity index 100% rename from agr_literature_service/lit_processing/benchmark/benchmark.sh rename to examples/lit_processing/benchmark/benchmark.sh diff --git a/agr_literature_service/lit_processing/benchmark/benchmark_python.py b/examples/lit_processing/benchmark/benchmark_python.py similarity index 100% rename from agr_literature_service/lit_processing/benchmark/benchmark_python.py rename to examples/lit_processing/benchmark/benchmark_python.py diff --git a/agr_literature_service/lit_processing/benchmark/benchmark_read_json.py b/examples/lit_processing/benchmark/benchmark_read_json.py similarity index 100% rename from agr_literature_service/lit_processing/benchmark/benchmark_read_json.py rename to examples/lit_processing/benchmark/benchmark_read_json.py diff --git a/agr_literature_service/lit_processing/benchmark/benchmark_xml_open_copy.py b/examples/lit_processing/benchmark/benchmark_xml_open_copy.py similarity index 100% rename from agr_literature_service/lit_processing/benchmark/benchmark_xml_open_copy.py rename to examples/lit_processing/benchmark/benchmark_xml_open_copy.py diff --git a/agr_literature_service/lit_processing/benchmark/split_xml.py b/examples/lit_processing/benchmark/split_xml.py similarity index 100% rename from agr_literature_service/lit_processing/benchmark/split_xml.py rename to examples/lit_processing/benchmark/split_xml.py