diff --git a/README.md b/README.md index 33aeab2..96a2a7b 100644 --- a/README.md +++ b/README.md @@ -166,7 +166,7 @@ To extract month and day information from PubDate, set `year_info_only=True`. We ### Parse MEDLINE Grant ID -Use `parse_medline_grant_id` in order to parse MEDLINE grant IDs from XML file. This will return a list of dictionaries, each containing +Use `parse_grant_id` in order to parse MEDLINE grant IDs from XML file. This will return a list of dictionaries, each containing * `pmid` : PubMed ID * `grant_id` : Grant ID diff --git a/docs/api.rst b/docs/api.rst index 84192fc..04a5781 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -11,7 +11,7 @@ Parse MEDLINE XML ----------------- .. autofunction:: parse_medline_xml -.. autofunction:: parse_medline_grant_id +.. autofunction:: parse_grant_id Parse PubMed OA XML ------------------- diff --git a/docs/conf.py b/docs/conf.py index b087a53..bfce9c1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,6 @@ import sys import os import sphinx -import pubmed_parser import sphinx_gallery diff --git a/pubmed_parser/__init__.py b/pubmed_parser/__init__.py index 9a6804b..8a91845 100644 --- a/pubmed_parser/__init__.py +++ b/pubmed_parser/__init__.py @@ -12,7 +12,11 @@ parse_pubmed_caption, parse_pubmed_table, ) -from .medline_parser import parse_medline_xml, split_mesh +from .medline_parser import ( + parse_medline_xml, + parse_grant_id, + split_mesh, +) from .pubmed_web_parser import ( parse_xml_web, parse_citation_web, diff --git a/pubmed_parser/medline_parser.py b/pubmed_parser/medline_parser.py index 3cacdff..c465dd1 100644 --- a/pubmed_parser/medline_parser.py +++ b/pubmed_parser/medline_parser.py @@ -9,7 +9,7 @@ from collections import defaultdict from pubmed_parser.utils import read_xml, stringify_children, month_or_day_formater -__all__ = ["parse_medline_xml", "split_mesh"] +__all__ = ["parse_medline_xml", "parse_grant_id", "split_mesh"] def parse_pmid(pubmed_article): diff --git a/scripts/medline_spark.py b/scripts/medline_spark.py index d00e2f5..db60056 100644 --- a/scripts/medline_spark.py +++ b/scripts/medline_spark.py @@ -70,7 +70,7 @@ def process_file(date_update): mode='overwrite') # parse grant database - parse_grant_rdd = path_rdd.flatMap(lambda x: pp.parse_medline_grant_id(x))\ + parse_grant_rdd = path_rdd.flatMap(lambda x: pp.parse_grant_id(x))\ .filter(lambda x: x is not None)\ .map(lambda x: Row(**x)) grant_df = parse_grant_rdd.toDF() diff --git a/tests/test_medline_parser.py b/tests/test_medline_parser.py index 1cf43c5..5532fa8 100644 --- a/tests/test_medline_parser.py +++ b/tests/test_medline_parser.py @@ -206,18 +206,6 @@ def test_parse_medline_xml(): assert parsed_medline[0]["vernacular_title"] == "" -def test_parse_medline_grant_id(): - """ - Test parsing grants from MEDLINE XML - """ - grants = pp.parse_medline_grant_id(os.path.join("data", "pubmed20n0014.xml.gz")) - assert isinstance(grants, list) - assert isinstance(grants[0], dict) - assert grants[0]["pmid"] == "399300" - assert grants[0]["grant_id"] == "HL17731" - assert len(grants) == 484, "Expect number of grants in a given file to be 484" - - def test_parse_medline_mesh_terms(): """ Test parsing MeSH headings from MEDLINE XML