Skip to content

Commit

Permalink
Update Parse Grant ID function name
Browse files Browse the repository at this point in the history
  • Loading branch information
titipata authored Apr 13, 2024
1 parent 8df87bb commit e516be9
Show file tree
Hide file tree
Showing 7 changed files with 9 additions and 18 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ To extract month and day information from PubDate, set `year_info_only=True`. We

### Parse MEDLINE Grant ID

Use `parse_medline_grant_id` in order to parse MEDLINE grant IDs from XML file. This will return a list of dictionaries, each containing
Use `parse_grant_id` in order to parse MEDLINE grant IDs from XML file. This will return a list of dictionaries, each containing

* `pmid` : PubMed ID
* `grant_id` : Grant ID
Expand Down
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Parse MEDLINE XML
-----------------

.. autofunction:: parse_medline_xml
.. autofunction:: parse_medline_grant_id
.. autofunction:: parse_grant_id

Parse PubMed OA XML
-------------------
Expand Down
1 change: 0 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import sys
import os
import sphinx
import pubmed_parser
import sphinx_gallery


Expand Down
6 changes: 5 additions & 1 deletion pubmed_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@
parse_pubmed_caption,
parse_pubmed_table,
)
from .medline_parser import parse_medline_xml, split_mesh
from .medline_parser import (
parse_medline_xml,
parse_grant_id,
split_mesh,
)
from .pubmed_web_parser import (
parse_xml_web,
parse_citation_web,
Expand Down
2 changes: 1 addition & 1 deletion pubmed_parser/medline_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from collections import defaultdict
from pubmed_parser.utils import read_xml, stringify_children, month_or_day_formater

__all__ = ["parse_medline_xml", "split_mesh"]
__all__ = ["parse_medline_xml", "parse_grant_id", "split_mesh"]


def parse_pmid(pubmed_article):
Expand Down
2 changes: 1 addition & 1 deletion scripts/medline_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def process_file(date_update):
mode='overwrite')

# parse grant database
parse_grant_rdd = path_rdd.flatMap(lambda x: pp.parse_medline_grant_id(x))\
parse_grant_rdd = path_rdd.flatMap(lambda x: pp.parse_grant_id(x))\
.filter(lambda x: x is not None)\
.map(lambda x: Row(**x))
grant_df = parse_grant_rdd.toDF()
Expand Down
12 changes: 0 additions & 12 deletions tests/test_medline_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,18 +206,6 @@ def test_parse_medline_xml():
assert parsed_medline[0]["vernacular_title"] == ""


def test_parse_medline_grant_id():
"""
Test parsing grants from MEDLINE XML
"""
grants = pp.parse_medline_grant_id(os.path.join("data", "pubmed20n0014.xml.gz"))
assert isinstance(grants, list)
assert isinstance(grants[0], dict)
assert grants[0]["pmid"] == "399300"
assert grants[0]["grant_id"] == "HL17731"
assert len(grants) == 484, "Expect number of grants in a given file to be 484"


def test_parse_medline_mesh_terms():
"""
Test parsing MeSH headings from MEDLINE XML
Expand Down

0 comments on commit e516be9

Please sign in to comment.