Skip to content

Commit

Permalink
#112 Parse epub insed of mix of ppub and epub
Browse files Browse the repository at this point in the history
  • Loading branch information
nils-herrmann committed May 16, 2024
1 parent 0eb7114 commit 28d5032
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
6 changes: 3 additions & 3 deletions pubmed_parser/pubmed_oa_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,11 @@ def parse_pubmed_xml(path, include_path=False, nxml=False):
journal = ""

dict_article_meta = parse_article_meta(tree)
pub_year_node = tree.find(".//pub-date/year")
pub_year_node = tree.find(".//pub-date[@pub-type='epub']/year")
pub_year = pub_year_node.text if pub_year_node is not None else ""
pub_month_node = tree.find(".//pub-date/month")
pub_month_node = tree.find(".//pub-date[@pub-type='epub']/month")
pub_month = pub_month_node.text if pub_month_node is not None else "01"
pub_day_node = tree.find(".//pub-date/day")
pub_day_node = tree.find(".//pub-date[@pub-type='epub']/day")
pub_day = pub_day_node.text if pub_day_node is not None else "01"

subjects_node = tree.findall(".//article-categories//subj-group/subject")
Expand Down
1 change: 1 addition & 0 deletions tests/test_pubmed_oa_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def test_parse_pubmed_xml():
assert parsed_xml.get("doi") == "10.1371/journal.pone.0046493"
assert parsed_xml.get("subjects") == "Research Article; Biology; Biochemistry; Enzymes; Enzyme Metabolism; Lipids; Fatty Acids; Glycerides; Lipid Metabolism; Neutral Lipids; Metabolism; Lipid Metabolism; Proteins; Globular Proteins; Protein Classes; Recombinant Proteins; Biotechnology; Microbiology; Bacterial Pathogens; Bacteriology; Emerging Infectious Diseases; Host-Pathogen Interaction; Microbial Growth and Development; Microbial Metabolism; Microbial Pathogens; Microbial Physiology; Proteomics; Sequence Analysis; Spectrometric Identification of Proteins" # noqa
assert "Competing Interests: " in parsed_xml.get("coi_statement")
assert parsed_xml.get('publication_date') == '28-9-2012'


def test_parse_pubmed_paragraph():
Expand Down

0 comments on commit 28d5032

Please sign in to comment.