diff --git a/pubmed_parser/pubmed_oa_parser.py b/pubmed_parser/pubmed_oa_parser.py index 5cfd0d9..07add1d 100644 --- a/pubmed_parser/pubmed_oa_parser.py +++ b/pubmed_parser/pubmed_oa_parser.py @@ -161,11 +161,11 @@ def parse_pubmed_xml(path, include_path=False, nxml=False): journal = "" dict_article_meta = parse_article_meta(tree) - pub_year_node = tree.find(".//pub-date/year") + pub_year_node = tree.find(".//pub-date[@pub-type='epub']/year") pub_year = pub_year_node.text if pub_year_node is not None else "" - pub_month_node = tree.find(".//pub-date/month") + pub_month_node = tree.find(".//pub-date[@pub-type='epub']/month") pub_month = pub_month_node.text if pub_month_node is not None else "01" - pub_day_node = tree.find(".//pub-date/day") + pub_day_node = tree.find(".//pub-date[@pub-type='epub']/day") pub_day = pub_day_node.text if pub_day_node is not None else "01" subjects_node = tree.findall(".//article-categories//subj-group/subject") diff --git a/tests/test_pubmed_oa_parser.py b/tests/test_pubmed_oa_parser.py index db5e158..dd8e484 100644 --- a/tests/test_pubmed_oa_parser.py +++ b/tests/test_pubmed_oa_parser.py @@ -39,6 +39,7 @@ def test_parse_pubmed_xml(): assert parsed_xml.get("doi") == "10.1371/journal.pone.0046493" assert parsed_xml.get("subjects") == "Research Article; Biology; Biochemistry; Enzymes; Enzyme Metabolism; Lipids; Fatty Acids; Glycerides; Lipid Metabolism; Neutral Lipids; Metabolism; Lipid Metabolism; Proteins; Globular Proteins; Protein Classes; Recombinant Proteins; Biotechnology; Microbiology; Bacterial Pathogens; Bacteriology; Emerging Infectious Diseases; Host-Pathogen Interaction; Microbial Growth and Development; Microbial Metabolism; Microbial Pathogens; Microbial Physiology; Proteomics; Sequence Analysis; Spectrometric Identification of Proteins" # noqa assert "Competing Interests: " in parsed_xml.get("coi_statement") + assert parsed_xml.get('publication_date') == '28-9-2012' def test_parse_pubmed_paragraph():