From d656ad6531bd117aa2dc077d35f7df9c996376d4 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Wed, 16 Oct 2024 10:17:01 -0400 Subject: [PATCH] Accomodate date-type in parse_date 180/955 articles in oa_comm_xml.incr.2024-10-04 had dates recorded like: ```92024``` instead of ``` 92024 ``` these changes will now properly parse those dates and not error on parsing the whole document --- pubmed_parser/pubmed_oa_parser.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pubmed_parser/pubmed_oa_parser.py b/pubmed_parser/pubmed_oa_parser.py index 8b079f6..49154eb 100644 --- a/pubmed_parser/pubmed_oa_parser.py +++ b/pubmed_parser/pubmed_oa_parser.py @@ -92,10 +92,15 @@ def parse_date(tree, date_type): def get_text(node): return node.text if node is not None else None - pub_date_path = f".//pub-date[@pub-type=\"{date_type}\"]" + pub_date_path = f".//pub-date[@pub-type='{date_type}' or @date-type='{date_type}']" + date_node = tree.xpath(pub_date_path) + + if not date_node: + return {} + date_dict = {} for part in ["year", "month", "day"]: - text = get_text(tree.find(f"{pub_date_path}/{part}")) + text = get_text(date_node[0].find(part)) if text is not None: date_dict[part] = text