From 1dfc9ee8e04a048c4717b7408e835aa51aa1bc8d Mon Sep 17 00:00:00 2001 From: Alex Younger Date: Mon, 16 Dec 2024 21:10:59 -0600 Subject: [PATCH] feat: fewer function calls around whitespace When checking whitespace, we should convert the node children to a list to avoid repeated generation. We should also add checks for early elimination of function calls. --- markdownify/__init__.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 5bd6f84..59bc696 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -140,19 +140,24 @@ def process_tag(self, node, convert_as_inline, children_only=False): # Remove whitespace-only textnodes just before, after or # inside block-level elements. should_remove_inside = should_remove_whitespace_inside(node) - for el in node.children: - # Only extract (remove) whitespace-only text node if any of the - # conditions is true: - # - el is the first element in its parent (block-level) - # - el is the last element in its parent (block-level) - # - el is adjacent to a block-level node - can_extract = (should_remove_inside and (not el.previous_sibling - or not el.next_sibling) - or should_remove_whitespace_outside(el.previous_sibling) - or should_remove_whitespace_outside(el.next_sibling)) - if (isinstance(el, NavigableString) - and six.text_type(el).strip() == '' - and can_extract): + children = list(node.children) # Convert to list to avoid repeated iteration + for i, el in enumerate(children): + # Quick type check first to avoid unnecessary function calls + if not isinstance(el, NavigableString): + continue + + # Check if the text is entirely whitespace first + text = six.text_type(el) + if text.strip(): + continue + + # Determine if we can extract based on position and adjacency + can_extract = ( + (should_remove_inside and (i == 0 or i == len(children) - 1)) or (i > 0 and should_remove_whitespace_outside(children[i - 1])) or (i < len(children) - 1 and should_remove_whitespace_outside(children[i + 1])) + ) + + # Extract if conditions are met + if can_extract: el.extract() # Convert the children first