Skip to content

Commit

Permalink
feat: fewer function calls around whitespace
Browse files Browse the repository at this point in the history
When checking whitespace, we should convert the node children to
a list to avoid repeated generation. We should also add checks for
early elimination of function calls.
  • Loading branch information
AlextheYounga committed Dec 17, 2024
1 parent 7dd9a1a commit 1dfc9ee
Showing 1 changed file with 18 additions and 13 deletions.
31 changes: 18 additions & 13 deletions markdownify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,19 +140,24 @@ def process_tag(self, node, convert_as_inline, children_only=False):
# Remove whitespace-only textnodes just before, after or
# inside block-level elements.
should_remove_inside = should_remove_whitespace_inside(node)
for el in node.children:
# Only extract (remove) whitespace-only text node if any of the
# conditions is true:
# - el is the first element in its parent (block-level)
# - el is the last element in its parent (block-level)
# - el is adjacent to a block-level node
can_extract = (should_remove_inside and (not el.previous_sibling
or not el.next_sibling)
or should_remove_whitespace_outside(el.previous_sibling)
or should_remove_whitespace_outside(el.next_sibling))
if (isinstance(el, NavigableString)
and six.text_type(el).strip() == ''
and can_extract):
children = list(node.children) # Convert to list to avoid repeated iteration
for i, el in enumerate(children):
# Quick type check first to avoid unnecessary function calls
if not isinstance(el, NavigableString):
continue

# Check if the text is entirely whitespace first
text = six.text_type(el)
if text.strip():
continue

# Determine if we can extract based on position and adjacency
can_extract = (
(should_remove_inside and (i == 0 or i == len(children) - 1)) or (i > 0 and should_remove_whitespace_outside(children[i - 1])) or (i < len(children) - 1 and should_remove_whitespace_outside(children[i + 1]))
)

# Extract if conditions are met
if can_extract:
el.extract()

# Convert the children first
Expand Down

0 comments on commit 1dfc9ee

Please sign in to comment.