Skip to content

Commit

Permalink
optimize empty-line handling for li and blockquote content
Browse files Browse the repository at this point in the history
Signed-off-by: chrispy <[email protected]>
  • Loading branch information
chrispy-snps committed Dec 30, 2024
1 parent 6258f5c commit 4fd03de
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 14 deletions.
42 changes: 31 additions & 11 deletions markdownify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


convert_heading_re = re.compile(r'convert_h(\d+)')
line_beginning_re = re.compile(r'^', re.MULTILINE)
line_with_content_re = re.compile(r'^(.*)', flags=re.MULTILINE)
whitespace_re = re.compile(r'[\t ]+')
all_whitespace_re = re.compile(r'[\t \r\n]+')
newline_whitespace_re = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
Expand Down Expand Up @@ -256,9 +256,6 @@ def escape(self, text):
text = text.replace('_', r'\_')
return text

def indent(self, text, columns):
return line_beginning_re.sub(' ' * columns, text) if text else ''

def underline(self, text, pad_char):
text = (text or '').rstrip()
return '\n\n%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
Expand All @@ -284,11 +281,20 @@ def convert_a(self, el, text, convert_as_inline):
convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol'])

def convert_blockquote(self, el, text, convert_as_inline):

# handle some early-exit scenarios
text = (text or '').strip()
if convert_as_inline:
return ' ' + text.strip() + ' '
return ' ' + text + ' '
if not text:
return "\n"

# indent lines with blockquote marker
def _indent_for_blockquote(match):
line_content = match.group(1)
return '> ' + line_content if line_content else '>'
text = line_with_content_re.sub(_indent_for_blockquote, text)

return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else ''
return '\n' + text + '\n\n'

def convert_br(self, el, text, convert_as_inline):
if convert_as_inline:
Expand Down Expand Up @@ -369,6 +375,12 @@ def convert_list(self, el, text, convert_as_inline):
convert_ol = convert_list

def convert_li(self, el, text, convert_as_inline):
# handle some early-exit scenarios
text = (text or '').strip()
if not text:
return "\n"

# determine list item bullet character to use
parent = el.parent
if parent is not None and parent.name == 'ol':
if parent.get("start") and str(parent.get("start")).isnumeric():
Expand All @@ -385,10 +397,18 @@ def convert_li(self, el, text, convert_as_inline):
bullets = self.options['bullets']
bullet = bullets[depth % len(bullets)]
bullet = bullet + ' '
text = (text or '').strip()
text = self.indent(text, len(bullet))
if text:
text = bullet + text[len(bullet):]
bullet_width = len(bullet)
bullet_indent = ' ' * bullet_width

# indent content lines by bullet width
def _indent_for_li(match):
line_content = match.group(1)
return bullet_indent + line_content if line_content else ''
text = line_with_content_re.sub(_indent_for_li, text)

# insert bullet into first-line indent whitespace
text = bullet + text[bullet_width:]

return '%s\n' % text

def convert_p(self, el, text, convert_as_inline):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_blockquote():

def test_blockquote_with_nested_paragraph():
assert md('<blockquote><p>Hello</p></blockquote>') == '\n> Hello\n\n'
assert md('<blockquote><p>Hello</p><p>Hello again</p></blockquote>') == '\n> Hello\n> \n> Hello again\n\n'
assert md('<blockquote><p>Hello</p><p>Hello again</p></blockquote>') == '\n> Hello\n>\n> Hello again\n\n'


def test_blockquote_with_paragraph():
Expand Down
4 changes: 2 additions & 2 deletions tests/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_ol():
assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n \n second para\n1235. third para\n \n fourth para\n'
assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n\n second para\n1235. third para\n\n fourth para\n'


def test_nested_ols():
Expand All @@ -64,7 +64,7 @@ def test_ul():
<li> c
</li>
</ul>""") == '\n\n* a\n* b\n* c\n'
assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n \n second para\n* third para\n \n fourth para\n'
assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n\n second para\n* third para\n\n fourth para\n'


def test_inline_ul():
Expand Down

0 comments on commit 4fd03de

Please sign in to comment.