Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexVonB committed Apr 13, 2022
2 parents eb0330b + 87b9f6c commit d375116
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 18 deletions.
26 changes: 25 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,34 @@ code_language
should be annotated with `````python`` or similar.
Defaults to ``''`` (empty string) and can be any string.

code_language_callback
When the HTML code contains ``pre`` tags that in some way provide the code
language, for example as class, this callback can be used to extract the
language from the tag and prefix it to the converted ``pre`` tag.
The callback gets one single argument, an BeautifylSoup object, and returns
a string containing the code language, or ``None``.
An example to use the class name as code language could be::

def callback(el):
return el['class'][0] if el.has_attr('class') else None

Defaults to ``None``.

escape_asterisks
If set to ``False``, do not escape ``*`` to ``\*`` in text.
Defaults to ``True``.

escape_underscores
If set to ``False``, do not escape ``_`` to ``\_`` in text.
Defaults to ``True``.

keep_inline_images_in
Images are converted to their alt-text when the images are located inside
headlines or table cells. If some inline images should be converted to
markdown images instead, this option can be set to a list of parent tags
that should be allowed to contain inline images, for example ``['td']``.
Defaults to an empty list.

Options may be specified as kwargs to the ``markdownify`` function, or as a
nested ``Options`` class in ``MarkdownConverter`` subclasses.

Expand All @@ -119,7 +143,7 @@ Converting BeautifulSoup objects
# Create shorthand method for conversion
def md(soup, **options):
return ImageBlockConverter(**options).convert_soup(soup)
return MarkdownConverter(**options).convert_soup(soup)
Creating Custom Converters
Expand Down
32 changes: 21 additions & 11 deletions markdownify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@
UNDERSCORE = '_'


def escape(text, escape_underscores):
if not text:
return ''
if escape_underscores:
return text.replace('_', r'\_')
return text


def chomp(text):
"""
If the text in an inline tag like b, a, or em contains a leading or trailing
Expand Down Expand Up @@ -71,10 +63,13 @@ class DefaultOptions:
autolinks = True
bullets = '*+-' # An iterable of bullet types.
code_language = ''
code_language_callback = None
convert = None
default_title = False
escape_asterisks = True
escape_underscores = True
heading_style = UNDERLINED
keep_inline_images_in = []
newline_style = SPACES
strip = None
strong_em_symbol = ASTERISK
Expand Down Expand Up @@ -161,7 +156,7 @@ def process_text(self, el):
text = whitespace_re.sub(' ', text)

if el.parent.name != 'code':
text = escape(text, self.options['escape_underscores'])
text = self.escape(text)

# remove trailing whitespaces if any of the following condition is true:
# - current text node is the last node in li
Expand Down Expand Up @@ -199,6 +194,15 @@ def should_convert_tag(self, tag):
else:
return True

def escape(self, text):
if not text:
return ''
if self.options['escape_asterisks']:
text = text.replace('*', r'\*')
if self.options['escape_underscores']:
text = text.replace('_', r'\_')
return text

def indent(self, text, level):
return line_beginning_re.sub('\t' * level, text) if text else ''

Expand Down Expand Up @@ -278,7 +282,8 @@ def convert_img(self, el, text, convert_as_inline):
src = el.attrs.get('src', None) or ''
title = el.attrs.get('title', None) or ''
title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
if convert_as_inline:
if (convert_as_inline
and el.parent.name not in self.options['keep_inline_images_in']):
return alt

return '![%s](%s%s)' % (alt, src, title_part)
Expand Down Expand Up @@ -331,7 +336,12 @@ def convert_p(self, el, text, convert_as_inline):
def convert_pre(self, el, text, convert_as_inline):
if not text:
return ''
return '\n```%s\n%s\n```\n' % (self.options['code_language'], text)
code_language = self.options['code_language']

if self.options['code_language_callback']:
code_language = self.options['code_language_callback'](el) or code_language

return '\n```%s\n%s\n```\n' % (code_language, text)

convert_s = convert_del

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
pkgmeta = {
'__title__': 'markdownify',
'__author__': 'Matthew Tretter',
'__version__': '0.10.3',
'__version__': '0.11.0',
}


Expand Down
20 changes: 15 additions & 5 deletions tests/test_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,13 @@ def test_hn_nested_simple_tag():

def test_hn_nested_img():
image_attributes_to_markdown = [
("", ""),
("alt='Alt Text'", "Alt Text"),
("alt='Alt Text' title='Optional title'", "Alt Text"),
("", "", ""),
("alt='Alt Text'", "Alt Text", ""),
("alt='Alt Text' title='Optional title'", "Alt Text", " \"Optional title\""),
]
for image_attributes, markdown in image_attributes_to_markdown:
assert md('<h3>A <img src="/path/to/img.jpg " ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
for image_attributes, markdown, title in image_attributes_to_markdown:
assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>', keep_inline_images_in=['h3']) == '### A ![' + markdown + '](/path/to/img.jpg' + title + ') B\n\n'


def test_hn_atx_headings():
Expand Down Expand Up @@ -215,3 +216,12 @@ def test_sup():
def test_lang():
assert md('<pre>test\n foo\nbar</pre>', code_language='python') == '\n```python\ntest\n foo\nbar\n```\n'
assert md('<pre><code>test\n foo\nbar</code></pre>', code_language='javascript') == '\n```javascript\ntest\n foo\nbar\n```\n'


def test_lang_callback():
def callback(el):
return el['class'][0] if el.has_attr('class') else None

assert md('<pre class="python">test\n foo\nbar</pre>', code_language_callback=callback) == '\n```python\ntest\n foo\nbar\n```\n'
assert md('<pre class="javascript"><code>test\n foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n'
assert md('<pre class="javascript"><code class="javascript">test\n foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n'
5 changes: 5 additions & 0 deletions tests/test_escaping.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from markdownify import markdownify as md


def test_asterisks():
assert md('*hey*dude*') == r'\*hey\*dude\*'
assert md('*hey*dude*', escape_asterisks=False) == r'*hey*dude*'


def test_underscore():
assert md('_hey_dude_') == r'\_hey\_dude\_'
assert md('_hey_dude_', escape_underscores=False) == r'_hey_dude_'
Expand Down

0 comments on commit d375116

Please sign in to comment.