Merge branch 'develop'

matthewwithanm · Apr 13, 2022 · d375116 · d375116
2 parents eb0330b + 87b9f6c
commit d375116
Show file tree

Hide file tree

Showing 5 changed files with 67 additions and 18 deletions.
diff --git a/README.rst b/README.rst
@@ -102,10 +102,34 @@ code_language
   should be annotated with `````python`` or similar.
   Defaults to ``''`` (empty string) and can be any string.
 
+code_language_callback
+  When the HTML code contains ``pre`` tags that in some way provide the code
+  language, for example as class, this callback can be used to extract the
+  language from the tag and prefix it to the converted ``pre`` tag.
+  The callback gets one single argument, an BeautifylSoup object, and returns
+  a string containing the code language, or ``None``.
+  An example to use the class name as code language could be::
+
+    def callback(el):
+        return el['class'][0] if el.has_attr('class') else None
+
+  Defaults to ``None``.
+
+escape_asterisks
+  If set to ``False``, do not escape ``*`` to ``\*`` in text.
+  Defaults to ``True``.
+
 escape_underscores
   If set to ``False``, do not escape ``_`` to ``\_`` in text.
   Defaults to ``True``.
 
+keep_inline_images_in
+  Images are converted to their alt-text when the images are located inside
+  headlines or table cells. If some inline images should be converted to
+  markdown images instead, this option can be set to a list of parent tags
+  that should be allowed to contain inline images, for example ``['td']``.
+  Defaults to an empty list.
+
 Options may be specified as kwargs to the ``markdownify`` function, or as a
 nested ``Options`` class in ``MarkdownConverter`` subclasses.
 
@@ -119,7 +143,7 @@ Converting BeautifulSoup objects
 
     # Create shorthand method for conversion
     def md(soup, **options):
-        return ImageBlockConverter(**options).convert_soup(soup)
+        return MarkdownConverter(**options).convert_soup(soup)
 
 
 Creating Custom Converters

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
@@ -25,14 +25,6 @@
 UNDERSCORE = '_'
 
 
-def escape(text, escape_underscores):
-    if not text:
-        return ''
-    if escape_underscores:
-        return text.replace('_', r'\_')
-    return text
-
-
 def chomp(text):
     """
     If the text in an inline tag like b, a, or em contains a leading or trailing
@@ -71,10 +63,13 @@ class DefaultOptions:
         autolinks = True
         bullets = '*+-'  # An iterable of bullet types.
         code_language = ''
+        code_language_callback = None
         convert = None
         default_title = False
+        escape_asterisks = True
         escape_underscores = True
         heading_style = UNDERLINED
+        keep_inline_images_in = []
         newline_style = SPACES
         strip = None
         strong_em_symbol = ASTERISK
@@ -161,7 +156,7 @@ def process_text(self, el):
             text = whitespace_re.sub(' ', text)
 
         if el.parent.name != 'code':
-            text = escape(text, self.options['escape_underscores'])
+            text = self.escape(text)
 
         # remove trailing whitespaces if any of the following condition is true:
         # - current text node is the last node in li
@@ -199,6 +194,15 @@ def should_convert_tag(self, tag):
         else:
             return True
 
+    def escape(self, text):
+        if not text:
+            return ''
+        if self.options['escape_asterisks']:
+            text = text.replace('*', r'\*')
+        if self.options['escape_underscores']:
+            text = text.replace('_', r'\_')
+        return text
+
     def indent(self, text, level):
         return line_beginning_re.sub('\t' * level, text) if text else ''
 
@@ -278,7 +282,8 @@ def convert_img(self, el, text, convert_as_inline):
         src = el.attrs.get('src', None) or ''
         title = el.attrs.get('title', None) or ''
         title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
-        if convert_as_inline:
+        if (convert_as_inline
+                and el.parent.name not in self.options['keep_inline_images_in']):
             return alt
 
         return '![%s](%s%s)' % (alt, src, title_part)
@@ -331,7 +336,12 @@ def convert_p(self, el, text, convert_as_inline):
     def convert_pre(self, el, text, convert_as_inline):
         if not text:
             return ''
-        return '\n```%s\n%s\n```\n' % (self.options['code_language'], text)
+        code_language = self.options['code_language']
+
+        if self.options['code_language_callback']:
+            code_language = self.options['code_language_callback'](el) or code_language
+
+        return '\n```%s\n%s\n```\n' % (code_language, text)
 
     convert_s = convert_del
 

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 pkgmeta = {
     '__title__': 'markdownify',
     '__author__': 'Matthew Tretter',
-    '__version__': '0.10.3',
+    '__version__': '0.11.0',
 }
 
 

diff --git a/tests/test_conversions.py b/tests/test_conversions.py
@@ -133,12 +133,13 @@ def test_hn_nested_simple_tag():
 
 def test_hn_nested_img():
     image_attributes_to_markdown = [
-        ("", ""),
-        ("alt='Alt Text'", "Alt Text"),
-        ("alt='Alt Text' title='Optional title'", "Alt Text"),
+        ("", "", ""),
+        ("alt='Alt Text'", "Alt Text", ""),
+        ("alt='Alt Text' title='Optional title'", "Alt Text", " \"Optional title\""),
     ]
-    for image_attributes, markdown in image_attributes_to_markdown:
-        assert md('<h3>A <img src="/path/to/img.jpg " ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
+    for image_attributes, markdown, title in image_attributes_to_markdown:
+        assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
+        assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>', keep_inline_images_in=['h3']) == '### A ![' + markdown + '](/path/to/img.jpg' + title + ') B\n\n'
 
 
 def test_hn_atx_headings():
@@ -215,3 +216,12 @@ def test_sup():
 def test_lang():
     assert md('<pre>test\n    foo\nbar</pre>', code_language='python') == '\n```python\ntest\n    foo\nbar\n```\n'
     assert md('<pre><code>test\n    foo\nbar</code></pre>', code_language='javascript') == '\n```javascript\ntest\n    foo\nbar\n```\n'
+
+
+def test_lang_callback():
+    def callback(el):
+        return el['class'][0] if el.has_attr('class') else None
+
+    assert md('<pre class="python">test\n    foo\nbar</pre>', code_language_callback=callback) == '\n```python\ntest\n    foo\nbar\n```\n'
+    assert md('<pre class="javascript"><code>test\n    foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n    foo\nbar\n```\n'
+    assert md('<pre class="javascript"><code class="javascript">test\n    foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n    foo\nbar\n```\n'
diff --git a/tests/test_escaping.py b/tests/test_escaping.py
@@ -1,6 +1,11 @@
 from markdownify import markdownify as md
 
 
+def test_asterisks():
+    assert md('*hey*dude*') == r'\*hey\*dude\*'
+    assert md('*hey*dude*', escape_asterisks=False) == r'*hey*dude*'
+
+
 def test_underscore():
     assert md('_hey_dude_') == r'\_hey\_dude\_'
     assert md('_hey_dude_', escape_underscores=False) == r'_hey_dude_'