-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathmistletoe_renderer.py
192 lines (150 loc) · 6.4 KB
/
mistletoe_renderer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""
This module contains a class CustomHTMLRenderer, which uses
mistletoe to generate HTML from markdown.
Extra features include:
- Linkifying raw URLs
- Managing LaTeX so that MathJax will be able to process it in the browser
- Syntax highlighting with Pygments
"""
import re
from mistletoe import Document, HTMLRenderer, BaseRenderer, span_token, block_token
from pygments import highlight
from pygments.lexers import get_lexer_by_name as get_lexer
from pygments.formatters.html import HtmlFormatter
from mathjax_editing import remove_math, replace_math
class RawUrl(span_token.SpanToken):
"""
Detect raw URLs.
"""
parse_inner = False
# regex to extract raw URLs from Markdown from:
# https://github.com/trentm/python-markdown2/wiki/link-patterns#converting-links-into-links-automatically
pattern = re.compile(
r'((([A-Za-z]{3,9}:(?:\/\/)?)' # scheme
r'(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(:\[0-9]+)?' # user@hostname:port
r'|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)' # www.|user@hostname
r'((?:\/[\+~%\/\.\w\-]*)?' # path
r'\??(?:[\-\+=&;%@\.\w]*)' # query parameters
r'#?(?:[\.\!\/\\\w\-]*))?)' # fragment
r'(?![^<]*?(?:<\/\w+>|\/?>))' # ignore anchor HTML tags
r'(?![^\(]*?\))' # ignore links in brackets (Markdown links and images)
)
def __init__(self, match):
self.url = match.group(1)
class CustomHTMLRenderer(HTMLRenderer):
"""
The main rendering function is `render_md`.
"""
def __init__(self):
super().__init__(RawUrl)
def render_md(self, ds):
"""
A wrapper for this class's .render() function.
Input is a string containing markdown with LaTeX,
Output is a string containing HTML.
Uses `mathjax_editing` to strip out sections of the text
which potentially contain LaTeX and then splice them back in.
"""
ds_no_math, math = remove_math(ds, '$')
# We have to run `mathjax_editing.replace_math` on the text in code
# blocks before passing it to Pygments (see `render_block_code`),
# otherwise `replace_math` will be confused by the added syntax
# highlighting `<span>`s and won't be able to splice in those blocks.
self.math = math
html = self.render(Document(ds_no_math))
return replace_math(html, self.math)
def render_heading(self, token) -> str:
"""
Override the default heading to provide links like in GitHub.
TODO: populate a list of table of contents in the `.toc_html` field of the body
"""
template = '<h{level} id="{anchor}" class="markdown-heading">{inner} <a class="hover-link" href="#{anchor}">#</a></h{level}>'
inner: str = self.render_inner(token)
# generate anchor following what github does
# See info and links at https://gist.github.com/asabaylus/3071099
anchor = inner.strip().lower()
anchor = re.sub(r'[^\w\- ]+', '', anchor).replace(' ', '-')
return template.format(level=token.level, inner=inner, anchor=anchor)
# Use pygments highlighting.
# https://github.com/miyuchina/mistletoe/blob/8f2f0161b2af92f8dd25a0a55cb7d437a67938bc/contrib/pygments_renderer.py
# HTMLCodeFormatter class copied from markdown2:
# https://github.com/trentm/python-markdown2/blob/2c58d70da0279fe19d04b3269b04d360a56c01ce/lib/markdown2.py#L1826
class HtmlCodeFormatter(HtmlFormatter):
def _wrap_code(self, inner):
"""A function for use in a Pygments Formatter which
wraps in <code> tags.
"""
yield 0, "<code>"
for tup in inner:
yield tup
yield 0, "</code>"
def wrap(self, source, outfile):
"""Return the source with a code, pre, and div."""
return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
# `cssclass` here should agree with what we have in pygments.css
formatter = HtmlCodeFormatter(cssclass='codehilite')
def render_block_code(self, token):
# replace math before highlighting
code = replace_math(token.children[0].content, self.math)
try:
# default to 'lean' if no language is specified
lexer = get_lexer(
token.language) if token.language else get_lexer('lean')
except:
lexer = get_lexer('text')
return highlight(code, lexer, self.formatter)
def render_raw_url(self, token):
"""
Linkify raw URLs.
"""
return f'<a href="{token.url}">{token.url}</a>'
class PlaintextSummaryRenderer(BaseRenderer):
""" A renderer for single-line plaintext summaries, such as those used in social previews """
def __init__(self):
super().__init__(RawUrl, block_token.HTMLBlock, span_token.HTMLSpan)
render_strong = BaseRenderer.render_inner
render_emphasis = BaseRenderer.render_inner
def render_inline_code(self, token):
return f"`{self.render_inner(token)}`"
def render_raw_text(self, token):
return token.content
render_strikethrough = BaseRenderer.render_inner
def render_image(self, token):
return "<image>"
render_link = BaseRenderer.render_inner
def render_auto_link(self, token):
return "<link>"
render_escape_sequence = BaseRenderer.render_inner
def render_heading(self, token):
return f"{self.render_inner(token)}: "
def render_quote(self, token):
return f'“{self.render_inner(token)}”'
def render_paragraph(self, token):
return f"{self.render_inner(token)} "
def render_code_fence(self, token):
return "<code>"
def render_block_code(self, token):
return "<code>"
render_list = BaseRenderer.render_inner
def render_list_item(self, token):
s = f"{self.render_inner(token)}".rstrip()
if not s.endswith(tuple('.,;')):
s = s + '; '
else:
s = s + ' '
return s
def render_table(self, token):
return "<table>"
def render_thematic_break(self, token):
return " // "
def render_line_break(self, token):
return " "
render_document = BaseRenderer.render_inner
def render_link_definition(self, token):
return ""
def render_footnote(self, token):
return ""
def render_raw_url(self, token):
return "<link>"
def render_html_span(self, token):
return token.content