-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinline_parser.dart
395 lines (324 loc) · 12.2 KB
/
inline_parser.dart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
/// Maintains the internal state needed to parse inline span elements in
/// markdown.
class InlineParser {
static List<InlineSyntax> get syntaxes() {
// Lazy initialize.
if (_syntaxes == null) {
_syntaxes = <InlineSyntax>[
new AutolinkSyntax(),
new LinkSyntax(),
// "*" surrounded by spaces is left alone.
new TextSyntax(@' \* '),
// "_" surrounded by spaces is left alone.
new TextSyntax(@' _ '),
// Leave already-encoded HTML entities alone. Ensures we don't turn
// "&" into "&amp;"
new TextSyntax(@'&[#a-zA-Z0-9]*;'),
// Encode "&".
new TextSyntax(@'&', sub: '&'),
// Encode "<". (Why not encode ">" too? Gruber is toying with us.)
new TextSyntax(@'<', sub: '<'),
// Parse "**strong**" tags.
new TagSyntax(@'\*\*', tag: 'strong'),
// Parse "__strong__" tags.
new TagSyntax(@'__', tag: 'strong'),
// Parse "*emphasis*" tags.
new TagSyntax(@'\*', tag: 'em'),
// Parse "_emphasis_" tags.
// TODO(rnystrom): Underscores in the middle of a word should not be
// parsed as emphasis like_in_this.
new TagSyntax(@'_', tag: 'em'),
// Parse inline code within double backticks: "``code``".
new CodeSyntax(@'``\s?((?:.|\n)*?)\s?``'),
// Parse inline code within backticks: "`code`".
new CodeSyntax(@'`([^`]*)`')
];
}
return _syntaxes;
}
static List<InlineSyntax> _syntaxes;
/// The string of markdown being parsed.
final String source;
/// The markdown document this parser is parsing.
final Document document;
/// The current read position.
int pos = 0;
/// Starting position of the last unconsumed text.
int start = 0;
final List<TagState> _stack;
InlineParser(this.source, this.document)
: _stack = <TagState>[];
List<Node> parse() {
// Make a fake top tag to hold the results.
_stack.add(new TagState(0, 0, null));
while (!isDone) {
bool matched = false;
// See if any of the current tags on the stack match. We don't allow tags
// of the same kind to nest, so this takes priority over other possible // matches.
for (int i = _stack.length - 1; i > 0; i--) {
if (_stack[i].tryMatch(this)) {
matched = true;
break;
}
}
if (matched) continue;
// See if the current text matches any defined markdown syntax.
for (final syntax in syntaxes) {
if (syntax.tryMatch(this)) {
matched = true;
break;
}
}
if (matched) continue;
// If we got here, it's just text.
advanceBy(1);
}
// Unwind any unmatched tags and get the results.
return _stack[0].close(this, null);
}
writeText() {
writeTextRange(start, pos);
start = pos;
}
writeTextRange(int start, int end) {
if (end > start) {
final text = source.substring(start, end);
final nodes = _stack.last().children;
// If the previous node is text too, just append.
if ((nodes.length > 0) && (nodes.last() is Text)) {
final newNode = new Text('${nodes.last().text}$text');
nodes[nodes.length - 1] = newNode;
} else {
nodes.add(new Text(text));
}
}
}
addNode(Node node) {
_stack.last().children.add(node);
}
// TODO(rnystrom): Only need this because RegExp doesn't let you start
// searching from a given offset.
String get currentSource() => source.substring(pos, source.length);
bool get isDone() => pos == source.length;
void advanceBy(int length) {
pos += length;
}
void consume(int length) {
pos += length;
start = pos;
}
}
/// Represents one kind of markdown tag that can be parsed.
class InlineSyntax {
final RegExp pattern;
InlineSyntax(String pattern)
: pattern = new RegExp(pattern, true);
// TODO(rnystrom): Should use named arg for RegExp multiLine.
bool tryMatch(InlineParser parser) {
final startMatch = pattern.firstMatch(parser.currentSource);
if ((startMatch != null) && (startMatch.start() == 0)) {
// Write any existing plain text up to this point.
parser.writeText();
if (onMatch(parser, startMatch)) {
parser.consume(startMatch[0].length);
}
return true;
}
return false;
}
abstract bool onMatch(InlineParser parser, Match match);
}
/// Matches stuff that should just be passed through as straight text.
class TextSyntax extends InlineSyntax {
String substitute;
TextSyntax(String pattern, [String sub])
: super(pattern),
substitute = sub;
bool onMatch(InlineParser parser, Match match) {
if (substitute == null) {
// Just use the original matched text.
parser.advanceBy(match[0].length);
return false;
}
// Insert the substitution.
parser.addNode(new Text(substitute));
return true;
}
}
/// Matches autolinks like `<http://foo.com>`.
class AutolinkSyntax extends InlineSyntax {
AutolinkSyntax()
: super(@'<((http|https|ftp)://[^>]*)>');
// TODO(rnystrom): Make case insensitive.
bool onMatch(InlineParser parser, Match match) {
final url = match[1];
final anchor = new Element.text('a', escapeHtml(url));
anchor.attributes['href'] = url;
parser.addNode(anchor);
return true;
}
}
/// Matches syntax that has a pair of tags and becomes an element, like `*` for
/// `<em>`. Allows nested tags.
class TagSyntax extends InlineSyntax {
final RegExp endPattern;
final String tag;
TagSyntax(String pattern, [String tag, String end = null])
: super(pattern),
endPattern = new RegExp((end != null) ? end : pattern, true),
tag = tag;
// TODO(rnystrom): Doing this.field doesn't seem to work with named args.
// TODO(rnystrom): Should use named arg for RegExp multiLine.
bool onMatch(InlineParser parser, Match match) {
parser._stack.add(new TagState(parser.pos,
parser.pos + match[0].length, this));
return true;
}
bool onMatchEnd(InlineParser parser, Match match, TagState state) {
parser.addNode(new Element(tag, state.children));
return true;
}
}
/// Matches inline links like `[blah] [id]` and `[blah] (url)`.
class LinkSyntax extends TagSyntax {
/// The regex for the end of a link needs to handle both reference style and
/// inline styles as well as optional titles for inline links. To make that
/// a bit more palatable, this breaks it into pieces.
static get linkPattern() {
final refLink = @'\s?\[([^\]]*)\]'; // "[id]" reflink id.
final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes.
final inlineLink = '\\s?\\(([^ )]+)$title\\)'; // "(url "title")" link.
return '\](?:($refLink|$inlineLink)|)';
// The groups matched by this are:
// 1: Will be non-empty if it's either a ref or inline link. Will be empty
// if it's just a bare pair of square brackets with nothing after them.
// 2: Contains the id inside [] for a reference-style link.
// 3: Contains the URL for an inline link.
// 4: Contains the title, if present, for an inline link.
}
LinkSyntax()
: super(@'\[', end: linkPattern);
bool onMatchEnd(InlineParser parser, Match match, TagState state) {
var url;
var title;
// If we didn't match refLink or inlineLink, then it means there was
// nothing after the first square bracket, so it isn't a normal markdown
// link at all. Instead, we allow users of the library to specify a special
// resolver function ([setImplicitLinkResolver]) that may choose to handle
// this. Otherwise, it's just treated as plain text.
if ((match[1] == null) || (match[1] == '')) {
if (_implicitLinkResolver == null) return false;
// Only allow implicit links if the content is just text.
// TODO(rnystrom): Do we want to relax this?
if (state.children.length != 1) return false;
if (state.children[0] is! Text) return false;
Text link = state.children[0];
// See if we have a resolver that will generate a link for us.
final node = _implicitLinkResolver(link.text);
if (node == null) return false;
parser.addNode(node);
return true;
}
if ((match[3] != null) && (match[3] != '')) {
// Inline link like [foo](url).
url = match[3];
title = match[4];
// For whatever reason, markdown allows angle-bracketed URLs here.
if (url.startsWith('<') && url.endsWith('>')) {
url = url.substring(1, url.length - 1);
}
} else {
// Reference link like [foo] [bar].
var id = match[2];
if (id == '') {
// The id is empty ("[]") so infer it from the contents.
id = parser.source.substring(state.startPos + 1, parser.pos);
}
// Look up the link.
final link = parser.document.refLinks[id];
// If it's an unknown link just emit plaintext.
if (link == null) return false;
url = link.url;
title = link.title;
}
final anchor = new Element('a', state.children);
anchor.attributes['href'] = escapeHtml(url);
if ((title != null) && (title != '')) {
anchor.attributes['title'] = escapeHtml(title);
}
parser.addNode(anchor);
return true;
}
}
/// Matches backtick-enclosed inline code blocks.
class CodeSyntax extends InlineSyntax {
CodeSyntax(String pattern)
: super(pattern);
bool onMatch(InlineParser parser, Match match) {
parser.addNode(new Element.text('code', escapeHtml(match[1])));
return true;
}
}
/// Keeps track of a currently open tag while it is being parsed. The parser
/// maintains a stack of these so it can handle nested tags.
class TagState {
/// The point in the original source where this tag started.
int startPos;
/// The point in the original source where open tag ended.
int endPos;
/// The syntax that created this node.
final TagSyntax syntax;
/// The children of this node. Will be `null` for text nodes.
final List<Node> children;
TagState(this.startPos, this.endPos, this.syntax)
: children = <Node>[];
/// Attempts to close this tag by matching the current text against its end
/// pattern.
bool tryMatch(InlineParser parser) {
Match endMatch = syntax.endPattern.firstMatch(parser.currentSource);
if ((endMatch != null) && (endMatch.start() == 0)) {
// Close the tag.
close(parser, endMatch);
return true;
}
return false;
}
/// Pops this tag off the stack, completes it, and adds it to the output.
/// Will discard any unmatched tags that happen to be above it on the stack.
/// If this is the last node in the stack, returns its children.
List<Node> close(InlineParser parser, Match endMatch) {
// If there are unclosed tags on top of this one when it's closed, that
// means they are mismatched. Mismatched tags are treated as plain text in
// markdown. So for each tag above this one, we write its start tag as text
// and then adds its children to this one's children.
int index = parser._stack.indexOf(this);
// Remove the unmatched children.
final unmatchedTags = parser._stack.getRange(index + 1,
parser._stack.length - index - 1);
parser._stack.removeRange(index + 1, parser._stack.length - index - 1);
// Flatten them out onto this tag.
for (final unmatched in unmatchedTags) {
// Write the start tag as text.
parser.writeTextRange(unmatched.startPos, unmatched.endPos);
// Bequeath its children unto this tag.
children.addAll(unmatched.children);
}
// Pop this off the stack.
parser.writeText();
parser._stack.removeLast();
// If the stack is empty now, this is the special "results" node.
if (parser._stack.length == 0) return children;
// We are still parsing, so add this to its parent's children.
if (syntax.onMatchEnd(parser, endMatch, this)) {
parser.consume(endMatch[0].length);
} else {
// Didn't close correctly so revert to text.
parser.start = startPos;
parser.advanceBy(endMatch[0].length);
}
return null;
}
}