Skip to content

Commit

Permalink
Assign line numbers to all block tokens during parsing. Added unit te…
Browse files Browse the repository at this point in the history
…sts.
  • Loading branch information
anderskaplan committed Aug 14, 2023
1 parent 21cb864 commit 639e1fd
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 25 deletions.
21 changes: 13 additions & 8 deletions mistletoe/block_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class BlockToken(token.Token):
Attributes:
children (list): inner tokens.
line_number (int): starting line (1-based).
"""
def __init__(self, lines, tokenize_func):
self.children = tokenize_func(lines)
Expand Down Expand Up @@ -680,16 +681,17 @@ class Table(BlockToken):
repr_attributes = ("column_align",)
interrupt_paragraph = True

def __init__(self, lines):
def __init__(self, match):
lines, start_line = match
if '---' in lines[1]:
self.column_align = [self.parse_align(column)
for column in self.split_delimiter(lines[1])]
self.header = TableRow(lines[0], self.column_align)
self.children = [TableRow(line, self.column_align) for line in lines[2:]]
self.header = TableRow(lines[0], start_line, self.column_align)
self.children = [TableRow(line, start_line + offset, self.column_align) for offset, line in enumerate(lines[2:], start=2)]
else:
# note: not reachable, because read() guarantees the presence of three dashes
self.column_align = [None]
self.children = [TableRow(line) for line in lines]
self.children = [TableRow(line, start_line + offset) for offset, line in enumerate(lines)]

@staticmethod
def split_delimiter(delimiter):
Expand Down Expand Up @@ -733,12 +735,13 @@ def check_interrupts_paragraph(cls, lines):
def read(lines):
anchor = lines.get_pos()
line_buffer = [next(lines)]
start_line = lines.line_number()
while lines.peek() is not None and '|' in lines.peek():
line_buffer.append(next(lines))
if len(line_buffer) < 2 or '---' not in line_buffer[1]:
lines.set_pos(anchor)
return None
return line_buffer
return line_buffer, start_line


class TableRow(BlockToken):
Expand All @@ -757,10 +760,11 @@ class TableRow(BlockToken):
split_pattern = re.compile(r"(?<!\\)\|")
escaped_pipe_pattern = re.compile(r"(?<!\\)(\\\\)*\\\|")

def __init__(self, line, row_align=None):
def __init__(self, line, line_number, row_align=None):
self.row_align = row_align or [None]
self.line_number = line_number
cells = filter(None, self.split_pattern.split(line.strip()))
self.children = [TableCell(self.escaped_pipe_pattern.sub('\\1|', cell.strip()) if cell else '', align)
self.children = [TableCell(self.escaped_pipe_pattern.sub('\\1|', cell.strip()) if cell else '', line_number, align)
for cell, align in zip_longest(cells, self.row_align)]


Expand All @@ -775,8 +779,9 @@ class TableCell(BlockToken):
align (bool): align option for current cell (default to None).
"""
repr_attributes = ("align",)
def __init__(self, content, align=None):
def __init__(self, content, line_number, align=None):
self.align = align
self.line_number = line_number
super().__init__(content, span_token.tokenize_inner)


Expand Down
6 changes: 4 additions & 2 deletions mistletoe/block_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ def tokenize_block(iterable, token_types, start_line=1):
while line is not None:
for token_type in token_types:
if token_type.start(line):
line_number = lines.line_number() + 1
result = token_type.read(lines)
if result is not None:
parse_buffer.append((token_type, result))
parse_buffer.append((token_type, result, line_number))
break
else: # unmatched newlines
next(lines)
Expand All @@ -99,9 +100,10 @@ def make_tokens(parse_buffer):
and span-level parsing is started here.
"""
tokens = []
for token_type, result in parse_buffer:
for token_type, result, line_number in parse_buffer:
token = token_type(result)
if token is not None:
token.line_number = line_number
tokens.append(token)
return tokens

Expand Down
2 changes: 1 addition & 1 deletion mistletoe/markdown_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(self, match):
class LinkReferenceDefinitionBlock(block_token.Footnote):
"""
A sequence of link reference definitions.
This is a container block token. Its children are link reference definition tokens.
This is a leaf block token. Its children are link reference definition tokens.
This class inherits from `Footnote` and modifies the behavior of the constructor,
to keep the tokens in the AST.
Expand Down
37 changes: 37 additions & 0 deletions test/samples/line_numbers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Test document for line numbers

See test_line_numbers.py.
Every number written as inline code should match the line number of its nearest
parent block token. Same with titles of link reference definitions.

## Heading `7`

Basic paragraph.

> Block quote (with a paragraph inside)
> It's the start line that counts! `11`
> * a list inside the quote
> * with two items `14`
>
> Still the same block quote, but another paragraph `16`
1. List item `18`
2. Nested list
* item
* another item `21`
3. List item with a nested `22`
> block quote `23`
| Table `25` | Columns |
| ---------- | ------- |
| ? | ! `27` |

Paragraph with <p>inline HTML</p> `29`

Setext
heading `31`
------------

Paragraph with [ref] to a link reference definition.

[ref]: /url (37)
28 changes: 14 additions & 14 deletions test/test_block_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def test_match(self):
self.assertTrue(hasattr(token, 'header'))
self.assertEqual(token.column_align, [None, None, None])
token.children
calls = [call(line, [None, None, None]) for line in lines[:1]+lines[2:]]
calls = [call(line, line_number, [None, None, None]) for line_number, line in enumerate(lines, start=1) if line_number != 2]
mock.assert_has_calls(calls)

def test_easy_table(self):
Expand All @@ -403,7 +403,7 @@ def test_easy_table(self):
self.assertTrue(hasattr(token, 'header'))
self.assertEqual(token.column_align, [1, None])
token.children
calls = [call(line, [1, None]) for line in lines[:1] + lines[2:]]
calls = [call(line, line_number, [1, None]) for line_number, line in enumerate(lines, start=1) if line_number != 2]
mock.assert_has_calls(calls)

def test_not_easy_table(self):
Expand Down Expand Up @@ -433,44 +433,44 @@ class TestTableRow(unittest.TestCase):
def test_match(self):
with patch('mistletoe.block_token.TableCell') as mock:
line = '| cell 1 | cell 2 |\n'
token = block_token.TableRow(line)
token = block_token.TableRow(line, 10)
self.assertEqual(token.row_align, [None])
mock.assert_has_calls([call('cell 1', None), call('cell 2', None)])
mock.assert_has_calls([call('cell 1', 10, None), call('cell 2', 10, None)])

def test_easy_table_row(self):
with patch('mistletoe.block_token.TableCell') as mock:
line = 'cell 1 | cell 2\n'
token = block_token.TableRow(line)
token = block_token.TableRow(line, 10)
self.assertEqual(token.row_align, [None])
mock.assert_has_calls([call('cell 1', None), call('cell 2', None)])
mock.assert_has_calls([call('cell 1', 10, None), call('cell 2', 10, None)])

def test_short_row(self):
with patch('mistletoe.block_token.TableCell') as mock:
line = '| cell 1 |\n'
token = block_token.TableRow(line, [None, None])
token = block_token.TableRow(line, 10, [None, None])
self.assertEqual(token.row_align, [None, None])
mock.assert_has_calls([call('cell 1', None), call('', None)])
mock.assert_has_calls([call('cell 1', 10, None), call('', 10, None)])

def test_escaped_pipe_in_cell(self):
with patch('mistletoe.block_token.TableCell') as mock:
line = '| pipe: `\\|` | cell 2\n'
token = block_token.TableRow(line, [None, None])
token = block_token.TableRow(line, 10, [None, None])
self.assertEqual(token.row_align, [None, None])
mock.assert_has_calls([call('pipe: `|`', None), call('cell 2', None)])
mock.assert_has_calls([call('pipe: `|`', 10, None), call('cell 2', 10, None)])

@unittest.skip('Even GitHub fails in here, workaround: always put a space before `|`')
def test_not_really_escaped_pipe_in_cell(self):
with patch('mistletoe.block_token.TableCell') as mock:
line = '|ending with a \\\\|cell 2\n'
token = block_token.TableRow(line, [None, None])
token = block_token.TableRow(line, 10, [None, None])
self.assertEqual(token.row_align, [None, None])
mock.assert_has_calls([call('ending with a \\\\', None), call('cell 2', None)])
mock.assert_has_calls([call('ending with a \\\\', 10, None), call('cell 2', 10, None)])


class TestTableCell(TestToken):
def test_match(self):
token = block_token.TableCell('cell 2')
self._test_token(token, 'cell 2', align=None)
token = block_token.TableCell('cell 2', 13)
self._test_token(token, 'cell 2', line_number=13, align=None)


class TestFootnote(unittest.TestCase):
Expand Down
57 changes: 57 additions & 0 deletions test/test_line_numbers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import unittest

import mistletoe.block_token as block_token
import mistletoe.span_token as span_token
from mistletoe.markdown_renderer import (
LinkReferenceDefinition,
LinkReferenceDefinitionBlock,
)


class TestLineNumbers(unittest.TestCase):
def setUp(self) -> None:
block_token.add_token(block_token.HTMLBlock)
span_token.add_token(span_token.HTMLSpan)
block_token.remove_token(block_token.Footnote)
block_token.add_token(LinkReferenceDefinitionBlock)
return super().setUp()

def tearDown(self) -> None:
span_token.reset_tokens()
block_token.reset_tokens()
return super().tearDown()

def test_main(self):
# see line_numbers.md for a description of how the test works.
NUMBER_OF_LINE_NUMBERS_TO_BE_CHECKED = 13
with open("test/samples/line_numbers.md", "r") as fin:
document = block_token.Document(fin)
count = self.check_line_numbers(document)
self.assertEqual(count, NUMBER_OF_LINE_NUMBERS_TO_BE_CHECKED)

def check_line_numbers(self, token: block_token.BlockToken):
"""Check the line number on the given block token and its children, if possible."""
count = 0
line_number = self.get_expected_line_number(token)
if line_number:
self.assertEqual(token.line_number, line_number)
count += 1

if isinstance(token, block_token.Table):
count += self.check_line_numbers(token.header)

for child in token.children:
if isinstance(child, block_token.BlockToken):
count += self.check_line_numbers(child)

return count

def get_expected_line_number(self, token: block_token.BlockToken):
# the expected line number, if it exists, should be wrapped in an inline
# code token and be an immediate child of the token.
# or it could be the title of a link reference definition.
for child in token.children:
if isinstance(child, span_token.InlineCode):
return int(child.children[0].content)
if isinstance(child, LinkReferenceDefinition):
return int(child.title)

0 comments on commit 639e1fd

Please sign in to comment.