From 59ead98e6afb0ac19c5745162b04be0a5a8f96e3 Mon Sep 17 00:00:00 2001 From: Damian Tarnawski Date: Sat, 2 Mar 2024 00:24:32 +0100 Subject: [PATCH 1/4] Progress on blockquote --- dev.js | 16 +-- mds/mds.js | 210 ++++++++++++++++++++++++------- mds/t.d.ts | 2 + test.js | 363 +++++++++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 480 insertions(+), 111 deletions(-) diff --git a/dev.js b/dev.js index 212cdd0..68b6379 100644 --- a/dev.js +++ b/dev.js @@ -19,7 +19,7 @@ const MESSAGE_RELOAD = "reload" function main() { const server = makeHttpServer(requestListener) - const wss = makeWebSocketServer() + const wss = new ws.WebSocketServer({port: WEB_SOCKET_PORT}) const watched_paths = /** @type {Set} */(new Set()) @@ -109,20 +109,6 @@ function makeHttpServer(requestListener) { return server } -/** @returns {ws.WebSocketServer} */ -function makeWebSocketServer() { - const wss = new ws.WebSocketServer({port: WEB_SOCKET_PORT}) - - // eslint-disable-next-line no-console - console.log( - `#` +`\n`+ - `# WebSocket server running at http://127.0.0.1:` + WEB_SOCKET_PORT +`\n`+ - `#` - ) - - return wss -} - /** @typedef {Parameters[0]} BufferLike */ /** @returns {void} */ diff --git a/mds/mds.js b/mds/mds.js index 6efce9a..75384f4 100644 --- a/mds/mds.js +++ b/mds/mds.js @@ -8,7 +8,7 @@ https://github.com/thetarnav/streaming-markdown export * from "./t.js" export const - ROOT = 1, // 1 + DOCUMENT = 1, // 1 PARAGRAPH = 2, // 2 HEADING_1 = 4, // 3 HEADING_2 = 8, // 4 @@ -18,7 +18,7 @@ export const HEADING_6 = 128, // 8 CODE_BLOCK = 256, // 9 CODE_FENCE = 512, // 10 - CODE_INLINE = 1024, // 11 + CODE_INLINE = 1024, // 11 ITALIC_AST = 2048, // 12 ITALIC_UND = 4096, // 13 STRONG_AST = 8192, // 14 @@ -26,6 +26,8 @@ export const STRIKE = 32768, // 16 LINK = 65536, // 17 IMAGE = 131072, // 18 + BLOCKQUOTE = 262144, // 19 + LINE_BREAK = 524288, // 20 /** `HEADING_1 | HEADING_2 | HEADING_3 | HEADING_4 | HEADING_5 | HEADING_6` */ ANY_HEADING = 252, /** `CODE_BLOCK | CODE_FENCE | CODE_INLINE` */ @@ -39,11 +41,15 @@ export const /** `STRONG_UND | ITALIC_UND` */ ANY_UND = 20480, /** `CODE | IMAGE` */ - NO_NESTING = 132864 + NO_NESTING = 132864, + /** `DOCUMENT | BLOCKQUOTE` */ + ANY_ROOT = 262145 /** @enum {(typeof Token_Type)[keyof typeof Token_Type]} */ export const Token_Type = /** @type {const} */({ - Root: ROOT, + Document: DOCUMENT, + Blockquote: BLOCKQUOTE, + Line_Break: LINE_BREAK, Paragraph: PARAGRAPH, Heading_1: HEADING_1, Heading_2: HEADING_2, @@ -68,8 +74,10 @@ export const Token_Type = /** @type {const} */({ * @returns {string } */ export function token_type_to_string(type) { switch (type) { - case ROOT: return "Root" + case DOCUMENT: return "Document" + case BLOCKQUOTE: return "Blockquote" case PARAGRAPH: return "Paragraph" + case LINE_BREAK: return "Line_Break" case HEADING_1: return "Heading_1" case HEADING_2: return "Heading_2" case HEADING_3: return "Heading_3" @@ -126,9 +134,10 @@ export function parser(renderer) { renderer : renderer, text : "", pending : "", - types : /**@type {*}*/([ROOT,,,,,]), + types : /**@type {*}*/([DOCUMENT,,,,,]), len : 0, code_fence: "", + newline_blockquote_idx: 0, } } @@ -173,12 +182,27 @@ export function parser_add_token(p, type) { p.renderer.add_node(p.renderer.data, type) } +/** + * @param {Parser } p + * @param {Token_Type} type + * @returns {void } */ +export function parser_add_block_token(p, type) { + while (!(p.types[p.len] & ANY_ROOT)) { + parser_end_token(p) + } + p.pending = "" + p.len += 1 + p.types[p.len] = type + p.renderer.add_node(p.renderer.data, type) +} + /** * Parse and render another chunk of markdown. * @param {Parser} p * @param {string} chunk * @returns {void } */ export function parser_write(p, chunk) { + char_loop: for (const char of chunk) { const in_token = p.types[p.len] const pending_with_char = p.pending + char @@ -187,7 +211,8 @@ export function parser_write(p, chunk) { Token specific checks */ switch (in_token) { - case ROOT: { + case DOCUMENT: + case BLOCKQUOTE: console.assert(p.text.length === 0, "Root should not have any text") switch (pending_with_char) { @@ -198,6 +223,8 @@ export function parser_write(p, chunk) { case "##### ": parser_add_token(p, HEADING_5) ;continue case "###### ": parser_add_token(p, HEADING_6) ;continue case "```": parser_add_token(p, CODE_FENCE) ;continue + case "> ": + case ">": parser_add_token(p, BLOCKQUOTE) ;continue case " ": case " \t": case " \t": @@ -233,7 +260,6 @@ export function parser_write(p, chunk) { case " ": case " ": case " ": - parser_add_token(p, PARAGRAPH) p.pending = char continue default: @@ -242,8 +268,74 @@ export function parser_write(p, chunk) { p.pending = char continue } + case LINE_BREAK: + console.assert(p.text.length === 0, "Text when in line break") - } + outer: + switch (pending_with_char) { + case "# ": parser_add_block_token(p, HEADING_1) ;continue + case "## ": parser_add_block_token(p, HEADING_2) ;continue + case "### ": parser_add_block_token(p, HEADING_3) ;continue + case "#### ": parser_add_block_token(p, HEADING_4) ;continue + case "##### ": parser_add_block_token(p, HEADING_5) ;continue + case "###### ": parser_add_block_token(p, HEADING_6) ;continue + case "```": parser_add_block_token(p, CODE_FENCE) ;continue + case " ": + case " \t": + case " \t": + case " \t": + case "\t": parser_add_block_token(p, CODE_BLOCK) ;continue + case "#": + case "##": + case "###": + case "####": + case "#####": + case "######": + case "#######": + case "`": + case "``": + case " ": + case " ": + case " ": + p.pending = pending_with_char + continue + case "\n": + console.log("-----------") + continue + case "> ": + case ">": + for (;p.newline_blockquote_idx < p.len; p.newline_blockquote_idx += 1) { + if (p.types[p.newline_blockquote_idx] === BLOCKQUOTE) { + p.newline_blockquote_idx += 1 + p.pending = "" + continue char_loop + } + } + + parser_add_block_token(p, BLOCKQUOTE) + continue + } + + switch (p.pending) { + /* `Code Inline` */ + case "`": + parser_end_token(p) + parser_add_text(p) + parser_add_token(p, CODE_INLINE) + p.text = char + continue + /* Trim leading spaces */ + case " ": + case " ": + case " ": + p.pending = char + continue + default: + p.text = p.pending + parser_end_token(p) + p.pending = char + continue + } case CODE_BLOCK: switch (pending_with_char) { case "\n ": @@ -271,8 +363,6 @@ export function parser_write(p, chunk) { continue } case CODE_FENCE: { - console.assert(p.len === 1, "Code block is always a top-level token") - switch (p.code_fence) { case 1: /* can end */ switch (pending_with_char) { @@ -325,6 +415,11 @@ export function parser_write(p, chunk) { p.pending = char continue } + if ('`' === p.pending) { + parser_add_text(p) + parser_end_token(p) + continue + } if ('`' === char) { p.text += p.pending parser_add_text(p) @@ -449,19 +544,6 @@ export function parser_write(p, chunk) { Common checks */ switch (p.pending) { - /* Newline */ - case "\n": - parser_add_text(p) - /* Paragraph */ - if ('\n' === char) { - while (p.len > 0) parser_end_token(p) - } - /* Line break */ - else { - p.renderer.add_text(p.renderer.data, '\n') - p.pending = char - } - continue /* Escape character */ case "\\": if (in_token & ANY_CODE) break @@ -472,12 +554,48 @@ export function parser_write(p, chunk) { } else { const char_code = char.charCodeAt(0) p.pending = "" - p.text += (char_code >= 48 && char_code <= 90) || // 0-9 A-Z - (char_code >= 97 && char_code <= 122) // a-z - ? pending_with_char - : char + p.text += (char_code >= 48 && char_code <= 57) || // 0-9 + (char_code >= 65 && char_code <= 90) || // A-Z + (char_code >= 97 && char_code <= 122) // a-z + ? pending_with_char + : char } continue + /* Newline */ + case "\n": + switch (char) { + /* Trim leading spaces */ + case ' ': + continue + /* Paragraph */ + case '\n': + parser_add_text(p) + while (p.len > 0) parser_end_token(p) + continue + /* Line break */ + default: + parser_add_text(p) + parser_add_token(p, LINE_BREAK) + // p.renderer.add_text(p.renderer.data, '\n') + // p.after_line_break = true + p.newline_blockquote_idx = 1 + p.pending = char + continue + } + // /* > Blockquote */ + // case ">": + // if (!p.after_line_break || in_token & NO_NESTING) break + + // for (;p.newline_blockquote_idx < p.len; p.newline_blockquote_idx += 1) { + // if (p.types[p.newline_blockquote_idx] === BLOCKQUOTE) { + // p.newline_blockquote_idx += 1 + // continue + // } + // } + + // parser_add_token(p, BLOCKQUOTE) + // p.pending = char + // continue /* `Code Inline` */ case "`": if (!(in_token & NO_NESTING) && @@ -605,22 +723,24 @@ export function default_add_node(data, type) { /**@type {HTMLElement}*/ let slot switch (type) { - case ROOT: return // node is already root - case PARAGRAPH: mount = slot = document.createElement("p") ;break - case HEADING_1: mount = slot = document.createElement("h1") ;break - case HEADING_2: mount = slot = document.createElement("h2") ;break - case HEADING_3: mount = slot = document.createElement("h3") ;break - case HEADING_4: mount = slot = document.createElement("h4") ;break - case HEADING_5: mount = slot = document.createElement("h5") ;break - case HEADING_6: mount = slot = document.createElement("h6") ;break + case DOCUMENT: return // node is already a document + case BLOCKQUOTE: mount = slot = document.createElement("blockquote");break + case PARAGRAPH: mount = slot = document.createElement("p") ;break + case LINE_BREAK: mount = slot = document.createElement("br") ;break + case HEADING_1: mount = slot = document.createElement("h1") ;break + case HEADING_2: mount = slot = document.createElement("h2") ;break + case HEADING_3: mount = slot = document.createElement("h3") ;break + case HEADING_4: mount = slot = document.createElement("h4") ;break + case HEADING_5: mount = slot = document.createElement("h5") ;break + case HEADING_6: mount = slot = document.createElement("h6") ;break case ITALIC_AST: - case ITALIC_UND: mount = slot = document.createElement("em") ;break + case ITALIC_UND: mount = slot = document.createElement("em") ;break case STRONG_AST: - case STRONG_UND: mount = slot = document.createElement("strong");break - case STRIKE: mount = slot = document.createElement("s") ;break - case CODE_INLINE:mount = slot = document.createElement("code") ;break - case LINK: mount = slot = document.createElement("a") ;break - case IMAGE: mount = slot = document.createElement("img") ;break + case STRONG_UND: mount = slot = document.createElement("strong") ;break + case STRIKE: mount = slot = document.createElement("s") ;break + case CODE_INLINE:mount = slot = document.createElement("code") ;break + case LINK: mount = slot = document.createElement("a") ;break + case IMAGE: mount = slot = document.createElement("img") ;break case CODE_BLOCK: case CODE_FENCE: mount = document.createElement("pre") @@ -641,11 +761,7 @@ export function default_end_node(data) { /** @type {Default_Renderer_Add_Text} */ export function default_add_text(data, text) { - switch (text) { - case "" : break - case "\n": data.nodes[data.index].appendChild(document.createElement("br")) ;break - default : data.nodes[data.index].appendChild(document.createTextNode(text)) - } + data.nodes[data.index].appendChild(document.createTextNode(text)) } /** @type {Default_Renderer_Set_Attr} */ diff --git a/mds/t.d.ts b/mds/t.d.ts index 783a462..ab86812 100644 --- a/mds/t.d.ts +++ b/mds/t.d.ts @@ -17,6 +17,8 @@ export type Parser = { * 0 : cannot end */ code_fence: string | 0 | 1 + /* For Blockquote parsing */ + newline_blockquote_idx: number } export type Renderer_Add_Node = (data: TData, type: Token_Type) => void diff --git a/test.js b/test.js index a09c2f1..7acc94a 100644 --- a/test.js +++ b/test.js @@ -29,7 +29,7 @@ import * as mds from "./mds/mds.js" function test_renderer() { /** @type {Test_Renderer_Node} */ const root = { - type : mds.Token_Type.Root, + type : mds.Token_Type.Document, children: [] } return { @@ -55,12 +55,7 @@ function test_renderer_add_node(data, type) { } /** @type {Test_Add_Text} */ function test_renderer_add_text(data, text) { - if (text === "") return - - if (text !== "\n" && - typeof data.node.children[data.node.children.length - 1] === "string" && - data.node.children[data.node.children.length - 1] !== "\n" - ) { + if (typeof data.node.children[data.node.children.length - 1] === "string") { data.node.children[data.node.children.length - 1] += text } else { data.node.children.push(text) @@ -83,6 +78,163 @@ function test_renderer_set_attr(data, type, value) { } } +/** @type {Test_Renderer_Node} */ +const br = { + type : mds.Token_Type.Line_Break, + children: [] +} + +/** + * @param {number} len + * @param {number} h + * @returns {string} */ +function compare_pad(len, h) { + let txt = "" + if (h < 0) { + txt += "\u001b[31m-" + } else if (h > 0) { + txt += "\u001b[32m+" + } + else { + txt += " " + } + txt += " " + for (let i = 0; i < len; i += 1) { + txt += " " + } + txt += "\u001b[0m" + return txt +} + +/** + * @param {string } text + * @param {string[]} lines + * @param {number } len + * @param {number} h + * @returns {void } */ +function compare_push_text(text, lines, len, h) { + lines.push(compare_pad(len, h) + JSON.stringify(text)) +} + +/** + * @param {Test_Renderer_Node} node + * @param {string[]} lines + * @param {number} len + * @param {number} h + * @returns {void} */ +function compare_push_node(node, lines, len, h) { + compare_push_type(node.type, lines, len, h) + // if (node.attrs !== undefined) { + // lines.push(h + compare_pad(len) + JSON.stringify(node.attrs)) + // } + for (const child of node.children) { + if (typeof child === "string") { + compare_push_text(child, lines, len + 1, h) + } else { + compare_push_node(child, lines, len + 1, h) + } + } +} + +/** + * @param {mds.Token_Type} type + * @param {string[]} lines + * @param {number} len + * @param {number} h + * @returns {void} */ +function compare_push_type(type, lines, len, h) { + lines.push(compare_pad(len, h) + mds.token_type_to_string(type)) +} + +/** + * @param {Children} children + * @param {Children} expected_children + * @param {string[]} lines + * @param {number} len + * @returns {boolean} */ +function compare_children(children, expected_children, lines, len) { + const path = /** @type {Test_Renderer_Node[]} */(new Array(10)) + let result = true + + let i = 0 + for (; i < children.length; i += 1) { + const child = children[i] + /** @type {string | Test_Renderer_Node | undefined} */ + const expected = (expected_children[i]) + + if (typeof child === "string") { + if (typeof expected === "string") { + if (child === expected) { + compare_push_text(expected, lines, len, 0) + } else { + compare_push_text(child, lines, len, +1) + compare_push_text(expected, lines, len, -1) + result = false + } + } + else if (expected === undefined) { + compare_push_text(child, lines, len, +1) + result = false + } + else { + compare_push_text(child, lines, len, +1) + compare_push_node(expected, lines, len, -1) + result = false + } + } else { + if (typeof expected === "string") { + compare_push_text(expected, lines, len, -1) + compare_push_node(child, lines, len, +1) + result = false + } + else if (expected === undefined) { + compare_push_node(child, lines, len, +1) + result = false + } + else { + if (child.type === expected.type) { + compare_push_type(child.type, lines, len, 0) + } else { + compare_push_type(child.type, lines, len, +1) + compare_push_type(expected.type, lines, len, -1) + result = false + } + path[len] = child + result = compare_children(child.children, expected.children, lines, len + 1) && result + } + } + } + + for (; i < expected_children.length; i += 1) { + const expected = expected_children[i] + if (typeof expected === "string") { + compare_push_text(expected, lines, len, -1) + } else { + compare_push_node(expected, lines, len, -1) + result = false + } + } + + return result +} + +/** + * @param {Children} children + * @param {Children} expected_children + * @returns {void} */ +function assert_children(children, expected_children) { + /** @type {string[]} */ + const lines = [] + const result = compare_children(children, expected_children, lines, 0) + if (!result) { + const stl = Error.stackTraceLimit + Error.stackTraceLimit = 0 + const e = new Error("Children not equal:\n" + lines.join("\n") + "\n") + Error.stackTraceLimit = stl + throw e + } +} + /** * @param {string } title * @param {string } markdown @@ -97,7 +249,7 @@ function test_single_write(title, markdown, expected_children) { mds.parser_write(parser, markdown) mds.parser_end(parser) - assert.deepEqual(renderer.data.root.children, expected_children) + assert_children(renderer.data.root.children, expected_children) }) t.test(title + " (by char)", () => { @@ -109,7 +261,7 @@ function test_single_write(title, markdown, expected_children) { } mds.parser_end(parser) - assert.deepEqual(renderer.data.root.children, expected_children) + assert_children(renderer.data.root.children, expected_children) }) } @@ -151,7 +303,7 @@ test_single_write("Line Breaks", "foo\nbar", [{ type : mds.Token_Type.Paragraph, - children: ["foo", "\n", "bar"], + children: ["foo", br, "bar"], }] ) @@ -161,16 +313,16 @@ test_single_write("Line Breaks with Italic", type : mds.Token_Type.Paragraph, children: [{ type : mds.Token_Type.Italic_Ast, - children: ["a", "\n", "b"] + children: ["a", br, "b"] }], }] ) test_single_write("Escaped Line Breaks", - 'a'+'\\'+'\n'+'b', + "a\\\nb", [{ type : mds.Token_Type.Paragraph, - children: ['a', '\n', 'b'], + children: ["a", br, "b"], }] ) @@ -240,7 +392,7 @@ test_single_write("Code with line break", type : mds.Token_Type.Paragraph, children: [{ type : mds.Token_Type.Code_Inline, - children: ["a", "\n", "b"] + children: ["a", br, "b"] }], }] ) @@ -314,41 +466,15 @@ for (const indent of [ children: [" foo"] }] ) - - { - const title = "Code_Block multiple lines" - const markdown = - indent + "foo\n"+ - indent + "bar" - - t.test(title, () => { - const renderer = test_renderer() - const parser = mds.parser(renderer) - - mds.parser_write(parser, markdown) - mds.parser_end(parser) - - assert.deepEqual(renderer.data.root.children, [{ - type : mds.Token_Type.Code_Block, - children: ["foo\nbar"] - }]) - }) - - t.test(title + " (by char)", () => { - const renderer = test_renderer() - const parser = mds.parser(renderer) - - for (const char of markdown) { - mds.parser_write(parser, char) - } - mds.parser_end(parser) - - assert.deepEqual(renderer.data.root.children, [{ - type : mds.Token_Type.Code_Block, - children: ["foo", "\n", "bar"] - }]) - }) - } + + test_single_write("Code_Block multiple lines", + indent + "foo\n" + + indent + "bar", + [{ + type : mds.Token_Type.Code_Block, + children: ["foo\nbar"] + }] + ) test_single_write("Code_Block end", indent+"foo\n" + @@ -617,3 +743,142 @@ test_single_write("Un-Escaped link Both", }] }] ) + +test_single_write("Blockquote", + "> foo", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo"], + }] + }] +) + +test_single_write("Blockquote no-space", + ">foo", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo"], + }] + }] +) + +test_single_write("Blockquote Escape", + "\\> foo", + [{ + type : mds.Token_Type.Paragraph, + children: ["> foo"], + }] +) + +test_single_write("Blockquote line break", + "> foo\nbar", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo", br, "bar"], + }] + }] +) + +test_single_write("Blockquote continued", + "> foo\n> bar", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo", br, "bar"], + }] + }] +) + +test_single_write("Blockquote end", + "> foo\n\nbar", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo"], + }] + }, { + type : mds.Token_Type.Paragraph, + children: ["bar"], + }] +) + +test_single_write("Blockquote heading", + "> # foo", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Heading_1, + children: ["foo"], + }] + }] +) + +test_single_write("Blockquote codeblock", + "> ```\nfoo\n```", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Code_Fence, + children: ["foo"], + }] + }] +) + +test_single_write("Blockquote blockquote", + "> > foo", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo"], + }] + }] + }] +) + +test_single_write("Blockquote up blockquote", + "> foo\n"+ + "> > bar", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo", br], + }, { + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["bar"], + }] + }] + }] +) + +test_single_write("Blockquote down blockquote", + "> > foo\n"+ + "> \n"+ + "> bar", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo"], + }] + }, { + type : mds.Token_Type.Paragraph, + children: ["bar"], + }] + }] +) \ No newline at end of file From 0387f575060ba9a39c10fc82607db8dbc3e283fd Mon Sep 17 00:00:00 2001 From: Damian Tarnawski Date: Sat, 2 Mar 2024 16:19:57 +0100 Subject: [PATCH 2/4] Blockquote is working --- mds/mds.js | 153 +++++++++++++++++++---------------------------------- mds/t.d.ts | 1 + test.js | 87 ++++++++++++++++++++++++++---- 3 files changed, 131 insertions(+), 110 deletions(-) diff --git a/mds/mds.js b/mds/mds.js index 75384f4..13efda3 100644 --- a/mds/mds.js +++ b/mds/mds.js @@ -138,6 +138,7 @@ export function parser(renderer) { len : 0, code_fence: "", newline_blockquote_idx: 0, + line_break: false, } } @@ -146,7 +147,6 @@ export function parser(renderer) { * @param {Parser} p * @returns {void } */ export function parser_end(p) { - if (p.len === 0) return parser_write(p, "\n") parser_add_text(p) } @@ -207,6 +207,48 @@ export function parser_write(p, chunk) { const in_token = p.types[p.len] const pending_with_char = p.pending + char + if (p.line_break) { + console.assert(p.text.length === 0, "Text when in line break") + + switch (p.pending) { + case " ": + p.pending = char + continue char_loop + case ">": + p.pending = char + + while (p.newline_blockquote_idx+1 < p.len) { + p.newline_blockquote_idx += 1 + if (p.types[p.newline_blockquote_idx] === BLOCKQUOTE) { + continue char_loop + } + } + + p.line_break=false; + while (p.newline_blockquote_idx < p.len) { + parser_end_token(p) + } + p.newline_blockquote_idx += 1 + parser_add_token(p, BLOCKQUOTE) + continue char_loop + case "\n": + while (p.newline_blockquote_idx < p.len) { + parser_end_token(p) + } + + p.pending = char + p.line_break=false + p.newline_blockquote_idx = 0 + continue char_loop + default: + p.line_break=false + parser_add_text(p) + p.renderer.add_node(p.renderer.data, LINE_BREAK) + p.renderer.end_node(p.renderer.data) + break + } + } + /* Token specific checks */ @@ -223,8 +265,6 @@ export function parser_write(p, chunk) { case "##### ": parser_add_token(p, HEADING_5) ;continue case "###### ": parser_add_token(p, HEADING_6) ;continue case "```": parser_add_token(p, CODE_FENCE) ;continue - case "> ": - case ">": parser_add_token(p, BLOCKQUOTE) ;continue case " ": case " \t": case " \t": @@ -246,80 +286,25 @@ export function parser_write(p, chunk) { continue case "\n": continue - } - - switch (p.pending) { - /* `Code Inline` */ - case "`": - parser_add_token(p, PARAGRAPH) - parser_add_text(p) - parser_add_token(p, CODE_INLINE) - p.text = char - continue - /* Trim leading spaces */ - case " ": - case " ": - case " ": - p.pending = char - continue - default: - p.text = p.pending - parser_add_token(p, PARAGRAPH) - p.pending = char - continue - } - case LINE_BREAK: - console.assert(p.text.length === 0, "Text when in line break") - - outer: - switch (pending_with_char) { - case "# ": parser_add_block_token(p, HEADING_1) ;continue - case "## ": parser_add_block_token(p, HEADING_2) ;continue - case "### ": parser_add_block_token(p, HEADING_3) ;continue - case "#### ": parser_add_block_token(p, HEADING_4) ;continue - case "##### ": parser_add_block_token(p, HEADING_5) ;continue - case "###### ": parser_add_block_token(p, HEADING_6) ;continue - case "```": parser_add_block_token(p, CODE_FENCE) ;continue - case " ": - case " \t": - case " \t": - case " \t": - case "\t": parser_add_block_token(p, CODE_BLOCK) ;continue - case "#": - case "##": - case "###": - case "####": - case "#####": - case "######": - case "#######": - case "`": - case "``": - case " ": - case " ": - case " ": - p.pending = pending_with_char - continue - case "\n": - console.log("-----------") - continue case "> ": case ">": - for (;p.newline_blockquote_idx < p.len; p.newline_blockquote_idx += 1) { + while (p.newline_blockquote_idx+1 <= p.len) { + p.newline_blockquote_idx += 1 if (p.types[p.newline_blockquote_idx] === BLOCKQUOTE) { - p.newline_blockquote_idx += 1 p.pending = "" continue char_loop } } - parser_add_block_token(p, BLOCKQUOTE) + p.newline_blockquote_idx += 1 + parser_add_token(p, BLOCKQUOTE) continue } switch (p.pending) { /* `Code Inline` */ case "`": - parser_end_token(p) + parser_add_token(p, PARAGRAPH) parser_add_text(p) parser_add_token(p, CODE_INLINE) p.text = char @@ -332,7 +317,7 @@ export function parser_write(p, chunk) { continue default: p.text = p.pending - parser_end_token(p) + parser_add_token(p, PARAGRAPH) p.pending = char continue } @@ -563,43 +548,14 @@ export function parser_write(p, chunk) { continue /* Newline */ case "\n": - switch (char) { - /* Trim leading spaces */ - case ' ': - continue - /* Paragraph */ - case '\n': - parser_add_text(p) - while (p.len > 0) parser_end_token(p) - continue - /* Line break */ - default: - parser_add_text(p) - parser_add_token(p, LINE_BREAK) - // p.renderer.add_text(p.renderer.data, '\n') - // p.after_line_break = true - p.newline_blockquote_idx = 1 - p.pending = char - continue - } - // /* > Blockquote */ - // case ">": - // if (!p.after_line_break || in_token & NO_NESTING) break - - // for (;p.newline_blockquote_idx < p.len; p.newline_blockquote_idx += 1) { - // if (p.types[p.newline_blockquote_idx] === BLOCKQUOTE) { - // p.newline_blockquote_idx += 1 - // continue - // } - // } - - // parser_add_token(p, BLOCKQUOTE) - // p.pending = char - // continue + p.line_break = true + p.newline_blockquote_idx = 0 + p.pending = char + parser_add_text(p) + continue /* `Code Inline` */ case "`": if (!(in_token & NO_NESTING) && - '\n'!== char && '`' !== char ) { parser_add_text(p) @@ -649,7 +605,6 @@ export function parser_write(p, chunk) { /* [Image](url) */ case "[": if (!(in_token & (NO_NESTING | LINK)) && - '\n'!== char && ']' !== char ) { parser_add_text(p) diff --git a/mds/t.d.ts b/mds/t.d.ts index ab86812..be8747e 100644 --- a/mds/t.d.ts +++ b/mds/t.d.ts @@ -19,6 +19,7 @@ export type Parser = { code_fence: string | 0 | 1 /* For Blockquote parsing */ newline_blockquote_idx: number + line_break: boolean } export type Renderer_Add_Node = (data: TData, type: Token_Type) => void diff --git a/test.js b/test.js index 7acc94a..906ce50 100644 --- a/test.js +++ b/test.js @@ -91,16 +91,14 @@ const br = { function compare_pad(len, h) { let txt = "" if (h < 0) { - txt += "\u001b[31m-" + txt += "\u001b[31m" } else if (h > 0) { - txt += "\u001b[32m+" - } - else { - txt += " " + txt += "\u001b[32m" + } else { + txt += "\u001b[30m" } - txt += " " - for (let i = 0; i < len; i += 1) { - txt += " " + for (let i = 0; i <= len; i += 1) { + txt += ": " } txt += "\u001b[0m" return txt @@ -143,7 +141,7 @@ function compare_push_node(node, lines, len, h) { * @param {number} h * @returns {void} */ function compare_push_type(type, lines, len, h) { - lines.push(compare_pad(len, h) + mds.token_type_to_string(type)) + lines.push(compare_pad(len, h) + "\u001b[36m" + mds.token_type_to_string(type) + "\u001b[0m") } /** @@ -853,7 +851,7 @@ test_single_write("Blockquote up blockquote", type : mds.Token_Type.Blockquote, children: [{ type : mds.Token_Type.Paragraph, - children: ["foo", br], + children: ["foo"], }, { type : mds.Token_Type.Blockquote, children: [{ @@ -864,7 +862,7 @@ test_single_write("Blockquote up blockquote", }] ) -test_single_write("Blockquote down blockquote", +test_single_write("Blockquote blockquote down", "> > foo\n"+ "> \n"+ "> bar", @@ -881,4 +879,71 @@ test_single_write("Blockquote down blockquote", children: ["bar"], }] }] +) + +test_single_write("Blockquote blockquote continued", + "> > foo\n"+ + "> >\n"+ + "> > bar", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo"], + }, { + type : mds.Token_Type.Paragraph, + children: ["bar"], + }] + }] + }] +) + +test_single_write("Blockquote up down", + "> > foo\n"+ + ">\n"+ + "> > bar", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["foo"], + }] + }, { + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["bar"], + }] + }] + }] +) + +test_single_write("Blockquote with code and line break", + "> > `a\n"+ + "b`\n"+ + ">\n"+ + "> > c", + [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: [{ + type : mds.Token_Type.Code_Inline, + children: ["a", br, "b"], + }] + }] + }, { + type : mds.Token_Type.Blockquote, + children: [{ + type : mds.Token_Type.Paragraph, + children: ["c"], + }], + }] + }] ) \ No newline at end of file From cb55e58ad701cc00cabab8f155b41d611c43827f Mon Sep 17 00:00:00 2001 From: Damian Tarnawski Date: Sat, 2 Mar 2024 16:36:42 +0100 Subject: [PATCH 3/4] Improve comparing nodes --- test.js | 128 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 72 insertions(+), 56 deletions(-) diff --git a/test.js b/test.js index 906ce50..36e9dbf 100644 --- a/test.js +++ b/test.js @@ -122,9 +122,6 @@ function compare_push_text(text, lines, len, h) { * @returns {void} */ function compare_push_node(node, lines, len, h) { compare_push_type(node.type, lines, len, h) - // if (node.attrs !== undefined) { - // lines.push(h + compare_pad(len) + JSON.stringify(node.attrs)) - // } for (const child of node.children) { if (typeof child === "string") { compare_push_text(child, lines, len + 1, h) @@ -144,6 +141,75 @@ function compare_push_type(type, lines, len, h) { lines.push(compare_pad(len, h) + "\u001b[36m" + mds.token_type_to_string(type) + "\u001b[0m") } +/** + * @param {string | Test_Renderer_Node | undefined} actual + * @param {string | Test_Renderer_Node | undefined} expected + * @param {string[]} lines + * @param {number} len + * @returns {boolean} */ +function compare_child(actual, expected, lines, len) { + if (actual === undefined) { + if (expected === undefined) return true + + if (typeof expected === "string") { + compare_push_text(expected, lines, len, -1) + } else { + compare_push_node(expected, lines, len, -1) + } + + return false + } + + if (expected === undefined) { + if (typeof actual === "string") { + compare_push_text(actual, lines, len, +1) + } else { + compare_push_node(actual, lines, len, +1) + } + + return false + } + + if (typeof actual === "string") { + if (typeof expected === "string") { + if (actual === expected) { + compare_push_text(expected, lines, len, 0) + return true + } + + compare_push_text(actual, lines, len, +1) + compare_push_text(expected, lines, len, -1) + return false + } + + compare_push_text(actual, lines, len, +1) + compare_push_node(expected, lines, len, -1) + return false + } + + if (typeof expected === "string") { + compare_push_text(expected, lines, len, -1) + compare_push_node(actual, lines, len, +1) + return false + } + + if (actual.type === expected.type) { + compare_push_type(actual.type, lines, len, 0) + } else { + compare_push_type(actual.type, lines, len, +1) + compare_push_type(expected.type, lines, len, -1) + return false + } + + if (JSON.stringify(actual.attrs) !== JSON.stringify(expected.attrs)) { + compare_push_text(JSON.stringify(actual.attrs), lines, len + 1, +1) + compare_push_text(JSON.stringify(expected.attrs), lines, len + 1, -1) + return false + } + + return compare_children(actual.children, expected.children, lines, len + 1) +} + /** * @param {Children} children * @param {Children} expected_children @@ -151,66 +217,16 @@ function compare_push_type(type, lines, len, h) { * @param {number} len * @returns {boolean} */ function compare_children(children, expected_children, lines, len) { - const path = /** @type {Test_Renderer_Node[]} */(new Array(10)) let result = true let i = 0 for (; i < children.length; i += 1) { - const child = children[i] - /** @type {string | Test_Renderer_Node | undefined} */ - const expected = (expected_children[i]) - - if (typeof child === "string") { - if (typeof expected === "string") { - if (child === expected) { - compare_push_text(expected, lines, len, 0) - } else { - compare_push_text(child, lines, len, +1) - compare_push_text(expected, lines, len, -1) - result = false - } - } - else if (expected === undefined) { - compare_push_text(child, lines, len, +1) - result = false - } - else { - compare_push_text(child, lines, len, +1) - compare_push_node(expected, lines, len, -1) - result = false - } - } else { - if (typeof expected === "string") { - compare_push_text(expected, lines, len, -1) - compare_push_node(child, lines, len, +1) - result = false - } - else if (expected === undefined) { - compare_push_node(child, lines, len, +1) - result = false - } - else { - if (child.type === expected.type) { - compare_push_type(child.type, lines, len, 0) - } else { - compare_push_type(child.type, lines, len, +1) - compare_push_type(expected.type, lines, len, -1) - result = false - } - path[len] = child - result = compare_children(child.children, expected.children, lines, len + 1) && result - } - } + result = compare_child(children[i], expected_children[i], lines, len) && result } for (; i < expected_children.length; i += 1) { - const expected = expected_children[i] - if (typeof expected === "string") { - compare_push_text(expected, lines, len, -1) - } else { - compare_push_node(expected, lines, len, -1) - result = false - } + compare_child(undefined, expected_children[i], lines, len) + result = false } return result From 47f0f110eb175d3ebb89548b2e49a3182568bc91 Mon Sep 17 00:00:00 2001 From: Damian Tarnawski Date: Sat, 2 Mar 2024 16:41:12 +0100 Subject: [PATCH 4/4] Change enums --- mds/mds.js | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/mds/mds.js b/mds/mds.js index 13efda3..f540a72 100644 --- a/mds/mds.js +++ b/mds/mds.js @@ -10,40 +10,40 @@ export * from "./t.js" export const DOCUMENT = 1, // 1 PARAGRAPH = 2, // 2 - HEADING_1 = 4, // 3 - HEADING_2 = 8, // 4 - HEADING_3 = 16, // 5 - HEADING_4 = 32, // 6 - HEADING_5 = 64, // 7 - HEADING_6 = 128, // 8 - CODE_BLOCK = 256, // 9 - CODE_FENCE = 512, // 10 - CODE_INLINE = 1024, // 11 - ITALIC_AST = 2048, // 12 - ITALIC_UND = 4096, // 13 - STRONG_AST = 8192, // 14 - STRONG_UND = 16384, // 15 - STRIKE = 32768, // 16 - LINK = 65536, // 17 - IMAGE = 131072, // 18 - BLOCKQUOTE = 262144, // 19 - LINE_BREAK = 524288, // 20 + LINE_BREAK = 4, // 3 + HEADING_1 = 8, // 4 + HEADING_2 = 16, // 5 + HEADING_3 = 32, // 6 + HEADING_4 = 64, // 7 + HEADING_5 = 128, // 8 + HEADING_6 = 256, // 9 + CODE_BLOCK = 512, // 10 + CODE_FENCE = 1024, // 11 + CODE_INLINE = 2048, // 12 + ITALIC_AST = 4096, // 13 + ITALIC_UND = 8192, // 14 + STRONG_AST = 16384, // 15 + STRONG_UND = 32768, // 16 + STRIKE = 65536, // 17 + LINK = 131072, // 18 + IMAGE = 262144, // 19 + BLOCKQUOTE = 524288, // 20 /** `HEADING_1 | HEADING_2 | HEADING_3 | HEADING_4 | HEADING_5 | HEADING_6` */ - ANY_HEADING = 252, + ANY_HEADING = 504, /** `CODE_BLOCK | CODE_FENCE | CODE_INLINE` */ - ANY_CODE = 1792, + ANY_CODE = 3584, /** `ITALIC_AST | ITALIC_UND` */ - ANY_ITALIC = 6144, + ANY_ITALIC = 12288, /** `STRONG_AST | STRONG_UND` */ - ANY_STRONG = 24576, + ANY_STRONG = 49152, /** `STRONG_AST | ITALIC_AST` */ - ANY_AST = 10240, + ANY_AST = 20480, /** `STRONG_UND | ITALIC_UND` */ - ANY_UND = 20480, - /** `CODE | IMAGE` */ - NO_NESTING = 132864, + ANY_UND = 40960, + /** `ANY_CODE | IMAGE` */ + NO_NESTING = 265728, /** `DOCUMENT | BLOCKQUOTE` */ - ANY_ROOT = 262145 + ANY_ROOT = 524289 /** @enum {(typeof Token_Type)[keyof typeof Token_Type]} */ export const Token_Type = /** @type {const} */({