diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..4c0ad9406 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,83 @@ +name: CI tests + +on: [push, workflow_dispatch] + +jobs: + linux: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + compiler: + - c: 'clang' + cpp: 'clang++' + - c: 'gcc' + cpp: 'g++' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + CC: ${{ matrix.compiler.c }} + CXX: ${{ matrix.compiler.cpp }} + + steps: + - uses: actions/checkout@v1 + - name: Install valgrind + run: | + sudo apt install -y valgrind + - name: Build and test + run: | + make + make test + make leakcheck + + macos: + + runs-on: macOS-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + compiler: + - c: 'clang' + cpp: 'clang++' + - c: 'gcc' + cpp: 'g++' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + CC: ${{ matrix.compiler.c }} + CXX: ${{ matrix.compiler.cpp }} + + steps: + - uses: actions/checkout@v1 + - name: Build and test + env: + CMAKE_OPTIONS: -DCMARK_SHARED=OFF + run: | + make + make test + + windows: + + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + + steps: + - uses: actions/checkout@v1 + - uses: ilammy/msvc-dev-cmd@v1 + - name: Build and test + run: | + chcp 65001 + nmake.exe /nologo /f Makefile.nmake test + shell: cmd diff --git a/CMakeLists.txt b/CMakeLists.txt index e30278cb2..bb4976a27 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(cmark-gfm) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 29) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 0) +set(PROJECT_VERSION_GFM 2) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM}) include("FindAsan.cmake") diff --git a/Makefile b/Makefile index a05ba97a6..b84e9e741 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,7 @@ mingw: cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\ $(MAKE) && $(MAKE) install -man/man3/cmark-gfm.3: src/cmark-gfm.h | $(CMARK) +man/man3/cmark-gfm.3: src/include/cmark-gfm.h | $(CMARK) python man/make_man_page.py $< > $@ \ archive: diff --git a/api_test/main.c b/api_test/main.c index 1051c2904..bc7d39c6b 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1417,6 +1417,114 @@ static void parser_interrupt(test_batch_runner *runner) { cmark_syntax_extension_free(cmark_get_default_mem_allocator(), my_ext); } +static void render_spoiler(test_batch_runner *runner) { + cmark_gfm_core_extensions_ensure_registered(); + + cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); + cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("spoiler")); + + { + static const char markdown[] = "we have some ||spicy text|| here"; + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " we have some \n" + " \n" + " spicy text\n" + " \n" + " here\n" + " \n" + "\n", "rendering spoilers should appear correctly"); + } + { + static const char markdown[] = "we have some |non-spicy text| here"; + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " we have some |non-spicy text| here\n" + " \n" + "\n", "rendering spoilers without proper delimiters should appear correctly"); + } + { + static const char markdown[] = "we have some >!incorrectly spicy text!< here"; + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " we have some >!incorrectly spicy text!< here\n" + " \n" + "\n", "rendering spoilers without proper delimiters should appear correctly"); + } +} + +static void render_spoiler_reddit(test_batch_runner *runner) { + cmark_gfm_core_extensions_ensure_registered(); + + cmark_parser *parser = cmark_parser_new(CMARK_OPT_SPOILER_REDDIT_STYLE); + cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("spoiler")); + + { + static const char markdown[] = "we have some >!spicy text!< here"; + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " we have some \n" + " \n" + " spicy text\n" + " \n" + " here\n" + " \n" + "\n", "rendering spoilers should appear correctly"); + } + { + static const char markdown[] = "we have some !non-spicy text! here"; + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " we have some !non-spicy text! here\n" + " \n" + "\n", "rendering spoilers without proper delimiters should appear correctly"); + } + { + static const char markdown[] = "we have some ||incorrectly spicy text|| here"; + cmark_parser_feed(parser, markdown, sizeof(markdown) - 1); + + cmark_node *doc = cmark_parser_finish(parser); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " we have some ||incorrectly spicy text|| here\n" + " \n" + "\n", "rendering spoilers without proper delimiters should appear correctly"); + } +} + int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); @@ -1452,6 +1560,7 @@ int main() { verify_custom_attributes_node(runner); verify_custom_attributes_node_with_footnote(runner); parser_interrupt(runner); + render_spoiler(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/bin/main.c b/bin/main.c index 2bab06284..87aaf619b 100644 --- a/bin/main.c +++ b/bin/main.c @@ -64,6 +64,9 @@ void print_usage() { " instead of align attributes.\n"); printf(" --full-info-string Include remainder of code block info\n" " string in a separate attribute.\n"); + printf(" --spoiler-reddit-style Use Reddit-style spoiler delimiters,\n" + " i.e. >!spoilertext!< instead of\n" + " ||spoilertext||\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); } @@ -185,6 +188,8 @@ int main(int argc, char *argv[]) { options |= CMARK_OPT_VALIDATE_UTF8; } else if (strcmp(argv[i], "--liberal-html-tag") == 0) { options |= CMARK_OPT_LIBERAL_HTML_TAG; + } else if (strcmp(argv[i], "--spoiler-reddit-style") == 0) { + options |= CMARK_OPT_SPOILER_REDDIT_STYLE; } else if ((strcmp(argv[i], "--help") == 0) || (strcmp(argv[i], "-h") == 0)) { print_usage(); diff --git a/changelog.txt b/changelog.txt index b86a41a22..a8f7bb1b1 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,18 @@ +[0.29.0.gfm.2] + * Fixed issues with footnote rendering when used with the autolinker (#121), + and when footnotes are adjacent (#139). + * We now allow footnotes to be referenced from inside a footnote definition, + we use the footnote label for the fnref href text when rendering html, and + we insert multiple backrefs when a footnote has been referenced multiple + times (#229, #230) + * We added new data- attributes to footnote html rendering to make them + easier to style (#234) + +[0.29.0.gfm.1] + + * Fixed denial of service bug in GFM's table extension + per https://github.com/github/cmark-gfm/security/advisories/GHSA-7gc6-9qr5-hc85 + [0.29.0] * Update spec to 0.29. diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 4977c98c3..62984af5c 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -11,6 +11,7 @@ set(LIBRARY_SOURCES ext_scanners.re ext_scanners.h tasklist.c + spoiler.c ) include_directories( diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 131cdf402..c9bff9471 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -2,6 +2,7 @@ #include "autolink.h" #include "mutex.h" #include "strikethrough.h" +#include "spoiler.h" #include "table.h" #include "tagfilter.h" #include "tasklist.h" @@ -15,6 +16,7 @@ static int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension()); + cmark_plugin_register_syntax_extension(plugin, create_spoiler_extension()); return 1; } diff --git a/extensions/spoiler.c b/extensions/spoiler.c new file mode 100644 index 000000000..532ea7df8 --- /dev/null +++ b/extensions/spoiler.c @@ -0,0 +1,194 @@ +#include "spoiler.h" +#include +#include + +cmark_node_type CMARK_NODE_SPOILER; + +static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, + cmark_node *parent, unsigned char character, + cmark_inline_parser *inline_parser) { + cmark_node *res = NULL; + int left_flanking, right_flanking, punct_before, punct_after, delims; + char buffer[101]; + + if ((parser->options & CMARK_OPT_SPOILER_REDDIT_STYLE)) { + // Reddit-style spoilers - flanked by angle brackets and exclamation marks, + // e.g. >!this is a spoiler!< + int pos = cmark_inline_parser_get_offset(inline_parser); + char *txt = NULL; + bool opener = false; + bool closer = false; + if (cmark_inline_parser_peek_at(inline_parser, pos) == '>' && + cmark_inline_parser_peek_at(inline_parser, pos + 1) == '!') { + txt = ">!"; + opener = true; + } else if (cmark_inline_parser_peek_at(inline_parser, pos) == '!' && + cmark_inline_parser_peek_at(inline_parser, pos + 1) == '<') { + txt = "!<"; + closer = true; + } + + if (opener && pos > 0 && !cmark_isspace(cmark_inline_parser_peek_at(inline_parser, pos - 1))) { + opener = false; + } + + if (closer) { + cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); + bufsize_t len = chunk->len; + if (pos + 2 < len && !cmark_isspace(cmark_inline_parser_peek_at(inline_parser, pos + 2))) { + closer = false; + } + } + + if ((!opener && !closer) || !txt) + return NULL; + + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, txt); + res->start_line = cmark_inline_parser_get_line(inline_parser); + res->start_column = cmark_inline_parser_get_column(inline_parser); + + cmark_inline_parser_set_offset(inline_parser, pos + 2); + + res->end_line = cmark_inline_parser_get_line(inline_parser); + res->end_column = cmark_inline_parser_get_column(inline_parser); + + // Set the character for this delimiter to `!`, since it's a heterogenous + // delimiter and the delimiter API assumes single repeated characters. + cmark_inline_parser_push_delimiter(inline_parser, '!', opener, closer, res); + } else { + // Discord-style spoilers - flanked on both sides by two pipes, + // e.g. ||this is a spoiler|| + if (character != '|') + return NULL; + + delims = cmark_inline_parser_scan_delimiters( + inline_parser, sizeof(buffer) - 1, '|', + &left_flanking, + &right_flanking, &punct_before, &punct_after); + + memset(buffer, '|', delims); + buffer[delims] = 0; + + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, buffer); + res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser); + res->start_column = cmark_inline_parser_get_column(inline_parser) - delims; + + if ((left_flanking || right_flanking) && (delims == 2)) { + cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, + right_flanking, res); + } + } + + return res; +} + +static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, + cmark_inline_parser *inline_parser, delimiter *opener, + delimiter *closer) { + cmark_node *spoiler; + cmark_node *tmp, *next; + delimiter *delim, *tmp_delim; + delimiter *res = closer->next; + + spoiler = opener->inl_text; + + if (opener->inl_text->as.literal.len != closer->inl_text->as.literal.len) + goto done; + + if (!cmark_node_set_type(spoiler, CMARK_NODE_SPOILER)) + goto done; + + cmark_node_set_syntax_extension(spoiler, self); + + tmp = cmark_node_next(opener->inl_text); + + while (tmp) { + if (tmp == closer->inl_text) + break; + next = cmark_node_next(tmp); + cmark_node_append_child(spoiler, tmp); + tmp = next; + } + + spoiler->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1; + cmark_node_free(closer->inl_text); + + delim = closer; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + cmark_inline_parser_remove_delimiter(inline_parser, delim); + delim = tmp_delim; + } + + cmark_inline_parser_remove_delimiter(inline_parser, opener); + +done: + return res; +} + +static const char *get_type_string(cmark_syntax_extension *extension, + cmark_node *node) { + return node->type == CMARK_NODE_SPOILER ? "spoiler" : ""; +} + +static int can_contain(cmark_syntax_extension *extension, cmark_node *node, + cmark_node_type child_type) { + if (node->type != CMARK_NODE_SPOILER) + return false; + + return CMARK_NODE_TYPE_INLINE_P(child_type); +} + +static void commonmark_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + if (options & CMARK_OPT_SPOILER_REDDIT_STYLE) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->out(renderer, node, ">!", false, LITERAL); + } else { + renderer->out(renderer, node, "!<", false, LITERAL); + } + } else { + renderer->out(renderer, node, "||", false, LITERAL); + } +} + +static void html_render(cmark_syntax_extension *extension, + cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + cmark_strbuf_puts(renderer->html, ""); + } else { + cmark_strbuf_puts(renderer->html, ""); + } +} + +cmark_syntax_extension *create_spoiler_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("spoiler"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); + cmark_syntax_extension_set_can_contain_func(ext, can_contain); + cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); + cmark_syntax_extension_set_html_render_func(ext, html_render); + cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render); + CMARK_NODE_SPOILER = cmark_syntax_extension_add_node(1); + + cmark_syntax_extension_set_match_inline_func(ext, match); + cmark_syntax_extension_set_inline_from_delim_func(ext, insert); + + cmark_mem *mem = cmark_get_default_mem_allocator(); + special_chars = cmark_llist_append(mem, special_chars, (void *)'|'); + special_chars = cmark_llist_append(mem, special_chars, (void *)'>'); + special_chars = cmark_llist_append(mem, special_chars, (void *)'<'); + special_chars = cmark_llist_append(mem, special_chars, (void *)'!'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + cmark_syntax_extension_set_emphasis(ext, 1); + + return ext; +} diff --git a/extensions/spoiler.h b/extensions/spoiler.h new file mode 100644 index 000000000..a86bbbf39 --- /dev/null +++ b/extensions/spoiler.h @@ -0,0 +1,9 @@ +#ifndef CMARK_GFM_SPOILER_H +#define CMARK_GFM_SPOILER_H + +#include "cmark-gfm-core-extensions.h" + +extern cmark_node_type CMARK_NODE_SPOILER; +cmark_syntax_extension *create_spoiler_extension(void); + +#endif /* CMARK_GFM_SPOILER_H */ diff --git a/src/blocks.c b/src/blocks.c index 6e87f19cb..99da25eb3 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -494,7 +494,6 @@ static void process_footnotes(cmark_parser *parser) { while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) { - cmark_node_unlink(cur); cmark_footnote_create(map, cur); } } @@ -511,6 +510,15 @@ static void process_footnotes(cmark_parser *parser) { if (!footnote->ix) footnote->ix = ++ix; + // store a reference to this footnote reference's footnote definition + // this is used by renderers when generating label ids + cur->parent_footnote_def = footnote->node; + + // keep track of a) count of how many times this footnote def has been + // referenced, and b) which reference index this footnote ref is at. + // this is used by renderers when generating links and backreferences. + cur->footnote.ref_ix = ++footnote->node->footnote.def_count; + char n[32]; snprintf(n, sizeof(n), "%d", footnote->ix); cmark_chunk_free(parser->mem, &cur->as.literal); @@ -541,13 +549,16 @@ static void process_footnotes(cmark_parser *parser) { qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix); for (unsigned int i = 0; i < map->size; ++i) { cmark_footnote *footnote = (cmark_footnote *)map->sorted[i]; - if (!footnote->ix) + if (!footnote->ix) { + cmark_node_unlink(footnote->node); continue; + } cmark_node_append_child(parser->root, footnote->node); footnote->node = NULL; } } + cmark_unlink_footnotes_map(map); cmark_map_free(map); } diff --git a/src/commonmark.c b/src/commonmark.c index 94fd4388f..328da12a3 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -488,7 +488,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, case CMARK_NODE_FOOTNOTE_REFERENCE: if (entering) { LIT("[^"); - OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL); + + char *footnote_label = renderer->mem->calloc(node->parent_footnote_def->as.literal.len + 1, sizeof(char)); + memmove(footnote_label, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len); + + OUT(footnote_label, false, LITERAL); + renderer->mem->free(footnote_label); + LIT("]"); } break; @@ -497,9 +503,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, if (entering) { renderer->footnote_ix += 1; LIT("[^"); - char n[32]; - snprintf(n, sizeof(n), "%d", renderer->footnote_ix); - OUT(n, false, LITERAL); + + char *footnote_label = renderer->mem->calloc(node->as.literal.len + 1, sizeof(char)); + memmove(footnote_label, node->as.literal.data, node->as.literal.len); + + OUT(footnote_label, false, LITERAL); + renderer->mem->free(footnote_label); + LIT("]:\n"); cmark_strbuf_puts(renderer->prefix, " "); diff --git a/src/footnotes.c b/src/footnotes.c index f2d2765f4..c2b745f79 100644 --- a/src/footnotes.c +++ b/src/footnotes.c @@ -38,3 +38,26 @@ void cmark_footnote_create(cmark_map *map, cmark_node *node) { cmark_map *cmark_footnote_map_new(cmark_mem *mem) { return cmark_map_new(mem, footnote_free); } + +// Before calling `cmark_map_free` on a map with `cmark_footnotes`, first +// unlink all of the footnote nodes before freeing their memory. +// +// Sometimes, two (unused) footnote nodes can end up referencing each other, +// which as they get freed up by calling `cmark_map_free` -> `footnote_free` -> +// etc, can lead to a use-after-free error. +// +// Better to `unlink` every footnote node first, setting their next, prev, and +// parent pointers to NULL, and only then walk thru & free them up. +void cmark_unlink_footnotes_map(cmark_map *map) { + cmark_map_entry *ref; + cmark_map_entry *next; + + ref = map->refs; + while(ref) { + next = ref->next; + if (((cmark_footnote *)ref)->node) { + cmark_node_unlink(((cmark_footnote *)ref)->node); + } + ref = next; + } +} diff --git a/src/html.c b/src/html.c index 5959d7a0b..96daa18e2 100644 --- a/src/html.c +++ b/src/html.c @@ -59,16 +59,30 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size cmark_strbuf_put(html, data, (bufsize_t)len); } -static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) { +static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html, cmark_node *node) { if (renderer->written_footnote_ix >= renderer->footnote_ix) return false; renderer->written_footnote_ix = renderer->footnote_ix; - cmark_strbuf_puts(html, "footnote_ix); - cmark_strbuf_puts(html, n); - cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩"); + cmark_strbuf_puts(html, "as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩"); + + if (node->footnote.def_count > 1) + { + for(int i = 2; i <= node->footnote.def_count; i++) { + char n[32]; + snprintf(n, sizeof(n), "%d", i); + + cmark_strbuf_puts(html, " as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "-"); + cmark_strbuf_puts(html, n); + cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩"); + cmark_strbuf_puts(html, n); + cmark_strbuf_puts(html, ""); + } + } return true; } @@ -273,7 +287,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, } else { if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) { cmark_strbuf_putc(html, ' '); - S_put_footnote_backref(renderer, html); + S_put_footnote_backref(renderer, html, parent); } cmark_strbuf_puts(html, "

\n"); } @@ -405,16 +419,15 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_FOOTNOTE_DEFINITION: if (entering) { if (renderer->footnote_ix == 0) { - cmark_strbuf_puts(html, "
\n
    \n"); + cmark_strbuf_puts(html, "
    \n
      \n"); } ++renderer->footnote_ix; - cmark_strbuf_puts(html, "
    1. footnote_ix); - cmark_strbuf_puts(html, n); + + cmark_strbuf_puts(html, "
    2. as.literal.data, node->as.literal.len); cmark_strbuf_puts(html, "\">\n"); } else { - if (S_put_footnote_backref(renderer, html)) { + if (S_put_footnote_backref(renderer, html, node)) { cmark_strbuf_putc(html, '\n'); } cmark_strbuf_puts(html, "
    3. \n"); @@ -423,12 +436,20 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_FOOTNOTE_REFERENCE: if (entering) { - cmark_strbuf_puts(html, "as.literal.data, node->as.literal.len); - cmark_strbuf_puts(html, "\" id=\"fnref"); - cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); - cmark_strbuf_puts(html, "\">"); - cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + cmark_strbuf_puts(html, "parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len); + cmark_strbuf_puts(html, "\" id=\"fnref-"); + houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len); + + if (node->footnote.ref_ix > 1) { + char n[32]; + snprintf(n, sizeof(n), "%d", node->footnote.ref_ix); + cmark_strbuf_puts(html, "-"); + cmark_strbuf_puts(html, n); + } + + cmark_strbuf_puts(html, "\" data-footnote-ref>"); + houdini_escape_href(html, node->as.literal.data, node->as.literal.len); cmark_strbuf_puts(html, ""); } break; diff --git a/src/include/cmark-gfm.h b/src/include/cmark-gfm.h index 14ab95f4a..a4ee03c42 100644 --- a/src/include/cmark-gfm.h +++ b/src/include/cmark-gfm.h @@ -780,6 +780,11 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar */ #define CMARK_OPT_PRESERVE_WHITESPACE ((1 << 19) | CMARK_OPT_INLINE_ONLY) +/** Parse spoiler text with Reddit-style delimiters (`>!this is a spoiler!<`). Without + * this option, spoilers are parsed with Discord-style delimiters (`||this is a spoiler||`). + */ +#define CMARK_OPT_SPOILER_REDDIT_STYLE (1 << 20) + /** * ## Version information */ diff --git a/src/include/footnotes.h b/src/include/footnotes.h index 43dd64ff6..64e2901e3 100644 --- a/src/include/footnotes.h +++ b/src/include/footnotes.h @@ -18,6 +18,8 @@ typedef struct cmark_footnote cmark_footnote; void cmark_footnote_create(cmark_map *map, cmark_node *node); cmark_map *cmark_footnote_map_new(cmark_mem *mem); +void cmark_unlink_footnotes_map(cmark_map *map); + #ifdef __cplusplus } #endif diff --git a/src/include/node.h b/src/include/node.h index e2c0216d1..0f9482fb3 100644 --- a/src/include/node.h +++ b/src/include/node.h @@ -81,6 +81,13 @@ struct cmark_node { cmark_syntax_extension *extension; + union { + int ref_ix; + int def_count; + } footnote; + + cmark_node *parent_footnote_def; + union { cmark_chunk literal; cmark_list list; diff --git a/src/inlines.c b/src/inlines.c index ddfb7814c..ec238e4be 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1283,19 +1283,77 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { // What if we're a footnote link? if (parser->options & CMARK_OPT_FOOTNOTES && opener->inl_text->next && - opener->inl_text->next->type == CMARK_NODE_TEXT && - !opener->inl_text->next->next) { + opener->inl_text->next->type == CMARK_NODE_TEXT) { + cmark_chunk *literal = &opener->inl_text->next->as.literal; - if (literal->len > 1 && literal->data[0] == '^') { - inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE); - inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1); - inl->start_line = inl->end_line = subj->line; - inl->start_column = opener->inl_text->start_column; - inl->end_column = subj->pos + subj->column_offset + subj->block_offset; - cmark_node_insert_before(opener->inl_text, inl); - cmark_node_free(opener->inl_text->next); - cmark_node_free(opener->inl_text); + + // look back to the opening '[', and skip ahead to the next character + // if we're looking at a '[^' sequence, and there is other text or nodes + // after the ^, let's call it a footnote reference. + if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) { + + // Before we got this far, the `handle_close_bracket` function may have + // advanced the current state beyond our footnote's actual closing + // bracket, ie if it went looking for a `link_label`. + // Let's just rewind the subject's position: + subj->pos = initial_pos; + + cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE); + + // the start and end of the footnote ref is the opening and closing brace + // i.e. the subject's current position, and the opener's start_column + int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset; + int fnref_start_column = opener->inl_text->start_column; + + // any given node delineates a substring of the line being processed, + // with the remainder of the line being pointed to thru its 'literal' + // struct member. + // here, we copy the literal's pointer, moving it past the '^' character + // for a length equal to the size of footnote reference text. + // i.e. end_col minus start_col, minus the [ and the ^ characters + // + // this copies the footnote reference string, even if between the + // `opener` and the subject's current position there are other nodes + // + // (first, check for underflows) + if ((fnref_start_column + 2) <= fnref_end_column) { + fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2); + } else { + fnref->as.literal = cmark_chunk_dup(literal, 1, 0); + } + + fnref->start_line = fnref->end_line = subj->line; + fnref->start_column = fnref_start_column; + fnref->end_column = fnref_end_column; + + // we then replace the opener with this new fnref node, the net effect + // being replacing the opening '[' text node with a `^footnote-ref]` node. + cmark_node_insert_before(opener->inl_text, fnref); + process_emphasis(parser, subj, opener->previous_delimiter); + // sometimes, the footnote reference text gets parsed into multiple nodes + // i.e. '[^example]' parsed into '[', '^exam', 'ple]'. + // this happens for ex with the autolink extension. when the autolinker + // finds the 'w' character, it will split the text into multiple nodes + // in hopes of being able to match a 'www.' substring. + // + // because this function is called one character at a time via the + // `parse_inlines` function, and the current subj->pos is pointing at the + // closing ] brace, and because we copy all the text between the [ ] + // braces, we should be able to safely ignore and delete any nodes after + // the opener->inl_text->next. + // + // therefore, here we walk thru the list and free them all up + cmark_node *next_node; + cmark_node *current_node = opener->inl_text->next; + while(current_node) { + next_node = current_node->next; + cmark_node_free(current_node); + current_node = next_node; + } + + cmark_node_free(opener->inl_text); + pop_bracket(subj); return NULL; } @@ -1458,6 +1516,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, cmark_chunk contents; unsigned char c; bufsize_t startpos, endpos; + bufsize_t initpos = subj->pos; c = peek_char(subj); if (c == 0) { return 0; @@ -1505,43 +1564,49 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, new_inl = handle_close_bracket(parser, subj); break; case '!': - advance(subj); - if (peek_char(subj) == '[' && peek_char_n(subj, 1) != '^') { + if (peek_char_n(subj, 1) == '[' && peek_char_n(subj, 2) != '^') { + advance(subj); advance(subj); new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("![")); push_bracket(subj, IMAGE, new_inl); - } else { - new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!")); } break; case '^': - advance(subj); // TODO: Support a name between ^ and [ - if (peek_char(subj) == '[') { + if (peek_char_n(subj, 1) == '[') { + advance(subj); advance(subj); new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("^[")); push_bracket(subj, ATTRIBUTE, new_inl); - } else { - new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("^")); } break; - default: - new_inl = try_extensions(parser, parent, c, subj); - if (new_inl != NULL) - break; + } - endpos = subject_find_special_char(parser, subj, options); - contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); - startpos = subj->pos; - subj->pos = endpos; + if (subj->pos == initpos) { + if (!new_inl) + new_inl = try_extensions(parser, parent, c, subj); - // if we're at a newline, strip trailing spaces. - if ((options & CMARK_OPT_PRESERVE_WHITESPACE) == 0 && S_is_line_end_char(peek_char(subj))) { - cmark_chunk_rtrim(&contents); - } + if (!new_inl) { + if (c == '^' || c == '!') { + advance(subj); + new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); + } else { + endpos = subject_find_special_char(parser, subj, options); + contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); + startpos = subj->pos; + subj->pos = endpos; + + // if we're at a newline, strip trailing spaces. + if ((options & CMARK_OPT_PRESERVE_WHITESPACE) == 0 && S_is_line_end_char(peek_char(subj))) { + cmark_chunk_rtrim(&contents); + } - new_inl = make_str(subj, startpos, endpos - 1, contents); + if (endpos > startpos) + new_inl = make_str(subj, startpos, endpos - 1, contents); + } + } } + if (new_inl != NULL) { cmark_node_append_child(parent, new_inl); } diff --git a/test/extensions.txt b/test/extensions.txt index 0d9993782..37033b1ca 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -672,31 +672,68 @@ Hi! [^unused]: This is unused. . -

      This is some text!1. Other text.2.

      -

      Here's a thing3.

      -

      And another thing4.

      +

      This is some text!1. Other text.2.

      +

      Here's a thing3.

      +

      And another thing4.

      This doesn't have a referent[^nope].

      Hi!

      -
      +
        -
      1. -

        Some bolded footnote definition.

        +
      2. +

        Some bolded footnote definition.

      3. -
      4. +
      5. Blockquotes can be in a footnote.

        as well as code blocks
         
        -

        or, naturally, simple paragraphs.

        +

        or, naturally, simple paragraphs.

      6. -
      7. -

        no code block here (spaces are stripped away)

        +
      8. +

        no code block here (spaces are stripped away)

      9. -
      10. +
      11. this is now a code block (8 spaces indentation)
         
        - + +
      12. +
      +
      +```````````````````````````````` + +## When a footnote is used multiple times, we insert multiple backrefs. + +```````````````````````````````` example +This is some text. It has a footnote[^a-footnote]. + +This footnote is referenced[^a-footnote] multiple times, in lots of different places.[^a-footnote] + +[^a-footnote]: This footnote definition should have three backrefs. +. +

      This is some text. It has a footnote1.

      +

      This footnote is referenced1 multiple times, in lots of different places.1

      +
      +
        +
      1. +

        This footnote definition should have three backrefs. 2 3

        +
      2. +
      +
      +```````````````````````````````` + +## Footnote reference labels are href escaped + +```````````````````````````````` example +Hello[^">] + +[^">]: pwned +. +

      Hello1

      +
      +
        +
      1. +

        pwned

      diff --git a/test/normalize.py b/test/normalize.py index 6073bf011..b7fd9b245 100644 --- a/test/normalize.py +++ b/test/normalize.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from html.parser import HTMLParser import urllib +import html try: from html.parser import HTMLParseError @@ -13,7 +14,6 @@ class HTMLParseError(Exception): from html.entities import name2codepoint import sys import re -import cgi # Normalization code, adapted from # https://github.com/karlcow/markdown-testsuite/ @@ -66,7 +66,7 @@ def handle_starttag(self, tag, attrs): self.output += ("=" + '"' + urllib.quote(urllib.unquote(v), safe='/') + '"') elif v != None: - self.output += ("=" + '"' + cgi.escape(v,quote=True) + '"') + self.output += ("=" + '"' + html.escape(v,quote=True) + '"') self.output += ">" self.last_tag = tag self.last = "starttag" diff --git a/test/regression.txt b/test/regression.txt index d033a4003..d4fbe3102 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -4,7 +4,8 @@ Issue #113: EOL character weirdness on Windows (Important: first line ends with CR + CR + LF) ```````````````````````````````` example -line1 +line1 + line2 .

      line1

      @@ -175,7 +176,7 @@ A footnote in a paragraph[^1] [^1]: a footnote . -

      A footnote in a paragraph1

      +

      A footnote in a paragraph1

      @@ -185,15 +186,15 @@ A footnote in a paragraph[^1] - +
      foot 1foot 1 note
      -
      +
        -
      1. -

        a footnote

        +
      2. +

        a footnote 2

      @@ -269,3 +270,99 @@ Pull request #128 - Buffer overread in tables extension

      | -|

      ```````````````````````````````` + +Footnotes may be nested inside other footnotes. + +```````````````````````````````` example footnotes +This is some text. It has a citation.[^citation] + +[^another-citation]: My second citation. + +[^citation]: This is a long winded parapgraph that also has another citation.[^another-citation] +. +

      This is some text. It has a citation.1

      +
      +
        +
      1. +

        This is a long winded parapgraph that also has another citation.2

        +
      2. +
      3. +

        My second citation.

        +
      4. +
      +
      +```````````````````````````````` + +Footnotes are similar to, but should not be confused with, link references + +```````````````````````````````` example footnotes +This is some text. It has two footnotes references, side-by-side without any spaces,[^footnote1][^footnote2] which are definitely not link references. + +[^footnote1]: Hello. + +[^footnote2]: Goodbye. +. +

      This is some text. It has two footnotes references, side-by-side without any spaces,12 which are definitely not link references.

      +
      +
        +
      1. +

        Hello.

        +
      2. +
      3. +

        Goodbye.

        +
      4. +
      +
      +```````````````````````````````` + +Footnotes may begin with or have a 'w' or a '_' in their reference label. + +```````````````````````````````` example footnotes autolink +This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a `w`.[^widely-cited] + +It has another footnote that contains many different characters (the autolinker was also breaking on `_`).[^sphinx-of-black-quartz_judge-my-vow-0123456789] + +[^sphinx-of-black-quartz_judge-my-vow-0123456789]: so does this. + +[^widely-cited]: this renders properly. +. +

      This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a w.1

      +

      It has another footnote that contains many different characters (the autolinker was also breaking on _).2

      +
      +
        +
      1. +

        this renders properly.

        +
      2. +
      3. +

        so does this.

        +
      4. +
      +
      +```````````````````````````````` + +Footnotes interacting with strikethrough should not lead to a use-after-free + +```````````````````````````````` example footnotes autolink strikethrough table +|Tot.....[^_a_]| +. +

      |Tot.....[^_a_]|

      +```````````````````````````````` + +Footnotes interacting with strikethrough should not lead to a use-after-free pt2 + +```````````````````````````````` example footnotes autolink strikethrough table +[^~~is~~1] +. +

      [^~~is~~1]

      +```````````````````````````````` + +Adjacent unused footnotes definitions should not lead to a use after free + +```````````````````````````````` example footnotes autolink strikethrough table +Hello world + + +[^a]:[^b]: +. +

      Hello world

      +```````````````````````````````` diff --git a/test/spec.txt b/test/spec.txt index 582131d70..ff93f3c77 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -7734,6 +7734,35 @@ new paragraph~~. +
      + +## Spoiler text (extension) + +Swift-cmark GFM provides the `spoiler` extension, providing an additional emphasis +type for text that should be hidden and revealed by user interaction. + +Spoiler text is wrapped in two pipes (`|`). + +As the use of spoiler text depends on user interaction, GFM only emits a generic +`` tag that must be styled by the client. + +```````````````````````````````` example spoiler +Spoiler alert: ||Hello, world!|| +. +

      Spoiler alert: Hello, world!

      +```````````````````````````````` + +The `spoiler` extension also optionally allows alternate "Reddit-style" delimiters, +which use angle brackets and exclamation marks to mark spoiler text: + +```````````````````````````````` example disabled +Spoiler alert: >!Hello, world!!< +. +

      Spoiler alert: Hello, world!

      +```````````````````````````````` + +
      + ## Links A link contains [link text] (the visible text), a [link destination]