From 6ceecd85d6dd111ae8eb62bfeaca737662d073c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Sat, 21 Dec 2024 18:33:50 -0800 Subject: [PATCH] Release KDL 2.0.0 (#434) * Release KDL 2.0.0 * fix grammar for multiline quoted strings to allow escaped whitespace on closing line * Add unicode-space to raw string * Remove nonexistent equals-sign from the grammar (#435) * fix multiline string tests * grammar: fix disallowed-keyword-identifiers and string-character (#436) * Back out "fix multiline string tests" This backs out commit 0c5604b462bfa5955edeac4a9bcb8d960ee39519. * add extra javascript implementation (#437) * reword interaction multiline + whitespace escape (#439) * More tests for backslash behaviour (#438) * More tests for baskslash behaviour * Incorrect example of escaped final newline * Test with non-literal indent * Make line-space a superset of node-space (#440) * Allow escline everywhere * escline tests * Always escape \ inside single quotes in the grammar text (#441) to match the other uses of it and the metalanguage description below * Add tests for mandatory whitespace between arguments or properties (#442) * Add an optional version marker (#444) * Add version marker to the grammer * Add version marker to the Changelog * Update SPEC.md Co-authored-by: eilvelia * add a mandatory newline after the version marker * add mandatory space between version number --------- Co-authored-by: eilvelia * Fix a changelog line erroneously truncated in #444 (#445) * fix: move vertical tab to the line-breaking whitespace to match Unicode (#446) * add vertical tab change test * final tweaks before release --------- Co-authored-by: eilvelia Co-authored-by: Bram Gotink Co-authored-by: Thomas Jollans Co-authored-by: Evgeny --- CHANGELOG.md | 109 ++++++++-------- README.md | 118 +++++++++++------- SPEC.md | 84 +++++++------ SPEC_v1.md | 4 + .../expected_kdl/esc_multiple_newlines.kdl | 1 + .../expected_kdl/escline_after_semicolon.kdl | 2 + .../test_cases/expected_kdl/escline_alone.kdl | 1 + .../expected_kdl/escline_empty_line.kdl | 1 + .../expected_kdl/escline_end_of_node.kdl | 2 + .../expected_kdl/escline_in_child_block.kdl | 4 + .../expected_kdl/escline_node_type.kdl | 1 + .../expected_kdl/escline_slashdash.kdl | 1 + .../multiline_string_double_backslash.kdl | 1 + .../multiline_string_escape_delimiter.kdl | 1 + ...ultiline_string_escape_in_closing_line.kdl | 1 + ..._string_escape_in_closing_line_shallow.kdl | 1 + ...multiline_string_escape_newline_at_end.kdl | 1 + .../input/esc_multiple_newlines.kdl | 4 + .../input/escline_after_semicolon.kdl | 2 + tests/test_cases/input/escline_alone.kdl | 1 + tests/test_cases/input/escline_empty_line.kdl | 3 + .../test_cases/input/escline_end_of_node.kdl | 3 + .../input/escline_in_child_block.kdl | 5 + tests/test_cases/input/escline_node.kdl | 1 + tests/test_cases/input/escline_node_type.kdl | 2 + tests/test_cases/input/escline_slashdash.kdl | 4 + .../multiline_string_double_backslash.kdl | 4 + .../multiline_string_escape_delimiter.kdl | 3 + ...ultiline_string_escape_in_closing_line.kdl | 5 + ..._string_escape_in_closing_line_shallow.kdl | 5 + ...multiline_string_escape_newline_at_end.kdl | 4 + ...line_string_escape_newline_at_end_fail.kdl | 4 + ...ne_string_final_whitespace_escape_fail.kdl | 4 + ...ltiline_string_non_literal_prefix_fail.kdl | 4 + .../zero_space_before_first_arg_fail.kdl | 1 + .../input/zero_space_before_prop_fail.kdl | 1 + .../zero_space_before_second_arg_fail.kdl | 1 + .../zero_space_before_slashdash_arg_fail.kdl | 1 + tests/vertical_tab_whitespace.kdl | 2 + 39 files changed, 265 insertions(+), 132 deletions(-) create mode 100644 tests/test_cases/expected_kdl/esc_multiple_newlines.kdl create mode 100644 tests/test_cases/expected_kdl/escline_after_semicolon.kdl create mode 100644 tests/test_cases/expected_kdl/escline_alone.kdl create mode 100644 tests/test_cases/expected_kdl/escline_empty_line.kdl create mode 100644 tests/test_cases/expected_kdl/escline_end_of_node.kdl create mode 100644 tests/test_cases/expected_kdl/escline_in_child_block.kdl create mode 100644 tests/test_cases/expected_kdl/escline_node_type.kdl create mode 100644 tests/test_cases/expected_kdl/escline_slashdash.kdl create mode 100644 tests/test_cases/expected_kdl/multiline_string_double_backslash.kdl create mode 100644 tests/test_cases/expected_kdl/multiline_string_escape_delimiter.kdl create mode 100644 tests/test_cases/expected_kdl/multiline_string_escape_in_closing_line.kdl create mode 100644 tests/test_cases/expected_kdl/multiline_string_escape_in_closing_line_shallow.kdl create mode 100644 tests/test_cases/expected_kdl/multiline_string_escape_newline_at_end.kdl create mode 100644 tests/test_cases/input/esc_multiple_newlines.kdl create mode 100644 tests/test_cases/input/escline_after_semicolon.kdl create mode 100644 tests/test_cases/input/escline_alone.kdl create mode 100644 tests/test_cases/input/escline_empty_line.kdl create mode 100644 tests/test_cases/input/escline_end_of_node.kdl create mode 100644 tests/test_cases/input/escline_in_child_block.kdl create mode 100644 tests/test_cases/input/escline_node_type.kdl create mode 100644 tests/test_cases/input/escline_slashdash.kdl create mode 100644 tests/test_cases/input/multiline_string_double_backslash.kdl create mode 100644 tests/test_cases/input/multiline_string_escape_delimiter.kdl create mode 100644 tests/test_cases/input/multiline_string_escape_in_closing_line.kdl create mode 100644 tests/test_cases/input/multiline_string_escape_in_closing_line_shallow.kdl create mode 100644 tests/test_cases/input/multiline_string_escape_newline_at_end.kdl create mode 100644 tests/test_cases/input/multiline_string_escape_newline_at_end_fail.kdl create mode 100644 tests/test_cases/input/multiline_string_final_whitespace_escape_fail.kdl create mode 100644 tests/test_cases/input/multiline_string_non_literal_prefix_fail.kdl create mode 100644 tests/test_cases/input/zero_space_before_first_arg_fail.kdl create mode 100644 tests/test_cases/input/zero_space_before_prop_fail.kdl create mode 100644 tests/test_cases/input/zero_space_before_second_arg_fail.kdl create mode 100644 tests/test_cases/input/zero_space_before_slashdash_arg_fail.kdl create mode 100644 tests/vertical_tab_whitespace.kdl diff --git a/CHANGELOG.md b/CHANGELOG.md index b6414c9..2f8a655 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,52 +1,6 @@ # KDL Changelog -## 2.0.0-draft.8 (2024-12-14) - -* Some details have been clarified around the treatment of whitespace in - multiline strings. -* `raw-string` productions have been updated to be explicitly non-greedy and - "fallible". -* Some tests have been added, others adjusted, some removed, after a cleanup pass. - -## 2.0.0-draft.7 (2024-12-10) - -* `node-space` is now allowed as whitespace after a `slashdash`, meaning line - continuations will work now. -* One or two consecutive double-quotes are now allowed in the bodies of - multi-line quoted strings, without needing to be escaped. -* Grammar has been fixed to disallow raw strings like `#"""#`, which are now - properly treated as invalid multi-line raw strings (instead of the equivalent of - `"\""`). -* Test suite has been updated to include a `_fail` suffix in all test cases - which are expected to fail. -* A slew of additional slashdash and multi-line string compliance tests have - been added. Have fun. :) -* The organization of string types in the spec prose has been updated to a - hopefully more helpful structure. - - -## 2.0.0-draft.6 (2024-12-04) - -* Multiline strings, both Raw and Quoted, must now use `"""` instead of a single `"`. Using `"""` for a single-line string is a syntax error. -* Fixed an issue with the `unicode_silly` test case. -* Some rewordings and clarification in the spec prose. -* Slight grammar tweak where the pre-terminator `node-space*` for `node` and `final-node` have been moved into `base-node`. - - -## 2.0.0-draft.5 (2024-11-28) - -* Equals signs other than `=` are no longer supported in properties. -* 128-bit integer type annotations have been added to the list of "well-known" - type annotations. -* Multiline string escape rules have been tweaked significantly. -* `\s` is now a valid escape within a string, representing a space character. -* Slashdash (`/-`)-compatible locations and related grammar adjusted to be more - clear and intuitive. This includes some changes relating to whitespace, - including comments and newlines, which are breaking changes. -* Various updates to test suite to reflect changes. - - -## 2.0.0 (Unreleased) +## 2.0.0 (2024-12-21) ### Grammar @@ -55,7 +9,7 @@ escape. * Single line comments (`//`) can now be immediately followed by a newline. * All literal whitespace following a `\` in a string is now discarded. -* Vertical tabs (`U+000B`) are now considered to be whitespace. +* Vertical tabs (`U+000B`) are now considered to be newlines. * The grammar syntax itself has been described, and some confusing definitions in the grammar have been fixed accordingly (mostly related to escaped characters). @@ -71,6 +25,7 @@ improvement. * Raw strings no longer require an `r` prefix: they are now specified by using `#""#`. +* Raw string productions are now explicitly non-greedy (and "fallible"). * Line continuations can be followed by an EOF now, instead of requiring a newline (or comment). `node \` is now a legal KDL document. * `#` is no longer a legal identifier character. @@ -91,7 +46,7 @@ should be valid UTF-8 now, as was intended. * The last node in a child block no longer needs to be terminated with `;`, even if the closing `}` is on the same line, so this is now a legal node: - `node {foo;bar;baz}` + `node{foo;bar;baz}` * More places allow whitespace (node-spaces, specifically) now. With great power comes great responsibility: * Inside `(foo)` annotations (so, `( foo )` would be legal (`( f oo )` would @@ -120,9 +75,14 @@ * Furthermore, The ordering of slashdashed elements has been restricted such that a slashdashed child block cannot go before an entry (including slashdashed entries). +* Optional version marker `/- kdl-version 2` (or `1`) as the first line in a document, optionally preceded by the BOM. ### KQL +> [!INFO] Note: these are provided for convenience, but as of the 2.0.0 KDL spec release, +> KQL itself is not finalized and should be considered a separate specification, +> alongside the Schema spec and others. + * There's now a _required_ descendant selector (`>>`), instead of using plain spaces for that purpose. * The "any sibling" selector is now `++` instead of `~`, for consistency with @@ -131,3 +91,54 @@ * Multi- and single-line comments are now supported, as well as line continuations with `\`. * Map operators have been removed entirely. + +--- + +## 2.0.0 Draft Changelogs + +### 2.0.0-draft.8 (2024-12-14) + +* Some details have been clarified around the treatment of whitespace in + multiline strings. +* `raw-string` productions have been updated to be explicitly non-greedy and + "fallible". +* Some tests have been added, others adjusted, some removed, after a cleanup pass. + + +### 2.0.0-draft.7 (2024-12-10) + +* `node-space` is now allowed as whitespace after a `slashdash`, meaning line + continuations will work now. +* One or two consecutive double-quotes are now allowed in the bodies of + multi-line quoted strings, without needing to be escaped. +* Grammar has been fixed to disallow raw strings like `#"""#`, which are now + properly treated as invalid multi-line raw strings (instead of the equivalent of + `"\""`). +* Test suite has been updated to include a `_fail` suffix in all test cases + which are expected to fail. +* A slew of additional slashdash and multi-line string compliance tests have + been added. Have fun. :) +* The organization of string types in the spec prose has been updated to a + hopefully more helpful structure. + + +### 2.0.0-draft.6 (2024-12-04) + +* Multiline strings, both Raw and Quoted, must now use `"""` instead of a single `"`. Using `"""` for a single-line string is a syntax error. +* Fixed an issue with the `unicode_silly` test case. +* Some rewordings and clarification in the spec prose. +* Slight grammar tweak where the pre-terminator `node-space*` for `node` and `final-node` have been moved into `base-node`. + + +### 2.0.0-draft.5 (2024-11-28) + +* Equals signs other than `=` are no longer supported in properties. +* 128-bit integer type annotations have been added to the list of "well-known" + type annotations. +* Multiline string escape rules have been tweaked significantly. +* `\s` is now a valid escape within a string, representing a space character. +* Slashdash (`/-`)-compatible locations and related grammar adjusted to be more + clear and intuitive. This includes some changes relating to whitespace, + including comments and newlines, which are breaking changes. +* Various updates to test suite to reflect changes. + diff --git a/README.md b/README.md index 561fa7c..1bf1005 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,9 @@ # The KDL Document Language -> [!WARNING] -> The main branch of this repository shows the latest v2.0.0 draft, which is a -> work in progress and not considered the "mainline" KDL yet. Most KDL -> implementations in the wild are based on the [v1.0.0 -> spec](https://github.com/kdl-org/kdl/tree/1.0.0) instead, so you may want to -> refer to that if you're using KDL today. - KDL is a small, pleasant document language with XML-like node semantics that looks like you're invoking a bunch of CLI commands! It's meant to be used both -as a serialization format and a configuration language, much like JSON, YAML, -or XML. It looks like this: +as a serialization format and a configuration language, much like JSON, YAML, or +XML. It looks like this: ```kdl package { @@ -51,27 +44,37 @@ package { } ``` +For more details, see the [overview below](#overview). + There's a living [specification](SPEC.md), as well as various [implementations](#implementations). You can also check out the [FAQ](#faq) to answer all your burning questions! -The current version of the KDL spec is `2.0.0-draft.8`. +The current version of the KDL spec is +[KDL 2.0.0](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC.md). For legacy KDL, +please refer to the [KDL 1.0.0 +spec](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC_v1.md). All users are +encouraged to migrate. [Migration is forward-and-backward-compatible and +safe](https://github.com/kdl-org/kdl/blob/2.0.0/SPEC.md#compatibility), and can +be automated. -In addition to a spec for KDL itself, there are also standard specs for [a KDL -Query Language](QUERY-SPEC.md) based on CSS selectors, and [a KDL Schema +In addition to a spec for KDL itself, there are specifications for [a KDL Query +Language](QUERY-SPEC.md) based on CSS selectors, and [a KDL Schema Language](SCHEMA-SPEC.md) loosely based on JSON Schema. The language is based on [SDLang](https://sdlang.org), with a [number of modifications and clarifications on its syntax and behavior](#why-not-sdlang). +We are grateful for their work as an inspiration to ours. -[Play with it in your browser!](https://kdl-play.danini.dev/) +[Play with it in your browser (currently v1 only)!](https://kdl-play.danini.dev/) ## Design and Discussion -KDL 2.0 design is still in progress. Discussions and questions about the format -should happen over on the [discussions -page](https://github.com/kdl-org/kdl/discussions). Feel free to jump in and give -us your 2 cents! +KDL 2.0.0 has been finalized, and no further changes are expected. For questions +about KDL and discussions, please see the [discussions +page](https://github.com/kdl-org/kdl/discussions). For minor editorial fixes or +critical spec errata, please feel free to [file an +issue](https://github.com/kdl-org/kdl/issues). ## Used By @@ -92,26 +95,38 @@ of some examples of KDL in the wild (either v1, v2, or both): ## Implementations -* Rust: [kdl-rs](https://github.com/kdl-org/kdl-rs), [knuffel](https://crates.io/crates/knuffel/) (latter includes derive macro), and [kaydle](https://github.com/Lucretiel/kaydle) (serde-based) -* JavaScript: [kdljs](https://github.com/kdl-org/kdljs), [@virtualstate/kdl](https://github.com/virtualstate/kdl) (query only, JSX based) -* Ruby: [kdl-rb](https://github.com/danini-the-panini/kdl-rb) -* Dart: [kdl-dart](https://github.com/danini-the-panini/kdl-dart) -* Java: [kdl4j](https://github.com/hkolbeck/kdl4j) -* PHP: [kdl-php](https://github.com/kdl-org/kdl-php) -* Python: [kdl-py](https://github.com/tabatkins/kdlpy), [cuddle](https://github.com/djmattyg007/python-cuddle), [ckdl](https://github.com/tjol/ckdl) -* Elixir: [kuddle](https://github.com/IceDragon200/kuddle) -* XSLT: [xml2kdl](https://github.com/Devasta/XML2KDL) -* Haskell: [Hustle](https://github.com/fuzzypixelz/Hustle) -* .NET: [Kadlet](https://github.com/oledfish/Kadlet) -* C: [ckdl](https://github.com/tjol/ckdl) -* C++: [kdlpp](https://github.com/tjol/ckdl) (part of ckdl, requires C++20) -* OCaml: [ocaml-kdl](https://github.com/Bannerets/ocaml-kdl) -* Nim: [kdl-nim](https://github.com/Patitotective/kdl-nim) -* Common Lisp: [kdlcl](https://github.com/chee/kdlcl) -* Go: [gokdl](https://github.com/lunjon/gokdl), [kdl-go](https://github.com/sblinch/kdl-go) -* Swift: [kdl-swift](https://github.com/danini-the-panini/kdl-swift) -* Crystal: [kdl-cr](https://github.com/danini-the-panini/kdl-cr) -* Lua: [kdlua](https://github.com/danini-the-panini/kdlua) +> [!INFO] There are two major versions of KDL. Different libraries may support one or the +> other, or even provide a "hybrid" mode where both versions are attempted, since +> there's no data ambiguity between v1 and v2 documents. + +| Language | Implementation | v1 | v2 | Notes | +|---|---|---|---|---| +| C | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | | +| C#/.NET | [Kadlet](https://github.com/oledfish/Kadlet) | ✅ | ✖️ | | +| C++ | [kdlpp](https://github.com/tjol/ckdl) | ✅ | ✅ | part of ckdl, requires C++20 | +| Common Lisp | [kdlcl](https://github.com/chee/kdlcl) | ✅ | ✖️ | | +| Crystal | [kdl-cr](https://github.com/danini-the-panini/kdl-cr) | ✅ | ✖️ | | +| Dart | [kdl-dart](https://github.com/danini-the-panini/kdl-dart) | ✅ | ✖️ | | +| Elixir | [kuddle](https://github.com/IceDragon200/kuddle) | ✅ | ✅ | | +| Go | [gokdl](https://github.com/lunjon/gokdl) | ✅ | ✖️ | | +| Go | [kdl-go](https://github.com/sblinch/kdl-go) | ✅ | ✖️ | | +| Haskell | [Hustle](https://github.com/fuzzypixelz/Hustle) | ✅ | ✖️ | | +| Java | [kdl4j](https://github.com/hkolbeck/kdl4j) | ✅ | ✖️ | | +| JavaScript | [@bgotink/kdl](https://github.com/bgotink/kdl) | ✅ | ✅ | Format/comment-preserving parser | +| JavaScript | [@virtualstate/kdl](https://github.com/virtualstate/kdl) | ✅ | ✖️ | query only, JSX based | +| JavaScript | [kdljs](https://github.com/kdl-org/kdljs) | ✅ | ✖️ | | +| Lua | [kdlua](https://github.com/danini-the-panini/kdlua) | ✅ | ✖️ | | +| Nim | [kdl-nim](https://github.com/Patitotective/kdl-nim) | ✅ | ✖️ | | +| OCaml | [ocaml-kdl](https://github.com/Bannerets/ocaml-kdl) | ✅ | ✖️ | | +| PHP | [kdl-php](https://github.com/kdl-org/kdl-php) | ✅ | ✖️ | | +| Python | [ckdl](https://github.com/tjol/ckdl) | ✅ | ✅ | | +| Python | [cuddle](https://github.com/djmattyg007/python-cuddle) | ✅ | ✖️ | | +| Python | [kdl-py](https://github.com/tabatkins/kdlpy) | ✅ | ✅ | | +| Ruby | [kdl-rb](https://github.com/danini-the-panini/kdl-rb) | ✅ | ✖️ | | +| Rust | [kdl-rs](https://github.com/kdl-org/kdl-rs) | ✅ | ✅ | Format/comment-preserving parser | +| Rust | [knus](https://crates.io/crates/knus/) | ✅ | ✖️ | Serde-_style_ derive macros (not actual Serde) | +| Swift | [kdl-swift](https://github.com/danini-the-panini/kdl-swift) | ✅ | ✖️ | | +| XSLT | [xml2kdl](https://github.com/Devasta/XML2KDL) | ✅ | ✖️ | | ## Compatibility Test Suite @@ -123,11 +138,13 @@ entirety, but in the future, may be required to in order to be included here. ## Editor Support -* [VS Code](https://marketplace.visualstudio.com/items?itemName=kdl-org.kdl&ssr=false#review-details) +* [Intellij IDEA](https://plugins.jetbrains.com/plugin/20136-kdl-document-language) * [Sublime Text](https://packagecontrol.io/packages/KDL) +* [TreeSitter](https://github.com/tree-sitter-grammars/tree-sitter-kdl) (neovim, among others) +* [VS Code](https://marketplace.visualstudio.com/items?itemName=kdl-org.kdl&ssr=false#review-details)\* * [vim](https://github.com/imsnif/kdl.vim) -* [neovim](https://github.com/tree-sitter-grammars/tree-sitter-kdl) -* [Intellij IDEA](https://plugins.jetbrains.com/plugin/20136-kdl-document-language) + +\* Supports KDL 2.0.0 ## Overview @@ -167,7 +184,7 @@ Nodes without children are terminated by a newline, a semicolon, or the end of a file stream: ```kdl -node1; node2; node3; +node1; node2; node3 ``` ### Values @@ -175,13 +192,13 @@ node1; node2; node3; KDL supports 4 data types: * Strings: `unquoted`, `"hello world"`, or `#"hello world"#` -* Numbers: `123.45` +* Numbers: `123.45`, `0xdeadbeef`, `#inf`, `#-inf`, `#nan` * Booleans: `#true` and `#false` * Null: `#null` #### Strings -It supports three different formats for string input: identifiers, quoted, and raw. +It supports three different formats for string input: unquoted, quoted, and raw. ```kdl node1 this-is-a-string @@ -234,10 +251,10 @@ other-raw ##"hello#"world"## #### Numbers -There are 4 ways to represent numbers in KDL. KDL does not prescribe any -representation for these numbers, and it's entirely up to individual -implementations whether to represent all numbers with a single type, or to -have different representations for different forms. +There are 4 ways to represent numbers in KDL, plus 3 float keywords. KDL does +not prescribe any representation for these numbers, and it's entirely up to +individual implementations whether to represent all numbers with a single type, +or to have different representations for different forms. KDL has regular decimal-radix numbers, with optional decimal part, as well as an optional exponent. @@ -255,6 +272,13 @@ my-octal 0o755 my-binary 0b10101101 ``` +If you're intending to represent IEEE 754 floats, there are three special +keywords you can use: + +```kdl +special-floats #inf #-inf #nan +``` + Finally, all numbers can have underscores to help readability: ```kdl diff --git a/SPEC.md b/SPEC.md index ba43cb5..83e8dfa 100644 --- a/SPEC.md +++ b/SPEC.md @@ -1,20 +1,25 @@ # KDL Spec -This is the semi-formal specification for KDL, including the intended data -model and the grammar. +This is the formal specification for KDL, including the intended data model and +the grammar. -This document describes KDL version `2.0.0-draft.8`. It was released on -2024-12-14. +This document describes KDL version KDL 2.0.0. It was released on 2024-12-21. It +is the latest stable version of the language, and will only be edited for minor +copyedits or major errata. ## Compatibility -KDL v2 is designed such that for any given KDL document written as [KDL +KDL 2.0 is designed such that for any given KDL document written as [KDL 1.0](./SPEC_v1.md) or KDL 2.0, the parse will either fail completely, or, if the parse succeeds, the data represented by a v1 or v2 parser will be identical. This means that it's safe to use a fallback parsing strategy in order to support both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both versions, and should be represented identically by parsers. +A version marker `/- kdl-version 2` (or `1`) _MAY_ be added to the beginning of +a KDL document, optionally preceded by the BOM, and parsers _MAY_ use that as a +hint as to which version to parse the document as. + ## Introduction KDL is a node-oriented document language. Its niche and purpose overlaps with @@ -422,7 +427,6 @@ such) are retained. For example, these strings are all semantically identical: Except as described in the escapes table, above, `\` *MUST NOT* precede any other characters in a string. - ### Multi-line String Multi-Line Strings support multiple lines with literal, non-escaped @@ -576,22 +580,28 @@ multi-line """[\n] #### Interaction with Whitespace Escapes -Multi-line strings support the same mechanism for escaping whitespace -as Quoted Strings. -When processing a Multi-line String, implementations MUST dedent the string _after_ -resolving all whitespace escapes, but _before_ resolving other backslash escapes. -Furthermore, a whitespace escape that attempts to escape the final line's newline -and/or whitespace prefix is invalid since the multi-line string has to still be -valid with the escaped whitespace removed. +Multi-line strings support the same mechanism for escaping whitespace as Quoted +Strings. + +When processing a Multi-line String, implementations MUST dedent the string +_after_ resolving all whitespace escapes, but _before_ resolving other backslash +escapes. This means a whitespace escape that attempts to escape the final line's +newline and/or whitespace prefix can be invalid: if removing escaped whitespace +places the closing `"""` on a line with non-whitespace characters, this escape +is invalid. For example, the following example is illegal: ```kdl - // Equivalent to trying to write a string containing `foo\nbar\`. """ foo bar\ """ + + // equivalent to + """ + foo + bar""" ``` while the following example is allowed @@ -611,20 +621,19 @@ bar ### Raw String -Both [Quoted](#quoted-string) and [Multi-Line Strings](#multi-line-string) -have Raw String variants, -which are identical in syntax except they do not support `\`-escapes. -They otherwise share the same properties as far as -literal [Newline](#newline) characters go, multi-line rules, and the requirement -of UTF-8 representation. +Both [Quoted](#quoted-string) and [Multi-Line Strings](#multi-line-string) have +Raw String variants, which are identical in syntax except they do not support +`\`-escapes. This includes line-continuation escapes (`\` + `ws` collapsing to +nothing). They otherwise share the same properties as far as literal +[Newline](#newline) characters go, multi-line rules, and the requirement of +UTF-8 representation. The Raw String variants are indicated by preceding the strings's opening quotes -with one or more `#` characters. -The string is then closed by its normal closing quotes, -followed by a _matching_ number of `#` characters. -This means that the string may contain any combination of `"` and `#` characters -other than its closing delimiter (e.g., if a raw string starts with `##"`, it can -contain `"` or `"#`, but not `"##` or `"###`). +with one or more `#` characters. The string is then closed by its normal closing +quotes, followed by a _matching_ number of `#` characters. This means that the +string may contain any combination of `"` and `#` characters other than its +closing delimiter (e.g., if a raw string starts with `##"`, it can contain `"` +or `"#`, but not `"##` or `"###`). Like other Strings, Raw Strings _MUST NOT_ include any of the [disallowed literal code-points](#disallowed-literal-code-points) as code points in their @@ -737,7 +746,6 @@ space](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt): | Name | Code Pt | |----------------------|---------| | Character Tabulation | `U+0009` | -| Line Tabulation | `U+000B` | | Space | `U+0020` | | No-Break Space | `U+00A0` | | Ogham Space Mark | `U+1680` | @@ -800,6 +808,7 @@ lines](https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf): | CR | Carriage Return | `U+000D` | | LF | Line Feed | `U+000A` | | NEL | Next Line | `U+0085` | +| VT | Vertical tab | `U+000B` | | FF | Form Feed | `U+000C` | | LS | Line Separator | `U+2028` | | PS | Paragraph Separator | `U+2029` | @@ -830,7 +839,7 @@ authoritative if something seems to disagree with the text above. The [grammar language syntax](#grammar-language) is defined below. ``` -document := bom? nodes +document := bom? version? nodes // Nodes nodes := (line-space* node)* line-space* @@ -861,18 +870,18 @@ identifier-string := unambiguous-ident | signed-ident | dotted-ident unambiguous-ident := ((identifier-char - digit - sign - '.') identifier-char*) - disallowed-keyword-strings signed-ident := sign ((identifier-char - digit - '.') identifier-char*)? dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)? -identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#=] - disallowed-literal-code-points - equals-sign -disallowed-keyword-identifiers := 'true' - 'false' - 'null' - 'inf' - '-inf' - 'nan' +identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#=] - disallowed-literal-code-points +disallowed-keyword-identifiers := 'true' | 'false' | 'null' | 'inf' | '-inf' | 'nan' -quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline unicode-space* '"""' +quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline (unicode-space | ws-escape)* '"""' single-line-string-body := (string-character - newline)* multi-line-string-body := (('"' | '""')? string-character)* -string-character := '\' escape | [^\\"] - disallowed-literal-code-points -escape := ["\\bfnrts] | 'u{' hex-digit{1, 6} '}' | (unicode-space | newline)+ +string-character := '\\' (["\\bfnrts] | 'u{' hex-digit{1, 6} '}') | ws-escape | [^\\"] - disallowed-literal-code-points +ws-escape := '\\' (unicode-space | newline)+ hex-digit := [0-9a-fA-F] raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' -raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body '"""' +raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space* '"""' single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char*? | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*? single-line-raw-string-char := unicode - newline - disallowed-literal-code-points multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*? @@ -905,16 +914,19 @@ unicode-space := See Table (All White_Space unicode characters which are not `ne single-line-comment := '//' ^newline* (newline | eof) multi-line-comment := '/*' commented-block commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block -slashdash := '/-' (node-space | line-space)* +slashdash := '/-' line-space* // Whitespace ws := unicode-space | multi-line-comment escline := '\\' ws* (single-line-comment | newline | eof) newline := See Table (All Newline White_Space) // Whitespace where newlines are allowed. -line-space := newline | ws | single-line-comment +line-space := node-space | newline | single-line-comment // Whitespace within nodes, where newline-ish things must be esclined. node-space := ws* escline ws* | ws+ + +// Version marker +version := '/-' unicode-space* 'kdl-version' unicode-space+ ('1' | '2') unicode-space* newline ``` ### Grammar language diff --git a/SPEC_v1.md b/SPEC_v1.md index 60d83a3..22258bd 100644 --- a/SPEC_v1.md +++ b/SPEC_v1.md @@ -28,6 +28,10 @@ This means that it's safe to use a fallback parsing strategy in order to support both v1 and v2 simultaneously. For example, `node "foo"` is a valid node in both versions, and should be represented identically by parsers. +A version marker `/- kdl-version 1` (or `2`) _MAY_ be added to the beginning of +a KDL document, optionally preceded by the BOM, and parsers _MAY_ use that as a +hint as to which version to parse the document as. + ## Introduction KDL is a node-oriented document language. Its niche and purpose overlaps with diff --git a/tests/test_cases/expected_kdl/esc_multiple_newlines.kdl b/tests/test_cases/expected_kdl/esc_multiple_newlines.kdl new file mode 100644 index 0000000..032ed17 --- /dev/null +++ b/tests/test_cases/expected_kdl/esc_multiple_newlines.kdl @@ -0,0 +1 @@ +node "12" diff --git a/tests/test_cases/expected_kdl/escline_after_semicolon.kdl b/tests/test_cases/expected_kdl/escline_after_semicolon.kdl new file mode 100644 index 0000000..3e545b1 --- /dev/null +++ b/tests/test_cases/expected_kdl/escline_after_semicolon.kdl @@ -0,0 +1,2 @@ +node +node diff --git a/tests/test_cases/expected_kdl/escline_alone.kdl b/tests/test_cases/expected_kdl/escline_alone.kdl new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/test_cases/expected_kdl/escline_alone.kdl @@ -0,0 +1 @@ + diff --git a/tests/test_cases/expected_kdl/escline_empty_line.kdl b/tests/test_cases/expected_kdl/escline_empty_line.kdl new file mode 100644 index 0000000..64f5a0a --- /dev/null +++ b/tests/test_cases/expected_kdl/escline_empty_line.kdl @@ -0,0 +1 @@ +node diff --git a/tests/test_cases/expected_kdl/escline_end_of_node.kdl b/tests/test_cases/expected_kdl/escline_end_of_node.kdl new file mode 100644 index 0000000..422c2b7 --- /dev/null +++ b/tests/test_cases/expected_kdl/escline_end_of_node.kdl @@ -0,0 +1,2 @@ +a +b diff --git a/tests/test_cases/expected_kdl/escline_in_child_block.kdl b/tests/test_cases/expected_kdl/escline_in_child_block.kdl new file mode 100644 index 0000000..9b05e30 --- /dev/null +++ b/tests/test_cases/expected_kdl/escline_in_child_block.kdl @@ -0,0 +1,4 @@ +parent { + child + child +} diff --git a/tests/test_cases/expected_kdl/escline_node_type.kdl b/tests/test_cases/expected_kdl/escline_node_type.kdl new file mode 100644 index 0000000..c790643 --- /dev/null +++ b/tests/test_cases/expected_kdl/escline_node_type.kdl @@ -0,0 +1 @@ +(type)node diff --git a/tests/test_cases/expected_kdl/escline_slashdash.kdl b/tests/test_cases/expected_kdl/escline_slashdash.kdl new file mode 100644 index 0000000..64f5a0a --- /dev/null +++ b/tests/test_cases/expected_kdl/escline_slashdash.kdl @@ -0,0 +1 @@ +node diff --git a/tests/test_cases/expected_kdl/multiline_string_double_backslash.kdl b/tests/test_cases/expected_kdl/multiline_string_double_backslash.kdl new file mode 100644 index 0000000..38ad0e5 --- /dev/null +++ b/tests/test_cases/expected_kdl/multiline_string_double_backslash.kdl @@ -0,0 +1 @@ +node "a\\ b\na\\b" diff --git a/tests/test_cases/expected_kdl/multiline_string_escape_delimiter.kdl b/tests/test_cases/expected_kdl/multiline_string_escape_delimiter.kdl new file mode 100644 index 0000000..025d655 --- /dev/null +++ b/tests/test_cases/expected_kdl/multiline_string_escape_delimiter.kdl @@ -0,0 +1 @@ +node "\"\"\"" diff --git a/tests/test_cases/expected_kdl/multiline_string_escape_in_closing_line.kdl b/tests/test_cases/expected_kdl/multiline_string_escape_in_closing_line.kdl new file mode 100644 index 0000000..e7186a6 --- /dev/null +++ b/tests/test_cases/expected_kdl/multiline_string_escape_in_closing_line.kdl @@ -0,0 +1 @@ +node "foo bar\nbaz" diff --git a/tests/test_cases/expected_kdl/multiline_string_escape_in_closing_line_shallow.kdl b/tests/test_cases/expected_kdl/multiline_string_escape_in_closing_line_shallow.kdl new file mode 100644 index 0000000..314069e --- /dev/null +++ b/tests/test_cases/expected_kdl/multiline_string_escape_in_closing_line_shallow.kdl @@ -0,0 +1 @@ +node " foo bar\n baz" diff --git a/tests/test_cases/expected_kdl/multiline_string_escape_newline_at_end.kdl b/tests/test_cases/expected_kdl/multiline_string_escape_newline_at_end.kdl new file mode 100644 index 0000000..56fe48f --- /dev/null +++ b/tests/test_cases/expected_kdl/multiline_string_escape_newline_at_end.kdl @@ -0,0 +1 @@ +node " a" diff --git a/tests/test_cases/input/esc_multiple_newlines.kdl b/tests/test_cases/input/esc_multiple_newlines.kdl new file mode 100644 index 0000000..f3d91c4 --- /dev/null +++ b/tests/test_cases/input/esc_multiple_newlines.kdl @@ -0,0 +1,4 @@ +node "1\ + + +2" diff --git a/tests/test_cases/input/escline_after_semicolon.kdl b/tests/test_cases/input/escline_after_semicolon.kdl new file mode 100644 index 0000000..59a4ab8 --- /dev/null +++ b/tests/test_cases/input/escline_after_semicolon.kdl @@ -0,0 +1,2 @@ +node; \ +node diff --git a/tests/test_cases/input/escline_alone.kdl b/tests/test_cases/input/escline_alone.kdl new file mode 100644 index 0000000..57ddad2 --- /dev/null +++ b/tests/test_cases/input/escline_alone.kdl @@ -0,0 +1 @@ +\ diff --git a/tests/test_cases/input/escline_empty_line.kdl b/tests/test_cases/input/escline_empty_line.kdl new file mode 100644 index 0000000..1777a83 --- /dev/null +++ b/tests/test_cases/input/escline_empty_line.kdl @@ -0,0 +1,3 @@ +\ + +node diff --git a/tests/test_cases/input/escline_end_of_node.kdl b/tests/test_cases/input/escline_end_of_node.kdl new file mode 100644 index 0000000..87dd42b --- /dev/null +++ b/tests/test_cases/input/escline_end_of_node.kdl @@ -0,0 +1,3 @@ +a \ + +b diff --git a/tests/test_cases/input/escline_in_child_block.kdl b/tests/test_cases/input/escline_in_child_block.kdl new file mode 100644 index 0000000..dfbe682 --- /dev/null +++ b/tests/test_cases/input/escline_in_child_block.kdl @@ -0,0 +1,5 @@ +parent { + child + \ // comment + child +} diff --git a/tests/test_cases/input/escline_node.kdl b/tests/test_cases/input/escline_node.kdl index 1c5b5f3..215f634 100644 --- a/tests/test_cases/input/escline_node.kdl +++ b/tests/test_cases/input/escline_node.kdl @@ -1,2 +1,3 @@ node1 +\ node2 diff --git a/tests/test_cases/input/escline_node_type.kdl b/tests/test_cases/input/escline_node_type.kdl new file mode 100644 index 0000000..fb22096 --- /dev/null +++ b/tests/test_cases/input/escline_node_type.kdl @@ -0,0 +1,2 @@ +\ +(type)node diff --git a/tests/test_cases/input/escline_slashdash.kdl b/tests/test_cases/input/escline_slashdash.kdl new file mode 100644 index 0000000..8cb0956 --- /dev/null +++ b/tests/test_cases/input/escline_slashdash.kdl @@ -0,0 +1,4 @@ +node +\ +/- +node diff --git a/tests/test_cases/input/multiline_string_double_backslash.kdl b/tests/test_cases/input/multiline_string_double_backslash.kdl new file mode 100644 index 0000000..7289aa9 --- /dev/null +++ b/tests/test_cases/input/multiline_string_double_backslash.kdl @@ -0,0 +1,4 @@ +node """ +a\\ b +a\\\ b +""" diff --git a/tests/test_cases/input/multiline_string_escape_delimiter.kdl b/tests/test_cases/input/multiline_string_escape_delimiter.kdl new file mode 100644 index 0000000..d873e8c --- /dev/null +++ b/tests/test_cases/input/multiline_string_escape_delimiter.kdl @@ -0,0 +1,3 @@ +node """ +\""" +""" diff --git a/tests/test_cases/input/multiline_string_escape_in_closing_line.kdl b/tests/test_cases/input/multiline_string_escape_in_closing_line.kdl new file mode 100644 index 0000000..23a5796 --- /dev/null +++ b/tests/test_cases/input/multiline_string_escape_in_closing_line.kdl @@ -0,0 +1,5 @@ +node """ + foo \ +bar + baz + \ """ diff --git a/tests/test_cases/input/multiline_string_escape_in_closing_line_shallow.kdl b/tests/test_cases/input/multiline_string_escape_in_closing_line_shallow.kdl new file mode 100644 index 0000000..fba22a4 --- /dev/null +++ b/tests/test_cases/input/multiline_string_escape_in_closing_line_shallow.kdl @@ -0,0 +1,5 @@ +node """ + foo \ +bar + baz +\ """ diff --git a/tests/test_cases/input/multiline_string_escape_newline_at_end.kdl b/tests/test_cases/input/multiline_string_escape_newline_at_end.kdl new file mode 100644 index 0000000..045bf6c --- /dev/null +++ b/tests/test_cases/input/multiline_string_escape_newline_at_end.kdl @@ -0,0 +1,4 @@ +node """ + a + \ +""" diff --git a/tests/test_cases/input/multiline_string_escape_newline_at_end_fail.kdl b/tests/test_cases/input/multiline_string_escape_newline_at_end_fail.kdl new file mode 100644 index 0000000..692c8c2 --- /dev/null +++ b/tests/test_cases/input/multiline_string_escape_newline_at_end_fail.kdl @@ -0,0 +1,4 @@ +node """ +a + \ +""" diff --git a/tests/test_cases/input/multiline_string_final_whitespace_escape_fail.kdl b/tests/test_cases/input/multiline_string_final_whitespace_escape_fail.kdl new file mode 100644 index 0000000..84310da --- /dev/null +++ b/tests/test_cases/input/multiline_string_final_whitespace_escape_fail.kdl @@ -0,0 +1,4 @@ +node """ + foo + bar\ + """ \ No newline at end of file diff --git a/tests/test_cases/input/multiline_string_non_literal_prefix_fail.kdl b/tests/test_cases/input/multiline_string_non_literal_prefix_fail.kdl new file mode 100644 index 0000000..bb7f188 --- /dev/null +++ b/tests/test_cases/input/multiline_string_non_literal_prefix_fail.kdl @@ -0,0 +1,4 @@ +node """ +\s escaped prefix + literal prefix + """ diff --git a/tests/test_cases/input/zero_space_before_first_arg_fail.kdl b/tests/test_cases/input/zero_space_before_first_arg_fail.kdl new file mode 100644 index 0000000..250d1f9 --- /dev/null +++ b/tests/test_cases/input/zero_space_before_first_arg_fail.kdl @@ -0,0 +1 @@ +node"string" diff --git a/tests/test_cases/input/zero_space_before_prop_fail.kdl b/tests/test_cases/input/zero_space_before_prop_fail.kdl new file mode 100644 index 0000000..4497205 --- /dev/null +++ b/tests/test_cases/input/zero_space_before_prop_fail.kdl @@ -0,0 +1 @@ +node foo="value"bar=5 diff --git a/tests/test_cases/input/zero_space_before_second_arg_fail.kdl b/tests/test_cases/input/zero_space_before_second_arg_fail.kdl new file mode 100644 index 0000000..604500e --- /dev/null +++ b/tests/test_cases/input/zero_space_before_second_arg_fail.kdl @@ -0,0 +1 @@ +node "string"1 diff --git a/tests/test_cases/input/zero_space_before_slashdash_arg_fail.kdl b/tests/test_cases/input/zero_space_before_slashdash_arg_fail.kdl new file mode 100644 index 0000000..5a2a6b3 --- /dev/null +++ b/tests/test_cases/input/zero_space_before_slashdash_arg_fail.kdl @@ -0,0 +1 @@ +node "string"/-1 diff --git a/tests/vertical_tab_whitespace.kdl b/tests/vertical_tab_whitespace.kdl new file mode 100644 index 0000000..42d333e --- /dev/null +++ b/tests/vertical_tab_whitespace.kdl @@ -0,0 +1,2 @@ +node arg +node2 arg2