From 1edc35f651331e3a228eebe8a8d2e52395a470e7 Mon Sep 17 00:00:00 2001 From: Carson McManus Date: Thu, 9 Jan 2025 09:57:09 -0500 Subject: [PATCH] feat(parser,format/html): add CDATA sections (#4859) --- .../src/generated/node_factory.rs | 14 ++ .../src/generated/syntax_factory.rs | 33 +++++ crates/biome_html_formatter/src/generated.rs | 38 ++++++ .../src/html/any/element.rs | 1 + .../src/html/auxiliary/cdata_section.rs | 23 ++++ .../src/html/auxiliary/mod.rs | 1 + crates/biome_html_parser/src/lexer/mod.rs | 53 ++++++++ crates/biome_html_parser/src/lexer/tests.rs | 11 ++ crates/biome_html_parser/src/syntax/mod.rs | 14 ++ crates/biome_html_parser/src/token_source.rs | 2 + .../tests/html_specs/ok/cdata.html | 4 + .../tests/html_specs/ok/cdata.html.snap | 73 ++++++++++ .../biome_html_syntax/src/generated/kind.rs | 18 ++- .../biome_html_syntax/src/generated/macros.rs | 4 + .../biome_html_syntax/src/generated/nodes.rs | 127 ++++++++++++++++++ .../src/generated/nodes_mut.rs | 20 +++ xtask/codegen/html.ungram | 8 ++ xtask/codegen/src/generate_nodes.rs | 4 +- xtask/codegen/src/generate_syntax_kinds.rs | 2 +- xtask/codegen/src/html_kinds_src.rs | 3 + xtask/codegen/src/js_kinds_src.rs | 2 + 21 files changed, 450 insertions(+), 5 deletions(-) create mode 100644 crates/biome_html_formatter/src/html/auxiliary/cdata_section.rs create mode 100644 crates/biome_html_parser/tests/html_specs/ok/cdata.html create mode 100644 crates/biome_html_parser/tests/html_specs/ok/cdata.html.snap diff --git a/crates/biome_html_factory/src/generated/node_factory.rs b/crates/biome_html_factory/src/generated/node_factory.rs index 90af9c92128f..01e871947e37 100644 --- a/crates/biome_html_factory/src/generated/node_factory.rs +++ b/crates/biome_html_factory/src/generated/node_factory.rs @@ -44,6 +44,20 @@ pub fn html_attribute_initializer_clause( ], )) } +pub fn html_cdata_section( + cdata_start_token: SyntaxToken, + content_token: SyntaxToken, + cdata_end_token: SyntaxToken, +) -> HtmlCdataSection { + HtmlCdataSection::unwrap_cast(SyntaxNode::new_detached( + HtmlSyntaxKind::HTML_CDATA_SECTION, + [ + Some(SyntaxElement::Token(cdata_start_token)), + Some(SyntaxElement::Token(content_token)), + Some(SyntaxElement::Token(cdata_end_token)), + ], + )) +} pub fn html_closing_element( l_angle_token: SyntaxToken, slash_token: SyntaxToken, diff --git a/crates/biome_html_factory/src/generated/syntax_factory.rs b/crates/biome_html_factory/src/generated/syntax_factory.rs index a1a7f9b754df..9c286b01721a 100644 --- a/crates/biome_html_factory/src/generated/syntax_factory.rs +++ b/crates/biome_html_factory/src/generated/syntax_factory.rs @@ -69,6 +69,39 @@ impl SyntaxFactory for HtmlSyntaxFactory { } slots.into_node(HTML_ATTRIBUTE_INITIALIZER_CLAUSE, children) } + HTML_CDATA_SECTION => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element { + if element.kind() == T![""] { + slots.mark_present(); + current_element = elements.next(); + } + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + HTML_CDATA_SECTION.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(HTML_CDATA_SECTION, children) + } HTML_CLOSING_ELEMENT => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<4usize> = RawNodeSlots::default(); diff --git a/crates/biome_html_formatter/src/generated.rs b/crates/biome_html_formatter/src/generated.rs index 5acae9a40442..8c08c0a20b43 100644 --- a/crates/biome_html_formatter/src/generated.rs +++ b/crates/biome_html_formatter/src/generated.rs @@ -75,6 +75,44 @@ impl IntoFormat for biome_html_syntax::HtmlAttributeInitializ FormatOwnedWithRule :: new (self , crate :: html :: auxiliary :: attribute_initializer_clause :: FormatHtmlAttributeInitializerClause :: default ()) } } +impl FormatRule + for crate::html::auxiliary::cdata_section::FormatHtmlCdataSection +{ + type Context = HtmlFormatContext; + #[inline(always)] + fn fmt( + &self, + node: &biome_html_syntax::HtmlCdataSection, + f: &mut HtmlFormatter, + ) -> FormatResult<()> { + FormatNodeRule::::fmt(self, node, f) + } +} +impl AsFormat for biome_html_syntax::HtmlCdataSection { + type Format<'a> = FormatRefWithRule< + 'a, + biome_html_syntax::HtmlCdataSection, + crate::html::auxiliary::cdata_section::FormatHtmlCdataSection, + >; + fn format(&self) -> Self::Format<'_> { + FormatRefWithRule::new( + self, + crate::html::auxiliary::cdata_section::FormatHtmlCdataSection::default(), + ) + } +} +impl IntoFormat for biome_html_syntax::HtmlCdataSection { + type Format = FormatOwnedWithRule< + biome_html_syntax::HtmlCdataSection, + crate::html::auxiliary::cdata_section::FormatHtmlCdataSection, + >; + fn into_format(self) -> Self::Format { + FormatOwnedWithRule::new( + self, + crate::html::auxiliary::cdata_section::FormatHtmlCdataSection::default(), + ) + } +} impl FormatRule for crate::html::auxiliary::closing_element::FormatHtmlClosingElement { diff --git a/crates/biome_html_formatter/src/html/any/element.rs b/crates/biome_html_formatter/src/html/any/element.rs index a3a8862826c4..df4c21f4d92e 100644 --- a/crates/biome_html_formatter/src/html/any/element.rs +++ b/crates/biome_html_formatter/src/html/any/element.rs @@ -9,6 +9,7 @@ impl FormatRule for FormatAnyHtmlElement { fn fmt(&self, node: &AnyHtmlElement, f: &mut HtmlFormatter) -> FormatResult<()> { match node { AnyHtmlElement::HtmlBogusElement(node) => node.format().fmt(f), + AnyHtmlElement::HtmlCdataSection(node) => node.format().fmt(f), AnyHtmlElement::HtmlComment(node) => node.format().fmt(f), AnyHtmlElement::HtmlContent(node) => node.format().fmt(f), AnyHtmlElement::HtmlElement(node) => node.format().fmt(f), diff --git a/crates/biome_html_formatter/src/html/auxiliary/cdata_section.rs b/crates/biome_html_formatter/src/html/auxiliary/cdata_section.rs new file mode 100644 index 000000000000..37828e18bdbd --- /dev/null +++ b/crates/biome_html_formatter/src/html/auxiliary/cdata_section.rs @@ -0,0 +1,23 @@ +use crate::prelude::*; +use biome_formatter::write; +use biome_html_syntax::{HtmlCdataSection, HtmlCdataSectionFields}; +#[derive(Debug, Clone, Default)] +pub(crate) struct FormatHtmlCdataSection; +impl FormatNodeRule for FormatHtmlCdataSection { + fn fmt_fields(&self, node: &HtmlCdataSection, f: &mut HtmlFormatter) -> FormatResult<()> { + let HtmlCdataSectionFields { + cdata_start_token, + content_token, + cdata_end_token, + } = node.as_fields(); + + write!( + f, + [ + cdata_start_token.format(), + content_token.format(), + cdata_end_token.format() + ] + ) + } +} diff --git a/crates/biome_html_formatter/src/html/auxiliary/mod.rs b/crates/biome_html_formatter/src/html/auxiliary/mod.rs index 021b788091a7..8deb77568267 100644 --- a/crates/biome_html_formatter/src/html/auxiliary/mod.rs +++ b/crates/biome_html_formatter/src/html/auxiliary/mod.rs @@ -2,6 +2,7 @@ pub(crate) mod attribute; pub(crate) mod attribute_initializer_clause; +pub(crate) mod cdata_section; pub(crate) mod closing_element; pub(crate) mod comment; pub(crate) mod content; diff --git a/crates/biome_html_parser/src/lexer/mod.rs b/crates/biome_html_parser/src/lexer/mod.rs index d9826fd9057d..d2c7596a6f3e 100644 --- a/crates/biome_html_parser/src/lexer/mod.rs +++ b/crates/biome_html_parser/src/lexer/mod.rs @@ -159,6 +159,24 @@ impl<'src> HtmlLexer<'src> { } } + /// Consume a token in the [HtmlLexContext::CdataSection] context. + fn consume_inside_cdata(&mut self, current: u8) -> HtmlSyntaxKind { + match current { + b'<' if self.at_start_cdata() => self.consume_cdata_start(), + b']' if self.at_end_cdata() => self.consume_cdata_end(), + _ => { + while let Some(char) = self.current_byte() { + if self.at_end_cdata() { + // eat ]]> + break; + } + self.advance_byte_or_char(char); + } + HTML_LITERAL + } + } + } + /// Bumps the current byte and creates a lexed token of the passed in kind. #[inline] fn consume_byte(&mut self, tok: HtmlSyntaxKind) -> HtmlSyntaxKind { @@ -358,6 +376,8 @@ impl<'src> HtmlLexer<'src> { if self.at_start_comment() { self.consume_comment_start() + } else if self.at_start_cdata() { + self.consume_cdata_start() } else { self.consume_byte(T![<]) } @@ -376,6 +396,24 @@ impl<'src> HtmlLexer<'src> { && self.byte_at(2) == Some(b'>') } + fn at_start_cdata(&mut self) -> bool { + self.current_byte() == Some(b'<') + && self.byte_at(1) == Some(b'!') + && self.byte_at(2) == Some(b'[') + && self.byte_at(3) == Some(b'C') + && self.byte_at(4) == Some(b'D') + && self.byte_at(5) == Some(b'A') + && self.byte_at(6) == Some(b'T') + && self.byte_at(7) == Some(b'A') + && self.byte_at(8) == Some(b'[') + } + + fn at_end_cdata(&mut self) -> bool { + self.current_byte() == Some(b']') + && self.byte_at(1) == Some(b']') + && self.byte_at(2) == Some(b'>') + } + fn consume_comment_start(&mut self) -> HtmlSyntaxKind { debug_assert!(self.at_start_comment()); @@ -390,6 +428,20 @@ impl<'src> HtmlLexer<'src> { T![-->] } + fn consume_cdata_start(&mut self) -> HtmlSyntaxKind { + debug_assert!(self.at_start_cdata()); + + self.advance(9); + T![" HtmlSyntaxKind { + debug_assert!(self.at_end_cdata()); + + self.advance(3); + T!["]]>"] + } + /// Lexes a `\u0000` escape sequence. Assumes that the lexer is positioned at the `u` token. /// /// A unicode escape sequence must consist of 4 hex characters. @@ -517,6 +569,7 @@ impl<'src> Lexer<'src> for HtmlLexer<'src> { self.consume_token_embedded_language(current, lang) } HtmlLexContext::Comment => self.consume_inside_comment(current), + HtmlLexContext::CdataSection => self.consume_inside_cdata(current), }, None => EOF, } diff --git a/crates/biome_html_parser/src/lexer/tests.rs b/crates/biome_html_parser/src/lexer/tests.rs index 1eca6ad5a9b4..f8fa150f37c7 100644 --- a/crates/biome_html_parser/src/lexer/tests.rs +++ b/crates/biome_html_parser/src/lexer/tests.rs @@ -347,3 +347,14 @@ fn comment_full() { COMMENT_END: 3, } } + +#[test] +fn cdata_full() { + assert_lex! { + HtmlLexContext::CdataSection, + "", + CDATA_START: 9, + HTML_LITERAL: 1, + CDATA_END: 3, + } +} diff --git a/crates/biome_html_parser/src/syntax/mod.rs b/crates/biome_html_parser/src/syntax/mod.rs index 3b5b50959e49..0c9983fb3a52 100644 --- a/crates/biome_html_parser/src/syntax/mod.rs +++ b/crates/biome_html_parser/src/syntax/mod.rs @@ -159,6 +159,7 @@ impl ParseNodeList for ElementList { fn parse_element(&mut self, p: &mut Self::Parser<'_>) -> ParsedSyntax { match p.cur() { T![]); Present(m.complete(p, HTML_COMMENT)) } + +fn parse_cdata_section(p: &mut HtmlParser) -> ParsedSyntax { + if !p.at(T![""]) && !p.at(EOF) { + p.bump_with_context(HTML_LITERAL, HtmlLexContext::CdataSection); + } + p.expect(T!["]]>"]); + Present(m.complete(p, HTML_CDATA_SECTION)) +} diff --git a/crates/biome_html_parser/src/token_source.rs b/crates/biome_html_parser/src/token_source.rs index d5a80699fa50..1f3cc1f237d1 100644 --- a/crates/biome_html_parser/src/token_source.rs +++ b/crates/biome_html_parser/src/token_source.rs @@ -35,6 +35,8 @@ pub(crate) enum HtmlLexContext { EmbeddedLanguage(HtmlEmbededLanguage), /// Comments are treated as text until the closing comment tag is encountered. Comment, + /// CDATA Sections are treated as text until the closing CDATA token is encountered. + CdataSection, } #[derive(Copy, Clone, Debug)] diff --git a/crates/biome_html_parser/tests/html_specs/ok/cdata.html b/crates/biome_html_parser/tests/html_specs/ok/cdata.html new file mode 100644 index 000000000000..3ba13a82273d --- /dev/null +++ b/crates/biome_html_parser/tests/html_specs/ok/cdata.html @@ -0,0 +1,4 @@ + + + +]]> diff --git a/crates/biome_html_parser/tests/html_specs/ok/cdata.html.snap b/crates/biome_html_parser/tests/html_specs/ok/cdata.html.snap new file mode 100644 index 000000000000..82e3a4871a67 --- /dev/null +++ b/crates/biome_html_parser/tests/html_specs/ok/cdata.html.snap @@ -0,0 +1,73 @@ +--- +source: crates/biome_html_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +```html + + + +]]> + +``` + + +## AST + +``` +HtmlRoot { + bom_token: missing (optional), + directive: missing (optional), + html: HtmlElementList [ + HtmlCdataSection { + cdata_start_token: CDATA_START@0..9 "" [] [], + }, + HtmlCdataSection { + cdata_start_token: CDATA_START@19..29 "" [] [], + }, + HtmlCdataSection { + cdata_start_token: CDATA_START@35..45 "" [] [], + }, + HtmlCdataSection { + cdata_start_token: CDATA_START@151..161 "" [] [], + cdata_end_token: CDATA_END@200..203 "]]>" [] [], + }, + ], + eof_token: EOF@203..204 "" [Newline("\n")] [], +} +``` + +## CST + +``` +0: HTML_ROOT@0..204 + 0: (empty) + 1: (empty) + 2: HTML_ELEMENT_LIST@0..203 + 0: HTML_CDATA_SECTION@0..19 + 0: CDATA_START@0..9 "" [] [] + 1: HTML_CDATA_SECTION@19..35 + 0: CDATA_START@19..29 "" [] [] + 2: HTML_CDATA_SECTION@35..151 + 0: CDATA_START@35..45 "" [] [] + 3: HTML_CDATA_SECTION@151..203 + 0: CDATA_START@151..161 "" [] [] + 2: CDATA_END@200..203 "]]>" [] [] + 3: EOF@203..204 "" [Newline("\n")] [] + +``` diff --git a/crates/biome_html_syntax/src/generated/kind.rs b/crates/biome_html_syntax/src/generated/kind.rs index 3dae9444dccd..496a5c0d8908 100644 --- a/crates/biome_html_syntax/src/generated/kind.rs +++ b/crates/biome_html_syntax/src/generated/kind.rs @@ -20,6 +20,8 @@ pub enum HtmlSyntaxKind { MINUS, COMMENT_START, COMMENT_END, + CDATA_START, + CDATA_END, NULL_KW, TRUE_KW, FALSE_KW, @@ -47,6 +49,7 @@ pub enum HtmlSyntaxKind { HTML_ATTRIBUTE_LIST, HTML_CONTENT, HTML_COMMENT, + HTML_CDATA_SECTION, HTML_BOGUS, HTML_BOGUS_ELEMENT, HTML_BOGUS_ATTRIBUTE, @@ -58,7 +61,16 @@ impl HtmlSyntaxKind { pub const fn is_punct(self) -> bool { matches!( self, - L_ANGLE | R_ANGLE | SLASH | EQ | BANG | MINUS | COMMENT_START | COMMENT_END + L_ANGLE + | R_ANGLE + | SLASH + | EQ + | BANG + | MINUS + | COMMENT_START + | COMMENT_END + | CDATA_START + | CDATA_END ) } pub const fn is_literal(self) -> bool { @@ -88,6 +100,8 @@ impl HtmlSyntaxKind { MINUS => "-", COMMENT_START => "", + CDATA_START => " "]]>", NULL_KW => "null", TRUE_KW => "true", FALSE_KW => "false", @@ -101,4 +115,4 @@ impl HtmlSyntaxKind { } #[doc = r" Utility macro for creating a SyntaxKind through simple macro syntax"] #[macro_export] -macro_rules ! T { [<] => { $ crate :: HtmlSyntaxKind :: L_ANGLE } ; [>] => { $ crate :: HtmlSyntaxKind :: R_ANGLE } ; [/] => { $ crate :: HtmlSyntaxKind :: SLASH } ; [=] => { $ crate :: HtmlSyntaxKind :: EQ } ; [!] => { $ crate :: HtmlSyntaxKind :: BANG } ; [-] => { $ crate :: HtmlSyntaxKind :: MINUS } ; [] => { $ crate :: HtmlSyntaxKind :: COMMENT_END } ; [null] => { $ crate :: HtmlSyntaxKind :: NULL_KW } ; [true] => { $ crate :: HtmlSyntaxKind :: TRUE_KW } ; [false] => { $ crate :: HtmlSyntaxKind :: FALSE_KW } ; [doctype] => { $ crate :: HtmlSyntaxKind :: DOCTYPE_KW } ; [html] => { $ crate :: HtmlSyntaxKind :: HTML_KW } ; [ident] => { $ crate :: HtmlSyntaxKind :: IDENT } ; [EOF] => { $ crate :: HtmlSyntaxKind :: EOF } ; [UNICODE_BOM] => { $ crate :: HtmlSyntaxKind :: UNICODE_BOM } ; [#] => { $ crate :: HtmlSyntaxKind :: HASH } ; } +macro_rules ! T { [<] => { $ crate :: HtmlSyntaxKind :: L_ANGLE } ; [>] => { $ crate :: HtmlSyntaxKind :: R_ANGLE } ; [/] => { $ crate :: HtmlSyntaxKind :: SLASH } ; [=] => { $ crate :: HtmlSyntaxKind :: EQ } ; [!] => { $ crate :: HtmlSyntaxKind :: BANG } ; [-] => { $ crate :: HtmlSyntaxKind :: MINUS } ; [] => { $ crate :: HtmlSyntaxKind :: COMMENT_END } ; [" { $ crate :: HtmlSyntaxKind :: CDATA_START } ; ["]]>"] => { $ crate :: HtmlSyntaxKind :: CDATA_END } ; [null] => { $ crate :: HtmlSyntaxKind :: NULL_KW } ; [true] => { $ crate :: HtmlSyntaxKind :: TRUE_KW } ; [false] => { $ crate :: HtmlSyntaxKind :: FALSE_KW } ; [doctype] => { $ crate :: HtmlSyntaxKind :: DOCTYPE_KW } ; [html] => { $ crate :: HtmlSyntaxKind :: HTML_KW } ; [ident] => { $ crate :: HtmlSyntaxKind :: IDENT } ; [EOF] => { $ crate :: HtmlSyntaxKind :: EOF } ; [UNICODE_BOM] => { $ crate :: HtmlSyntaxKind :: UNICODE_BOM } ; [#] => { $ crate :: HtmlSyntaxKind :: HASH } ; } diff --git a/crates/biome_html_syntax/src/generated/macros.rs b/crates/biome_html_syntax/src/generated/macros.rs index 20801883a1a6..f392e67772e6 100644 --- a/crates/biome_html_syntax/src/generated/macros.rs +++ b/crates/biome_html_syntax/src/generated/macros.rs @@ -25,6 +25,10 @@ macro_rules! map_syntax_node { unsafe { $crate::HtmlAttributeInitializerClause::new_unchecked(node) }; $body } + $crate::HtmlSyntaxKind::HTML_CDATA_SECTION => { + let $pattern = unsafe { $crate::HtmlCdataSection::new_unchecked(node) }; + $body + } $crate::HtmlSyntaxKind::HTML_CLOSING_ELEMENT => { let $pattern = unsafe { $crate::HtmlClosingElement::new_unchecked(node) }; $body diff --git a/crates/biome_html_syntax/src/generated/nodes.rs b/crates/biome_html_syntax/src/generated/nodes.rs index 20551851fab5..72f34a2d13e2 100644 --- a/crates/biome_html_syntax/src/generated/nodes.rs +++ b/crates/biome_html_syntax/src/generated/nodes.rs @@ -100,6 +100,51 @@ pub struct HtmlAttributeInitializerClauseFields { pub value: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] +pub struct HtmlCdataSection { + pub(crate) syntax: SyntaxNode, +} +impl HtmlCdataSection { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> HtmlCdataSectionFields { + HtmlCdataSectionFields { + cdata_start_token: self.cdata_start_token(), + content_token: self.content_token(), + cdata_end_token: self.cdata_end_token(), + } + } + pub fn cdata_start_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } + pub fn content_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 1usize) + } + pub fn cdata_end_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 2usize) + } +} +impl Serialize for HtmlCdataSection { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct HtmlCdataSectionFields { + pub cdata_start_token: SyntaxResult, + pub content_token: SyntaxResult, + pub cdata_end_token: SyntaxResult, +} +#[derive(Clone, PartialEq, Eq, Hash)] pub struct HtmlClosingElement { pub(crate) syntax: SyntaxNode, } @@ -591,6 +636,7 @@ impl AnyHtmlAttribute { #[derive(Clone, PartialEq, Eq, Hash, Serialize)] pub enum AnyHtmlElement { HtmlBogusElement(HtmlBogusElement), + HtmlCdataSection(HtmlCdataSection), HtmlComment(HtmlComment), HtmlContent(HtmlContent), HtmlElement(HtmlElement), @@ -603,6 +649,12 @@ impl AnyHtmlElement { _ => None, } } + pub fn as_html_cdata_section(&self) -> Option<&HtmlCdataSection> { + match &self { + AnyHtmlElement::HtmlCdataSection(item) => Some(item), + _ => None, + } + } pub fn as_html_comment(&self) -> Option<&HtmlComment> { match &self { AnyHtmlElement::HtmlComment(item) => Some(item), @@ -727,6 +779,64 @@ impl From for SyntaxElement { n.syntax.into() } } +impl AstNode for HtmlCdataSection { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(HTML_CDATA_SECTION as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == HTML_CDATA_SECTION + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for HtmlCdataSection { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use std::sync::atomic::{AtomicUsize, Ordering}; + static DEPTH: AtomicUsize = AtomicUsize::new(0); + let current_depth = DEPTH.fetch_add(1, Ordering::Relaxed); + let result = if current_depth < 16 { + f.debug_struct("HtmlCdataSection") + .field( + "cdata_start_token", + &support::DebugSyntaxResult(self.cdata_start_token()), + ) + .field( + "content_token", + &support::DebugSyntaxResult(self.content_token()), + ) + .field( + "cdata_end_token", + &support::DebugSyntaxResult(self.cdata_end_token()), + ) + .finish() + } else { + f.debug_struct("HtmlCdataSection").finish() + }; + DEPTH.fetch_sub(1, Ordering::Relaxed); + result + } +} +impl From for SyntaxNode { + fn from(n: HtmlCdataSection) -> SyntaxNode { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: HtmlCdataSection) -> SyntaxElement { + n.syntax.into() + } +} impl AstNode for HtmlClosingElement { type Language = Language; const KIND_SET: SyntaxKindSet = @@ -1363,6 +1473,11 @@ impl From for AnyHtmlElement { AnyHtmlElement::HtmlBogusElement(node) } } +impl From for AnyHtmlElement { + fn from(node: HtmlCdataSection) -> AnyHtmlElement { + AnyHtmlElement::HtmlCdataSection(node) + } +} impl From for AnyHtmlElement { fn from(node: HtmlComment) -> AnyHtmlElement { AnyHtmlElement::HtmlComment(node) @@ -1386,6 +1501,7 @@ impl From for AnyHtmlElement { impl AstNode for AnyHtmlElement { type Language = Language; const KIND_SET: SyntaxKindSet = HtmlBogusElement::KIND_SET + .union(HtmlCdataSection::KIND_SET) .union(HtmlComment::KIND_SET) .union(HtmlContent::KIND_SET) .union(HtmlElement::KIND_SET) @@ -1394,6 +1510,7 @@ impl AstNode for AnyHtmlElement { matches!( kind, HTML_BOGUS_ELEMENT + | HTML_CDATA_SECTION | HTML_COMMENT | HTML_CONTENT | HTML_ELEMENT @@ -1403,6 +1520,7 @@ impl AstNode for AnyHtmlElement { fn cast(syntax: SyntaxNode) -> Option { let res = match syntax.kind() { HTML_BOGUS_ELEMENT => AnyHtmlElement::HtmlBogusElement(HtmlBogusElement { syntax }), + HTML_CDATA_SECTION => AnyHtmlElement::HtmlCdataSection(HtmlCdataSection { syntax }), HTML_COMMENT => AnyHtmlElement::HtmlComment(HtmlComment { syntax }), HTML_CONTENT => AnyHtmlElement::HtmlContent(HtmlContent { syntax }), HTML_ELEMENT => AnyHtmlElement::HtmlElement(HtmlElement { syntax }), @@ -1416,6 +1534,7 @@ impl AstNode for AnyHtmlElement { fn syntax(&self) -> &SyntaxNode { match self { AnyHtmlElement::HtmlBogusElement(it) => &it.syntax, + AnyHtmlElement::HtmlCdataSection(it) => &it.syntax, AnyHtmlElement::HtmlComment(it) => &it.syntax, AnyHtmlElement::HtmlContent(it) => &it.syntax, AnyHtmlElement::HtmlElement(it) => &it.syntax, @@ -1425,6 +1544,7 @@ impl AstNode for AnyHtmlElement { fn into_syntax(self) -> SyntaxNode { match self { AnyHtmlElement::HtmlBogusElement(it) => it.syntax, + AnyHtmlElement::HtmlCdataSection(it) => it.syntax, AnyHtmlElement::HtmlComment(it) => it.syntax, AnyHtmlElement::HtmlContent(it) => it.syntax, AnyHtmlElement::HtmlElement(it) => it.syntax, @@ -1436,6 +1556,7 @@ impl std::fmt::Debug for AnyHtmlElement { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { AnyHtmlElement::HtmlBogusElement(it) => std::fmt::Debug::fmt(it, f), + AnyHtmlElement::HtmlCdataSection(it) => std::fmt::Debug::fmt(it, f), AnyHtmlElement::HtmlComment(it) => std::fmt::Debug::fmt(it, f), AnyHtmlElement::HtmlContent(it) => std::fmt::Debug::fmt(it, f), AnyHtmlElement::HtmlElement(it) => std::fmt::Debug::fmt(it, f), @@ -1447,6 +1568,7 @@ impl From for SyntaxNode { fn from(n: AnyHtmlElement) -> SyntaxNode { match n { AnyHtmlElement::HtmlBogusElement(it) => it.into(), + AnyHtmlElement::HtmlCdataSection(it) => it.into(), AnyHtmlElement::HtmlComment(it) => it.into(), AnyHtmlElement::HtmlContent(it) => it.into(), AnyHtmlElement::HtmlElement(it) => it.into(), @@ -1480,6 +1602,11 @@ impl std::fmt::Display for HtmlAttributeInitializerClause { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for HtmlCdataSection { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for HtmlClosingElement { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) diff --git a/crates/biome_html_syntax/src/generated/nodes_mut.rs b/crates/biome_html_syntax/src/generated/nodes_mut.rs index fb793fd99740..5b6db033d550 100644 --- a/crates/biome_html_syntax/src/generated/nodes_mut.rs +++ b/crates/biome_html_syntax/src/generated/nodes_mut.rs @@ -31,6 +31,26 @@ impl HtmlAttributeInitializerClause { ) } } +impl HtmlCdataSection { + pub fn with_cdata_start_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into()))), + ) + } + pub fn with_content_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(1usize..=1usize, once(Some(element.into()))), + ) + } + pub fn with_cdata_end_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into()))), + ) + } +} impl HtmlClosingElement { pub fn with_l_angle_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( diff --git a/xtask/codegen/html.ungram b/xtask/codegen/html.ungram index 2c2976cf80f2..4dc985b1e173 100644 --- a/xtask/codegen/html.ungram +++ b/xtask/codegen/html.ungram @@ -69,6 +69,7 @@ AnyHtmlElement = | HtmlElement | HtmlContent | HtmlComment + | HtmlCdataSection | HtmlBogusElement @@ -107,6 +108,13 @@ HtmlComment = content: 'html_literal' '-->' +// +// Reference: https://html.spec.whatwg.org/multipage/syntax.html#cdata-sections +HtmlCdataSection = + '' + // ================================== // Attributes // ================================== diff --git a/xtask/codegen/src/generate_nodes.rs b/xtask/codegen/src/generate_nodes.rs index 4f5fc6bcdea2..8b18cd943881 100644 --- a/xtask/codegen/src/generate_nodes.rs +++ b/xtask/codegen/src/generate_nodes.rs @@ -989,9 +989,9 @@ pub(crate) fn token_kind_to_code(name: &str, language_kind: LanguageKind) -> Tok let token: TokenStream = token.parse().unwrap(); quote! { T![#token] } } else { - // $ is valid syntax in rust and it's part of macros, + // `$`, `[`, and `]` is valid syntax in rust and it's part of macros, // so we need to decorate the tokens with quotes - if matches!(name, "$=" | "$_") { + if matches!(name, "$=" | "$_" | "") { let token = Literal::string(name); quote! { T![#token] } } else { diff --git a/xtask/codegen/src/generate_syntax_kinds.rs b/xtask/codegen/src/generate_syntax_kinds.rs index 0b749a6431ef..955878d6222b 100644 --- a/xtask/codegen/src/generate_syntax_kinds.rs +++ b/xtask/codegen/src/generate_syntax_kinds.rs @@ -14,7 +14,7 @@ pub fn generate_syntax_kinds(grammar: KindsSrc, language_kind: LanguageKind) -> if "{}[]()`".contains(token) { let c = token.chars().next().unwrap(); quote! { #c } - } else if matches!(*token, "$=" | "$_") { + } else if matches!(*token, "$=" | "$_" | "") { let token = Literal::string(token); quote! { #token } } else { diff --git a/xtask/codegen/src/html_kinds_src.rs b/xtask/codegen/src/html_kinds_src.rs index ac14e54acf87..7e53202f4837 100644 --- a/xtask/codegen/src/html_kinds_src.rs +++ b/xtask/codegen/src/html_kinds_src.rs @@ -10,6 +10,8 @@ pub const HTML_KINDS_SRC: KindsSrc = KindsSrc { ("-", "MINUS"), ("", "COMMENT_END"), + ("", "CDATA_END"), ], keywords: &["null", "true", "false", "doctype", "html"], literals: &["HTML_STRING_LITERAL", "HTML_LITERAL"], @@ -36,6 +38,7 @@ pub const HTML_KINDS_SRC: KindsSrc = KindsSrc { "HTML_ATTRIBUTE_LIST", "HTML_CONTENT", "HTML_COMMENT", + "HTML_CDATA_SECTION", // Bogus nodes "HTML_BOGUS", "HTML_BOGUS_ELEMENT", diff --git a/xtask/codegen/src/js_kinds_src.rs b/xtask/codegen/src/js_kinds_src.rs index c6df087c9054..3c5abea8f90d 100644 --- a/xtask/codegen/src/js_kinds_src.rs +++ b/xtask/codegen/src/js_kinds_src.rs @@ -695,6 +695,8 @@ impl Field { ("---", LanguageKind::Yaml) => "dashdashdash", ("", LanguageKind::Html) => "comment_end", + (" "cdata_start", + ("]]>", LanguageKind::Html) => "cdata_end", _ => name, };