Skip to content

Commit

Permalink
feat(parser,format/html): add CDATA sections (#4859)
Browse files Browse the repository at this point in the history
  • Loading branch information
dyc3 authored Jan 9, 2025
1 parent 016046e commit 1edc35f
Show file tree
Hide file tree
Showing 21 changed files with 450 additions and 5 deletions.
14 changes: 14 additions & 0 deletions crates/biome_html_factory/src/generated/node_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions crates/biome_html_factory/src/generated/syntax_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions crates/biome_html_formatter/src/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,44 @@ impl IntoFormat<HtmlFormatContext> for biome_html_syntax::HtmlAttributeInitializ
FormatOwnedWithRule :: new (self , crate :: html :: auxiliary :: attribute_initializer_clause :: FormatHtmlAttributeInitializerClause :: default ())
}
}
impl FormatRule<biome_html_syntax::HtmlCdataSection>
for crate::html::auxiliary::cdata_section::FormatHtmlCdataSection
{
type Context = HtmlFormatContext;
#[inline(always)]
fn fmt(
&self,
node: &biome_html_syntax::HtmlCdataSection,
f: &mut HtmlFormatter,
) -> FormatResult<()> {
FormatNodeRule::<biome_html_syntax::HtmlCdataSection>::fmt(self, node, f)
}
}
impl AsFormat<HtmlFormatContext> for biome_html_syntax::HtmlCdataSection {
type Format<'a> = FormatRefWithRule<
'a,
biome_html_syntax::HtmlCdataSection,
crate::html::auxiliary::cdata_section::FormatHtmlCdataSection,
>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(
self,
crate::html::auxiliary::cdata_section::FormatHtmlCdataSection::default(),
)
}
}
impl IntoFormat<HtmlFormatContext> for biome_html_syntax::HtmlCdataSection {
type Format = FormatOwnedWithRule<
biome_html_syntax::HtmlCdataSection,
crate::html::auxiliary::cdata_section::FormatHtmlCdataSection,
>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(
self,
crate::html::auxiliary::cdata_section::FormatHtmlCdataSection::default(),
)
}
}
impl FormatRule<biome_html_syntax::HtmlClosingElement>
for crate::html::auxiliary::closing_element::FormatHtmlClosingElement
{
Expand Down
1 change: 1 addition & 0 deletions crates/biome_html_formatter/src/html/any/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ impl FormatRule<AnyHtmlElement> for FormatAnyHtmlElement {
fn fmt(&self, node: &AnyHtmlElement, f: &mut HtmlFormatter) -> FormatResult<()> {
match node {
AnyHtmlElement::HtmlBogusElement(node) => node.format().fmt(f),
AnyHtmlElement::HtmlCdataSection(node) => node.format().fmt(f),
AnyHtmlElement::HtmlComment(node) => node.format().fmt(f),
AnyHtmlElement::HtmlContent(node) => node.format().fmt(f),
AnyHtmlElement::HtmlElement(node) => node.format().fmt(f),
Expand Down
23 changes: 23 additions & 0 deletions crates/biome_html_formatter/src/html/auxiliary/cdata_section.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use crate::prelude::*;
use biome_formatter::write;
use biome_html_syntax::{HtmlCdataSection, HtmlCdataSectionFields};
#[derive(Debug, Clone, Default)]
pub(crate) struct FormatHtmlCdataSection;
impl FormatNodeRule<HtmlCdataSection> for FormatHtmlCdataSection {
fn fmt_fields(&self, node: &HtmlCdataSection, f: &mut HtmlFormatter) -> FormatResult<()> {
let HtmlCdataSectionFields {
cdata_start_token,
content_token,
cdata_end_token,
} = node.as_fields();

write!(
f,
[
cdata_start_token.format(),
content_token.format(),
cdata_end_token.format()
]
)
}
}
1 change: 1 addition & 0 deletions crates/biome_html_formatter/src/html/auxiliary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
pub(crate) mod attribute;
pub(crate) mod attribute_initializer_clause;
pub(crate) mod cdata_section;
pub(crate) mod closing_element;
pub(crate) mod comment;
pub(crate) mod content;
Expand Down
53 changes: 53 additions & 0 deletions crates/biome_html_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,24 @@ impl<'src> HtmlLexer<'src> {
}
}

/// Consume a token in the [HtmlLexContext::CdataSection] context.
fn consume_inside_cdata(&mut self, current: u8) -> HtmlSyntaxKind {
match current {
b'<' if self.at_start_cdata() => self.consume_cdata_start(),
b']' if self.at_end_cdata() => self.consume_cdata_end(),
_ => {
while let Some(char) = self.current_byte() {
if self.at_end_cdata() {
// eat ]]>
break;
}
self.advance_byte_or_char(char);
}
HTML_LITERAL
}
}
}

/// Bumps the current byte and creates a lexed token of the passed in kind.
#[inline]
fn consume_byte(&mut self, tok: HtmlSyntaxKind) -> HtmlSyntaxKind {
Expand Down Expand Up @@ -358,6 +376,8 @@ impl<'src> HtmlLexer<'src> {

if self.at_start_comment() {
self.consume_comment_start()
} else if self.at_start_cdata() {
self.consume_cdata_start()
} else {
self.consume_byte(T![<])
}
Expand All @@ -376,6 +396,24 @@ impl<'src> HtmlLexer<'src> {
&& self.byte_at(2) == Some(b'>')
}

fn at_start_cdata(&mut self) -> bool {
self.current_byte() == Some(b'<')
&& self.byte_at(1) == Some(b'!')
&& self.byte_at(2) == Some(b'[')
&& self.byte_at(3) == Some(b'C')
&& self.byte_at(4) == Some(b'D')
&& self.byte_at(5) == Some(b'A')
&& self.byte_at(6) == Some(b'T')
&& self.byte_at(7) == Some(b'A')
&& self.byte_at(8) == Some(b'[')
}

fn at_end_cdata(&mut self) -> bool {
self.current_byte() == Some(b']')
&& self.byte_at(1) == Some(b']')
&& self.byte_at(2) == Some(b'>')
}

fn consume_comment_start(&mut self) -> HtmlSyntaxKind {
debug_assert!(self.at_start_comment());

Expand All @@ -390,6 +428,20 @@ impl<'src> HtmlLexer<'src> {
T![-->]
}

fn consume_cdata_start(&mut self) -> HtmlSyntaxKind {
debug_assert!(self.at_start_cdata());

self.advance(9);
T!["<![CDATA["]
}

fn consume_cdata_end(&mut self) -> HtmlSyntaxKind {
debug_assert!(self.at_end_cdata());

self.advance(3);
T!["]]>"]
}

/// Lexes a `\u0000` escape sequence. Assumes that the lexer is positioned at the `u` token.
///
/// A unicode escape sequence must consist of 4 hex characters.
Expand Down Expand Up @@ -517,6 +569,7 @@ impl<'src> Lexer<'src> for HtmlLexer<'src> {
self.consume_token_embedded_language(current, lang)
}
HtmlLexContext::Comment => self.consume_inside_comment(current),
HtmlLexContext::CdataSection => self.consume_inside_cdata(current),
},
None => EOF,
}
Expand Down
11 changes: 11 additions & 0 deletions crates/biome_html_parser/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,14 @@ fn comment_full() {
COMMENT_END: 3,
}
}

#[test]
fn cdata_full() {
assert_lex! {
HtmlLexContext::CdataSection,
"<![CDATA[1]]>",
CDATA_START: 9,
HTML_LITERAL: 1,
CDATA_END: 3,
}
}
14 changes: 14 additions & 0 deletions crates/biome_html_parser/src/syntax/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ impl ParseNodeList for ElementList {
fn parse_element(&mut self, p: &mut Self::Parser<'_>) -> ParsedSyntax {
match p.cur() {
T![<!--] => parse_comment(p),
T!["<![CDATA["] => parse_cdata_section(p),
T![<] => parse_element(p),
HTML_LITERAL => {
let m = p.start();
Expand Down Expand Up @@ -277,3 +278,16 @@ fn parse_comment(p: &mut HtmlParser) -> ParsedSyntax {
p.expect(T![-->]);
Present(m.complete(p, HTML_COMMENT))
}

fn parse_cdata_section(p: &mut HtmlParser) -> ParsedSyntax {
if !p.at(T!["<![CDATA["]) {
return Absent;
}
let m = p.start();
p.bump_with_context(T!["<![CDATA["], HtmlLexContext::CdataSection);
while !p.at(T!["]]>"]) && !p.at(EOF) {
p.bump_with_context(HTML_LITERAL, HtmlLexContext::CdataSection);
}
p.expect(T!["]]>"]);
Present(m.complete(p, HTML_CDATA_SECTION))
}
2 changes: 2 additions & 0 deletions crates/biome_html_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ pub(crate) enum HtmlLexContext {
EmbeddedLanguage(HtmlEmbededLanguage),
/// Comments are treated as text until the closing comment tag is encountered.
Comment,
/// CDATA Sections are treated as text until the closing CDATA token is encountered.
CdataSection,
}

#[derive(Copy, Clone, Debug)]
Expand Down
4 changes: 4 additions & 0 deletions crates/biome_html_parser/tests/html_specs/ok/cdata.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<![CDATA[example]]>
<![CDATA[1<3]]>
<![CDATA[lorem ipsum dolar 33 bottles of beer on the wall, 33 bottles of beer! Take one down, pass it around ...]]>
<![CDATA[<div attributes="some-attribute"></div>]]>
73 changes: 73 additions & 0 deletions crates/biome_html_parser/tests/html_specs/ok/cdata.html.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
---
source: crates/biome_html_parser/tests/spec_test.rs
expression: snapshot
---
## Input

```html
<![CDATA[example]]>
<![CDATA[1<3]]>
<![CDATA[lorem ipsum dolar 33 bottles of beer on the wall, 33 bottles of beer! Take one down, pass it around ...]]>
<![CDATA[<div attributes="some-attribute"></div>]]>
```


## AST

```
HtmlRoot {
bom_token: missing (optional),
directive: missing (optional),
html: HtmlElementList [
HtmlCdataSection {
cdata_start_token: CDATA_START@0..9 "<![CDATA[" [] [],
content_token: HTML_LITERAL@9..16 "example" [] [],
cdata_end_token: CDATA_END@16..19 "]]>" [] [],
},
HtmlCdataSection {
cdata_start_token: CDATA_START@19..29 "<![CDATA[" [Newline("\n")] [],
content_token: HTML_LITERAL@29..32 "1<3" [] [],
cdata_end_token: CDATA_END@32..35 "]]>" [] [],
},
HtmlCdataSection {
cdata_start_token: CDATA_START@35..45 "<![CDATA[" [Newline("\n")] [],
content_token: HTML_LITERAL@45..148 "lorem ipsum dolar 33 bottles of beer on the wall, 33 bottles of beer! Take one down, pass it around ..." [] [],
cdata_end_token: CDATA_END@148..151 "]]>" [] [],
},
HtmlCdataSection {
cdata_start_token: CDATA_START@151..161 "<![CDATA[" [Newline("\n")] [],
content_token: HTML_LITERAL@161..200 "<div attributes=\"some-attribute\"></div>" [] [],
cdata_end_token: CDATA_END@200..203 "]]>" [] [],
},
],
eof_token: EOF@203..204 "" [Newline("\n")] [],
}
```

## CST

```
0: [email protected]
0: (empty)
1: (empty)
2: [email protected]
0: [email protected]
0: [email protected] "<![CDATA[" [] []
1: [email protected] "example" [] []
2: [email protected] "]]>" [] []
1: [email protected]
0: [email protected] "<![CDATA[" [Newline("\n")] []
1: [email protected] "1<3" [] []
2: [email protected] "]]>" [] []
2: [email protected]
0: [email protected] "<![CDATA[" [Newline("\n")] []
1: [email protected] "lorem ipsum dolar 33 bottles of beer on the wall, 33 bottles of beer! Take one down, pass it around ..." [] []
2: [email protected] "]]>" [] []
3: [email protected]
0: [email protected] "<![CDATA[" [Newline("\n")] []
1: [email protected] "<div attributes=\"some-attribute\"></div>" [] []
2: [email protected] "]]>" [] []
3: [email protected] "" [Newline("\n")] []
```
Loading

0 comments on commit 1edc35f

Please sign in to comment.