Skip to content

Commit

Permalink
Use explicit cold paths when checking non-ASCII qualified names
Browse files Browse the repository at this point in the history
  • Loading branch information
adamreichold committed Jan 12, 2025
1 parent 18adb7c commit 54a078f
Showing 1 changed file with 40 additions and 11 deletions.
51 changes: 40 additions & 11 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl XmlCharExt for char {
fn is_xml_name_start(&self) -> bool {
// Check for ASCII first.
if *self as u32 <= 128 {
return matches!(*self as u8, b'A'..=b'Z' | b'a'..=b'z' | b':' | b'_');
return (*self as u8).is_xml_name_start();
}

matches!(*self as u32,
Expand Down Expand Up @@ -85,6 +85,10 @@ trait XmlByteExt {
/// `[ \r\n\t]`
fn is_xml_space(&self) -> bool;

/// Checks if the value is within the
/// [NameStartChar](https://www.w3.org/TR/xml/#NT-NameStartChar) range.
fn is_xml_name_start(&self) -> bool;

/// Checks if byte is within the ASCII
/// [Char](https://www.w3.org/TR/xml/#NT-Char) range.
fn is_xml_name(&self) -> bool;
Expand All @@ -96,6 +100,11 @@ impl XmlByteExt for u8 {
matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
}

#[inline]
fn is_xml_name_start(&self) -> bool {
matches!(*self, b'A'..=b'Z' | b'a'..=b'z' | b':' | b'_')
}

#[inline]
fn is_xml_name(&self) -> bool {
matches!(*self, b'A'..=b'Z' | b'a'..=b'z'| b'0'..=b'9'| b':' | b'_' | b'-' | b'.')
Expand Down Expand Up @@ -981,7 +990,6 @@ impl<'input> Stream<'input> {
/// Consumes a qualified XML name and returns it.
///
/// Consumes according to: <https://www.w3.org/TR/xml-names/#ns-qualnames>
#[inline(never)]
fn consume_qname(&mut self) -> Result<(&'input str, &'input str)> {
let start = self.pos();

Expand All @@ -1005,12 +1013,21 @@ impl<'input> Stream<'input> {
break;
}
} else {
// Fallback to Unicode code point.
match self.chars().nth(0) {
Some(c) if c.is_xml_name() => {
self.advance(c.len_utf8());
#[cold]
#[inline(never)]
fn check_uncode_xml_name<'input>(s: &mut Stream<'input>) -> bool {
// Fallback to Unicode code point.
match s.chars().next() {
Some(c) if c.is_xml_name() => {
s.advance(c.len_utf8());
false
}
_ => true,
}
_ => break,
}

if check_uncode_xml_name(self) {
break;
}
}
}
Expand All @@ -1025,16 +1042,28 @@ impl<'input> Stream<'input> {
(self.span.slice_region(start, start), local)
};

#[cold]
#[inline(never)]
fn check_unicode_xml_name_start(name: &str) -> bool {
if let Some(c) = name.chars().next() {
if !c.is_xml_name_start() {
return true;
}
}

false
}

// Prefix must start with a `NameStartChar`.
if let Some(c) = prefix.chars().nth(0) {
if !c.is_xml_name_start() {
if let Some(&b) = prefix.as_bytes().first() {
if (b < 128 && !b.is_xml_name_start()) || (b >= 128 && check_unicode_xml_name_start(prefix)) {
return Err(Error::InvalidName(self.gen_text_pos_from(start)));
}
}

// Local name must start with a `NameStartChar`.
if let Some(c) = local.chars().nth(0) {
if !c.is_xml_name_start() {
if let Some(&b) = local.as_bytes().first() {
if (b < 128 && !b.is_xml_name_start()) || (b >= 128 && check_unicode_xml_name_start(local)) {
return Err(Error::InvalidName(self.gen_text_pos_from(start)));
}
} else {
Expand Down

0 comments on commit 54a078f

Please sign in to comment.