diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6d6baf4..285823b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,6 +24,9 @@ jobs: toolchain: ${{ matrix.rust }} override: true + - name: Use MSRV-compatible version of memchr + run: cargo update --package memchr --precise 2.6.2 + - name: Run tests run: cargo test diff --git a/Cargo.toml b/Cargo.toml index 4841c66..878cf16 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,8 +18,11 @@ exclude = ["testing-tools"] [features] default = ["std", "positions"] -std = [] +std = ["memchr/std"] # Enables Nodes and Attributes position in the original document preserving. # Increases memory usage by `Range` for each Node. # Increases memory usage by `Range` + `u16` + `u8` for each Attribute. positions = [] + +[dependencies] +memchr = { version = "2", default-features = false } diff --git a/src/parse.rs b/src/parse.rs index f95baf7..4f07c3f 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,6 +1,7 @@ use alloc::string::{String, ToString}; use alloc::{vec, vec::Vec}; use core::ops::Range; +use memchr::{memchr, memchr_iter}; use crate::{ AttributeData, Document, ExpandedNameIndexed, NamespaceIdx, Namespaces, NodeData, NodeId, @@ -555,8 +556,8 @@ impl<'input> Context<'input> { fn parse(text: &str, opt: ParsingOptions) -> Result { // Trying to guess rough nodes and attributes amount. - let nodes_capacity = text.bytes().filter(|c| *c == b'<').count(); - let attributes_capacity = text.bytes().filter(|c| *c == b'=').count(); + let nodes_capacity = memchr_iter(b'<', text.as_bytes()).count(); + let attributes_capacity = memchr_iter(b'=', text.as_bytes()).count(); // Init document. let mut doc = Document { @@ -1002,30 +1003,40 @@ fn process_text<'input>( // While the whole purpose of CDATA is to indicate to an XML library that this text // has to be stored as is, carriage return (`\r`) is still has to be replaced with `\n`. fn process_cdata<'input>( - text: &'input str, + mut text: &'input str, range: Range, ctx: &mut Context<'input>, ) -> Result<()> { + let mut pos = memchr(b'\r', text.as_bytes()); + // Add text as is if it has only valid characters. - if !text.as_bytes().contains(&b'\r') { + if pos.is_none() { append_text(StringStorage::Borrowed(text), range, ctx)?; ctx.after_text = true; return Ok(()); } - let mut text_buffer = TextBuffer::new(); - let count = text.chars().count(); - for (i, c) in text.chars().enumerate() { - for b in CharToBytes::new(c) { - text_buffer.push_from_text(b, i + 1 == count); - } - } + let mut buf = String::new(); - if !text_buffer.is_empty() { - append_text(StringStorage::new_owned(text_buffer.finish()), range, ctx)?; - ctx.after_text = true; + while let Some(pos1) = pos { + let (line, rest) = text.split_at(pos1); + + buf.push_str(line); + buf.push('\n'); + + text = if rest.as_bytes().get(1) == Some(&b'\n') { + &rest[2..] + } else { + &rest[1..] + }; + + pos = memchr(b'\r', text.as_bytes()); } + buf.push_str(text); + + append_text(StringStorage::new_owned(buf), range, ctx)?; + ctx.after_text = true; Ok(()) }