From c6b199e87119828baedc4024f16a8ecbe25eb26d Mon Sep 17 00:00:00 2001 From: Stef Busking Date: Thu, 19 Oct 2023 11:16:47 +0200 Subject: [PATCH] Fix prefix issues This fixes attributes being allowed to adopt the default namespace (with empty prefix) and ancestor declarations not being considered as candidates. --- bench.cjs | 43 +++++++ src/dom-parsing/NamespacePrefixMap.ts | 43 +++++-- src/dom-parsing/serializationAlgorithms.ts | 136 +++++---------------- test/dom-parsing/XMLSerializer.tests.ts | 28 +++++ 4 files changed, 133 insertions(+), 117 deletions(-) create mode 100644 bench.cjs diff --git a/bench.cjs b/bench.cjs new file mode 100644 index 0000000..dfe8692 --- /dev/null +++ b/bench.cjs @@ -0,0 +1,43 @@ +const { Document, serializeToWellFormedString } = require('.'); + +const TABLE_SIZE = 1000; + +function createTable() { + const doc = new Document(); + const table = doc.createElementNS("http://example.com", "table"); + doc.appendChild(table); + + for (let num = 1; num < TABLE_SIZE; ++num) { + const newRow = doc.createElementNS("http://example.com", "tr"); + table.appendChild(newRow); + + for (let i = 1; i < num; ++i) { + const newCell = doc.createElementNS("http://example.com", "td"); + newRow.appendChild(newCell); + } + + for (const row of table.childNodes) { + const newCell = doc.createElementNS("http://example.com", "td"); + row.appendChild(newCell); + } + } + + return doc; +} + +console.group('createTable'); +for (let i = 0; i < 20; ++i) { + console.time(); + createTable(); + console.timeEnd(); +} +console.groupEnd(); + +console.group('serializeTable'); +const doc = createTable(); +for (let i = 0; i < 20; ++i) { + console.time(); + serializeToWellFormedString(doc); + console.timeEnd(); +} +console.groupEnd(); diff --git a/src/dom-parsing/NamespacePrefixMap.ts b/src/dom-parsing/NamespacePrefixMap.ts index 6870950..a3cbd8c 100644 --- a/src/dom-parsing/NamespacePrefixMap.ts +++ b/src/dom-parsing/NamespacePrefixMap.ts @@ -107,18 +107,31 @@ export class NamespacePrefixMap { // An existing declaration attribute should be skipped if it doesn't // match the local scope. It can be skipped if it doesn't change the // inherited value. - return this.prefixToNamespace(prefix) === ns && this._inheritedPrefixToNamespace(prefix) !== ns; + return ( + this.prefixToNamespace(prefix) === ns && this._inheritedPrefixToNamespace(prefix) !== ns + ); } - private _getCandidatePrefix(namespaceUri: string | null): string | null | undefined { - const candidates = this._prefixCandidatesByNs.get(namespaceUri); - if (candidates !== undefined) { - for (let i = candidates.length - 1; i >= 0; --i) { - const candidate = candidates[i]; - if (this.prefixToNamespace(candidate) === namespaceUri) { - return candidate; + private _getCandidatePrefix( + namespaceUri: string | null, + allowDefault: boolean + ): string | null | undefined { + let ancestor: NamespacePrefixMap | null = this; + while (ancestor) { + const candidates = ancestor._prefixCandidatesByNs.get(namespaceUri); + if (candidates !== undefined) { + for (let i = candidates.length - 1; i >= 0; --i) { + const candidate = candidates[i]; + if (!allowDefault && candidate === null) { + continue; + } + // Check if this candidate prefix is valid in the current scope + if (this.prefixToNamespace(candidate) === namespaceUri) { + return candidate; + } } } + ancestor = ancestor._parent; } return undefined; } @@ -158,23 +171,29 @@ export class NamespacePrefixMap { // If the authored prefix resolves to the requested namespace in scope, // we can use it, except that attributes in a namespace can't use an // empty prefix. - if ((!isAttr || node.prefix !== null) && this.prefixToNamespace(node.prefix) === node.namespaceURI) { + if ( + (!isAttr || node.prefix !== null) && + this.prefixToNamespace(node.prefix) === node.namespaceURI + ) { return node.prefix; } // If any prefixes in scope resolve to the requested namespace, use the // most recent one. - const candidatePrefix = this._getCandidatePrefix(node.namespaceURI); + const candidatePrefix = this._getCandidatePrefix(node.namespaceURI, !isAttr); if (candidatePrefix !== undefined) { return candidatePrefix; } // No suitable existing declaration, try to use the authored prefix - // Attributes can't use the authored prefix if it conflicts with an existing local declaration + // Attributes can't use the authored prefix if it is null or conflicts + // with an existing local declaration if (isAttr) { const namespaceForPrefix = this._localPrefixToNamespace(node.prefix); - const isValidPrefix = node.prefix !== null && (namespaceForPrefix === undefined || namespaceForPrefix === node.namespaceURI); + const isValidPrefix = + node.prefix !== null && + (namespaceForPrefix === undefined || namespaceForPrefix === node.namespaceURI); if (!isValidPrefix) { // Collision - generate a new prefix diff --git a/src/dom-parsing/serializationAlgorithms.ts b/src/dom-parsing/serializationAlgorithms.ts index 512c8df..f6d31f6 100644 --- a/src/dom-parsing/serializationAlgorithms.ts +++ b/src/dom-parsing/serializationAlgorithms.ts @@ -14,10 +14,7 @@ import { import { throwInvalidStateError } from '../util/errorHelpers'; import { HTML_NAMESPACE, XML_NAMESPACE, XMLNS_NAMESPACE } from '../util/namespaceHelpers'; import { NodeType } from '../util/NodeType'; -import { - NamespacePrefixMap, - PrefixIndex, -} from './NamespacePrefixMap'; +import { NamespacePrefixMap, PrefixIndex } from './NamespacePrefixMap'; const HTML_VOID_ELEMENTS = [ 'area', @@ -111,13 +108,7 @@ export function produceXmlSerialization( // the execution of the algorithm, then catch that exception and throw an "InvalidStateError" // DOMException. try { - runXmlSerializationAlgorithm( - node, - prefixMap, - prefixIndex, - requireWellFormed, - result - ); + runXmlSerializationAlgorithm(node, prefixMap, prefixIndex, requireWellFormed, result); } catch (error) { return throwInvalidStateError((error as Error).message); } @@ -147,33 +138,17 @@ function runXmlSerializationAlgorithm( switch (node.nodeType) { // Element: Run the algorithm for XML serializing an Element node node. case NodeType.ELEMENT_NODE: - serializeElementNode( - node, - prefixMap, - prefixIndex, - requireWellFormed, - result - ); + serializeElementNode(node, prefixMap, prefixIndex, requireWellFormed, result); return; // Document: Run the algorithm for XML serializing a Document node node. case NodeType.DOCUMENT_NODE: - serializeDocumentNode( - node, - prefixMap, - prefixIndex, - requireWellFormed, - result - ); + serializeDocumentNode(node, prefixMap, prefixIndex, requireWellFormed, result); return; // Comment: Run the algorithm for XML serializing a Comment node node. case NodeType.COMMENT_NODE: - serializeCommentNode( - node, - requireWellFormed, - result - ); + serializeCommentNode(node, requireWellFormed, result); return; // CDATASection: Run the algorithm for XML serializing a CDATASection node node. @@ -181,11 +156,7 @@ function runXmlSerializationAlgorithm( // spec which removed the CDATASection interface. It seems the interface has been restored // in the DOM living standard, so we'll implement its serialization as specced previously. case NodeType.CDATA_SECTION_NODE: - serializeCDATASectionNode( - node, - requireWellFormed, - result - ); + serializeCDATASectionNode(node, requireWellFormed, result); return; // Text: Run the algorithm for XML serializing a Text node node. @@ -195,32 +166,18 @@ function runXmlSerializationAlgorithm( // DocumentFragment: Run the algorithm for XML serializing a DocumentFragment node node. case NodeType.DOCUMENT_FRAGMENT_NODE: - serializeDocumentFragmentNode( - node, - prefixMap, - prefixIndex, - requireWellFormed, - result - ); + serializeDocumentFragmentNode(node, prefixMap, prefixIndex, requireWellFormed, result); return; // DocumentType: Run the algorithm for XML serializing a DocumentType node node. case NodeType.DOCUMENT_TYPE_NODE: - serializeDocumentTypeNode( - node, - requireWellFormed, - result - ); + serializeDocumentTypeNode(node, requireWellFormed, result); return; // ProcessingInstruction: Run the algorithm for XML serializing a ProcessingInstruction node // node. case NodeType.PROCESSING_INSTRUCTION_NODE: - serializeProcessingInstructionNode( - node, - requireWellFormed, - result - ); + serializeProcessingInstructionNode(node, requireWellFormed, result); return; // An Attr object: Return an empty string. @@ -319,7 +276,7 @@ function serializeElementNode( const prefix = map.getPreferredPrefix(element, prefixIndex); if (prefix !== null) { - qualifiedName += `${prefix}:` + qualifiedName += `${prefix}:`; } qualifiedName += element.localName; result.push(qualifiedName); @@ -339,13 +296,7 @@ function serializeElementNode( // 13. Append to markup the result of the XML serialization of node's attributes given map, // prefix index, local prefixes map, ignore namespace definition attribute flag, and require // well-formed flag. - serializeAttributes( - element, - map, - prefixIndex, - requireWellFormed, - result - ); + serializeAttributes(element, map, prefixIndex, requireWellFormed, result); // 14. If ns is the HTML namespace, and the node's list of children is empty, and the node's // localName matches any one of the following void elements: "area", "base", "basefont", @@ -393,13 +344,7 @@ function serializeElementNode( // of node's children, in tree order, providing inherited ns, map, prefix index, and the require // well-formed flag. for (const child of node.childNodes) { - runXmlSerializationAlgorithm( - child, - map, - prefixIndex, - requireWellFormed, - result - ); + runXmlSerializationAlgorithm(child, map, prefixIndex, requireWellFormed, result); } // 20. Append the following to markup, in the order listed: @@ -453,7 +398,7 @@ function serializeAttributes( if (attr.namespaceURI === XMLNS_NAMESPACE) { // Namespace declaration attribute - const declaredNamespaceUri = attr.value === "" ? null : attr.value; + const declaredNamespaceUri = attr.value === '' ? null : attr.value; // 3.5.2.2. If the require well-formed flag is set (its value is // true), and the value of attr's value attribute matches the XMLNS @@ -463,7 +408,7 @@ function serializeAttributes( // XML parsing. // NOTE: DOM APIs do allow creation of elements in the XMLNS // namespace but with strict qualifications. - if (requireWellFormed && attr.value === XMLNS_NAMESPACE) { + if (requireWellFormed && declaredNamespaceUri === XMLNS_NAMESPACE) { throw new Error( 'The serialization of this attribute would produce invalid XML because ' + 'the XMLNS namespace is reserved and cannot be applied as an ' + @@ -472,7 +417,10 @@ function serializeAttributes( } // Don't declare the XML or XMLNS namespaces - if (declaredNamespaceUri === XML_NAMESPACE || declaredNamespaceUri === XMLNS_NAMESPACE) { + if ( + declaredNamespaceUri === XML_NAMESPACE || + declaredNamespaceUri === XMLNS_NAMESPACE + ) { continue; } @@ -538,7 +486,13 @@ function serializeAttributes( // Do we need a declaration? if (prefix !== null && map.prefixToNamespace(prefix) !== attr.namespaceURI) { - result.push(' xmlns:', prefix, '="', serializeAttributeValue(attr.namespaceURI, requireWellFormed), '"'); + result.push( + ' xmlns:', + prefix, + '="', + serializeAttributeValue(attr.namespaceURI, requireWellFormed), + '"' + ); map.add(prefix, attr.namespaceURI); } @@ -644,13 +598,7 @@ function serializeDocumentNode( // and after the Document's documentElement node, including at most one DocumentType node. (Text // nodes are not allowed as children of the Document.) for (const child of document.childNodes) { - runXmlSerializationAlgorithm( - child, - prefixMap, - prefixIndex, - requireWellFormed, - result - ); + runXmlSerializationAlgorithm(child, prefixMap, prefixIndex, requireWellFormed, result); } // 2.3. Return the value of serialized document. @@ -663,11 +611,7 @@ function serializeDocumentNode( * @param requireWellFormed - Determines whether the result needs to be well-formed * @param result - Array of strings in which to construct the result */ -function serializeCommentNode( - node: Node, - requireWellFormed: boolean, - result: string[] -): void { +function serializeCommentNode(node: Node, requireWellFormed: boolean, result: string[]): void { const comment = node as Comment; // 1. If the require well-formed flag is set (its value is true), and node's data contains // characters that are not matched by the XML Char production or contains "--" (two adjacent @@ -693,11 +637,7 @@ function serializeCommentNode( * @param requireWellFormed - Determines whether the result needs to be well-formed * @param result - Array of strings in which to construct the result */ -function serializeCDATASectionNode( - node: Node, - requireWellFormed: boolean, - result: string[] -): void { +function serializeCDATASectionNode(node: Node, requireWellFormed: boolean, result: string[]): void { const cs = node as CDATASection; // 1. Let markup be the concatenation of "". @@ -713,11 +653,7 @@ function serializeCDATASectionNode( * @param requireWellFormed - Determines whether the result needs to be well-formed * @param result - Array of strings in which to construct the result */ -function serializeTextNode( - node: Node, - requireWellFormed: boolean, - result: string[] -): void { +function serializeTextNode(node: Node, requireWellFormed: boolean, result: string[]): void { const text = node as Text; // 1. If the require well-formed flag is set (its value is true), and node's data contains // characters that are not matched by the XML Char production, then throw an exception; the @@ -765,13 +701,7 @@ function serializeDocumentFragmentNode( // child given namespace, prefix map, a reference to prefix index, and flag require well-formed. // Concatenate the result to markup. for (const child of node.childNodes) { - runXmlSerializationAlgorithm( - child, - prefixMap, - prefixIndex, - requireWellFormed, - result - ); + runXmlSerializationAlgorithm(child, prefixMap, prefixIndex, requireWellFormed, result); } // 3. Return the value of markup. @@ -784,11 +714,7 @@ function serializeDocumentFragmentNode( * @param requireWellFormed - Determines whether the result needs to be well-formed * @param result - Array of strings in which to construct the result */ -function serializeDocumentTypeNode( - node: Node, - requireWellFormed: boolean, - result: string[] -): void { +function serializeDocumentTypeNode(node: Node, requireWellFormed: boolean, result: string[]): void { const dt = node as DocumentType; // 1. If the require well-formed flag is true and the node's publicId attribute contains // characters that are not matched by the XML PubidChar production, then throw an exception; the diff --git a/test/dom-parsing/XMLSerializer.tests.ts b/test/dom-parsing/XMLSerializer.tests.ts index 1d14875..a2a723c 100644 --- a/test/dom-parsing/XMLSerializer.tests.ts +++ b/test/dom-parsing/XMLSerializer.tests.ts @@ -504,4 +504,32 @@ describe('serializeToWellFormedString', () => { `""` ); }); + + it('uses an existing prefix declared on an ancestor element', () => { + const root = document.appendChild(document.createElementNS('ns1', 'root')); + root.setAttributeNS(XMLNS_NAMESPACE, 'xmlns:pre', 'ns2'); + const child = root.appendChild(document.createElementNS('ns1', 'child')); + child.appendChild(document.createElementNS('ns2', 'grandChild')); + expect(slimdom.serializeToWellFormedString(document)).toBe( + '' + ); + }); + + it('does not use an ancestor-defined prefix that is no longer in scope', () => { + // see https://github.com/w3c/DOM-Parsing/issues/75 + const root = document.appendChild(document.createElementNS('ns1', 'pre:root')); + const child = root.appendChild(document.createElementNS('ns2', 'pre:child')); + child.appendChild(document.createElementNS('ns1', 'other:grandChild')); + expect(slimdom.serializeToWellFormedString(document)).toBe( + '' + ); + }); + + it('does not use the empty prefix for a namespaced attribute', () => { + const root = document.appendChild(document.createElementNS('ns1', 'root')); + root.setAttributeNS('ns1', 'attr', 'value'); + expect(slimdom.serializeToWellFormedString(document)).toMatchInlineSnapshot( + `""` + ); + }); });