From a4f5c76349c26848b29b16f93de5a363c08ab128 Mon Sep 17 00:00:00 2001
From: Stef Busking <stef.busking@gmail.com>
Date: Thu, 21 Dec 2023 09:16:46 +0100
Subject: [PATCH] Also guard against entity expansion attacks in attribute
 values (#149)

Looks like I missed this in the earlier implementation. This refactors the implementation a bit into a shape that also applies to any entities expanded as part of normalizing attribute values, both normal and default.
---
 src/dom-parsing/EntityExpansionGuard.ts    | 55 +++++++++++++++
 src/dom-parsing/parsingAlgorithms.ts       | 57 +++++++---------
 test/dom-parsing/parseXmlDocument.tests.ts | 78 ++++++++++++++++++++++
 3 files changed, 158 insertions(+), 32 deletions(-)
 create mode 100644 src/dom-parsing/EntityExpansionGuard.ts

diff --git a/src/dom-parsing/EntityExpansionGuard.ts b/src/dom-parsing/EntityExpansionGuard.ts
new file mode 100644
index 0000000..1b93450
--- /dev/null
+++ b/src/dom-parsing/EntityExpansionGuard.ts
@@ -0,0 +1,55 @@
+import { EntityRefEvent } from './parserEvents';
+import { throwErrorWithContext } from './parsingAlgorithms';
+
+/*
+ * Guard against entity expansion attacks by keeping track of the initial input
+ * length vs. the expanded input length. The latter includes the length of the
+ * replacement text for each processed entity reference. An attack is likely if
+ * the ratio between the two exceeds the maximum amplification factor AND the
+ * expanded input length exceeds a threshold. This approach and defaults are
+ * taken from libexpat's billion laughs attack protection.
+ */
+export default class EntityExpansionGuard {
+	private readonly _initialInputLength: number;
+
+	private _expandedInputLength: number;
+
+	private readonly _entityExpansionThreshold: number;
+
+	private readonly _entityExpansionMaxAmplification: number;
+
+	private _topLevelEntityRef: EntityRefEvent | null = null;
+
+	private _depth = 0;
+
+	public constructor(
+		initialInputLength: number,
+		entityExpansionThreshold: number,
+		entityExpansionMaxAmplification: number
+	) {
+		this._initialInputLength = initialInputLength;
+		this._expandedInputLength = initialInputLength;
+		this._entityExpansionThreshold = entityExpansionThreshold;
+		this._entityExpansionMaxAmplification = entityExpansionMaxAmplification;
+	}
+
+	public enter(event: EntityRefEvent, replacementTextLength: number): void {
+		const topLevelEntityRef = this._topLevelEntityRef ?? event;
+		this._expandedInputLength += replacementTextLength;
+		if (this._expandedInputLength > this._entityExpansionThreshold) {
+			const amplification = this._expandedInputLength / this._initialInputLength;
+			if (amplification > this._entityExpansionMaxAmplification) {
+				throwErrorWithContext('too much entity expansion', topLevelEntityRef);
+			}
+		}
+		this._topLevelEntityRef = topLevelEntityRef;
+		this._depth += 1;
+	}
+
+	public exit(): void {
+		this._depth -= 1;
+		if (this._depth === 0) {
+			this._topLevelEntityRef = null;
+		}
+	}
+}
diff --git a/src/dom-parsing/parsingAlgorithms.ts b/src/dom-parsing/parsingAlgorithms.ts
index 21f790a..19d21a2 100644
--- a/src/dom-parsing/parsingAlgorithms.ts
+++ b/src/dom-parsing/parsingAlgorithms.ts
@@ -7,6 +7,7 @@ import { insertNode } from '../util/mutationAlgorithms';
 import { XMLNS_NAMESPACE, XML_NAMESPACE } from '../util/namespaceHelpers';
 import { isElement } from '../util/NodeType';
 import { getNodeDocument } from '../util/treeHelpers';
+import EntityExpansionGuard from './EntityExpansionGuard';
 import {
 	CompleteChars,
 	CompleteName,
@@ -343,7 +344,8 @@ function normalizeAndIncludeEntities(
 	normalized: string[],
 	value: AttValueEvent[],
 	dtd: Dtd | null,
-	ancestorEntities: string[] | null
+	ancestorEntities: string[] | null,
+	expansionGuard: EntityExpansionGuard
 ) {
 	for (const event of value) {
 		if (typeof event === 'string') {
@@ -372,6 +374,7 @@ function normalizeAndIncludeEntities(
 				event
 			);
 		}
+		expansionGuard.enter(event, replacementText.length);
 		const result = EntityReplacementTextInLiteral(replacementText, 0);
 		if (!result.success) {
 			throwParseError(
@@ -386,18 +389,21 @@ function normalizeAndIncludeEntities(
 			normalized,
 			result.value,
 			dtd,
-			ancestorEntities ? [event.name, ...ancestorEntities] : [event.name]
+			ancestorEntities ? [event.name, ...ancestorEntities] : [event.name],
+			expansionGuard
 		);
+		expansionGuard.exit();
 	}
 }
 
 function normalizeAttributeValue(
 	value: AttValueEvent[],
 	attDef: AttDefEvent | undefined,
-	dtd: Dtd | null
+	dtd: Dtd | null,
+	expansionGuard: EntityExpansionGuard
 ): string {
 	const normalized: string[] = [];
-	normalizeAndIncludeEntities(normalized, value, dtd, null);
+	normalizeAndIncludeEntities(normalized, value, dtd, null, expansionGuard);
 	if (attDef && !attDef.isCData) {
 		return normalized
 			.join('')
@@ -506,7 +512,8 @@ class Namespaces {
 		event: STagEvent | EmptyElemTagEvent,
 		attlist: Map<string, AttDefEvent> | undefined,
 		dtd: Dtd | null,
-		qualifiedNameCache: QualifiedNameCache
+		qualifiedNameCache: QualifiedNameCache,
+		expansionGuard: EntityExpansionGuard
 	): Namespaces {
 		let ns = parent;
 		let hasDeclarations = false;
@@ -558,7 +565,7 @@ class Namespaces {
 				localName === 'xmlns' &&
 				(!hasDeclarations || !ns._byPrefix.has(null))
 			) {
-				const namespace = normalizeAttributeValue(value, def, dtd) || null;
+				const namespace = normalizeAttributeValue(value, def, dtd, expansionGuard) || null;
 				add(null, namespace, nameEvent);
 			} else if (prefix === 'xmlns' && (!hasDeclarations || !ns._byPrefix.has(localName))) {
 				if (localName === 'xmlns') {
@@ -567,7 +574,7 @@ class Namespaces {
 						nameEvent
 					);
 				}
-				const namespace = normalizeAttributeValue(value, def, dtd) || null;
+				const namespace = normalizeAttributeValue(value, def, dtd, expansionGuard) || null;
 				add(localName, namespace, nameEvent);
 			}
 		};
@@ -719,15 +726,11 @@ export function parseXml(
 	input = input.replace(/^\ufeff/, '');
 	input = normalizeLineEndings(input);
 
-	// Guard against entity expansion attacks by keeping track of the initial input length vs. the
-	// expanded input length. The latter includes the length of the replacement text for each
-	// processed entity reference. An attack is likely if the ratio between the two exceeds the
-	// maximum amplification factor AND the expanded input length exceeds a threshold. This approach
-	// and defaults are taken from libexpat's billion laughs attack protection.
-	const initialInputLength = input.length;
-	let expandedInputLength = initialInputLength;
-	let topLevelEntityRef: EntityRefEvent | null = null;
-
+	const expansionGuard = new EntityExpansionGuard(
+		input.length,
+		entityExpansionThreshold,
+		entityExpansionMaxAmplification
+	);
 	let entityContext: EntityContext | null = {
 		parent: null,
 		entity: null,
@@ -778,16 +781,7 @@ export function parseXml(
 							event
 						);
 					}
-					if (topLevelEntityRef === null) {
-						topLevelEntityRef = event;
-					}
-					expandedInputLength += replacementText.length;
-					if (expandedInputLength > entityExpansionThreshold) {
-						const amplification = expandedInputLength / initialInputLength;
-						if (amplification > entityExpansionMaxAmplification) {
-							throwErrorWithContext('too much entity expansion', topLevelEntityRef);
-						}
-					}
+					expansionGuard.enter(event, replacementText.length);
 					domContext = {
 						parent: domContext,
 						root: domContext.root,
@@ -861,7 +855,8 @@ export function parseXml(
 						event,
 						attlist,
 						dtd,
-						qualifiedNameCache
+						qualifiedNameCache,
+						expansionGuard
 					);
 					const { prefix, localName } = splitQualifiedName(
 						event.name,
@@ -888,7 +883,7 @@ export function parseXml(
 							namespace,
 							prefix,
 							localName,
-							normalizeAttributeValue(attr.value, def, dtd),
+							normalizeAttributeValue(attr.value, def, dtd, expansionGuard),
 							element
 						);
 						appendAttribute(attrNode, element, true);
@@ -917,7 +912,7 @@ export function parseXml(
 								namespace,
 								prefix,
 								localName,
-								normalizeAttributeValue(def.value, attr, dtd),
+								normalizeAttributeValue(def.value, attr, dtd, expansionGuard),
 								element
 							);
 							appendAttribute(attrNode, element, true);
@@ -981,10 +976,8 @@ export function parseXml(
 
 		entityContext = entityContext.parent;
 		if (entityContext) {
+			expansionGuard.exit();
 			domContext = domContext.parent!;
-			if (entityContext.entity === null) {
-				topLevelEntityRef = null;
-			}
 		}
 	}
 
diff --git a/test/dom-parsing/parseXmlDocument.tests.ts b/test/dom-parsing/parseXmlDocument.tests.ts
index 937d27f..8e0734b 100644
--- a/test/dom-parsing/parseXmlDocument.tests.ts
+++ b/test/dom-parsing/parseXmlDocument.tests.ts
@@ -831,6 +831,84 @@ describe('parseXmlDocument', () => {
 	`);
 	});
 
+	it('protects against entity expansion attacks in attribute values', () => {
+		// Each level expands to 10 times the next, leading to 10^10 expansions
+		const xml = `<!DOCTYPE root [
+			<!ENTITY one "&two;&two;&two;&two;&two;&two;&two;&two;&two;&two;&two;">
+			<!ENTITY two "&three;&three;&three;&three;&three;&three;&three;&three;&three;&three;&three;">
+			<!ENTITY three "&four;&four;&four;&four;&four;&four;&four;&four;&four;&four;&four;">
+			<!ENTITY four "&five;&five;&five;&five;&five;&five;&five;&five;&five;&five;&five;">
+			<!ENTITY five "&six;&six;&six;&six;&six;&six;&six;&six;&six;&six;&six;">
+			<!ENTITY six "&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;">
+			<!ENTITY seven "&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;">
+			<!ENTITY eight "&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;">
+			<!ENTITY nine "&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;">
+			<!ENTITY ten "a">
+		]><root attr="&one;"/>`;
+		expect(() => {
+			console.log(slimdom.parseXmlDocument(xml));
+		}).toThrowErrorMatchingInlineSnapshot(`
+		"too much entity expansion
+		At line 12, character 17:
+
+				]><root attr="&one;"/>
+				              ^^^^^"
+	`);
+	});
+
+	it('protects against entity expansion attacks in default attribute values', () => {
+		// Each level expands to 10 times the next, leading to 10^10 expansions
+		const xml = `<!DOCTYPE root [
+			<!ENTITY one "&two;&two;&two;&two;&two;&two;&two;&two;&two;&two;&two;">
+			<!ENTITY two "&three;&three;&three;&three;&three;&three;&three;&three;&three;&three;&three;">
+			<!ENTITY three "&four;&four;&four;&four;&four;&four;&four;&four;&four;&four;&four;">
+			<!ENTITY four "&five;&five;&five;&five;&five;&five;&five;&five;&five;&five;&five;">
+			<!ENTITY five "&six;&six;&six;&six;&six;&six;&six;&six;&six;&six;&six;">
+			<!ENTITY six "&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;">
+			<!ENTITY seven "&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;">
+			<!ENTITY eight "&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;">
+			<!ENTITY nine "&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;">
+			<!ENTITY ten "a">
+			<!ATTLIST root attr CDATA "&one;">
+		]><root/>`;
+		expect(() => {
+			console.log(slimdom.parseXmlDocument(xml));
+		}).toThrowErrorMatchingInlineSnapshot(`
+		"too much entity expansion
+		At line 12, character 31:
+
+					<!ATTLIST root attr CDATA "&one;">
+					                           ^^^^^"
+	`);
+	});
+
+	it('protects against entity expansion attacks in default attributes on elements generated from an entity', () => {
+		// Each level expands to 10 times the next, leading to 10^10 expansions
+		const xml = `<!DOCTYPE root [
+			<!ENTITY one "&two;&two;&two;&two;&two;&two;&two;&two;&two;&two;&two;">
+			<!ENTITY two "&three;&three;&three;&three;&three;&three;&three;&three;&three;&three;&three;">
+			<!ENTITY three "&four;&four;&four;&four;&four;&four;&four;&four;&four;&four;&four;">
+			<!ENTITY four "&five;&five;&five;&five;&five;&five;&five;&five;&five;&five;&five;">
+			<!ENTITY five "&six;&six;&six;&six;&six;&six;&six;&six;&six;&six;&six;">
+			<!ENTITY six "&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;&seven;">
+			<!ENTITY seven "&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;&eight;">
+			<!ENTITY eight "&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;&nine;">
+			<!ENTITY nine "&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;&ten;">
+			<!ENTITY ten "a">
+			<!ENTITY ele "&#60;element/>">
+			<!ATTLIST element attr CDATA "&one;">
+		]><root>&ele;</root>`;
+		expect(() => {
+			console.log(slimdom.parseXmlDocument(xml));
+		}).toThrowErrorMatchingInlineSnapshot(`
+		"too much entity expansion
+		At line 14, character 11:
+
+				]><root>&ele;</root>
+				        ^^^^^"
+	`);
+	});
+
 	it('can optionally treat CDATA sections as text', () => {
 		const xml = '<xml>before<![CDATA[inside]]>after</xml>';
 		const doc1 = slimdom.parseXmlDocument(xml);