Skip to content

Commit

Permalink
Run W3C XML Conformance Test Suite
Browse files Browse the repository at this point in the history
Fix a number of issues found due to failing tests, including:

* Enforce well-formedness for PE references.
* Character references must reference a valid Char.

Also run these tests in GitHub actions
  • Loading branch information
bwrrp committed Jun 23, 2022
1 parent 512369d commit d81dbea
Show file tree
Hide file tree
Showing 9 changed files with 7,825 additions and 38 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@ jobs:
with:
node-version: ${{ matrix.node-version }}
- run: npm install
- run: npm run download-xmlconf
- run: npm test
env:
XMLCONF_PATH: './temp/xmlconf'
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"build:docs": "api-documenter markdown -i api -o docs",
"build": "npm-run-all build:clean build:bundle build:api build:api-copy build:docs",
"prepare": "npm run build",
"download-xmlconf": "node test/dom-parsing/downloadXmlConf.js",
"test": "jest --coverage --verbose",
"test:debug": "node --inspect --debug-brk node_modules/jest/bin/jest.js --runInBand"
},
Expand All @@ -43,6 +44,7 @@
"@microsoft/api-extractor": "^7.24.2",
"@rollup/plugin-node-resolve": "^13.3.0",
"@types/jest": "^27.5.1",
"@types/node": "^18.0.0",
"copyfiles": "^2.4.1",
"dom-treeadapter": "^0.2.2",
"jest": "^28.1.0",
Expand Down
76 changes: 49 additions & 27 deletions src/dom-parsing/grammar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,23 +228,33 @@ const Nmtoken = recognize(codepoints(isValidNameChar, ['valid name character']))
// | '&#x' [0-9a-fA-F]+ ';'
const CharRef: Parser<CharRefEvent> = withPosition(
map(
or([
map(
delimited(CHARREF_HEX_START, recognize(plusConsumed(HEX_DIGIT)), SEMICOLON, true),
(n) => parseInt(n, 16)
),
map(delimited(CHARREF_START, recognize(plusConsumed(DIGIT)), SEMICOLON), (n) =>
parseInt(n, 10)
),
]),
filter(
or([
map(
delimited(
CHARREF_HEX_START,
recognize(plusConsumed(HEX_DIGIT)),
SEMICOLON,
true
),
(n) => parseInt(n, 16)
),
map(
delimited(CHARREF_START, recognize(plusConsumed(DIGIT)), SEMICOLON, true),
(n) => parseInt(n, 10)
),
]),
(cp) => isValidChar(cp),
['character reference must reference a valid character']
),
(cp) => ({ type: ParserEventType.CharRef, cp })
)
);

// [68] EntityRef ::= '&' Name ';'
// Namespaces spec makes this an NCName
const EntityRef: Parser<EntityRefEvent> = withPosition(
map(delimited(AMPERSAND, NCName, SEMICOLON), (name) => ({
map(delimited(AMPERSAND, NCName, cut(SEMICOLON)), (name) => ({
type: ParserEventType.EntityRef,
name,
}))
Expand Down Expand Up @@ -372,18 +382,22 @@ export const EntityReplacementTextInLiteral = complete(
);

// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
const SystemLiteral = or([
delimited(
DOUBLE_QUOTE,
recognize(codepoints((cp) => cp !== DOUBLE_QUOTE_CP && isValidChar(cp))),
DOUBLE_QUOTE
),
delimited(
SINGLE_QUOTE,
recognize(codepoints((cp) => cp !== SINGLE_QUOTE_CP && isValidChar(cp))),
SINGLE_QUOTE
),
]);
const SystemLiteral = filter(
or([
delimited(
DOUBLE_QUOTE,
recognize(codepoints((cp) => cp !== DOUBLE_QUOTE_CP && isValidChar(cp))),
DOUBLE_QUOTE
),
delimited(
SINGLE_QUOTE,
recognize(codepoints((cp) => cp !== SINGLE_QUOTE_CP && isValidChar(cp))),
SINGLE_QUOTE
),
]),
(systemId) => !systemId.includes('#'),
['system identifier must not contain a fragment identifier']
);

// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
function isValidPubidChar(cp: number): boolean {
Expand Down Expand Up @@ -703,7 +717,7 @@ const Mixed = preceded(
consume(
followed(
starConsumed(preceded(delimited(optional(S), VERTICAL_BAR, optional(S)), Name)),
followed(PARENTHESIS_CLOSE, ASTERISK)
preceded(optional(S), followed(PARENTHESIS_CLOSE, ASTERISK))
)
),
consume(followed(optional(S), PARENTHESIS_CLOSE)),
Expand Down Expand Up @@ -892,21 +906,29 @@ const EntityDef = or<EntityValueEvent[] | ExternalEntityEvent>([
const GEDecl: Parser<EntityDeclEvent | void> = delimited(
ENTITY_DECL_START,
then(preceded(S, NCName), cut(preceded(S, EntityDef)), (name, value) => ({
type: MarkupdeclEventType.EntityDecl,
type: MarkupdeclEventType.GEDecl,
name,
value,
})),
preceded(optional(S), ANGLE_BRACKET_CLOSE)
);

// [74] PEDef ::= EntityValue | ExternalID
const PEDef = or([consume(EntityValue), consume(ExternalID)]);
const PEDef = or<EntityValueEvent[] | void>([EntityValue, consume(ExternalID)]);

// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
// Namespaces spec makes this an NCName
const PEDecl = delimited(
const PEDecl: Parser<EntityDeclEvent | void> = delimited(
followed(ENTITY_DECL_START, preceded(S, PERCENT)),
then(preceded(S, NCName), preceded(S, PEDef), () => undefined),
then(preceded(S, NCName), cut(preceded(S, PEDef)), (name, value) =>
value
? {
type: MarkupdeclEventType.PEDecl,
name,
value,
}
: undefined
),
preceded(optional(S), ANGLE_BRACKET_CLOSE),
true
);
Expand Down
5 changes: 3 additions & 2 deletions src/dom-parsing/parserEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ export type ExternalIDEvent = { publicId: string | null; systemId: string | null

export const enum MarkupdeclEventType {
AttlistDecl,
EntityDecl,
GEDecl,
PEDecl,
}

export const enum DefaultDeclType {
Expand Down Expand Up @@ -94,7 +95,7 @@ export type EntityValueEvent = TextEvent | ReferenceEvent | PEReferenceEvent;
export type ExternalEntityEvent = { ids: ExternalIDEvent; ndata: string | null };

export type EntityDeclEvent = {
type: MarkupdeclEventType.EntityDecl;
type: MarkupdeclEventType.GEDecl | MarkupdeclEventType.PEDecl;
name: string;
value: EntityValueEvent[] | ExternalEntityEvent;
};
Expand Down
41 changes: 32 additions & 9 deletions src/dom-parsing/parsingAlgorithms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ export function throwErrorWithContext(message: string, event: WithPosition<unkno
}

function throwParseError(what: string, input: string, expected: string[], offset: number): never {
const quoted = expected.map((str) => (str.includes('"') ? `'${str}'` : `"${str}"`));
const quoted = Array.from(new Set(expected), (str) =>
str.includes('"') ? `'${str}'` : `"${str}"`
);
const cp = input.codePointAt(offset);
const actual = cp ? String.fromCodePoint(cp) : '';
throwErrorWithContext(
Expand Down Expand Up @@ -164,9 +166,6 @@ function isWhitespace(value: string): boolean {
return CompleteWhitespace(value, 0).success;
}

// TODO: add line / column info (and some context) to all parser errors
// TODO: add same info to all other errors

function constructReplacementText(value: EntityValueEvent[]): string {
const replacementText: string[] = [];
for (const event of value) {
Expand Down Expand Up @@ -216,10 +215,10 @@ class Dtd {
for (const attr of decl.attdefs) {
if (attr.def.type === DefaultDeclType.VALUE) {
for (const event of attr.def.value) {
if (
typeof event !== 'string' &&
event.type === ParserEventType.EntityRef
) {
if (typeof event === 'string') {
continue;
}
if (event.type === ParserEventType.EntityRef) {
if (
!this._entityReplacementTextByName.has(event.name) &&
!this._externalEntityNames.has(event.name) &&
Expand All @@ -230,6 +229,12 @@ class Dtd {
event
);
}
if (this._externalEntityNames.has(event.name)) {
throwErrorWithContext(
`default value of attribute "${attr.name.name}" must not contain reference to external entity "${event.name}"`,
event
);
}
}
}
}
Expand All @@ -250,7 +255,25 @@ class Dtd {
break;
}

case MarkupdeclEventType.EntityDecl: {
case MarkupdeclEventType.PEDecl: {
// We don't support these, but still need to validate well-formedness
if (Array.isArray(decl.value)) {
for (const event of decl.value) {
if (
typeof event !== 'string' &&
event.type === ParserEventType.PEReference
) {
throwErrorWithContext(
`reference to parameter entity "${event.name}" must not occur in an entity declaration in the internal subset`,
event
);
}
}
}
break;
}

case MarkupdeclEventType.GEDecl: {
// First declaration is binding
if (
this._entityReplacementTextByName.has(decl.name) ||
Expand Down
Loading

0 comments on commit d81dbea

Please sign in to comment.