From 637ee6736f6eea52e4aea7e147fea1acea669c5f Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Fri, 4 Jun 2021 04:46:44 +0200 Subject: [PATCH] Improved `regexp/strict` rule (#225) * Improved `regexp/strict` rule * Update * Disable validator for patterns with named backrefs --- README.md | 2 +- docs/rules/README.md | 2 +- docs/rules/strict.md | 3 +- lib/rules/strict.ts | 231 ++++++++++++++++++++++++++++++++++-- tests/lib/rules/strict.ts | 241 +++++++++++++++++++++++++++++++++++--- 5 files changed, 449 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 55485267d..bd480fd4a 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco | [regexp/no-useless-assertions](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-assertions.html) | disallow assertions that are known to always accept (or reject) | | | [regexp/no-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-backreference.html) | disallow useless backreferences in regular expressions | :star: | | [regexp/no-useless-dollar-replacements](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-dollar-replacements.html) | disallow useless `$` replacements in replacement string | | -| [regexp/strict](https://ota-meshi.github.io/eslint-plugin-regexp/rules/strict.html) | disallow not strictly valid regular expressions | | +| [regexp/strict](https://ota-meshi.github.io/eslint-plugin-regexp/rules/strict.html) | disallow not strictly valid regular expressions | :wrench: | ### Best Practices diff --git a/docs/rules/README.md b/docs/rules/README.md index b1599c2d6..18f07ff7c 100644 --- a/docs/rules/README.md +++ b/docs/rules/README.md @@ -25,7 +25,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco | [regexp/no-useless-assertions](./no-useless-assertions.md) | disallow assertions that are known to always accept (or reject) | | | [regexp/no-useless-backreference](./no-useless-backreference.md) | disallow useless backreferences in regular expressions | :star: | | [regexp/no-useless-dollar-replacements](./no-useless-dollar-replacements.md) | disallow useless `$` replacements in replacement string | | -| [regexp/strict](./strict.md) | disallow not strictly valid regular expressions | | +| [regexp/strict](./strict.md) | disallow not strictly valid regular expressions | :wrench: | ### Best Practices diff --git a/docs/rules/strict.md b/docs/rules/strict.md index edaab300c..6f96c2034 100644 --- a/docs/rules/strict.md +++ b/docs/rules/strict.md @@ -9,6 +9,7 @@ description: "disallow not strictly valid regular expressions" > disallow not strictly valid regular expressions - :exclamation: ***This rule has not been released yet.*** +- :wrench: The `--fix` option on the [command line](https://eslint.org/docs/user-guide/command-line-interface#fixing-problems) can automatically fix some of the problems reported by this rule. ## :book: Rule Details @@ -20,7 +21,7 @@ Depending on the syntax defined in [Annex B] of the ECMAScript specification, so [Annex B]: https://tc39.es/ecma262/#sec-regular-expressions-patterns - + ```js /* eslint regexp/strict: "error" */ diff --git a/lib/rules/strict.ts b/lib/rules/strict.ts index e9c8b425b..0e7cc4c9f 100644 --- a/lib/rules/strict.ts +++ b/lib/rules/strict.ts @@ -1,15 +1,24 @@ import { RegExpValidator } from "regexpp" +import type { CharacterClassElement, Element } from "regexpp/ast" import type { RegExpVisitor } from "regexpp/visitor" import type { RegExpContext } from "../utils" -import { createRule, defineRegexpVisitor } from "../utils" +import { + isOctalEscape, + createRule, + defineRegexpVisitor, + isEscapeSequence, +} from "../utils" -const validator = new RegExpValidator({ strict: true }) +const validator = new RegExpValidator({ strict: true, ecmaVersion: 2020 }) /** * Check syntax error in a given pattern. - * @returns {string|null} The syntax error. + * @returns The syntax error. */ -function validateRegExpPattern(pattern: string, uFlag?: boolean) { +function validateRegExpPattern( + pattern: string, + uFlag?: boolean, +): string | null { try { validator.validatePattern(pattern, undefined, undefined, uFlag) return null @@ -18,6 +27,9 @@ function validateRegExpPattern(pattern: string, uFlag?: boolean) { } } +const CHARACTER_CLASS_SYNTAX_CHARACTERS = new Set("\\/()[]{}^$.|-+*?".split("")) +const SYNTAX_CHARACTERS = new Set("\\/()[]{}^$.|+*?".split("")) + export default createRule("strict", { meta: { docs: { @@ -27,8 +39,33 @@ export default createRule("strict", { // recommended: true, recommended: false, }, + fixable: "code", schema: [], messages: { + // character escape + invalidControlEscape: + "Invalid or incomplete control escape sequence. Either use a valid control escape sequence or escaping the standalone backslash.", + incompleteEscapeSequence: + "Incomplete escape sequence '{{expr}}'. Either use a valid escape sequence or remove the useless escaping.", + invalidPropertyEscape: + "Invalid property escape sequence '{{expr}}'. Either use a valid property escape sequence or remove the useless escaping.", + incompleteBackreference: + "Incomplete backreference '{{expr}}'. Either use a valid backreference or remove the useless escaping.", + unescapedSourceCharacter: "Unescaped source character '{{expr}}'.", + octalEscape: + "Invalid legacy octal escape sequence '{{expr}}'. Use a hexadecimal escape instead.", + uselessEscape: + "Useless identity escapes with non-syntax characters are forbidden.", + + // character class + invalidRange: + "Invalid character class range. A character set cannot be the minimum or maximum of a character class range. Either escape the `-` or fix the character class range.", + + // assertion + quantifiedAssertion: + "Assertion are not allowed to be quantified directly.", + + // validator regexMessage: "{{message}}.", }, type: "suggestion", @@ -40,21 +77,195 @@ export default createRule("strict", { function createVisitor( regexpContext: RegExpContext, ): RegExpVisitor.Handlers { - const { node, flags, pattern } = regexpContext + const { + node, + flags, + pattern, + getRegexpLocation, + fixReplaceNode, + } = regexpContext + + if (flags.unicode) { + // the Unicode flag enables strict parsing mode automatically + return {} + } - const message = validateRegExpPattern(pattern, flags.unicode) + let reported = false + let hasNamedBackreference = false + + /** Report */ + function report( + messageId: string, + element: Element, + fix?: string | null, + ): void { + reported = true - if (message) { context.report({ node, - messageId: "regexMessage", + loc: getRegexpLocation(element), + messageId, data: { - message, + expr: element.raw, }, + fix: fix ? fixReplaceNode(element, fix) : null, }) } - return {} + return { + // eslint-disable-next-line complexity -- x + onCharacterEnter(cNode) { + if (cNode.raw === "\\") { + // e.g. \c5 or \c + report("invalidControlEscape", cNode) + return + } + if (cNode.raw === "\\u" || cNode.raw === "\\x") { + // e.g. \u000; + report("incompleteEscapeSequence", cNode) + return + } + if (cNode.raw === "\\p" || cNode.raw === "\\P") { + // e.g. \p{H} or \p + report("invalidPropertyEscape", cNode) + return + } + if (cNode.value !== 0 && isOctalEscape(cNode.raw)) { + // e.g. \023 + report( + "octalEscape", + cNode, + `\\x${cNode.value.toString(16).padStart(2, "0")}`, + ) + return + } + + const insideCharClass = + cNode.parent.type === "CharacterClass" || + cNode.parent.type === "CharacterClassRange" + + if (!insideCharClass) { + if (cNode.raw === "\\k") { + // e.g. \k)\k/`, + String.raw`/\p{L}/u`, + String.raw`/ \( \) \[ \] \{ \} \| \* \+ \? \^ \$ \\ \/ \./`, + String.raw`/[\( \) \[ \] \{ \} \| \* \+ \? \^ \$ \\ \/ \. \-]/`, + "/\\u000f/", + "/\\x000f/", + ], invalid: [ + // source characters + { + code: String.raw`/]/`, + output: String.raw`/\]/`, + errors: [ + { + message: "Unescaped source character ']'.", + column: 2, + }, + ], + }, + { + code: String.raw`/{/`, + output: String.raw`/\{/`, + errors: [ + { + message: "Unescaped source character '{'.", + column: 2, + }, + ], + }, { - code: `/]/`, + code: String.raw`/}/`, + output: String.raw`/\}/`, + errors: [ + { + message: "Unescaped source character '}'.", + column: 2, + }, + ], + }, + + // invalid or incomplete escape sequences + { + code: String.raw`/\u{42}/`, + output: null, errors: [ { message: - "Invalid regular expression: /]/: Lone quantifier brackets.", - line: 1, - column: 1, + "Incomplete escape sequence '\\u'. Either use a valid escape sequence or remove the useless escaping.", + column: 2, }, ], }, { - code: `/{/`, + code: "/\\u000;/", + output: null, errors: [ { message: - "Invalid regular expression: /{/: Lone quantifier brackets.", - line: 1, - column: 1, + "Incomplete escape sequence '\\u'. Either use a valid escape sequence or remove the useless escaping.", + column: 2, }, ], }, { - code: `/}/`, + code: "/\\x4/", + output: null, errors: [ { message: - "Invalid regular expression: /}/: Lone quantifier brackets.", - line: 1, - column: 1, + "Incomplete escape sequence '\\x'. Either use a valid escape sequence or remove the useless escaping.", + column: 2, }, ], }, { - code: String.raw`/\u{42}/`, + code: "/\\c;/", + output: null, + errors: [ + { + message: + "Invalid or incomplete control escape sequence. Either use a valid control escape sequence or escaping the standalone backslash.", + column: 2, + }, + ], + }, + { + code: "/\\p/", + output: null, + errors: [ + { + message: + "Invalid property escape sequence '\\p'. Either use a valid property escape sequence or remove the useless escaping.", + column: 2, + }, + ], + }, + { + code: "/\\p{H}/", + output: "/\\p\\{H\\}/", + errors: [ + { + message: + "Invalid property escape sequence '\\p'. Either use a valid property escape sequence or remove the useless escaping.", + column: 2, + }, + { + message: "Unescaped source character '{'.", + column: 4, + }, + { + message: "Unescaped source character '}'.", + column: 6, + }, + ], + }, + { + code: "/\\012/", + output: "/\\x0a/", + errors: [ + { + message: + "Invalid legacy octal escape sequence '\\012'. Use a hexadecimal escape instead.", + column: 2, + }, + ], + }, + + // incomplete backreference + { + code: "/\\k/", + output: null, + errors: [ + { + message: + "Incomplete backreference '\\k'. Either use a valid backreference or remove the useless escaping.", + column: 2, + }, + ], + }, + + // useless escape + { + code: "/\\; \\_ \\a \\- \\'/", + output: "/; _ a - '/", + errors: [ + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 2, + }, + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 5, + }, + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 8, + }, + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 11, + }, + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 14, + }, + ], + }, + { + code: "/[\\; \\_ \\a \\']/", + output: "/[; _ a ']/", + errors: [ + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 3, + }, + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 6, + }, + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 9, + }, + { + message: + "Useless identity escapes with non-syntax characters are forbidden.", + column: 12, + }, + ], + }, + + // invalid ranges + { + code: String.raw`/[\w-a]/`, + output: null, + errors: [ + { + message: + "Invalid character class range. A character set cannot be the minimum or maximum of a character class range. Either escape the `-` or fix the character class range.", + column: 3, + }, + ], + }, + { + code: String.raw`/[a-\w]/`, + output: null, + errors: [ + { + message: + "Invalid character class range. A character set cannot be the minimum or maximum of a character class range. Either escape the `-` or fix the character class range.", + column: 5, + }, + ], + }, + + // quantified assertions + { + code: String.raw`/(?!a)+/`, + output: String.raw`/(?:(?!a))+/`, errors: [ { message: - "Invalid regular expression: /\\u{42}/: Invalid unicode escape.", - line: 1, - column: 1, + "Assertion are not allowed to be quantified directly.", + column: 2, }, ], },