Skip to content

Commit

Permalink
Improved regexp/strict rule (#225)
Browse files Browse the repository at this point in the history
* Improved `regexp/strict` rule

* Update

* Disable validator for patterns with named backrefs
  • Loading branch information
RunDevelopment authored Jun 4, 2021
1 parent 90d0ce1 commit 637ee67
Show file tree
Hide file tree
Showing 5 changed files with 449 additions and 30 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
| [regexp/no-useless-assertions](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-assertions.html) | disallow assertions that are known to always accept (or reject) | |
| [regexp/no-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-backreference.html) | disallow useless backreferences in regular expressions | :star: |
| [regexp/no-useless-dollar-replacements](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-dollar-replacements.html) | disallow useless `$` replacements in replacement string | |
| [regexp/strict](https://ota-meshi.github.io/eslint-plugin-regexp/rules/strict.html) | disallow not strictly valid regular expressions | |
| [regexp/strict](https://ota-meshi.github.io/eslint-plugin-regexp/rules/strict.html) | disallow not strictly valid regular expressions | :wrench: |

### Best Practices

Expand Down
2 changes: 1 addition & 1 deletion docs/rules/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
| [regexp/no-useless-assertions](./no-useless-assertions.md) | disallow assertions that are known to always accept (or reject) | |
| [regexp/no-useless-backreference](./no-useless-backreference.md) | disallow useless backreferences in regular expressions | :star: |
| [regexp/no-useless-dollar-replacements](./no-useless-dollar-replacements.md) | disallow useless `$` replacements in replacement string | |
| [regexp/strict](./strict.md) | disallow not strictly valid regular expressions | |
| [regexp/strict](./strict.md) | disallow not strictly valid regular expressions | :wrench: |

### Best Practices

Expand Down
3 changes: 2 additions & 1 deletion docs/rules/strict.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ description: "disallow not strictly valid regular expressions"
> disallow not strictly valid regular expressions
- :exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
- :wrench: The `--fix` option on the [command line](https://eslint.org/docs/user-guide/command-line-interface#fixing-problems) can automatically fix some of the problems reported by this rule.

## :book: Rule Details

Expand All @@ -20,7 +21,7 @@ Depending on the syntax defined in [Annex B] of the ECMAScript specification, so

[Annex B]: https://tc39.es/ecma262/#sec-regular-expressions-patterns

<eslint-code-block>
<eslint-code-block fix>

```js
/* eslint regexp/strict: "error" */
Expand Down
231 changes: 221 additions & 10 deletions lib/rules/strict.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
import { RegExpValidator } from "regexpp"
import type { CharacterClassElement, Element } from "regexpp/ast"
import type { RegExpVisitor } from "regexpp/visitor"
import type { RegExpContext } from "../utils"
import { createRule, defineRegexpVisitor } from "../utils"
import {
isOctalEscape,
createRule,
defineRegexpVisitor,
isEscapeSequence,
} from "../utils"

const validator = new RegExpValidator({ strict: true })
const validator = new RegExpValidator({ strict: true, ecmaVersion: 2020 })

/**
* Check syntax error in a given pattern.
* @returns {string|null} The syntax error.
* @returns The syntax error.
*/
function validateRegExpPattern(pattern: string, uFlag?: boolean) {
function validateRegExpPattern(
pattern: string,
uFlag?: boolean,
): string | null {
try {
validator.validatePattern(pattern, undefined, undefined, uFlag)
return null
Expand All @@ -18,6 +27,9 @@ function validateRegExpPattern(pattern: string, uFlag?: boolean) {
}
}

const CHARACTER_CLASS_SYNTAX_CHARACTERS = new Set("\\/()[]{}^$.|-+*?".split(""))
const SYNTAX_CHARACTERS = new Set("\\/()[]{}^$.|+*?".split(""))

export default createRule("strict", {
meta: {
docs: {
Expand All @@ -27,8 +39,33 @@ export default createRule("strict", {
// recommended: true,
recommended: false,
},
fixable: "code",
schema: [],
messages: {
// character escape
invalidControlEscape:
"Invalid or incomplete control escape sequence. Either use a valid control escape sequence or escaping the standalone backslash.",
incompleteEscapeSequence:
"Incomplete escape sequence '{{expr}}'. Either use a valid escape sequence or remove the useless escaping.",
invalidPropertyEscape:
"Invalid property escape sequence '{{expr}}'. Either use a valid property escape sequence or remove the useless escaping.",
incompleteBackreference:
"Incomplete backreference '{{expr}}'. Either use a valid backreference or remove the useless escaping.",
unescapedSourceCharacter: "Unescaped source character '{{expr}}'.",
octalEscape:
"Invalid legacy octal escape sequence '{{expr}}'. Use a hexadecimal escape instead.",
uselessEscape:
"Useless identity escapes with non-syntax characters are forbidden.",

// character class
invalidRange:
"Invalid character class range. A character set cannot be the minimum or maximum of a character class range. Either escape the `-` or fix the character class range.",

// assertion
quantifiedAssertion:
"Assertion are not allowed to be quantified directly.",

// validator
regexMessage: "{{message}}.",
},
type: "suggestion",
Expand All @@ -40,21 +77,195 @@ export default createRule("strict", {
function createVisitor(
regexpContext: RegExpContext,
): RegExpVisitor.Handlers {
const { node, flags, pattern } = regexpContext
const {
node,
flags,
pattern,
getRegexpLocation,
fixReplaceNode,
} = regexpContext

if (flags.unicode) {
// the Unicode flag enables strict parsing mode automatically
return {}
}

const message = validateRegExpPattern(pattern, flags.unicode)
let reported = false
let hasNamedBackreference = false

/** Report */
function report(
messageId: string,
element: Element,
fix?: string | null,
): void {
reported = true

if (message) {
context.report({
node,
messageId: "regexMessage",
loc: getRegexpLocation(element),
messageId,
data: {
message,
expr: element.raw,
},
fix: fix ? fixReplaceNode(element, fix) : null,
})
}

return {}
return {
// eslint-disable-next-line complexity -- x
onCharacterEnter(cNode) {
if (cNode.raw === "\\") {
// e.g. \c5 or \c
report("invalidControlEscape", cNode)
return
}
if (cNode.raw === "\\u" || cNode.raw === "\\x") {
// e.g. \u000;
report("incompleteEscapeSequence", cNode)
return
}
if (cNode.raw === "\\p" || cNode.raw === "\\P") {
// e.g. \p{H} or \p
report("invalidPropertyEscape", cNode)
return
}
if (cNode.value !== 0 && isOctalEscape(cNode.raw)) {
// e.g. \023
report(
"octalEscape",
cNode,
`\\x${cNode.value.toString(16).padStart(2, "0")}`,
)
return
}

const insideCharClass =
cNode.parent.type === "CharacterClass" ||
cNode.parent.type === "CharacterClassRange"

if (!insideCharClass) {
if (cNode.raw === "\\k") {
// e.g. \k<foo or \k
report("incompleteBackreference", cNode)
return
}

if (
cNode.raw === "{" ||
cNode.raw === "}" ||
cNode.raw === "]"
) {
report(
"unescapedSourceCharacter",
cNode,
`\\${cNode.raw}`,
)
return
}
}

if (isEscapeSequence(cNode.raw)) {
// all remaining escape sequences are valid
return
}

if (cNode.raw.startsWith("\\")) {
const identity = cNode.raw.slice(1)
const syntaxChars = insideCharClass
? CHARACTER_CLASS_SYNTAX_CHARACTERS
: SYNTAX_CHARACTERS

if (
cNode.value === identity.charCodeAt(0) &&
!syntaxChars.has(identity)
) {
// e.g. \g or \;
report("uselessEscape", cNode, identity)
}
}
},
onCharacterClassEnter(ccNode) {
for (let i = 0; i < ccNode.elements.length; i++) {
const current = ccNode.elements[i]

if (current.type === "CharacterSet") {
const next: CharacterClassElement | undefined =
ccNode.elements[i + 1]
const nextNext: CharacterClassElement | undefined =
ccNode.elements[i + 2]

if (next && next.raw === "-" && nextNext) {
// e.g. [\w-a]
report("invalidRange", current)
return
}

const prev: CharacterClassElement | undefined =
ccNode.elements[i - 1]
const prevPrev: CharacterClassElement | undefined =
ccNode.elements[i - 2]
if (
prev &&
prev.raw === "-" &&
prevPrev &&
prevPrev.type !== "CharacterClassRange"
) {
// e.g. [a-\w]
report("invalidRange", current)
return
}
}
}
},
onQuantifierEnter(qNode) {
if (qNode.element.type === "Assertion") {
// e.g. \b+
report(
"quantifiedAssertion",
qNode,
`(?:${qNode.element.raw})${qNode.raw.slice(
qNode.element.end - qNode.start,
)}`,
)
}
},

onBackreferenceEnter(bNode) {
if (typeof bNode.ref === "string") {
hasNamedBackreference = true
}
},
onPatternLeave() {
if (hasNamedBackreference) {
// There is a bug in regexpp that causes it throw a
// syntax error for all non-Unicode regexes with named
// backreferences.
// TODO: Remove this workaround when the bug is fixed.
return
}

if (!reported) {
// our own logic couldn't find any problems,
// so let's use a real parser to do the job.

const message = validateRegExpPattern(
pattern,
flags.unicode,
)

if (message) {
context.report({
node,
messageId: "regexMessage",
data: {
message,
},
})
}
}
},
}
}

return defineRegexpVisitor(context, {
Expand Down
Loading

0 comments on commit 637ee67

Please sign in to comment.