From 43f2f001f199f609f53c3c0f825d3e5c08f4ef50 Mon Sep 17 00:00:00 2001 From: Sergey Igushkin Date: Tue, 20 Apr 2021 12:26:07 +0300 Subject: [PATCH] Fix non-thread safe regex matching in the JVM implementation Close #39 --- .../h0tk3y/betterParse/lexer/RegexToken.kt | 64 ++++++++++++------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/src/jvmMain/kotlin/com/github/h0tk3y/betterParse/lexer/RegexToken.kt b/src/jvmMain/kotlin/com/github/h0tk3y/betterParse/lexer/RegexToken.kt index f30c09d..9a673d4 100644 --- a/src/jvmMain/kotlin/com/github/h0tk3y/betterParse/lexer/RegexToken.kt +++ b/src/jvmMain/kotlin/com/github/h0tk3y/betterParse/lexer/RegexToken.kt @@ -3,38 +3,54 @@ package com.github.h0tk3y.betterParse.lexer import java.util.* import java.util.regex.Matcher -public actual class RegexToken : Token { - private val pattern: String +public actual class RegexToken private constructor( + name: String?, + ignored: Boolean, + private val pattern: String, private val regex: Regex - private val matcher: Matcher +) : Token(name, ignored) { - private companion object { - const val inputStartPrefix = "\\A" + private val threadLocalMatcher = object : ThreadLocal() { + override fun initialValue() = regex.toPattern().matcher("") } - private fun prependPatternWithInputStart(patternString: String, options: Set) = - if (patternString.startsWith(inputStartPrefix)) - patternString.toRegex(options) - else { - val newlineAfterComments = if (RegexOption.COMMENTS in options) "\n" else "" - val patternToEmbed = if (RegexOption.LITERAL in options) Regex.escape(patternString) else patternString - ("$inputStartPrefix(?:$patternToEmbed$newlineAfterComments)").toRegex(options - RegexOption.LITERAL) - } + private val matcher: Matcher get() = threadLocalMatcher.get() + + private companion object { + private const val inputStartPrefix = "\\A" - public actual constructor(name: String?, @Language("RegExp", "", "") patternString: String, ignored: Boolean) - : super(name, ignored) { - pattern = patternString - regex = prependPatternWithInputStart(patternString, emptySet()) + private fun prependPatternWithInputStart(patternString: String, options: Set) = + if (patternString.startsWith(Companion.inputStartPrefix)) + patternString.toRegex(options) + else { + val newlineAfterComments = if (RegexOption.COMMENTS in options) "\n" else "" + val patternToEmbed = if (RegexOption.LITERAL in options) Regex.escape(patternString) else patternString + ("${inputStartPrefix}(?:$patternToEmbed$newlineAfterComments)").toRegex(options - RegexOption.LITERAL) + } - matcher = regex.toPattern().matcher("") } - public actual constructor(name: String?, regex: Regex, ignored: Boolean) - : super(name, ignored) { - pattern = regex.pattern - this.regex = prependPatternWithInputStart(pattern, regex.options) - matcher = this.regex.toPattern().matcher("") - } + public actual constructor( + name: String?, + @Language("RegExp", "", "") patternString: String, + ignored: Boolean + ) : this( + name, + ignored, + patternString, + prependPatternWithInputStart(patternString, emptySet()) + ) + + public actual constructor( + name: String?, + regex: Regex, + ignored: Boolean + ) : this( + name, + ignored, + regex.pattern, + prependPatternWithInputStart(regex.pattern, regex.options) + ) override fun match(input: CharSequence, fromIndex: Int): Int { matcher.reset(input).region(fromIndex, input.length)