From 199f888bf3e251af62824159a36aa23472e791d2 Mon Sep 17 00:00:00 2001
From: Mike Lischke
- * The default value is {@code false} to avoid the performance and memory - * overhead of copying text for every token unless explicitly requested.
- */ - this.copyText = copyText === undefined ? false : copyText; - } - - create(source, type, text, channel, start, stop, line, column) { - const t = new CommonToken(source, type, channel, start, stop); - t.line = line; - t.column = column; - if (text !== null) { - t.text = text; - } else if (this.copyText && source[1] !== null) { - t.text = source[1].getText(start, stop); - } - return t; - } - - createThin(type, text) { - const t = new CommonToken(null, type); - t.text = text; - return t; - } -} - -/** - * The default {@link CommonTokenFactory} instance. - * - *- * This token factory does not explicitly copy token text when constructing - * tokens.
- */ -CommonTokenFactory.DEFAULT = new CommonTokenFactory(); diff --git a/src/CommonTokenFactory.ts b/src/CommonTokenFactory.ts new file mode 100644 index 0000000..3bb1463 --- /dev/null +++ b/src/CommonTokenFactory.ts @@ -0,0 +1,77 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import { CharStream } from "./CharStream.js"; +import { CommonToken } from "./CommonToken.js"; +import { TokenFactory } from "./TokenFactory.js"; +import { TokenSource } from "./TokenSource.js"; + +/** + * This default implementation of {@link TokenFactory} creates + * {@link CommonToken} objects. + */ +export class CommonTokenFactory extends TokenFactory+ * This token factory does not explicitly copy token text when constructing + * tokens.
+ */ + // eslint-disable-next-line @typescript-eslint/naming-convention + public static readonly DEFAULT = new CommonTokenFactory(); + + /** + * Indicates whether {@link CommonToken#setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream#getText} in + * {@link UnbufferedCharStream} throws an + * {@link UnsupportedOperationException}). Explicitly setting the token text + * allows {@link Token#getText} to be called at any time regardless of the + * input stream implementation. + * + *+ * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.
+ */ + protected readonly copyText: boolean = false; + + public constructor(copyText?: boolean) { + super(); + /** + * Indicates whether {@link CommonToken//setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream//getText} in + * {@link UnbufferedCharStream} throws an + * {@link UnsupportedOperationException}). Explicitly setting the token text + * allows {@link Token//getText} to be called at any time regardless of the + * input stream implementation. + * + *+ * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.
+ */ + this.copyText = copyText ?? false; + } + + public override create(source: [TokenSource | null, CharStream | null], type: number, text: string, channel: number, + start: number, stop: number, line: number, column: number): CommonToken { + const t = new CommonToken(source, type, channel, start, stop); + t.line = line; + t.column = column; + if (text !== null) { + t.text = text; + } else if (this.copyText && source[1] !== null) { + t.text = source[1].getText(start, stop); + } + + return t; + } +} diff --git a/src/CommonTokenStream.d.ts b/src/CommonTokenStream.d.ts deleted file mode 100644 index db8077f..0000000 --- a/src/CommonTokenStream.d.ts +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { Lexer } from "./Lexer.js"; -import { BufferedTokenStream } from "./BufferedTokenStream.js"; - -export declare class CommonTokenStream extends BufferedTokenStream { - public constructor(lexer: Lexer); - public constructor(lexer: Lexer, channel: number); -} diff --git a/src/CommonTokenStream.js b/src/CommonTokenStream.ts similarity index 74% rename from src/CommonTokenStream.js rename to src/CommonTokenStream.ts index 6aba045..b18bccd 100644 --- a/src/CommonTokenStream.js +++ b/src/CommonTokenStream.ts @@ -4,8 +4,11 @@ * can be found in the LICENSE.txt file in the project root. */ -import { Token } from './Token.js'; -import { BufferedTokenStream } from './BufferedTokenStream.js'; +/* eslint-disable @typescript-eslint/naming-convention */ + +import { Token } from "./Token.js"; +import { BufferedTokenStream } from "./BufferedTokenStream.js"; +import { TokenSource } from "./index.js"; /** * This class extends {@link BufferedTokenStream} with functionality to filter @@ -32,16 +35,25 @@ import { BufferedTokenStream } from './BufferedTokenStream.js'; * channel. */ export class CommonTokenStream extends BufferedTokenStream { - constructor(lexer, channel) { + /** + * Specifies the channel to use for filtering tokens. + * + *+ * The default value is {@link Token#DEFAULT_CHANNEL}, which matches the + * default channel assigned to tokens created by the lexer.
+ */ + protected channel = Token.DEFAULT_CHANNEL; + + public constructor(lexer: TokenSource, channel?: number) { super(lexer); - this.channel = channel === undefined ? Token.DEFAULT_CHANNEL : channel; + this.channel = channel ?? Token.DEFAULT_CHANNEL; } - adjustSeekIndex(i) { + public override adjustSeekIndex(i: number): number { return this.nextTokenOnChannel(i, this.channel); } - LB(k) { + public override LB(k: number): Token | null { if (k === 0 || this.index - k < 0) { return null; } @@ -56,10 +68,11 @@ export class CommonTokenStream extends BufferedTokenStream { if (i < 0) { return null; } + return this.tokens[i]; } - LT(k) { + public override LT(k: number): Token | null { this.lazyInit(); if (k === 0) { return null; @@ -77,15 +90,15 @@ export class CommonTokenStream extends BufferedTokenStream { } n += 1; } + return this.tokens[i]; } // Count EOF just once. - getNumberOfOnChannelTokens() { + public getNumberOfOnChannelTokens(): number { let n = 0; this.fill(); - for (let i = 0; i < this.tokens.length; i++) { - const t = this.tokens[i]; + for (const t of this.tokens) { if (t.channel === this.channel) { n += 1; } @@ -93,6 +106,7 @@ export class CommonTokenStream extends BufferedTokenStream { break; } } + return n; } } diff --git a/src/ConsoleErrorListener.js b/src/ConsoleErrorListener.js deleted file mode 100644 index 31224c0..0000000 --- a/src/ConsoleErrorListener.js +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { BaseErrorListener } from "./BaseErrorListener.js"; - -/** - * {@inheritDoc} - * - *- * This implementation prints messages to {@link System//err} containing the - * values of {@code line}, {@code charPositionInLine}, and {@code msg} using - * the following format.
- * - *- * line line:charPositionInLine msg - *- * - */ -export class ConsoleErrorListener extends BaseErrorListener { - constructor() { - super(); - } - - syntaxError(recognizer, offendingSymbol, line, column, msg, e) { - console.error("line " + line + ":" + column + " " + msg); - } -} - - -/** - * Provides a default instance of {@link ConsoleErrorListener}. - */ -ConsoleErrorListener.INSTANCE = new ConsoleErrorListener(); diff --git a/src/ConsoleErrorListener.ts b/src/ConsoleErrorListener.ts new file mode 100644 index 0000000..4b517fa --- /dev/null +++ b/src/ConsoleErrorListener.ts @@ -0,0 +1,40 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import { BaseErrorListener } from "./BaseErrorListener.js"; +import { RecognitionException } from "./RecognitionException.js"; +import { Recognizer } from "./Recognizer.js"; +import { ATNSimulator } from "./atn/ATNSimulator.js"; + +/** + * {@inheritDoc} + * + *
+ * This implementation prints messages to {@link System//err} containing the + * values of {@code line}, {@code charPositionInLine}, and {@code msg} using + * the following format.
+ * + *+ * line line:charPositionInLine msg + *+ * + */ +export class ConsoleErrorListener extends BaseErrorListener { + /** + * Provides a default instance of {@link ConsoleErrorListener}. + */ + // eslint-disable-next-line @typescript-eslint/naming-convention + public static readonly INSTANCE = new ConsoleErrorListener(); + + public override syntaxError
The default implementation simply calls {@link endErrorCondition} to * ensure that the handler is not in error recovery mode.
*/ - reset(recognizer) { + public reset(recognizer: Parser): void { this.endErrorCondition(recognizer); } @@ -53,24 +72,23 @@ export class DefaultErrorStrategy { * This method is called to enter error recovery mode when a recognition * exception is reported. * - * @param recognizer the parser instance + * @param _recognizer the parser instance */ - beginErrorCondition(recognizer) { + public beginErrorCondition(_recognizer: Parser): void { this.errorRecoveryMode = true; } - inErrorRecoveryMode(recognizer) { + public inErrorRecoveryMode(_recognizer: Parser): boolean { return this.errorRecoveryMode; } /** * This method is called to leave error recovery mode after recovering from * a recognition exception. - * @param recognizer */ - endErrorCondition(recognizer) { + public endErrorCondition(_recognizer: Parser): void { this.errorRecoveryMode = false; - this.lastErrorStates = null; + this.lastErrorStates = new IntervalSet(); this.lastErrorIndex = -1; } @@ -78,7 +96,7 @@ export class DefaultErrorStrategy { * {@inheritDoc} *The default implementation simply calls {@link endErrorCondition}.
*/ - reportMatch(recognizer) { + public reportMatch(recognizer: Parser): void { this.endErrorCondition(recognizer); } @@ -101,7 +119,7 @@ export class DefaultErrorStrategy { * the exception * */ - reportError(recognizer, e) { + public reportError(recognizer: Parser, e: RecognitionException): void { // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if (this.inErrorRecoveryMode(recognizer)) { @@ -116,8 +134,7 @@ export class DefaultErrorStrategy { this.reportFailedPredicate(recognizer, e); } else { console.log("unknown recognition error type: " + e.constructor.name); - console.log(e.stack); - recognizer.notifyErrorListeners(e.offendingToken, e.getMessage(), e); + recognizer.notifyErrorListeners(e.message, e.offendingToken, e); } } @@ -130,20 +147,18 @@ export class DefaultErrorStrategy { * that can follow the current rule. * */ - recover(recognizer, e) { - if (this.lastErrorIndex === recognizer.inputStream.index && - this.lastErrorStates !== null && this.lastErrorStates.indexOf(recognizer.state) >= 0) { + public recover(recognizer: Parser, _e: RecognitionException): void { + if (this.lastErrorIndex === recognizer.inputStream.index && this.lastErrorStates.contains(recognizer.state)) { // uh oh, another error at same token index and previously-visited // state in ATN; must be a case where LT(1) is in the recovery // token set so nothing got consumed. Consume a single token // at least to prevent an infinite loop; this is a failsafe. recognizer.consume(); } - this.lastErrorIndex = recognizer._input.index; - if (this.lastErrorStates === null) { - this.lastErrorStates = []; - } - this.lastErrorStates.push(recognizer.state); + + this.lastErrorIndex = recognizer.inputStream.index; + + this.lastErrorStates.addOne(recognizer.state); const followSet = this.getErrorRecoverySet(recognizer); this.consumeUntil(recognizer, followSet); } @@ -195,26 +210,28 @@ export class DefaultErrorStrategy { * functionality by simply overriding this method as a blank { }. * */ - sync(recognizer) { + public sync(recognizer: Parser): void { // If already recovering, don't try to sync if (this.inErrorRecoveryMode(recognizer)) { return; } - const s = recognizer.interpreter.atn.states[recognizer.state]; + const s = recognizer.interpreter.atn.states[recognizer.state]!; const la = recognizer.tokenStream.LA(1); // try cheaper subset first; might get lucky. seems to shave a wee bit off const nextTokens = recognizer.atn.nextTokens(s); if (nextTokens.contains(la)) { this.nextTokensContext = null; this.nextTokenState = ATNState.INVALID_STATE_NUMBER; + return; } else if (nextTokens.contains(Token.EPSILON)) { if (this.nextTokensContext === null) { // It's possible the next token won't match information tracked // by sync is restricted for performance. - this.nextTokensContext = recognizer._ctx; - this.nextTokensState = recognizer._stateNumber; + this.nextTokensContext = recognizer.context; + this.nextTokenState = recognizer.state; } + return; } switch (s.stateType) { @@ -252,14 +269,14 @@ export class DefaultErrorStrategy { * @param recognizer the parser instance * @param e the recognition exception */ - reportNoViableAlternative(recognizer, e) { + public reportNoViableAlternative(recognizer: Parser, e: NoViableAltException): void { const tokens = recognizer.tokenStream; let input; if (tokens !== null) { if (e.startToken.type === Token.EOF) { input = "This recovery strategy is implemented by {@link - * //singleTokenDeletion}.
+ *This recovery strategy is implemented by {@link singleTokenDeletion}.
* *MISSING TOKEN (single token insertion)
* @@ -383,8 +398,7 @@ export class DefaultErrorStrategy { * "insertion" is performed by returning the created token as the successful * result of the match operation. * - *This recovery strategy is implemented by {@link - * //singleTokenInsertion}.
+ *This recovery strategy is implemented by {@link singleTokenInsertion}.
* *EXAMPLE
* @@ -409,13 +423,14 @@ export class DefaultErrorStrategy { * is in the set of tokens that can follow the {@code ')'} token reference * in rule {@code atom}. It can assume that you forgot the {@code ')'}. */ - recoverInline(recognizer) { + public recoverInline(recognizer: Parser): Token { // SINGLE TOKEN DELETION const matchedSymbol = this.singleTokenDeletion(recognizer); if (matchedSymbol !== null) { // we have deleted the extra token. // now, move past ttype token as if all were ok recognizer.consume(); + return matchedSymbol; } // SINGLE TOKEN INSERTION @@ -440,20 +455,21 @@ export class DefaultErrorStrategy { * token with the correct type to produce this behavior. * * @param recognizer the parser instance - * @return {@code true} if single-token insertion is a viable recovery + * @returns `true` if single-token insertion is a viable recovery * strategy for the current mismatched input, otherwise {@code false} */ - singleTokenInsertion(recognizer) { + public singleTokenInsertion(recognizer: Parser): boolean { const currentSymbolType = recognizer.tokenStream.LA(1); // if current token is consistent with what could come after current // ATN state, then we know we're missing a token; error recovery // is free to conjure up and insert the missing token const atn = recognizer.interpreter.atn; const currentState = atn.states[recognizer.state]; - const next = currentState.transitions[0].target; - const expectingAtLL2 = atn.nextTokens(next, recognizer._ctx); + const next = currentState!.transitions[0].target; + const expectingAtLL2 = atn.nextTokens(next, recognizer.context); if (expectingAtLL2.contains(currentSymbolType)) { this.reportMissingToken(recognizer); + return true; } else { return false; @@ -475,11 +491,11 @@ export class DefaultErrorStrategy { * match. * * @param recognizer the parser instance - * @return the successfully matched {@link Token} instance if single-token + * @returns the successfully matched {@link Token} instance if single-token * deletion successfully recovers from the mismatched input, otherwise * {@code null} */ - singleTokenDeletion(recognizer) { + public singleTokenDeletion(recognizer: Parser): Token | null { const nextTokenType = recognizer.tokenStream.LA(2); const expecting = this.getExpectedTokens(recognizer); if (expecting.contains(nextTokenType)) { @@ -492,6 +508,7 @@ export class DefaultErrorStrategy { // we want to return the token we're actually matching const matchedSymbol = recognizer.getCurrentToken(); this.reportMatch(recognizer); // we know current token is correct + return matchedSymbol; } else { return null; @@ -519,8 +536,8 @@ export class DefaultErrorStrategy { * override this method to create the appropriate tokens. * */ - getMissingSymbol(recognizer) { - const currentSymbol = recognizer.getCurrentToken(); + public getMissingSymbol(recognizer: Parser): Token { + const currentSymbol = recognizer.getCurrentToken() as CommonToken; const expecting = this.getExpectedTokens(recognizer); let expectedTokenType = Token.INVALID_TYPE; if (!expecting.isNil) { @@ -535,7 +552,7 @@ export class DefaultErrorStrategy { } let current = currentSymbol; - const lookBack = recognizer.tokenStream.LT(-1); + const lookBack = recognizer.tokenStream.LT(-1) as CommonToken; if (current.type === Token.EOF && lookBack !== null) { current = lookBack; } @@ -545,7 +562,7 @@ export class DefaultErrorStrategy { -1, -1, current.line, current.column); } - getExpectedTokens(recognizer) { + public getExpectedTokens(recognizer: Parser): IntervalSet { return recognizer.getExpectedTokens(); } @@ -558,7 +575,7 @@ export class DefaultErrorStrategy { * your token objects because you don't have to go modify your lexer * so that it creates a new Java type. */ - getTokenErrorDisplay(t) { + public getTokenErrorDisplay(t: Token | null): string { if (t === null) { return "