Skip to content

Commit

Permalink
Merge pull request #1 from FelipeSharkao/v2
Browse files Browse the repository at this point in the history
Add tokens, logging and rework recursive parsers
  • Loading branch information
FelipeSharkao authored Jan 21, 2024
2 parents fcf9419 + 9481616 commit 3f6120f
Show file tree
Hide file tree
Showing 28 changed files with 859 additions and 1,028 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ type Color = {
blue: number;
};

const hexDigit = anyIn('09', 'af', 'AF');
const hexDigit = new TokenParser("Hex Digit", /[0-9a-fA-F]/)

const hexPrimary = (length: number) => map(
raw(many(hexDigit, length, length)),
raw(many(hexDigit, length)),
(res) => parseInt(res.value, 16)
));

Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "chunky-parser",
"version": "1.2.1",
"version": "2.0.0",
"description": "Lightweight parser combinator library for TypeScript",
"author": "Felipe Nascimento <[email protected]>",
"license": "MIT",
Expand All @@ -13,7 +13,7 @@
"preinstall": "git config diff.lockb.textconv bun; git config diff.lockb.binary true",
"build": "bun build.ts",
"test": "bun test",
"lint": "bunx --bun eslint src; bunx --bun tsc --noEmit"
"lint": "bunx --bun tsc --noEmit && bunx --bun eslint src"
},
"devDependencies": {
"@trivago/prettier-plugin-sort-imports": "^4.3.0",
Expand Down
156 changes: 121 additions & 35 deletions src/ParseInput.ts
Original file line number Diff line number Diff line change
@@ -1,56 +1,82 @@
import type { ParseFailure, ParseSuccess } from "@/ParseResult"
import type { Source } from "@/Source"
import type { Token, TokenParser } from "@/tokens"

/** @internal */
export type StackMap = Record<string, string[] | undefined>
import type { Parser } from "./Parser"
import type { RecState } from "./combinators/choice"

export interface ParseContext {
/** @internal */
stacks?: StackMap
test?: string
/**
* Specifies options for the parsers
*/
log?: {
/**
* If true, token and named parsers will log their results to the console. Use for debugging
* purposes
*/
enabled?: boolean
/**
* Determines how many spaces to indent the log output
*/
indent?: number
}
}

/**
* Represents a input for a parser. It references a point in the source text being parsed without
* copying it, and contains the context of the previous parsers
*/
export class ParseInput {
private tokens: Token<string>[] = []
private srcCursor = 0
private tkCursor = 0

constructor(
readonly source: Source,
readonly offset: number,
readonly context: ParseContext
readonly path: string,
readonly content: string,
public context: ParseContext
) {}

clone(): ParseInput {
const newInput = new ParseInput(this.path, this.content, structuredClone(this.context))
newInput.tokens = this.tokens
newInput.srcCursor = this.srcCursor
newInput.tkCursor = this.tkCursor
return newInput
}

/**
* Returns a string containing the next `n` characters from the source text
* Returns a token of the specified type from the current input if the current input starts with
* this token type, or null otherwise
*/
take(n: number): string {
if (n <= 0) {
return ""
token<T extends string>(type: TokenParser<T>): Token<T> | null {
if (this.tkCursor < this.tokens.length) {
const tk = this.tokens[this.tkCursor]

if (tk.is(type) && tk.loc[0] === this.srcCursor) {
return tk
}

return null
}

if (n === 1) {
return this.source.content[this.offset]
const match = this.match(type.pattern)

if (match != null) {
const tk = type.token(match as T, [this.srcCursor, this.srcCursor + match.length])
this.tokens.push(tk)
return tk
}

return this.source.content.slice(this.offset, this.offset + n)
return null
}

/**
* Returns true if the source text at the offset starts with `search`
*/
startsWith(search: string): boolean {
return this.source.content.startsWith(search, this.offset)
}

/**
* Executes a regex on the source text at the offset. The regex will only search for a match at
* the offset, as if it had the `y` flag, and the `g` flag will be ignored. The original regex
* object will not be modified, and it's `lastIndex` will be ignored
*/
matches(regex: RegExp): RegExpExecArray | null {
const _regex = new RegExp(regex.source, regex.flags.replace("g", "") + "y")
_regex.lastIndex = this.offset
return _regex.exec(this.source.content)
return this.content.startsWith(search, this.offset)
}

/**
Expand All @@ -60,8 +86,7 @@ export class ParseInput {
return {
success: true,
value: opts.value,
loc: [opts.start || this.offset, opts.end || this.offset + (opts.length || 0)],
next: opts.next || this.context,
loc: [opts.start ?? this.offset, opts.end ?? this.offset + (opts.length || 0)],
}
}

Expand All @@ -71,8 +96,7 @@ export class ParseInput {
failure(opts: FailureOptions): ParseFailure {
return {
success: false,
source: this.source,
offset: opts.offset || this.offset + (opts.move || 0),
offset: opts.offset ?? this.offset + (opts.move || 0),
expected: opts.expected,
}
}
Expand All @@ -81,7 +105,73 @@ export class ParseInput {
* Returns the length of the source text after the offset
*/
get length(): number {
return this.source.content.length - this.offset
return this.content.length - this.offset
}

/**
* Returns the current offset of the input
*/
get offset(): number {
return this.srcCursor
}

/**
* Sets the current offset of the input
*
* @throws If the offset is out of bounds of the source text or if it points to a token that
* hasn't been parsed yet
*/
set offset(value: number) {
if (value < 0 || value > this.content.length) {
throw new Error(`Offset ${value} is out of bounds`)
}

if (value > (this.tokens[this.tokens.length - 1]?.loc[1] ?? 0)) {
throw new Error(`Offset ${value} points to a token that hasn't been parsed yet`)
}

if (this.srcCursor === value) {
return
}

if (value < this.srcCursor) {
while (
this.tkCursor > 0 &&
(this.tkCursor >= this.tokens.length || this.tokens[this.tkCursor].loc[0] > value)
) {
this.tkCursor -= 1
}
} else {
while (
this.tkCursor < this.tokens.length &&
this.tokens[this.tkCursor].loc[1] <= value
) {
this.tkCursor += 1
}
}

this.srcCursor = value
}

/**
* Match the source text at the offset with the specified pattern
*/
private match(pat: string | RegExp): string | null {
if (typeof pat == "string") {
if (this.content.startsWith(pat, this.offset)) {
return pat
}
} else {
const re = new RegExp(pat.source, pat.flags)
re.lastIndex = this.offset

const match = re.exec(this.content)
if (match?.length) {
return match[0]
}
}

return null
}
}

Expand All @@ -105,10 +195,6 @@ type SuccessOptions<T> = {
* The length of the parsed value. Will be ignored if `end` is specified
*/
length?: number
/**
* The context to use for the next parser. Defaults to the current context of the input
*/
next?: ParseContext
}

/**
Expand Down
19 changes: 8 additions & 11 deletions src/ParseResult.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
import type { ParseContext } from "@/ParseInput"
import type { LocationRange, Source } from "@/Source"

export type ParseResult<T> = ParseSuccess<T> | ParseFailure

export type ParseSuccess<T> = Readonly<{
success: true
export type ParseSuccess<T> = {
readonly success: true
value: T
loc: LocationRange
next: ParseContext
}>
}

export type ParseFailure = Readonly<{
success: false
source: Source
export type ParseFailure = {
readonly success: false
offset: number
expected: string[]
}>
}

export type LocationRange = readonly [start: number, end: number]
51 changes: 46 additions & 5 deletions src/Parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,59 @@ import { describe, it, expect } from "bun:test"

import { ParseInput } from "@/ParseInput"

import { run, type Parser } from "./Parser"
import { run, tryRun } from "./Parser"

const parser = (input: ParseInput) => {
input.context.test = "test"
return input.success({ value: "test" })
}

const failParser = (input: ParseInput) => {
input.context.test = "test"
return input.failure({ expected: ["test"] })
}

describe("run", () => {
const parser: Parser<string> = (input) => input.success({ value: "test" })
const input = new ParseInput({ name: "test", path: "test", content: "test" }, 0, {})
it("should run a parser", () => {
const input = new ParseInput("test", "test", {})
expect(run(parser, input.clone())).toEqual(parser(input.clone()))
})

it("should run a lazy parser", () => {
const input = new ParseInput("test", "test", {})
const lazyParser = () => parser
expect(run(lazyParser, input.clone())).toEqual(parser(input.clone()))
})

it("should allow mutation of the input", () => {
const input = new ParseInput("test", "test", {})
run(parser, input)
expect(input).toEqual(new ParseInput("test", "test", { test: "test" }))
})
})

describe("tryRun", () => {
it("should run a parser", () => {
expect(run(parser, input)).toEqual(parser(input))
const input = new ParseInput("test", "test", {})
expect(run(parser, input.clone())).toEqual(parser(input.clone()))
})

it("should run a lazy parser", () => {
const input = new ParseInput("test", "test", {})
const lazyParser = () => parser
expect(run(lazyParser, input)).toEqual(parser(input))
expect(run(lazyParser, input.clone())).toEqual(parser(input.clone()))
})

it("should allow mutation of the input", () => {
const input = new ParseInput("test", "test", {})
tryRun(parser, input)
expect(input).toEqual(new ParseInput("test", "test", { test: "test" }))
})

it("should rollback the input on failure", () => {
const input = new ParseInput("test", "test", {})
const oldInput = input.clone()
tryRun(failParser, input)
expect(input).toEqual(oldInput)
})
})
Loading

0 comments on commit 3f6120f

Please sign in to comment.