remove lezer parser & grammar
This commit is contained in:
parent
e45a6d9bf7
commit
87cb01392a
|
|
@ -5,9 +5,8 @@
|
|||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "bun generate-parser && bun --hot src/server/server.tsx",
|
||||
"generate-parser": "lezer-generator src/parser/shrimp.grammar --typeScript -o src/parser/shrimp.ts",
|
||||
"repl": "bun generate-parser && bun bin/repl",
|
||||
"dev": "bun --hot src/server/server.tsx",
|
||||
"repl": "bun bin/repl",
|
||||
"update-reef": "rm -rf ~/.bun/install/cache/ && rm bun.lock && bun update reefvm",
|
||||
"cli:install": "ln -s \"$(pwd)/bin/shrimp\" ~/.bun/bin/shrimp",
|
||||
"cli:remove": "rm ~/.bun/bin/shrimp",
|
||||
|
|
|
|||
|
|
@ -1,9 +1,6 @@
|
|||
import { CompilerError } from '#compiler/compilerError.ts'
|
||||
import { parse } from '#parser/parser2'
|
||||
import { parse, setGlobals } from '#parser/parser2'
|
||||
import { SyntaxNode, Tree } from '#parser/node'
|
||||
import { parser } from '#parser/shrimp.ts'
|
||||
import * as terms from '#parser/shrimp.terms'
|
||||
import { setGlobals } from '#parser/tokenizer'
|
||||
import { tokenizeCurlyString } from '#parser/curlyTokenizer'
|
||||
import { assert, errorMessage } from '#utils/utils'
|
||||
import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
|
||||
|
|
@ -91,7 +88,7 @@ export class Compiler {
|
|||
}
|
||||
|
||||
#compileCst(cst: Tree, input: string) {
|
||||
const isProgram = cst.topNode.type.id === terms.Program
|
||||
const isProgram = cst.topNode.type.is('Program')
|
||||
assert(isProgram, `Expected Program node, got ${cst.topNode.type.name}`)
|
||||
|
||||
let child = cst.topNode.firstChild
|
||||
|
|
@ -107,8 +104,8 @@ export class Compiler {
|
|||
const value = input.slice(node.from, node.to)
|
||||
if (DEBUG) console.log(`🫦 ${node.name}: ${value}`)
|
||||
|
||||
switch (node.type.id) {
|
||||
case terms.Number:
|
||||
switch (node.type.name) {
|
||||
case 'Number':
|
||||
// Handle sign prefix for hex, binary, and octal literals
|
||||
// Number() doesn't parse '-0xFF', '+0xFF', '-0o77', etc. correctly
|
||||
let numberValue: number
|
||||
|
|
@ -125,8 +122,8 @@ export class Compiler {
|
|||
|
||||
return [[`PUSH`, numberValue]]
|
||||
|
||||
case terms.String: {
|
||||
if (node.firstChild?.type.id === terms.CurlyString)
|
||||
case 'String': {
|
||||
if (node.firstChild?.type.is('CurlyString'))
|
||||
return this.#compileCurlyString(value, input)
|
||||
|
||||
const { parts, hasInterpolation } = getStringParts(node, input)
|
||||
|
|
@ -143,19 +140,19 @@ export class Compiler {
|
|||
parts.forEach((part) => {
|
||||
const partValue = input.slice(part.from, part.to)
|
||||
|
||||
switch (part.type.id) {
|
||||
case terms.StringFragment:
|
||||
switch (part.type.name) {
|
||||
case 'StringFragment':
|
||||
// Plain text fragment - just push as-is
|
||||
instructions.push(['PUSH', partValue])
|
||||
break
|
||||
|
||||
case terms.EscapeSeq:
|
||||
case 'EscapeSeq':
|
||||
// Process escape sequence and push the result
|
||||
const processed = processEscapeSeq(partValue)
|
||||
instructions.push(['PUSH', processed])
|
||||
break
|
||||
|
||||
case terms.Interpolation:
|
||||
case 'Interpolation':
|
||||
// Interpolation contains either Identifier or ParenExpr (the $ is anonymous)
|
||||
const child = part.firstChild
|
||||
if (!child) {
|
||||
|
|
@ -179,15 +176,15 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.Boolean: {
|
||||
case 'Boolean': {
|
||||
return [[`PUSH`, value === 'true']]
|
||||
}
|
||||
|
||||
case terms.Null: {
|
||||
case 'Null': {
|
||||
return [[`PUSH`, null]]
|
||||
}
|
||||
|
||||
case terms.Regex: {
|
||||
case 'Regex': {
|
||||
// remove the surrounding slashes and any flags
|
||||
const [_, pattern, flags] = value.match(/^\/\/(.*)\/\/([gimsuy]*)$/) || []
|
||||
if (!pattern) {
|
||||
|
|
@ -204,15 +201,15 @@ export class Compiler {
|
|||
return [['PUSH', regex]]
|
||||
}
|
||||
|
||||
case terms.Identifier: {
|
||||
case 'Identifier': {
|
||||
return [[`TRY_LOAD`, value]]
|
||||
}
|
||||
|
||||
case terms.Word: {
|
||||
case 'Word': {
|
||||
return [['PUSH', value]]
|
||||
}
|
||||
|
||||
case terms.DotGet: {
|
||||
case 'DotGet': {
|
||||
// DotGet is parsed into a nested tree because it's hard to parse it into a flat one.
|
||||
// However, we want a flat tree - so we're going to pretend like we are getting one from the parser.
|
||||
//
|
||||
|
|
@ -224,7 +221,7 @@ export class Compiler {
|
|||
instructions.push(['TRY_LOAD', objectName])
|
||||
|
||||
const flattenProperty = (prop: SyntaxNode): void => {
|
||||
if (prop.type.id === terms.DotGet) {
|
||||
if (prop.type.is('DotGet')) {
|
||||
const nestedParts = getDotGetParts(prop, input)
|
||||
|
||||
const nestedObjectValue = input.slice(nestedParts.object.from, nestedParts.object.to)
|
||||
|
|
@ -233,7 +230,7 @@ export class Compiler {
|
|||
|
||||
flattenProperty(nestedParts.property)
|
||||
} else {
|
||||
if (prop.type.id === terms.ParenExpr) {
|
||||
if (prop.type.is('ParenExpr')) {
|
||||
instructions.push(...this.#compileNode(prop, input))
|
||||
} else {
|
||||
const propertyValue = input.slice(prop.from, prop.to)
|
||||
|
|
@ -247,7 +244,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.BinOp: {
|
||||
case 'BinOp': {
|
||||
const { left, op, right } = getBinaryParts(node)
|
||||
const instructions: ProgramItem[] = []
|
||||
instructions.push(...this.#compileNode(left, input))
|
||||
|
|
@ -295,7 +292,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.Assign: {
|
||||
case 'Assign': {
|
||||
const assignParts = getAssignmentParts(node)
|
||||
const instructions: ProgramItem[] = []
|
||||
|
||||
|
|
@ -326,7 +323,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.CompoundAssign: {
|
||||
case 'CompoundAssign': {
|
||||
const { identifier, operator, right } = getCompoundAssignmentParts(node)
|
||||
const identifierName = input.slice(identifier.from, identifier.to)
|
||||
const instructions: ProgramItem[] = []
|
||||
|
|
@ -388,14 +385,14 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.ParenExpr: {
|
||||
case 'ParenExpr': {
|
||||
const child = node.firstChild
|
||||
if (!child) return [] // I guess it is empty parentheses?
|
||||
|
||||
return this.#compileNode(child, input)
|
||||
}
|
||||
|
||||
case terms.FunctionDef: {
|
||||
case 'FunctionDef': {
|
||||
const { paramNames, bodyNodes, catchVariable, catchBody, finallyBody } =
|
||||
getFunctionDefParts(node, input)
|
||||
const instructions: ProgramItem[] = []
|
||||
|
|
@ -441,8 +438,8 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.FunctionCallOrIdentifier: {
|
||||
if (node.firstChild?.type.id === terms.DotGet) {
|
||||
case 'FunctionCallOrIdentifier': {
|
||||
if (node.firstChild?.type.is('DotGet')) {
|
||||
const instructions: ProgramItem[] = []
|
||||
const callLabel: Label = `.call_dotget_${++this.labelCount}`
|
||||
const afterLabel: Label = `.after_dotget_${++this.labelCount}`
|
||||
|
|
@ -484,8 +481,8 @@ export class Compiler {
|
|||
PUSH 1 ; Named count
|
||||
CALL
|
||||
*/
|
||||
case terms.FunctionCallWithNewlines:
|
||||
case terms.FunctionCall: {
|
||||
|
||||
case 'FunctionCall': {
|
||||
const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts(node, input)
|
||||
const instructions: ProgramItem[] = []
|
||||
instructions.push(...this.#compileNode(identifierNode, input))
|
||||
|
|
@ -507,7 +504,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.Block: {
|
||||
case 'Block': {
|
||||
const children = getAllChildren(node)
|
||||
const instructions: ProgramItem[] = []
|
||||
|
||||
|
|
@ -522,7 +519,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.FunctionCallWithBlock: {
|
||||
case 'FunctionCallWithBlock': {
|
||||
const [fn, _colon, ...block] = getAllChildren(node)
|
||||
let instructions: ProgramItem[] = []
|
||||
|
||||
|
|
@ -540,13 +537,13 @@ export class Compiler {
|
|||
instructions.push(['RETURN'])
|
||||
instructions.push([`${afterLabel}:`])
|
||||
|
||||
if (fn?.type.id === terms.FunctionCallOrIdentifier) {
|
||||
if (fn?.type.is('FunctionCallOrIdentifier')) {
|
||||
instructions.push(['LOAD', input.slice(fn!.from, fn!.to)])
|
||||
instructions.push(['MAKE_FUNCTION', [], fnLabel])
|
||||
instructions.push(['PUSH', 1])
|
||||
instructions.push(['PUSH', 0])
|
||||
instructions.push(['CALL'])
|
||||
} else if (fn?.type.id === terms.FunctionCall) {
|
||||
} else if (fn?.type.is('FunctionCall')) {
|
||||
let body = this.#compileNode(fn!, input)
|
||||
const namedArgCount = (body[body.length - 2]![1] as number) * 2
|
||||
const startSlice = body.length - namedArgCount - 3
|
||||
|
|
@ -569,7 +566,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.TryExpr: {
|
||||
case 'TryExpr': {
|
||||
const { tryBlock, catchVariable, catchBody, finallyBody } = getTryExprParts(node, input)
|
||||
|
||||
return this.#compileTryCatchFinally(
|
||||
|
|
@ -581,9 +578,9 @@ export class Compiler {
|
|||
)
|
||||
}
|
||||
|
||||
case terms.Throw:
|
||||
case terms.Not: {
|
||||
const keyword = node.type.id === terms.Throw ? 'Throw' : 'Not'
|
||||
case 'Throw':
|
||||
case 'Not': {
|
||||
const keyword = node.type.is('Throw') ? 'Throw' : 'Not'
|
||||
const children = getAllChildren(node)
|
||||
const [_throwKeyword, expression] = children
|
||||
if (!expression) {
|
||||
|
|
@ -601,7 +598,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.IfExpr: {
|
||||
case 'IfExpr': {
|
||||
const { conditionNode, thenBlock, elseIfBlocks, elseThenBlock } = getIfExprParts(
|
||||
node,
|
||||
input
|
||||
|
|
@ -644,7 +641,7 @@ export class Compiler {
|
|||
}
|
||||
|
||||
// - `EQ`, `NEQ`, `LT`, `GT`, `LTE`, `GTE` - Pop 2, push boolean
|
||||
case terms.ConditionalOp: {
|
||||
case 'ConditionalOp': {
|
||||
const instructions: ProgramItem[] = []
|
||||
const { left, op, right } = getBinaryParts(node)
|
||||
const leftInstructions: ProgramItem[] = this.#compileNode(left, input)
|
||||
|
|
@ -719,7 +716,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.PipeExpr: {
|
||||
case 'PipeExpr': {
|
||||
const { pipedFunctionCall, pipeReceivers } = getPipeExprParts(node)
|
||||
if (!pipedFunctionCall || pipeReceivers.length === 0) {
|
||||
throw new CompilerError('PipeExpr must have at least two operands', node.from, node.to)
|
||||
|
|
@ -741,11 +738,11 @@ export class Compiler {
|
|||
instructions.push(...this.#compileNode(identifierNode, input))
|
||||
|
||||
const isUnderscoreInPositionalArgs = positionalArgs.some(
|
||||
(arg) => arg.type.id === terms.Underscore
|
||||
(arg) => arg.type.is('Underscore')
|
||||
)
|
||||
const isUnderscoreInNamedArgs = namedArgs.some((arg) => {
|
||||
const { valueNode } = getNamedArgParts(arg, input)
|
||||
return valueNode.type.id === terms.Underscore
|
||||
return valueNode.type.is('Underscore')
|
||||
})
|
||||
|
||||
const shouldPushPositionalArg = !isUnderscoreInPositionalArgs && !isUnderscoreInNamedArgs
|
||||
|
|
@ -756,7 +753,7 @@ export class Compiler {
|
|||
}
|
||||
|
||||
positionalArgs.forEach((arg) => {
|
||||
if (arg.type.id === terms.Underscore) {
|
||||
if (arg.type.is('Underscore')) {
|
||||
instructions.push(['LOAD', pipeValName])
|
||||
} else {
|
||||
instructions.push(...this.#compileNode(arg, input))
|
||||
|
|
@ -766,7 +763,7 @@ export class Compiler {
|
|||
namedArgs.forEach((arg) => {
|
||||
const { name, valueNode } = getNamedArgParts(arg, input)
|
||||
instructions.push(['PUSH', name])
|
||||
if (valueNode.type.id === terms.Underscore) {
|
||||
if (valueNode.type.is('Underscore')) {
|
||||
instructions.push(['LOAD', pipeValName])
|
||||
} else {
|
||||
instructions.push(...this.#compileNode(valueNode, input))
|
||||
|
|
@ -781,14 +778,14 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.Array: {
|
||||
case 'Array': {
|
||||
const children = getAllChildren(node)
|
||||
|
||||
// We can easily parse [=] as an empty dict, but `[ = ]` is tougher.
|
||||
// = can be a valid word, and is also valid inside words, so for now we cheat
|
||||
// and check for arrays that look like `[ = ]` to interpret them as
|
||||
// empty dicts
|
||||
if (children.length === 1 && children[0]!.type.id === terms.Word) {
|
||||
if (children.length === 1 && children[0]!.type.is('Word')) {
|
||||
const child = children[0]!
|
||||
if (input.slice(child.from, child.to) === '=') {
|
||||
return [['MAKE_DICT', 0]]
|
||||
|
|
@ -800,7 +797,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.Dict: {
|
||||
case 'Dict': {
|
||||
const children = getAllChildren(node)
|
||||
const instructions: ProgramItem[] = []
|
||||
|
||||
|
|
@ -819,7 +816,7 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.WhileExpr: {
|
||||
case 'WhileExpr': {
|
||||
const [_while, test, _colon, block] = getAllChildren(node)
|
||||
const instructions: ProgramItem[] = []
|
||||
|
||||
|
|
@ -837,11 +834,11 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.Import: {
|
||||
case 'Import': {
|
||||
const instructions: ProgramItem[] = []
|
||||
const [_import, ...nodes] = getAllChildren(node)
|
||||
const args = nodes.filter(node => node.type.id === terms.Identifier)
|
||||
const namedArgs = nodes.filter(node => node.type.id === terms.NamedArg)
|
||||
const args = nodes.filter(node => node.type.is('Identifier'))
|
||||
const namedArgs = nodes.filter(node => node.type.is('NamedArg'))
|
||||
|
||||
instructions.push(['LOAD', 'import'])
|
||||
|
||||
|
|
@ -862,13 +859,13 @@ export class Compiler {
|
|||
return instructions
|
||||
}
|
||||
|
||||
case terms.Comment: {
|
||||
case 'Comment': {
|
||||
return [] // ignore comments
|
||||
}
|
||||
|
||||
default:
|
||||
throw new CompilerError(
|
||||
`Compiler doesn't know how to handle a "${node.type.name}" (${node.type.id}) node.`,
|
||||
`Compiler doesn't know how to handle a "${node.type.name}" node.`,
|
||||
node.from,
|
||||
node.to
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import { CompilerError } from '#compiler/compilerError.ts'
|
||||
import * as terms from '#parser/shrimp.terms'
|
||||
import type { SyntaxNode, Tree } from '#parser/node'
|
||||
|
||||
export const checkTreeForErrors = (tree: Tree): CompilerError[] => {
|
||||
|
|
@ -24,7 +23,7 @@ export const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
|
|||
child = child.nextSibling
|
||||
}
|
||||
|
||||
return children.filter((n) => n.type.id !== terms.Comment)
|
||||
return children.filter((n) => !n.type.is('Comment'))
|
||||
}
|
||||
|
||||
export const getBinaryParts = (node: SyntaxNode) => {
|
||||
|
|
@ -51,12 +50,12 @@ export const getAssignmentParts = (node: SyntaxNode) => {
|
|||
}
|
||||
|
||||
// array destructuring
|
||||
if (left && left.type.id === terms.Array) {
|
||||
const identifiers = getAllChildren(left).filter((child) => child.type.id === terms.Identifier)
|
||||
if (left && left.type.is('Array')) {
|
||||
const identifiers = getAllChildren(left).filter((child) => child.type.is('Identifier'))
|
||||
return { arrayPattern: identifiers, right }
|
||||
}
|
||||
|
||||
if (!left || left.type.id !== terms.AssignableIdentifier) {
|
||||
if (!left || !left.type.is('AssignableIdentifier')) {
|
||||
throw new CompilerError(
|
||||
`Assign left child must be an AssignableIdentifier or Array, got ${left ? left.type.name : 'none'
|
||||
}`,
|
||||
|
|
@ -72,7 +71,7 @@ export const getCompoundAssignmentParts = (node: SyntaxNode) => {
|
|||
const children = getAllChildren(node)
|
||||
const [left, operator, right] = children
|
||||
|
||||
if (!left || left.type.id !== terms.AssignableIdentifier) {
|
||||
if (!left || !left.type.is('AssignableIdentifier')) {
|
||||
throw new CompilerError(
|
||||
`CompoundAssign left child must be an AssignableIdentifier, got ${left ? left.type.name : 'none'
|
||||
}`,
|
||||
|
|
@ -103,7 +102,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
|||
}
|
||||
|
||||
const paramNames = getAllChildren(paramsNode).map((param) => {
|
||||
if (param.type.id !== terms.Identifier && param.type.id !== terms.NamedParam) {
|
||||
if (!param.type.is('Identifier') && !param.type.is('NamedParam')) {
|
||||
throw new CompilerError(
|
||||
`FunctionDef params must be Identifier or NamedParam, got ${param.type.name}`,
|
||||
param.from,
|
||||
|
|
@ -122,7 +121,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
|||
let finallyBody: SyntaxNode | undefined
|
||||
|
||||
for (const child of rest) {
|
||||
if (child.type.id === terms.CatchExpr) {
|
||||
if (child.type.is('CatchExpr')) {
|
||||
catchExpr = child
|
||||
const catchChildren = getAllChildren(child)
|
||||
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
||||
|
|
@ -135,7 +134,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
|||
}
|
||||
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
||||
catchBody = body
|
||||
} else if (child.type.id === terms.FinallyExpr) {
|
||||
} else if (child.type.is('FinallyExpr')) {
|
||||
finallyExpr = child
|
||||
const finallyChildren = getAllChildren(child)
|
||||
const [_finallyKeyword, _colon, body] = finallyChildren
|
||||
|
|
@ -164,9 +163,9 @@ export const getFunctionCallParts = (node: SyntaxNode, input: string) => {
|
|||
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
|
||||
}
|
||||
|
||||
const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg)
|
||||
const namedArgs = args.filter((arg) => arg.type.is('NamedArg'))
|
||||
const positionalArgs = args
|
||||
.filter((arg) => arg.type.id === terms.PositionalArg)
|
||||
.filter((arg) => arg.type.is('PositionalArg'))
|
||||
.map((arg) => {
|
||||
const child = arg.firstChild
|
||||
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
|
||||
|
|
@ -207,13 +206,13 @@ export const getIfExprParts = (node: SyntaxNode, input: string) => {
|
|||
rest.forEach((child) => {
|
||||
const parts = getAllChildren(child)
|
||||
|
||||
if (child.type.id === terms.ElseExpr) {
|
||||
if (child.type.is('ElseExpr')) {
|
||||
if (parts.length !== 3) {
|
||||
const message = `ElseExpr expected 1 child, got ${parts.length}`
|
||||
throw new CompilerError(message, child.from, child.to)
|
||||
}
|
||||
elseThenBlock = parts.at(-1)
|
||||
} else if (child.type.id === terms.ElseIfExpr) {
|
||||
} else if (child.type.is('ElseIfExpr')) {
|
||||
const [_else, _if, conditional, _colon, thenBlock] = parts
|
||||
if (!conditional || !thenBlock) {
|
||||
const names = parts.map((p) => p.type.name).join(', ')
|
||||
|
|
@ -248,10 +247,10 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
|||
// The text is just between the quotes
|
||||
const parts = children.filter((child) => {
|
||||
return (
|
||||
child.type.id === terms.StringFragment ||
|
||||
child.type.id === terms.Interpolation ||
|
||||
child.type.id === terms.EscapeSeq ||
|
||||
child.type.id === terms.CurlyString
|
||||
child.type.is('StringFragment') ||
|
||||
child.type.is('Interpolation') ||
|
||||
child.type.is('EscapeSeq') ||
|
||||
child.type.is('CurlyString')
|
||||
|
||||
)
|
||||
})
|
||||
|
|
@ -259,10 +258,10 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
|||
// Validate each part is the expected type
|
||||
parts.forEach((part) => {
|
||||
if (
|
||||
part.type.id !== terms.StringFragment &&
|
||||
part.type.id !== terms.Interpolation &&
|
||||
part.type.id !== terms.EscapeSeq &&
|
||||
part.type.id !== terms.CurlyString
|
||||
part.type.is('StringFragment') &&
|
||||
part.type.is('Interpolation') &&
|
||||
part.type.is('EscapeSeq') &&
|
||||
part.type.is('CurlyString')
|
||||
) {
|
||||
throw new CompilerError(
|
||||
`String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`,
|
||||
|
|
@ -275,7 +274,7 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
|||
// hasInterpolation means the string has interpolation ($var) or escape sequences (\n)
|
||||
// A simple string like 'hello' has one StringFragment but no interpolation
|
||||
const hasInterpolation = parts.some(
|
||||
(p) => p.type.id === terms.Interpolation || p.type.id === terms.EscapeSeq
|
||||
(p) => p.type.is('Interpolation') || p.type.is('EscapeSeq')
|
||||
)
|
||||
return { parts, hasInterpolation }
|
||||
}
|
||||
|
|
@ -292,7 +291,7 @@ export const getDotGetParts = (node: SyntaxNode, input: string) => {
|
|||
)
|
||||
}
|
||||
|
||||
if (object.type.id !== terms.IdentifierBeforeDot && object.type.id !== terms.Dollar) {
|
||||
if (!object.type.is('IdentifierBeforeDot')) {
|
||||
throw new CompilerError(
|
||||
`DotGet object must be an IdentifierBeforeDot, got ${object.type.name}`,
|
||||
object.from,
|
||||
|
|
@ -300,7 +299,7 @@ export const getDotGetParts = (node: SyntaxNode, input: string) => {
|
|||
)
|
||||
}
|
||||
|
||||
if (![terms.Identifier, terms.Number, terms.ParenExpr, terms.DotGet].includes(property.type.id)) {
|
||||
if (!['Identifier', 'Number', 'ParenExpr', 'DotGet'].includes(property.type.name)) {
|
||||
throw new CompilerError(
|
||||
`DotGet property must be an Identifier, Number, ParenExpr, or DotGet, got ${property.type.name}`,
|
||||
property.from,
|
||||
|
|
@ -334,7 +333,7 @@ export const getTryExprParts = (node: SyntaxNode, input: string) => {
|
|||
let finallyBody: SyntaxNode | undefined
|
||||
|
||||
rest.forEach((child) => {
|
||||
if (child.type.id === terms.CatchExpr) {
|
||||
if (child.type.is('CatchExpr')) {
|
||||
catchExpr = child
|
||||
const catchChildren = getAllChildren(child)
|
||||
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
||||
|
|
@ -347,7 +346,7 @@ export const getTryExprParts = (node: SyntaxNode, input: string) => {
|
|||
}
|
||||
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
||||
catchBody = body
|
||||
} else if (child.type.id === terms.FinallyExpr) {
|
||||
} else if (child.type.is('FinallyExpr')) {
|
||||
finallyExpr = child
|
||||
const finallyChildren = getAllChildren(child)
|
||||
const [_finallyKeyword, _colon, body] = finallyChildren
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { VM, fromValue, toValue, isValue, type Bytecode } from 'reefvm'
|
|||
import { Compiler } from '#compiler/compiler'
|
||||
import { parse } from '#parser/parser2'
|
||||
import { Tree } from '#parser/node'
|
||||
import { globals as parserGlobals, setGlobals as setParserGlobals } from '#parser/tokenizer'
|
||||
import { globals as parserGlobals, setGlobals as setParserGlobals } from '#parser/parser2'
|
||||
import { globals as prelude } from '#prelude'
|
||||
|
||||
export { Compiler } from '#compiler/compiler'
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import { parser } from '#parser/shrimp.ts'
|
||||
import { parse } from '#parser/parser2'
|
||||
import type { SyntaxNode } from '#parser/node'
|
||||
import { isIdentStart, isIdentChar } from './tokenizer'
|
||||
import { isIdentStart, isIdentChar } from './tokenizer2'
|
||||
|
||||
// Turns a { curly string } into strings and nodes for interpolation
|
||||
export const tokenizeCurlyString = (value: string): (string | [string, SyntaxNode])[] => {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import { type Token, TokenType } from './tokenizer2'
|
||||
import * as term from './shrimp.terms'
|
||||
|
||||
export type NodeType =
|
||||
| 'Program'
|
||||
|
|
@ -140,183 +139,6 @@ export class Tree {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: TEMPORARY SHIM
|
||||
class SyntaxNodeType {
|
||||
constructor(public nodeType: NodeType, public isError: boolean) { }
|
||||
|
||||
is(other: string) {
|
||||
return this.nodeType === other
|
||||
}
|
||||
|
||||
get id(): number {
|
||||
switch (this.nodeType) {
|
||||
case 'Program':
|
||||
return term.Program
|
||||
|
||||
case 'Block':
|
||||
return term.Block
|
||||
|
||||
case 'FunctionCall':
|
||||
return term.FunctionCall
|
||||
|
||||
case 'FunctionCallOrIdentifier':
|
||||
return term.FunctionCallOrIdentifier
|
||||
|
||||
case 'FunctionCallWithBlock':
|
||||
return term.FunctionCallWithBlock
|
||||
|
||||
case 'PositionalArg':
|
||||
return term.PositionalArg
|
||||
|
||||
case 'NamedArg':
|
||||
return term.NamedArg
|
||||
|
||||
case 'FunctionDef':
|
||||
return term.FunctionDef
|
||||
|
||||
case 'Params':
|
||||
return term.Params
|
||||
|
||||
case 'NamedParam':
|
||||
return term.NamedParam
|
||||
|
||||
case 'Null':
|
||||
return term.Null
|
||||
|
||||
case 'Boolean':
|
||||
return term.Boolean
|
||||
|
||||
case 'Number':
|
||||
return term.Number
|
||||
|
||||
case 'String':
|
||||
return term.String
|
||||
|
||||
case 'StringFragment':
|
||||
return term.StringFragment
|
||||
|
||||
case 'CurlyString':
|
||||
return term.CurlyString
|
||||
|
||||
case 'DoubleQuote':
|
||||
return term.DoubleQuote
|
||||
|
||||
case 'EscapeSeq':
|
||||
return term.EscapeSeq
|
||||
|
||||
case 'Interpolation':
|
||||
return term.Interpolation
|
||||
|
||||
case 'Regex':
|
||||
return term.Regex
|
||||
|
||||
case 'Identifier':
|
||||
return term.Identifier
|
||||
|
||||
case 'AssignableIdentifier':
|
||||
return term.AssignableIdentifier
|
||||
|
||||
case 'IdentifierBeforeDot':
|
||||
return term.IdentifierBeforeDot
|
||||
|
||||
case 'Word':
|
||||
return term.Word
|
||||
|
||||
case 'Array':
|
||||
return term.Array
|
||||
|
||||
case 'Dict':
|
||||
return term.Dict
|
||||
|
||||
case 'Comment':
|
||||
return term.Comment
|
||||
|
||||
case 'BinOp':
|
||||
return term.BinOp
|
||||
|
||||
case 'ConditionalOp':
|
||||
return term.ConditionalOp
|
||||
|
||||
case 'ParenExpr':
|
||||
return term.ParenExpr
|
||||
|
||||
case 'Assign':
|
||||
return term.Assign
|
||||
|
||||
case 'CompoundAssign':
|
||||
return term.CompoundAssign
|
||||
|
||||
case 'DotGet':
|
||||
return term.DotGet
|
||||
|
||||
case 'PipeExpr':
|
||||
return term.PipeExpr
|
||||
|
||||
case 'IfExpr':
|
||||
return term.IfExpr
|
||||
|
||||
case 'ElseIfExpr':
|
||||
return term.ElseIfExpr
|
||||
|
||||
case 'ElseExpr':
|
||||
return term.ElseExpr
|
||||
|
||||
case 'WhileExpr':
|
||||
return term.WhileExpr
|
||||
|
||||
case 'TryExpr':
|
||||
return term.TryExpr
|
||||
|
||||
case 'CatchExpr':
|
||||
return term.CatchExpr
|
||||
|
||||
case 'FinallyExpr':
|
||||
return term.FinallyExpr
|
||||
|
||||
case 'Throw':
|
||||
return term.Throw
|
||||
|
||||
case 'Not':
|
||||
return term.Not
|
||||
|
||||
case 'Eq':
|
||||
return term.Eq
|
||||
|
||||
case 'Modulo':
|
||||
return term.Modulo
|
||||
|
||||
case 'Plus':
|
||||
return term.Plus
|
||||
|
||||
case 'Star':
|
||||
return term.Star
|
||||
|
||||
case 'Slash':
|
||||
return term.Slash
|
||||
|
||||
case 'Import':
|
||||
return term.Import
|
||||
|
||||
case 'Do':
|
||||
return term.Do
|
||||
|
||||
case 'Underscore':
|
||||
return term.Underscore
|
||||
|
||||
case 'colon':
|
||||
return term.colon
|
||||
|
||||
case 'keyword':
|
||||
return term.keyword
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
get name(): string {
|
||||
return this.nodeType
|
||||
}
|
||||
}
|
||||
|
||||
export class SyntaxNode {
|
||||
#type: NodeType
|
||||
#isError = false
|
||||
|
|
@ -336,8 +158,13 @@ export class SyntaxNode {
|
|||
return new SyntaxNode(TokenType[token.type] as NodeType, token.from, token.to, parent ?? null)
|
||||
}
|
||||
|
||||
get type(): SyntaxNodeType {
|
||||
return new SyntaxNodeType(this.#type, this.#isError)
|
||||
get type(): { type: NodeType, name: NodeType, isError: boolean, is: (other: NodeType) => boolean } {
|
||||
return {
|
||||
type: this.#type,
|
||||
name: this.#type,
|
||||
isError: this.#isError,
|
||||
is: (other: NodeType) => other === this.#type
|
||||
}
|
||||
}
|
||||
|
||||
set type(name: NodeType) {
|
||||
|
|
|
|||
|
|
@ -1,99 +0,0 @@
|
|||
import { ExternalTokenizer, InputStream } from '@lezer/lr'
|
||||
import * as terms from './shrimp.terms'
|
||||
|
||||
type Operator = { str: string; tokenName: keyof typeof terms }
|
||||
const operators: Array<Operator> = [
|
||||
{ str: 'and', tokenName: 'And' },
|
||||
{ str: 'or', tokenName: 'Or' },
|
||||
{ str: 'band', tokenName: 'Band' },
|
||||
{ str: 'bor', tokenName: 'Bor' },
|
||||
{ str: 'bxor', tokenName: 'Bxor' },
|
||||
{ str: '>>>', tokenName: 'Ushr' }, // Must come before >>
|
||||
{ str: '>>', tokenName: 'Shr' },
|
||||
{ str: '<<', tokenName: 'Shl' },
|
||||
{ str: '>=', tokenName: 'Gte' },
|
||||
{ str: '<=', tokenName: 'Lte' },
|
||||
{ str: '!=', tokenName: 'Neq' },
|
||||
{ str: '==', tokenName: 'EqEq' },
|
||||
|
||||
// Compound assignment operators (must come before single-char operators)
|
||||
{ str: '??=', tokenName: 'NullishEq' },
|
||||
{ str: '+=', tokenName: 'PlusEq' },
|
||||
{ str: '-=', tokenName: 'MinusEq' },
|
||||
{ str: '*=', tokenName: 'StarEq' },
|
||||
{ str: '/=', tokenName: 'SlashEq' },
|
||||
{ str: '%=', tokenName: 'ModuloEq' },
|
||||
|
||||
// Nullish coalescing (must come before it could be mistaken for other tokens)
|
||||
{ str: '??', tokenName: 'NullishCoalesce' },
|
||||
|
||||
// Single-char operators
|
||||
{ str: '*', tokenName: 'Star' },
|
||||
{ str: '=', tokenName: 'Eq' },
|
||||
{ str: '/', tokenName: 'Slash' },
|
||||
{ str: '+', tokenName: 'Plus' },
|
||||
{ str: '-', tokenName: 'Minus' },
|
||||
{ str: '>', tokenName: 'Gt' },
|
||||
{ str: '<', tokenName: 'Lt' },
|
||||
{ str: '%', tokenName: 'Modulo' },
|
||||
]
|
||||
|
||||
export const operatorTokenizer = new ExternalTokenizer((input: InputStream) => {
|
||||
for (let operator of operators) {
|
||||
if (!matchesString(input, 0, operator.str)) continue
|
||||
const afterOpPos = operator.str.length
|
||||
const charAfterOp = input.peek(afterOpPos)
|
||||
if (!isWhitespace(charAfterOp)) continue
|
||||
|
||||
// Accept the operator token
|
||||
const token = terms[operator.tokenName]
|
||||
if (token === undefined) {
|
||||
throw new Error(`Unknown token name: ${operator.tokenName}`)
|
||||
}
|
||||
|
||||
input.advance(afterOpPos)
|
||||
input.acceptToken(token)
|
||||
|
||||
return
|
||||
}
|
||||
})
|
||||
|
||||
const isWhitespace = (ch: number): boolean => {
|
||||
return matchesChar(ch, [' ', '\t', '\n'])
|
||||
}
|
||||
|
||||
const matchesChar = (ch: number, chars: (string | number)[]): boolean => {
|
||||
for (const c of chars) {
|
||||
if (typeof c === 'number') {
|
||||
if (ch === c) {
|
||||
return true
|
||||
}
|
||||
} else if (ch === c.charCodeAt(0)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
const matchesString = (input: InputStream, pos: number, str: string): boolean => {
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
if (input.peek(pos + i) !== str.charCodeAt(i)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
const peek = (numChars: number, input: InputStream): string => {
|
||||
let result = ''
|
||||
for (let i = 0; i < numChars; i++) {
|
||||
const ch = input.peek(i)
|
||||
if (ch === -1) {
|
||||
result += 'EOF'
|
||||
break
|
||||
} else {
|
||||
result += String.fromCharCode(ch)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
|
@ -1,11 +1,17 @@
|
|||
import { CompilerError } from '#compiler/compilerError'
|
||||
import { Scanner, type Token, TokenType } from './tokenizer2'
|
||||
import { SyntaxNode, operators, precedence, conditionals, compounds } from './node'
|
||||
import { globals } from './tokenizer'
|
||||
import { parseString } from './stringParser'
|
||||
|
||||
const $T = TokenType
|
||||
|
||||
// tell the dotGet searcher about builtin globals
|
||||
export const globals: string[] = []
|
||||
export const setGlobals = (newGlobals: string[] | Record<string, any>) => {
|
||||
globals.length = 0
|
||||
globals.push(...(Array.isArray(newGlobals) ? newGlobals : Object.keys(newGlobals)))
|
||||
}
|
||||
|
||||
export const parse = (input: string): SyntaxNode => {
|
||||
const parser = new Parser()
|
||||
return parser.parse(input)
|
||||
|
|
|
|||
|
|
@ -1,129 +0,0 @@
|
|||
import { ContextTracker, InputStream } from '@lezer/lr'
|
||||
import * as terms from './shrimp.terms'
|
||||
|
||||
export class Scope {
|
||||
constructor(public parent: Scope | null, public vars = new Set<string>()) { }
|
||||
|
||||
has(name: string): boolean {
|
||||
return this.vars.has(name) || (this.parent?.has(name) ?? false)
|
||||
}
|
||||
|
||||
hash(): number {
|
||||
let h = 0
|
||||
for (const name of this.vars) {
|
||||
for (let i = 0; i < name.length; i++) {
|
||||
h = (h << 5) - h + name.charCodeAt(i)
|
||||
h |= 0
|
||||
}
|
||||
}
|
||||
if (this.parent) {
|
||||
h = (h << 5) - h + this.parent.hash()
|
||||
h |= 0
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// Static methods that return new Scopes (immutable operations)
|
||||
|
||||
static add(scope: Scope, ...names: string[]): Scope {
|
||||
const newVars = new Set(scope.vars)
|
||||
names.forEach((name) => newVars.add(name))
|
||||
return new Scope(scope.parent, newVars)
|
||||
}
|
||||
|
||||
push(): Scope {
|
||||
return new Scope(this, new Set())
|
||||
}
|
||||
|
||||
pop(): Scope {
|
||||
return this.parent ?? this
|
||||
}
|
||||
}
|
||||
|
||||
// Tracker context that combines Scope with temporary pending identifiers
|
||||
class TrackerContext {
|
||||
constructor(public scope: Scope, public pendingIds: string[] = []) { }
|
||||
}
|
||||
|
||||
// Extract identifier text from input stream
|
||||
const readIdentifierText = (input: InputStream, start: number, end: number): string => {
|
||||
let text = ''
|
||||
for (let i = start; i < end; i++) {
|
||||
const offset = i - input.pos
|
||||
const ch = input.peek(offset)
|
||||
if (ch === -1) break
|
||||
text += String.fromCharCode(ch)
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
let inParams = false
|
||||
|
||||
export const trackScope = new ContextTracker<TrackerContext>({
|
||||
start: new TrackerContext(new Scope(null, new Set())),
|
||||
|
||||
shift(context, term, stack, input) {
|
||||
if (term == terms.Do) inParams = true
|
||||
|
||||
if (term === terms.AssignableIdentifier) {
|
||||
const text = readIdentifierText(input, input.pos, stack.pos)
|
||||
return new TrackerContext(Scope.add(context.scope, text), context.pendingIds)
|
||||
}
|
||||
|
||||
if (inParams && term === terms.Identifier) {
|
||||
const text = readIdentifierText(input, input.pos, stack.pos)
|
||||
return new TrackerContext(context.scope, [...context.pendingIds, text])
|
||||
}
|
||||
|
||||
// Track identifiers in array destructuring: [ a b ] = ...
|
||||
if (!inParams && term === terms.Identifier && isArrayDestructuring(input)) {
|
||||
const text = readIdentifierText(input, input.pos, stack.pos)
|
||||
return new TrackerContext(Scope.add(context.scope, text), context.pendingIds)
|
||||
}
|
||||
|
||||
return context
|
||||
},
|
||||
|
||||
reduce(context, term) {
|
||||
if (term === terms.Params) {
|
||||
inParams = false
|
||||
let newScope = context.scope.push()
|
||||
if (context.pendingIds.length > 0) {
|
||||
newScope = Scope.add(newScope, ...context.pendingIds)
|
||||
}
|
||||
return new TrackerContext(newScope, [])
|
||||
}
|
||||
|
||||
// Pop scope when exiting function
|
||||
if (term === terms.FunctionDef) {
|
||||
return new TrackerContext(context.scope.pop(), [])
|
||||
}
|
||||
|
||||
return context
|
||||
},
|
||||
|
||||
hash: (context) => context.scope.hash(),
|
||||
})
|
||||
|
||||
// Check if we're parsing array destructuring: [ a b ] = ...
|
||||
const isArrayDestructuring = (input: InputStream): boolean => {
|
||||
let pos = 0
|
||||
|
||||
// Find closing bracket
|
||||
while (pos < 200 && input.peek(pos) !== 93 /* ] */) {
|
||||
if (input.peek(pos) === -1) return false // EOF
|
||||
pos++
|
||||
}
|
||||
|
||||
if (input.peek(pos) !== 93 /* ] */) return false
|
||||
pos++
|
||||
|
||||
// Skip whitespace
|
||||
while (input.peek(pos) === 32 /* space */ ||
|
||||
input.peek(pos) === 9 /* tab */ ||
|
||||
input.peek(pos) === 10 /* \n */) {
|
||||
pos++
|
||||
}
|
||||
|
||||
return input.peek(pos) === 61 /* = */
|
||||
}
|
||||
|
|
@ -1,299 +0,0 @@
|
|||
@external propSource highlighting from "./highlight"
|
||||
|
||||
@context trackScope from "./parserScopeContext"
|
||||
|
||||
@skip { space | Comment }
|
||||
|
||||
@top Program { item* }
|
||||
|
||||
@external tokens operatorTokenizer from "./operatorTokenizer" { Star, Slash, Plus, Minus, And, Or, Eq, EqEq, Neq, Lt, Lte, Gt, Gte, Modulo, PlusEq, MinusEq, StarEq, SlashEq, ModuloEq, Band, Bor, Bxor, Shl, Shr, Ushr, NullishCoalesce, NullishEq }
|
||||
|
||||
@tokens {
|
||||
@precedence { Number Regex }
|
||||
|
||||
StringFragment { !['\\$]+ }
|
||||
DoubleQuote { '"' !["]* '"' }
|
||||
NamedArgPrefix { $[a-z] $[a-z0-9-]* "=" }
|
||||
Number {
|
||||
("-" | "+")? "0x" $[0-9a-fA-F]+ |
|
||||
("-" | "+")? "0b" $[01]+ |
|
||||
("-" | "+")? "0o" $[0-7]+ |
|
||||
("-" | "+")? $[0-9]+ ("_"? $[0-9]+)* ('.' $[0-9]+ ("_"? $[0-9]+)*)?
|
||||
}
|
||||
Boolean { "true" | "false" }
|
||||
semicolon { ";" }
|
||||
eof { @eof }
|
||||
space { " " | "\t" }
|
||||
Comment { "#" ![\n]* }
|
||||
leftParen { "(" }
|
||||
rightParen { ")" }
|
||||
colon[closedBy="end", @name="colon"] { ":" }
|
||||
Underscore { "_" }
|
||||
Dollar { "$" }
|
||||
Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar
|
||||
"|"[@name=operator]
|
||||
}
|
||||
|
||||
newlineOrSemicolon { newline | semicolon }
|
||||
|
||||
end { @specialize[@name=keyword]<Identifier, "end"> }
|
||||
while { @specialize[@name=keyword]<Identifier, "while"> }
|
||||
if { @specialize[@name=keyword]<Identifier, "if"> }
|
||||
else { @specialize[@name=keyword]<Identifier, "else"> }
|
||||
try { @specialize[@name=keyword]<Identifier, "try"> }
|
||||
catch { @specialize[@name=keyword]<Identifier, "catch"> }
|
||||
finally { @specialize[@name=keyword]<Identifier, "finally"> }
|
||||
throw { @specialize[@name=keyword]<Identifier, "throw"> }
|
||||
not { @specialize[@name=keyword]<Identifier, "not"> }
|
||||
import { @specialize[@name=keyword]<Identifier, "import"> }
|
||||
null { @specialize[@name=Null]<Identifier, "null"> }
|
||||
|
||||
@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, CurlyString }
|
||||
@external tokens pipeStartsLineTokenizer from "./tokenizer" { newline, pipeStartsLine }
|
||||
@external specialize {Identifier} specializeKeyword from "./tokenizer" { Do }
|
||||
|
||||
@precedence {
|
||||
pipe @left,
|
||||
or @left,
|
||||
and @left,
|
||||
nullish @left,
|
||||
comparison @left,
|
||||
multiplicative @left,
|
||||
additive @left,
|
||||
bitwise @left,
|
||||
call,
|
||||
functionWithNewlines
|
||||
}
|
||||
|
||||
item {
|
||||
consumeToTerminator newlineOrSemicolon |
|
||||
consumeToTerminator eof |
|
||||
newlineOrSemicolon // allow blank lines
|
||||
}
|
||||
|
||||
consumeToTerminator {
|
||||
PipeExpr |
|
||||
WhileExpr |
|
||||
FunctionCallWithBlock |
|
||||
ambiguousFunctionCall |
|
||||
TryExpr |
|
||||
Throw |
|
||||
Not |
|
||||
Import |
|
||||
IfExpr |
|
||||
FunctionDef |
|
||||
CompoundAssign |
|
||||
Assign |
|
||||
BinOp |
|
||||
ConditionalOp |
|
||||
expressionWithoutIdentifier
|
||||
}
|
||||
|
||||
PipeExpr {
|
||||
pipeOperand (!pipe (pipeStartsLine? "|") newlineOrSemicolon* pipeOperand)+
|
||||
}
|
||||
|
||||
pipeOperand {
|
||||
consumeToTerminator
|
||||
}
|
||||
|
||||
WhileExpr {
|
||||
while (ConditionalOp | expression) colon Block end
|
||||
}
|
||||
|
||||
Block {
|
||||
consumeToTerminator | newlineOrSemicolon block
|
||||
}
|
||||
|
||||
FunctionCallWithBlock {
|
||||
ambiguousFunctionCall colon Block CatchExpr? FinallyExpr? end
|
||||
}
|
||||
|
||||
FunctionCallOrIdentifier {
|
||||
DotGet | Identifier
|
||||
}
|
||||
|
||||
ambiguousFunctionCall {
|
||||
FunctionCall | FunctionCallOrIdentifier
|
||||
}
|
||||
|
||||
FunctionCall {
|
||||
(DotGet | Identifier | ParenExpr) arg+
|
||||
}
|
||||
|
||||
arg {
|
||||
PositionalArg | NamedArg
|
||||
}
|
||||
|
||||
PositionalArg {
|
||||
expression | FunctionDef | Underscore
|
||||
}
|
||||
|
||||
NamedArg {
|
||||
NamedArgPrefix (expression | FunctionDef | Underscore)
|
||||
}
|
||||
|
||||
FunctionDef {
|
||||
Do Params colon (consumeToTerminator | newlineOrSemicolon block) CatchExpr? FinallyExpr? end
|
||||
}
|
||||
|
||||
ifTest {
|
||||
ConditionalOp | expression | FunctionCall
|
||||
}
|
||||
|
||||
IfExpr {
|
||||
if ifTest colon Block ElseIfExpr* ElseExpr? end
|
||||
}
|
||||
|
||||
ElseIfExpr {
|
||||
else if ifTest colon Block
|
||||
}
|
||||
|
||||
ElseExpr {
|
||||
else colon Block
|
||||
}
|
||||
|
||||
TryExpr {
|
||||
try colon Block CatchExpr? FinallyExpr? end
|
||||
}
|
||||
|
||||
CatchExpr {
|
||||
catch Identifier colon Block
|
||||
}
|
||||
|
||||
FinallyExpr {
|
||||
finally colon Block
|
||||
}
|
||||
|
||||
Throw {
|
||||
throw (BinOp | ConditionalOp | expression)
|
||||
}
|
||||
|
||||
Not {
|
||||
not (BinOp | ConditionalOp | expression)
|
||||
}
|
||||
|
||||
// this has to be in the parse tree so the scope tracker can use it
|
||||
Import {
|
||||
import NamedArg* Identifier+ NamedArg*
|
||||
}
|
||||
|
||||
ConditionalOp {
|
||||
expression !comparison EqEq expression |
|
||||
expression !comparison Neq expression |
|
||||
expression !comparison Lt expression |
|
||||
expression !comparison Lte expression |
|
||||
expression !comparison Gt expression |
|
||||
expression !comparison Gte expression |
|
||||
(expression | ConditionalOp) !and And (expression | ConditionalOp) |
|
||||
(expression | ConditionalOp) !or Or (expression | ConditionalOp) |
|
||||
(expression | ConditionalOp) !nullish NullishCoalesce (expression | ConditionalOp)
|
||||
}
|
||||
|
||||
Params {
|
||||
Identifier* NamedParam*
|
||||
}
|
||||
|
||||
NamedParam {
|
||||
NamedArgPrefix (String | Number | Boolean | null)
|
||||
}
|
||||
|
||||
Assign {
|
||||
(AssignableIdentifier | Array) Eq consumeToTerminator
|
||||
}
|
||||
|
||||
CompoundAssign {
|
||||
AssignableIdentifier (PlusEq | MinusEq | StarEq | SlashEq | ModuloEq | NullishEq) consumeToTerminator
|
||||
}
|
||||
|
||||
BinOp {
|
||||
expression !multiplicative Modulo expression |
|
||||
(expression | BinOp) !multiplicative Star (expression | BinOp) |
|
||||
(expression | BinOp) !multiplicative Slash (expression | BinOp) |
|
||||
(expression | BinOp) !additive Plus (expression | BinOp) |
|
||||
(expression | BinOp) !additive Minus (expression | BinOp) |
|
||||
(expression | BinOp) !bitwise Band (expression | BinOp) |
|
||||
(expression | BinOp) !bitwise Bor (expression | BinOp) |
|
||||
(expression | BinOp) !bitwise Bxor (expression | BinOp) |
|
||||
(expression | BinOp) !bitwise Shl (expression | BinOp) |
|
||||
(expression | BinOp) !bitwise Shr (expression | BinOp) |
|
||||
(expression | BinOp) !bitwise Ushr (expression | BinOp)
|
||||
}
|
||||
|
||||
ParenExpr {
|
||||
leftParen newlineOrSemicolon* (
|
||||
FunctionCallWithNewlines |
|
||||
IfExpr |
|
||||
ambiguousFunctionCall |
|
||||
BinOp newlineOrSemicolon* |
|
||||
expressionWithoutIdentifier |
|
||||
ConditionalOp newlineOrSemicolon* |
|
||||
PipeExpr |
|
||||
FunctionDef
|
||||
)
|
||||
rightParen
|
||||
}
|
||||
|
||||
FunctionCallWithNewlines[@name=FunctionCall] {
|
||||
(DotGet | Identifier | ParenExpr) newlineOrSemicolon+ arg !functionWithNewlines (newlineOrSemicolon+ arg)* newlineOrSemicolon*
|
||||
}
|
||||
|
||||
expression {
|
||||
expressionWithoutIdentifier | DotGet | Identifier
|
||||
}
|
||||
|
||||
|
||||
@local tokens {
|
||||
dot { "." }
|
||||
}
|
||||
|
||||
@skip {} {
|
||||
DotGet {
|
||||
IdentifierBeforeDot dot (DotGet | Number | Identifier | ParenExpr) |
|
||||
Dollar dot (DotGet | Number | Identifier | ParenExpr)
|
||||
}
|
||||
|
||||
String {
|
||||
"'" stringContent* "'" | CurlyString | DoubleQuote
|
||||
}
|
||||
}
|
||||
|
||||
stringContent {
|
||||
StringFragment |
|
||||
Interpolation |
|
||||
EscapeSeq
|
||||
}
|
||||
|
||||
Interpolation {
|
||||
"$" FunctionCallOrIdentifier |
|
||||
"$" ParenExpr
|
||||
}
|
||||
|
||||
EscapeSeq {
|
||||
"\\" ("$" | "n" | "t" | "r" | "\\" | "'")
|
||||
}
|
||||
|
||||
Dict {
|
||||
"[=]" |
|
||||
"[" newlineOrSemicolon* NamedArg (newlineOrSemicolon | NamedArg)* "]"
|
||||
}
|
||||
|
||||
Array {
|
||||
"[" newlineOrSemicolon* (expression (newlineOrSemicolon | expression)*)? "]"
|
||||
}
|
||||
|
||||
// We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator.
|
||||
// Without this, when parsing "my-var" at statement level, the parser can't decide:
|
||||
// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier
|
||||
// - expression → Identifier
|
||||
// Both want the same Identifier token! So we use expressionWithoutIdentifier
|
||||
// to remove Identifier from the second path, forcing standalone identifiers
|
||||
// to go through ambiguousFunctionCall (which is what we want semantically).
|
||||
// Yes, it is annoying and I gave up trying to use GLR to fix it.
|
||||
expressionWithoutIdentifier {
|
||||
ParenExpr | Word | String | Number | Boolean | Regex | Dict | Array | null
|
||||
}
|
||||
|
||||
block {
|
||||
(consumeToTerminator? newlineOrSemicolon)*
|
||||
}
|
||||
4
src/parser/shrimp.grammar.d.ts
vendored
4
src/parser/shrimp.grammar.d.ts
vendored
|
|
@ -1,4 +0,0 @@
|
|||
declare module '*.grammar' {
|
||||
const content: string
|
||||
export default content
|
||||
}
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
||||
export const
|
||||
Star = 1,
|
||||
Slash = 2,
|
||||
Plus = 3,
|
||||
Minus = 4,
|
||||
And = 5,
|
||||
Or = 6,
|
||||
Eq = 7,
|
||||
EqEq = 8,
|
||||
Neq = 9,
|
||||
Lt = 10,
|
||||
Lte = 11,
|
||||
Gt = 12,
|
||||
Gte = 13,
|
||||
Modulo = 14,
|
||||
PlusEq = 15,
|
||||
MinusEq = 16,
|
||||
StarEq = 17,
|
||||
SlashEq = 18,
|
||||
ModuloEq = 19,
|
||||
Band = 20,
|
||||
Bor = 21,
|
||||
Bxor = 22,
|
||||
Shl = 23,
|
||||
Shr = 24,
|
||||
Ushr = 25,
|
||||
NullishCoalesce = 26,
|
||||
NullishEq = 27,
|
||||
Identifier = 28,
|
||||
AssignableIdentifier = 29,
|
||||
Word = 30,
|
||||
IdentifierBeforeDot = 31,
|
||||
CurlyString = 32,
|
||||
newline = 103,
|
||||
pipeStartsLine = 104,
|
||||
Do = 33,
|
||||
Comment = 34,
|
||||
Program = 35,
|
||||
PipeExpr = 36,
|
||||
WhileExpr = 38,
|
||||
keyword = 86,
|
||||
ConditionalOp = 40,
|
||||
ParenExpr = 41,
|
||||
FunctionCallWithNewlines = 42,
|
||||
DotGet = 43,
|
||||
Number = 44,
|
||||
Dollar = 45,
|
||||
PositionalArg = 46,
|
||||
FunctionDef = 47,
|
||||
Params = 48,
|
||||
NamedParam = 49,
|
||||
NamedArgPrefix = 50,
|
||||
String = 51,
|
||||
StringFragment = 52,
|
||||
Interpolation = 53,
|
||||
FunctionCallOrIdentifier = 54,
|
||||
EscapeSeq = 55,
|
||||
DoubleQuote = 56,
|
||||
Boolean = 57,
|
||||
Null = 58,
|
||||
colon = 59,
|
||||
CatchExpr = 60,
|
||||
Block = 62,
|
||||
FinallyExpr = 63,
|
||||
Underscore = 66,
|
||||
NamedArg = 67,
|
||||
IfExpr = 68,
|
||||
FunctionCall = 70,
|
||||
ElseIfExpr = 71,
|
||||
ElseExpr = 73,
|
||||
BinOp = 74,
|
||||
Regex = 75,
|
||||
Dict = 76,
|
||||
Array = 77,
|
||||
FunctionCallWithBlock = 78,
|
||||
TryExpr = 79,
|
||||
Throw = 81,
|
||||
Not = 83,
|
||||
Import = 85,
|
||||
CompoundAssign = 87,
|
||||
Assign = 88
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
||||
import {LRParser, LocalTokenGroup} from "@lezer/lr"
|
||||
import {operatorTokenizer} from "./operatorTokenizer"
|
||||
import {tokenizer, pipeStartsLineTokenizer, specializeKeyword} from "./tokenizer"
|
||||
import {trackScope} from "./parserScopeContext"
|
||||
import {highlighting} from "./highlight"
|
||||
const spec_Identifier = {__proto__:null,while:78, null:116, catch:122, finally:128, end:130, if:138, else:144, try:160, throw:164, not:168, import:172}
|
||||
export const parser = LRParser.deserialize({
|
||||
version: 14,
|
||||
states: "?tQYQ!SOOOOQ!Q'#Em'#EmO!vO!bO'#DXO%nQ!TO'#DdO&XOSO'#DaOOQ!R'#Da'#DaO)VQ!TO'#EpOOQ!Q'#E}'#E}O)sQRO'#DxO+{Q!TO'#ElO,iQ!SO'#DVOOQ!R'#Dz'#DzO/^Q!SO'#D{OOQ!R'#Ep'#EpO/eQ!TO'#EpO1iQ!TO'#EoO2wQ!TO'#ElO3UQRO'#EVOOQ!Q'#El'#ElO3mQ!SO'#ElO3tQrO'#EkOOQ!Q'#Ek'#EkOOQ!Q'#EX'#EXQYQ!SOOO4VQbO'#D]O4bQbO'#DrO5`QbO'#DSO6^QQO'#D}O5`QbO'#EPO5`QbO'#ERO6cQbO'#ETO6kObO,59sOOQ!Q'#D['#D[O6|QbO'#DqOOQ!Q'#Es'#EsOOQ!Q'#Ea'#EaO7WQ!SO,5:`OOQ!R'#Eo'#EoO8WQbO'#DcO8fQWO'#DeOOOO'#Eu'#EuOOOO'#E^'#E^O8zOSO,59{OOQ!R,59{,59{O5`QbO,5:dO5`QbO,5:dO5`QbO,5:dO5`QbO,5:dO5`QbO,59pO5`QbO,59pO5`QbO,59pO5`QbO,59pOOQ!Q'#EZ'#EZO,iQ!SO,59qO9YQ!TO'#DdO9dQ!TO'#EpO9nQsO,59qO9{QQO,59qO:QQrO,59qO:]QrO,59qO:kQsO,59qO;ZQsO,59qO;bQrO'#DQO;jQ!SO,5:gO;qQrO,5:fOOQ!R,5:g,5:gO<PQ!SO,5:gO<^QbO,5:rO<^QbO,5:qOYQ!SO,5:hO=tQ!SO,59lOOQ!Q,5;V,5;VOYQ!SO'#EYO>fQQO'#EYOOQ!Q-E8V-E8VOOQ!Q'#E['#E[O>kQbO'#D^O>vQbO'#D_OOQO'#E]'#E]O>nQQO'#D^O?[QQO,59wO?aQcO'#EoO@^QRO'#E|OAZQRO'#E|OOQO'#E|'#E|OAbQQO,5:^OAgQRO,59nOAnQRO,59nOYQ!SO,5:iOA|Q!TO,5:kOCbQ!TO,5:kODUQ!TO,5:kODcQ!TO,5:mOEwQ!TO,5:mOFkQ!TO,5:mOFxQ!SO,5:oOOQ!Q'#Ee'#EeO6cQbO,5:oOOQ!R1G/_1G/_OOQ!Q,5:],5:]OOQ!Q-E8_-E8_OOOO'#Dd'#DdOOOO,59},59}OOOO,5:P,5:POOOO-E8[-E8[OOQ!R1G/g1G/gOOQ!R1G0O1G0OOH}Q!TO1G0OOIXQ!TO1G0OOJmQ!TO1G0OOJwQ!TO1G0OOKUQ!TO1G0OOOQ!R1G/[1G/[OLmQ!TO1G/[OLtQ!TO1G/[OL{Q!TO1G/[ONQQ!TO1G/[OMSQ!TO1G/[OOQ!Q-E8X-E8XONhQsO1G/]ONuQQO1G/]ONzQrO1G/]O! VQrO1G/]O! eQsO1G/]O! lQsO1G/]O! sQ!SO,59rO! }QrO1G/]OOQ!R1G/]1G/]O!!YQrO1G0QOOQ!R1G0R1G0RO!!hQ!SO1G0ROOQp'#Ec'#EcO!!YQrO1G0QOOQ!R1G0Q1G0QOOQ!Q'#Ed'#EdO!!hQ!SO1G0RO!!uQ!SO1G0^O!#gQ!SO1G0]O!$XQ!SO'#DlO!$mQ!SO'#DlO!$}QbO1G0SOOQ!Q-E8W-E8WOYQ!SO,5:tOOQ!Q,5:t,5:tOYQ!SO,5:tOOQ!Q-E8Y-E8YO!%YQQO,59xOOQO,59y,59yOOQO-E8Z-E8ZOYQ!SO1G/cOYQ!SO1G/xOYQ!SO1G/YO!%bQbO1G0TO!%mQ!SO1G0ZO!&bQ!SO1G0ZOOQ!Q-E8c-E8cO!&iQrO7+$wOOQ!R7+$w7+$wO!&tQrO1G/^O!'PQrO7+%lOOQ!R7+%l7+%lO!'_Q!SO7+%mOOQ!R7+%m7+%mOOQp-E8a-E8aOOQ!Q-E8b-E8bOOQ!Q'#E_'#E_O!'lQrO'#E_O!'zQ!SO'#E{OOQ`,5:W,5:WO!([QbO'#DjO!(aQQO'#DmOOQ!Q7+%n7+%nO!(fQbO7+%nO!(kQbO7+%nOOQ!Q1G0`1G0`OYQ!SO1G0`O!(sQ!SO7+$}O!)UQ!SO7+$}O!)cQbO7+%dO!)kQbO7+$tOOQ!Q7+%o7+%oO!)pQbO7+%oO!)uQbO7+%oO!)}Q!SO7+%uOOQ!R<<Hc<<HcO!*rQ!SO7+$xO!+PQrO7+$xOOQ!R<<IW<<IWOOQ!R<<IX<<IXOOQ!Q,5:y,5:yOOQ!Q-E8]-E8]O!+[QQO,5:UOYQ!SO,5:XOOQ!Q<<IY<<IYO!+aQbO<<IYOOQ!Q7+%z7+%zOOQ!Q<<Hi<<HiO!+fQbO<<HiO!+kQbO<<HiO!+sQbO<<HiOOQ`'#Eb'#EbO!,OQbO<<IOO!,WQbO'#DwOOQ!Q<<IO<<IOO!,`QbO<<IOOOQ!Q<<H`<<H`OOQ!Q<<IZ<<IZO!,eQbO<<IZOOQp,5:z,5:zO!,jQ!SO<<HdOOQp-E8^-E8^OYQ!SO1G/pOOQ`1G/s1G/sOOQ!QAN>tAN>tOOQ!QAN>TAN>TO!,wQbOAN>TO!,|QbOAN>TOOQ`-E8`-E8`OOQ!QAN>jAN>jO!-UQbOAN>jO4bQbO,5:aOYQ!SO,5:cOOQ!QAN>uAN>uP! sQ!SO'#EZOOQ`7+%[7+%[OOQ!QG23oG23oO!-ZQbOG23oP!,ZQbO'#DuOOQ!QG24UG24UO!-`QQO1G/{OOQ`1G/}1G/}OOQ!QLD)ZLD)ZOYQ!SO7+%gOOQ`<<IR<<IRO!-eObO,59sO!-vO!bO'#DX",
|
||||
stateData: "!.O~O#^OSrOS~OlROmaOn]OoQOpTOqhOwjO|]O}QO!YTO!Z]O![]O!giO!m]O!rkO!tlO!vmO!xnO#ZPO#bPO#eYO#hSO#sZO#t[O~O#foO~OluOn]OoQOpTOqhO|]O}QO!SqO!YTO!Z]O![]O!dpO!m]O#eYO#hSO#sZO#t[OP#cXQ#cXR#cXS#cXT#cXU#cXW#cXX#cXY#cXZ#cX[#cX]#cX^#cXd#cXe#cXf#cXg#cXh#cXi#cXj#cXu!WX!]!WX#[!WX#r!WX~O#Z!WX#b!WX#v!WX!_!WX!b!WX!c!WX!j!WX~P!{O!UxO#h{O#jvO#kwO~OluOn]OoQOpTOqhO|]O}QO!SqO!YTO!Z]O![]O!dpO!m]O#eYO#hSO#sZO#t[OP#dXQ#dXR#dXS#dXT#dXU#dXW#dXX#dXY#dXZ#dX[#dX]#dX^#dXd#dXe#dXf#dXg#dXh#dXi#dXj#dXu#dX#[#dX#r#dX~O#Z#dX#b#dX#v#dX!]#dX!_#dX!b#dX!c#dX!j#dX~P&gOP}OQ}OR!OOS!OOT!ROU!SOW!QOX!QOY!QOZ!QO[!QO]!QO^|Od!POe!POf!POg!POh!POi!POj!TO~OP}OQ}OR!OOS!OOd!POe!POf!POg!POh!POi!POu#`X#[#`X~O#Z#`X#b#`X#v#`X!_#`X!b#`X!c#`X#r#`X!j#`X~P+TOl!WOmaOn]OoQOpTOqhOwjO|]O}QO!YTO!Z]O![]O!giO!m]O!rkO!tlO!vmO!xnO#ZPO#bPO#eYO#hSO#sZO#t[O~OluOn]OoQOpTO|]O}QO!SqO!YTO!Z]O![]O!m]O#ZPO#bPO#eYO#hSO#sZO#t[O~O#u!cO~P.VOV!eO#Z#dX#b#dX#v#dX!_#dX!b#dX!c#dX!j#dX~P'lOP#cXQ#cXR#cXS#cXT#cXU#cXW#cXX#cXY#cXZ#cX[#cX]#cX^#cXd#cXe#cXf#cXg#cXh#cXi#cXj#cXu#`X#[#`X~O#Z#`X#b#`X#v#`X!_#`X!b#`X!c#`X#r#`X!j#`X~P0ROu#`X#Z#`X#[#`X#b#`X#v#`X!_#`X!b#`X!c#`X#r#`X!j#`X~OT!ROU!SOj!TO~P2VOV!eO_!fO`!fOa!fOb!fOc!fOk!fO~O!]!gO~P2VOu!jO#ZPO#[!kO#bPO#v!iO~Ol!mO!S!oO!]!QP~Ol!sOn]OoQOpTO|]O}QO!YTO!Z]O![]O!m]O#eYO#hSO#sZO#t[O~OluOn]OoQOpTO|]O}QO!YTO!Z]O![]O!m]O#eYO#hSO#sZO#t[O~O!]!zO~Ol!mO!SqO~Ol#UOoQO|#UO}QO#eYO~OqhO!d#VO~P5`OqhO!SqO!dpOu!ha!]!ha#Z!ha#[!ha#b!ha#v!ha#r!ha!_!ha!b!ha!c!ha!j!ha~P5`Ol#XOo&TO}&TO#eYO~O#h#ZO#j#ZO#k#ZO#l#ZO#m#ZO#n#ZO~O!UxO#h#]O#jvO#kwO~O#ZPO#bPO~P!{O#ZPO#bPO~P&gO#ZPO#bPO#r#sO~P+TO#r#sO~O#r#sOu#`X#[#`X~O!]!gO#r#sOu#`X#[#`X~O#r#sO~P0ROT!ROU!SOj!TO#ZPO#bPOu#`X#[#`X~O#r#sO~P:rOu!jO#[!kO~O#u#uO~P.VO!SqO#ZPO#bPO#u#yO~O#ZPO#bPO#u#uO~P5`OlROmaOn]OoQOpTOqhOwjO|]O}QO!YTO!Z]O![]O!giO!m]O!rkO!tlO!vmO!xnO#eYO#hSO#sZO#t[O~Ou!jO#[!kO#Zta#bta#vta#rta!_ta!bta!cta!jta~Ou$UO~Ol!mO!S!oO!]!QX~OpTO|$XO!YTO!Z$XO![$XO#hSO~O!]$ZO~OqhO!SqO!dpOT#cXU#cXW#cXX#cXY#cXZ#cX[#cX]#cXj#cX!]#cX~P5`OT!ROU!SOj!TO!]#pX~OT!ROU!SOW!QOX!QOY!QOZ!QO[!QO]!QOj!TO~O!]#pX~P@lO!]$[O~O!]$]O~P@lOT!ROU!SOj!TO!]$]O~Ou!sa#Z!sa#[!sa#b!sa#v!sa!_!sa!b!sa!c!sa#r!sa!j!sa~P)sOu!sa#Z!sa#[!sa#b!sa#v!sa!_!sa!b!sa!c!sa#r!sa!j!sa~OP}OQ}OR!OOS!OOd!POe!POf!POg!POh!POi!PO~PBpOT!ROU!SOj!TO~PBpOu!ua#Z!ua#[!ua#b!ua#v!ua!_!ua!b!ua!c!ua#r!ua!j!ua~P)sOu!ua#Z!ua#[!ua#b!ua#v!ua!_!ua!b!ua!c!ua#r!ua!j!ua~OP}OQ}OR!OOS!OOd!POe!POf!POg!POh!POi!PO~PEVOT!ROU!SOj!TO~PEVOl!mO!SqOu!wa#Z!wa#[!wa#b!wa#v!wa!_!wa!b!wa!c!wa#r!wa!j!wa~O^|OR!liS!lid!lie!lif!lig!lih!lii!liu!li#Z!li#[!li#b!li#v!li#r!li!_!li!b!li!c!li!j!li~OP!liQ!li~PGpOP}OQ}O~PGpOP}OQ}Od!lie!lif!lig!lih!lii!liu!li#Z!li#[!li#b!li#v!li#r!li!_!li!b!li!c!li!j!li~OR!liS!li~PIcOR!OOS!OO^|O~PIcOR!OOS!OO~PIcOW!QOX!QOY!QOZ!QO[!QO]!QOTxijxiuxi#Zxi#[xi#bxi#vxi#rxi!]xi!_xi!bxi!cxi!jxi~OU!SO~PK`OU!SO~PKrOUxi~PK`OT!ROU!SOjxiuxi#Zxi#[xi#bxi#vxi#rxi!]xi!_xi!bxi!cxi!jxi~OW!QOX!QOY!QOZ!QO[!QO]!QO~PMSO#ZPO#bPO#r$cO~P+TO#r$cO~O#r$cOu#`X#[#`X~O!]!gO#r$cOu#`X#[#`X~O#r$cO~P0RO#r$cO~P:rOqhO!dpO~P.VO#ZPO#bPO#r$cO~O!SqO#ZPO#bPO#u$fO~O#ZPO#bPO#u$hO~P5`Ou!jO#[!kO#Z!zi#b!zi#v!zi!_!zi!b!zi!c!zi#r!zi!j!zi~Ou!jO#[!kO#Z!yi#b!yi#v!yi!_!yi!b!yi!c!yi#r!yi!j!yi~Ou!jO#[!kO!_!`X!b!`X!c!`X!j!`X~O!_#oP!b#oP!c#oP!j#oP~PYO!_$oO!b$pO!c$qO~O!S!oO!]!Qa~O!_$oO!b$pO!c$zO~O!SqOu!wi#Z!wi#[!wi#b!wi#v!wi!_!wi!b!wi!c!wi#r!wi!j!wi~Ol!mO~P!%mO#ZPO#bPO#r%OO~O#ZPO#bPO#rzi~O!SqO#ZPO#bPO#u%RO~O#ZPO#bPO#u%SO~P5`Ou!jO#ZPO#[!kO#bPO~O!_#oX!b#oX!c#oX!j#oX~PYOl%VO~O!]%WO~O!c%XO~O!b$pO!c%XO~Ou!jO!_$oO!b$pO!c%[O#[!kO~O!_#oP!b#oP!c#oP~PYO!c%cO!j%bO~O!c%eO~O!c%fO~O!b$pO!c%fO~O!SqOu!wq#Z!wq#[!wq#b!wq#v!wq!_!wq!b!wq!c!wq#r!wq!j!wq~OqhO!dpO#rzq~P.VO#ZPO#bPO#rzq~O!]%kO~O!c%mO~O!c%nO~O!b$pO!c%nO~O!_$oO!b$pO!c%nO~O!c%rO!j%bO~O!]%uO!g%tO~O!c%rO~O!c%vO~OqhO!dpO#rzy~P.VO!c%yO~O!b$pO!c%yO~O!c%|O~O!c&PO~O!]&QO~Ol#UOo&TO|#UO}&TO#eYO~O#f&SO~O|!m~",
|
||||
goto: "<m#rPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP#sP$aP$y%}'b'hPP(})Z*W*ZP*aP+l+p+lPPPP,]P,i-RPPP-i#sP.Z.wP.{/RP0O1W$a$aP$aP$aP$aP$a$a2b2h2t3p4O4Y4`4g4m4w4}5X5cPPPPP5q5u6qP8T:PPP;_P;oPPPPP;s;y<PxbOg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQ![YR#m!V}bOYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&Qx`Og!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQ!_YS!ti%tQ!yjQ!}lQ#QmQ#d!SQ#f!RQ#i!TR#p!V|UOgi!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%t%u&Q!Y]RU[jlmqt|}!O!P!Q!R!S!T!W!X!a!d!s#q#v#{$g%P%i%wS!XY!VS#Uo&SR#YvQ!ZYR#l!VxROg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&Q!YuRU[jlmqt|}!O!P!Q!R!S!T!W!X!a!d!s#q#v#{$g%P%i%wS!WY!VS!si%tS#Uo&SR#XverRUt!W!X!s#q%P%i%wxbOg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QdpRUt!W!X!s#q%P%i%wQ![YQ#VqR#m!VR!rhX!ph!n!q$W#[]ORUY[gijlmqt|}!O!P!Q!R!S!T!V!W!X!a!d!e!f!g!j!s!z#q#v#{$P$S$U$Z$[$]$g$m$u$w%P%W%i%k%t%u%w&QR$X!oTxSz|VOYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QR#YvQ$s$QQ$|$^Q%^$vR%p%_Q$Q!gQ$^!zQ$x$[Q$y$]Q%l%WQ%x%kQ&O%uR&R&QQ$r$QQ${$^Q%Y$sQ%]$vQ%g$|S%o%^%_R%z%pdrRUt!W!X!s#q%P%i%wQ!b[[#Sn#R#T$_$`$}Q#t!aX#w!b#t#x$e|VOYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QT!vi%tT%`$x%aQ%d$xR%s%axXOg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQ!YYQ!|lQ#PmQ#^}Q#a!OQ#c!PR#k!V#]]ORUY[gijlmqt|}!O!P!Q!R!S!T!V!W!X!a!d!e!f!g!j!s!z#q#v#{$P$S$U$Z$[$]$g$m$u$w%P%W%i%k%t%u%w&Q!^]RU[ijlmqt|}!O!P!Q!R!S!T!W!X!a!d!s#q#v#{$g%P%i%t%w}^OYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQgOR!lg^!hd!`#|#}$O$l$vR$R!hQ!VYQ!a[d#j!V!a#q#r$S$b$u%P%i%wS#q!W!XS#r!Y!_Q$S!jS$b#k#pQ$u$UQ%P$dR%i%QQ!nhQ#RnU$V!n#R$`R$`#TQ!qhQ$W!nT$Y!q$WQzSR#[zS$m$P$wR%U$mQ%Q$dR%j%QYtRU!W!X!sR#WtQ%a$xR%q%aQ#x!bQ$e#tT$i#x$eQ#{!dQ$g#vT$j#{$gQ#TnQ$_#RU$a#T$_$}R$}$`TfOgSdOgS!`Y!VQ#|!eQ#}!f`$O!g!z$[$]%W%k%u&QQ$T!jU$l$P$m$wS$t$S$UQ$v$ZR%Z$uSeOg|!UY[!V!W!X!Y!_!a!j#k#p#q#r$S$U$b$d$u%P%Q%i%wQ!idW#w!b#t#x$eW#z!d#v#{$g`$P!g!z$[$]%W%k%u&QU$k$P$m$wQ$w$ZR%T$l|WOYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QdpRUt!W!X!s#q%P%i%wQ!d[S!ui%tQ!xjQ!{lQ#OmQ#VqQ#^|Q#_}Q#`!OQ#b!PQ#d!QQ#e!RQ#g!SQ#h!TQ#v!aX#z!d#v#{$gx_Og!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&Q!^uRU[ijlmqt|}!O!P!Q!R!S!T!W!X!a!d!s#q#v#{$g%P%i%t%wQ!^YR#o!V[sRUt!W!X!sQ$d#qV%h%P%i%wTySzQ$n$PR%_$wQ!wiR%}%txcOg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQ!]YR#n!V",
|
||||
nodeNames: "⚠ Star Slash Plus Minus And Or Eq EqEq Neq Lt Lte Gt Gte Modulo PlusEq MinusEq StarEq SlashEq ModuloEq Band Bor Bxor Shl Shr Ushr NullishCoalesce NullishEq Identifier AssignableIdentifier Word IdentifierBeforeDot CurlyString Do Comment Program PipeExpr operator WhileExpr keyword ConditionalOp ParenExpr FunctionCall DotGet Number Dollar PositionalArg FunctionDef Params NamedParam NamedArgPrefix String StringFragment Interpolation FunctionCallOrIdentifier EscapeSeq DoubleQuote Boolean Null colon CatchExpr keyword Block FinallyExpr keyword keyword Underscore NamedArg IfExpr keyword FunctionCall ElseIfExpr keyword ElseExpr BinOp Regex Dict Array FunctionCallWithBlock TryExpr keyword Throw keyword Not keyword Import keyword CompoundAssign Assign",
|
||||
maxTerm: 130,
|
||||
context: trackScope,
|
||||
nodeProps: [
|
||||
["closedBy", 59,"end"]
|
||||
],
|
||||
propSources: [highlighting],
|
||||
skippedNodes: [0,34],
|
||||
repeatNodeCount: 13,
|
||||
tokenData: "Lp~R}OX$OXY$mYp$Opq$mqr$Ors%Wst'^tu(uuw$Owx(|xy)Ryz)lz{$O{|*V|}$O}!O*V!O!P$O!P!Q3r!Q!R*w!R![-l![!]<_!]!^<x!^!}$O!}#O=c#O#P?X#P#Q?^#Q#R$O#R#S?w#S#T$O#T#Y@b#Y#ZA|#Z#b@b#b#cGj#c#f@b#f#gHm#g#h@b#h#iIp#i#o@b#o#p$O#p#qLQ#q;'S$O;'S;=`$g<%l~$O~O$O~~LkS$TU!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OS$jP;=`<%l$O^$tU!US#^YOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU%]Z!USOr%Wrs&Ost%Wtu&iuw%Wwx&ix#O%W#O#P&i#P;'S%W;'S;=`'W<%lO%WU&VU!YQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OQ&lTOr&irs&{s;'S&i;'S;=`'Q<%lO&iQ'QO!YQQ'TP;=`<%l&iU'ZP;=`<%l%W^'eZrY!USOY'^YZ$OZt'^tu(Wuw'^wx(Wx#O'^#O#P(W#P;'S'^;'S;=`(o<%lO'^Y(]SrYOY(WZ;'S(W;'S;=`(i<%lO(WY(lP;=`<%l(W^(rP;=`<%l'^^(|O#j[}Q~)RO#h~U)YU!US#eQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU)sU!US#rQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU*[X!USOt$Ouw$Ox!Q$O!Q!R*w!R![-l![#O$O#P;'S$O;'S;=`$g<%lO$OU+Ob!US|QOt$Ouw$Ox!O$O!O!P,W!P!Q$O!Q![-l![#O$O#P#R$O#R#S.i#S#U$O#U#V/W#V#c$O#c#d0l#d#l$O#l#m1z#m;'S$O;'S;=`$g<%lO$OU,]W!USOt$Ouw$Ox!Q$O!Q![,u![#O$O#P;'S$O;'S;=`$g<%lO$OU,|Y!US|QOt$Ouw$Ox!Q$O!Q![,u![#O$O#P#R$O#R#S,W#S;'S$O;'S;=`$g<%lO$OU-s[!US|QOt$Ouw$Ox!O$O!O!P,W!P!Q$O!Q![-l![#O$O#P#R$O#R#S.i#S;'S$O;'S;=`$g<%lO$OU.nW!USOt$Ouw$Ox!Q$O!Q![-l![#O$O#P;'S$O;'S;=`$g<%lO$OU/]X!USOt$Ouw$Ox!Q$O!Q!R/x!R!S/x!S#O$O#P;'S$O;'S;=`$g<%lO$OU0PX!US|QOt$Ouw$Ox!Q$O!Q!R/x!R!S/x!S#O$O#P;'S$O;'S;=`$g<%lO$OU0qW!USOt$Ouw$Ox!Q$O!Q!Y1Z!Y#O$O#P;'S$O;'S;=`$g<%lO$OU1bW!US|QOt$Ouw$Ox!Q$O!Q!Y1Z!Y#O$O#P;'S$O;'S;=`$g<%lO$OU2P[!USOt$Ouw$Ox!Q$O!Q![2u![!c$O!c!i2u!i#O$O#P#T$O#T#Z2u#Z;'S$O;'S;=`$g<%lO$OU2|[!US|QOt$Ouw$Ox!Q$O!Q![2u![!c$O!c!i2u!i#O$O#P#T$O#T#Z2u#Z;'S$O;'S;=`$g<%lO$OU3wW!USOt$Ouw$Ox!P$O!P!Q4a!Q#O$O#P;'S$O;'S;=`$g<%lO$OU4f^!USOY5bYZ$OZt5btu6euw5bwx6ex!P5b!P!Q$O!Q!}5b!}#O;W#O#P8s#P;'S5b;'S;=`<X<%lO5bU5i^!US!mQOY5bYZ$OZt5btu6euw5bwx6ex!P5b!P!Q9Y!Q!}5b!}#O;W#O#P8s#P;'S5b;'S;=`<X<%lO5bQ6jX!mQOY6eZ!P6e!P!Q7V!Q!}6e!}#O7t#O#P8s#P;'S6e;'S;=`9S<%lO6eQ7YP!P!Q7]Q7bU!mQ#Z#[7]#]#^7]#a#b7]#g#h7]#i#j7]#m#n7]Q7wVOY7tZ#O7t#O#P8^#P#Q6e#Q;'S7t;'S;=`8m<%lO7tQ8aSOY7tZ;'S7t;'S;=`8m<%lO7tQ8pP;=`<%l7tQ8vSOY6eZ;'S6e;'S;=`9S<%lO6eQ9VP;=`<%l6eU9_W!USOt$Ouw$Ox!P$O!P!Q9w!Q#O$O#P;'S$O;'S;=`$g<%lO$OU:Ob!US!mQOt$Ouw$Ox#O$O#P#Z$O#Z#[9w#[#]$O#]#^9w#^#a$O#a#b9w#b#g$O#g#h9w#h#i$O#i#j9w#j#m$O#m#n9w#n;'S$O;'S;=`$g<%lO$OU;][!USOY;WYZ$OZt;Wtu7tuw;Wwx7tx#O;W#O#P8^#P#Q5b#Q;'S;W;'S;=`<R<%lO;WU<UP;=`<%l;WU<[P;=`<%l5bU<fU!US!]QOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU=PU!US#bQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU=jW#tQ!USOt$Ouw$Ox!_$O!_!`>S!`#O$O#P;'S$O;'S;=`$g<%lO$OU>XV!USOt$Ouw$Ox#O$O#P#Q>n#Q;'S$O;'S;=`$g<%lO$OU>uU#sQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$O~?^O#k~U?eU#uQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU@OU!US!dQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU@g^!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#o@b#o;'S$O;'S;=`$g<%lO$OUAjU!SQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OUBR_!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#UCQ#U#o@b#o;'S$O;'S;=`$g<%lO$OUCV`!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#`@b#`#aDX#a#o@b#o;'S$O;'S;=`$g<%lO$OUD^`!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#g@b#g#hE`#h#o@b#o;'S$O;'S;=`$g<%lO$OUEe`!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#X@b#X#YFg#Y#o@b#o;'S$O;'S;=`$g<%lO$OUFn^!ZQ!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#o@b#o;'S$O;'S;=`$g<%lO$O^Gq^#lW!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#o@b#o;'S$O;'S;=`$g<%lO$O^Ht^#nW!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#o@b#o;'S$O;'S;=`$g<%lO$O^Iw`#mW!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#f@b#f#gJy#g#o@b#o;'S$O;'S;=`$g<%lO$OUKO`!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#i@b#i#jE`#j#o@b#o;'S$O;'S;=`$g<%lO$OULXUuQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$O~LpO#v~",
|
||||
tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, pipeStartsLineTokenizer, new LocalTokenGroup("[~RP!O!PU~ZO#f~~", 11)],
|
||||
topRules: {"Program":[0,35]},
|
||||
specialized: [{term: 28, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 28, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}],
|
||||
tokenPrec: 2711
|
||||
})
|
||||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('null', () => {
|
||||
test('parses null', () => {
|
||||
expect('null').toMatchTree(`Null null`)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('bitwise operators - grammar', () => {
|
||||
test('parses band (bitwise AND)', () => {
|
||||
expect('5 band 3').toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('if/else if/else', () => {
|
||||
test('parses single line if', () => {
|
||||
expect(`if y == 1: 'cool' end`).toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('Array destructuring', () => {
|
||||
test('parses array pattern with two variables', () => {
|
||||
expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('try/catch/finally/throw', () => {
|
||||
test('parses try with catch', () => {
|
||||
expect(`try:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('single line function blocks', () => {
|
||||
test('work with no args', () => {
|
||||
expect(`trap: echo bye bye end`).toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('calling functions', () => {
|
||||
test('call with no args', () => {
|
||||
expect('tail').toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('import', () => {
|
||||
test('parses single import', () => {
|
||||
expect(`import str`).toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('number literals', () => {
|
||||
test('binary numbers', () => {
|
||||
expect('0b110').toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('multiline', () => {
|
||||
test('parses multiline strings', () => {
|
||||
expect(`'first'\n'second'`).toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,7 +1,4 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
import { parser } from '../shrimp'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('pipe expressions', () => {
|
||||
test('simple pipe expression', () => {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { expect, describe, test } from 'bun:test'
|
||||
|
||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||
|
||||
describe('string interpolation', () => {
|
||||
test('string with variable interpolation', () => {
|
||||
expect("'hello $name'").toMatchTree(`
|
||||
|
|
|
|||
|
|
@ -1,389 +0,0 @@
|
|||
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
|
||||
import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, CurlyString, DotGet, newline, pipeStartsLine } from './shrimp.terms'
|
||||
|
||||
// doobie doobie do (we need the `do` keyword to know when we're defining params)
|
||||
export function specializeKeyword(ident: string) {
|
||||
return ident === 'do' ? Do : -1
|
||||
}
|
||||
|
||||
// tell the dotGet searcher about builtin globals
|
||||
export const globals: string[] = []
|
||||
export const setGlobals = (newGlobals: string[] | Record<string, any>) => {
|
||||
globals.length = 0
|
||||
globals.push(...(Array.isArray(newGlobals) ? newGlobals : Object.keys(newGlobals)))
|
||||
}
|
||||
|
||||
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
|
||||
|
||||
export const tokenizer = new ExternalTokenizer(
|
||||
(input: InputStream, stack: Stack) => {
|
||||
const ch = getFullCodePoint(input, 0)
|
||||
|
||||
// Handle curly strings
|
||||
if (ch === 123 /* { */) return consumeCurlyString(input, stack)
|
||||
|
||||
if (!isWordChar(ch)) return
|
||||
|
||||
// Don't consume things that start with digits - let Number token handle it
|
||||
if (isDigit(ch)) return
|
||||
|
||||
// Don't consume things that start with - or + followed by a digit (negative/positive numbers)
|
||||
if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return
|
||||
|
||||
const isValidStart = isIdentStart(ch)
|
||||
const canBeWord = stack.canShift(Word)
|
||||
|
||||
// Consume all word characters, tracking if it remains a valid identifier
|
||||
const { pos, isValidIdentifier, stoppedAtDot } = consumeWordToken(
|
||||
input,
|
||||
isValidStart,
|
||||
canBeWord
|
||||
)
|
||||
|
||||
// Check if we should emit IdentifierBeforeDot for property access
|
||||
if (stoppedAtDot) {
|
||||
const dotGetToken = checkForDotGet(input, stack, pos)
|
||||
|
||||
if (dotGetToken) {
|
||||
input.advance(pos)
|
||||
input.acceptToken(dotGetToken)
|
||||
} else {
|
||||
// Not in scope - continue consuming the dot as part of the word
|
||||
const afterDot = consumeRestOfWord(input, pos + 1, canBeWord)
|
||||
input.advance(afterDot)
|
||||
input.acceptToken(Word)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Advance past the token we consumed
|
||||
input.advance(pos)
|
||||
|
||||
// Choose which token to emit
|
||||
if (isValidIdentifier) {
|
||||
const token = chooseIdentifierToken(input, stack)
|
||||
input.acceptToken(token)
|
||||
} else {
|
||||
input.acceptToken(Word)
|
||||
}
|
||||
},
|
||||
{ contextual: true }
|
||||
)
|
||||
|
||||
// Build identifier text from input stream, handling surrogate pairs for emoji
|
||||
const buildIdentifierText = (input: InputStream, length: number): string => {
|
||||
let text = ''
|
||||
for (let i = 0; i < length; i++) {
|
||||
const charCode = input.peek(i)
|
||||
if (charCode === -1) break
|
||||
|
||||
// Handle surrogate pairs for emoji (UTF-16 encoding)
|
||||
if (charCode >= 0xd800 && charCode <= 0xdbff && i + 1 < length) {
|
||||
const low = input.peek(i + 1)
|
||||
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||
text += String.fromCharCode(charCode, low)
|
||||
i++ // Skip the low surrogate
|
||||
continue
|
||||
}
|
||||
}
|
||||
text += String.fromCharCode(charCode)
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
// Consume word characters, tracking if it remains a valid identifier
|
||||
// Returns the position after consuming, whether it's a valid identifier, and if we stopped at a dot
|
||||
const consumeWordToken = (
|
||||
input: InputStream,
|
||||
isValidStart: boolean,
|
||||
canBeWord: boolean
|
||||
): { pos: number; isValidIdentifier: boolean; stoppedAtDot: boolean } => {
|
||||
let pos = getCharSize(getFullCodePoint(input, 0))
|
||||
let isValidIdentifier = isValidStart
|
||||
let stoppedAtDot = false
|
||||
|
||||
while (true) {
|
||||
const ch = getFullCodePoint(input, pos)
|
||||
|
||||
// Stop at dot if we have a valid identifier (might be property access)
|
||||
if (ch === 46 /* . */ && isValidIdentifier) {
|
||||
stoppedAtDot = true
|
||||
break
|
||||
}
|
||||
|
||||
// Stop if we hit a non-word character
|
||||
if (!isWordChar(ch)) break
|
||||
|
||||
// Context-aware termination: semicolon/colon can end a word if followed by whitespace
|
||||
// This allows `hello; 2` to parse correctly while `hello;world` stays as one word
|
||||
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
|
||||
const nextCh = getFullCodePoint(input, pos + 1)
|
||||
if (!isWordChar(nextCh)) break
|
||||
}
|
||||
|
||||
// Track identifier validity: must be lowercase, digit, dash, or emoji/unicode
|
||||
if (!isIdentChar(ch)) {
|
||||
if (!canBeWord) break
|
||||
isValidIdentifier = false
|
||||
}
|
||||
|
||||
pos += getCharSize(ch)
|
||||
}
|
||||
|
||||
return { pos, isValidIdentifier, stoppedAtDot }
|
||||
}
|
||||
|
||||
// Consume the rest of a word after we've decided not to treat a dot as DotGet
|
||||
// Used when we have "file.txt" - we already consumed "file", now consume ".txt"
|
||||
const consumeRestOfWord = (input: InputStream, startPos: number, canBeWord: boolean): number => {
|
||||
let pos = startPos
|
||||
while (true) {
|
||||
const ch = getFullCodePoint(input, pos)
|
||||
|
||||
// Stop if we hit a non-word character
|
||||
if (!isWordChar(ch)) break
|
||||
|
||||
// Context-aware termination for semicolon/colon
|
||||
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
|
||||
const nextCh = getFullCodePoint(input, pos + 1)
|
||||
if (!isWordChar(nextCh)) break
|
||||
}
|
||||
|
||||
pos += getCharSize(ch)
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
// Consumes { curly strings } and tracks braces so you can { have { braces { inside { braces } } }
|
||||
const consumeCurlyString = (input: InputStream, stack: Stack) => {
|
||||
if (!stack.canShift(CurlyString)) return
|
||||
|
||||
let depth = 0
|
||||
let pos = 0
|
||||
|
||||
while (true) {
|
||||
const ch = input.peek(pos)
|
||||
if (ch < 0) return // EOF - invalid
|
||||
|
||||
if (ch === 123) depth++ // {
|
||||
else if (ch === 125) { // }
|
||||
depth--
|
||||
if (depth === 0) {
|
||||
pos++ // consume final }
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
pos++
|
||||
}
|
||||
|
||||
input.acceptToken(CurlyString, pos)
|
||||
}
|
||||
|
||||
// Check if this identifier is in scope (for property access detection)
|
||||
// Returns IdentifierBeforeDot token if in scope, null otherwise
|
||||
const checkForDotGet = (input: InputStream, stack: Stack, pos: number): number | null => {
|
||||
const identifierText = buildIdentifierText(input, pos)
|
||||
const context = stack.context as { scope: { has(name: string): boolean } } | undefined
|
||||
|
||||
// Check if identifier is in scope (lexical scope or globals)
|
||||
const inScope = context?.scope.has(identifierText) || globals.includes(identifierText)
|
||||
|
||||
// property access
|
||||
if (inScope) return IdentifierBeforeDot
|
||||
|
||||
// Not in scope - check if we're inside a DotGet chain
|
||||
// Inside the @skip {} block where DotGet is defined, Word cannot be shifted
|
||||
// but Identifier can be. This tells us we're at the RHS of a DotGet.
|
||||
const canShiftIdentifier = stack.canShift(Identifier)
|
||||
const canShiftWord = stack.canShift(Word)
|
||||
const inDotGetChain = canShiftIdentifier && !canShiftWord
|
||||
|
||||
// continue if we're inside a DotGet
|
||||
return inDotGetChain ? IdentifierBeforeDot : null
|
||||
}
|
||||
|
||||
// Decide between AssignableIdentifier and Identifier using grammar state + peek-ahead
|
||||
const chooseIdentifierToken = (input: InputStream, stack: Stack): number => {
|
||||
const canAssignable = stack.canShift(AssignableIdentifier)
|
||||
const canRegular = stack.canShift(Identifier)
|
||||
|
||||
// Only one option is valid - use it
|
||||
if (canAssignable && !canRegular) return AssignableIdentifier
|
||||
if (canRegular && !canAssignable) return Identifier
|
||||
|
||||
// Both possible (ambiguous context) - peek ahead for '=' to disambiguate
|
||||
// This happens at statement start where both `x = 5` (assign) and `echo x` (call) are valid
|
||||
let peekPos = 0
|
||||
while (true) {
|
||||
const ch = getFullCodePoint(input, peekPos)
|
||||
if (isWhiteSpace(ch)) {
|
||||
peekPos += getCharSize(ch)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const nextCh = getFullCodePoint(input, peekPos)
|
||||
const nextCh2 = getFullCodePoint(input, peekPos + 1)
|
||||
const nextCh3 = getFullCodePoint(input, peekPos + 2)
|
||||
|
||||
// Check for ??= (three-character compound operator)
|
||||
if (nextCh === 63 /* ? */ && nextCh2 === 63 /* ? */ && nextCh3 === 61 /* = */) {
|
||||
const charAfterOp = getFullCodePoint(input, peekPos + 3)
|
||||
if (isWhiteSpace(charAfterOp) || charAfterOp === -1 /* EOF */) {
|
||||
return AssignableIdentifier
|
||||
}
|
||||
}
|
||||
|
||||
// Check for compound assignment operators: +=, -=, *=, /=, %=
|
||||
if (
|
||||
[43 /* + */, 45 /* - */, 42 /* * */, 47 /* / */, 37 /* % */].includes(nextCh) &&
|
||||
nextCh2 === 61 /* = */
|
||||
) {
|
||||
// Found compound operator, check if it's followed by whitespace
|
||||
const charAfterOp = getFullCodePoint(input, peekPos + 2)
|
||||
if (isWhiteSpace(charAfterOp) || charAfterOp === -1 /* EOF */) {
|
||||
return AssignableIdentifier
|
||||
}
|
||||
}
|
||||
|
||||
if (nextCh === 61 /* = */) {
|
||||
// Found '=', but check if it's followed by whitespace
|
||||
// If '=' is followed by non-whitespace (like '=cool*'), it won't be tokenized as Eq
|
||||
// In that case, this should be Identifier (for function call), not AssignableIdentifier
|
||||
const charAfterEquals = getFullCodePoint(input, peekPos + 1)
|
||||
if (isWhiteSpace(charAfterEquals) || charAfterEquals === -1 /* EOF */) {
|
||||
return AssignableIdentifier
|
||||
}
|
||||
}
|
||||
return Identifier
|
||||
}
|
||||
|
||||
// Character classification helpers
|
||||
export const isIdentStart = (ch: number): boolean => {
|
||||
return isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
|
||||
}
|
||||
|
||||
export const isIdentChar = (ch: number): boolean => {
|
||||
return isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */ || isEmojiOrUnicode(ch)
|
||||
}
|
||||
|
||||
const isWhiteSpace = (ch: number): boolean => {
|
||||
return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 13 /* \r */
|
||||
}
|
||||
|
||||
const isWordChar = (ch: number): boolean => {
|
||||
return (
|
||||
!isWhiteSpace(ch) &&
|
||||
ch !== 10 /* \n */ &&
|
||||
ch !== 41 /* ) */ &&
|
||||
ch !== 93 /* ] */ &&
|
||||
ch !== -1 /* EOF */
|
||||
)
|
||||
}
|
||||
|
||||
const isLowercaseLetter = (ch: number): boolean => {
|
||||
return ch >= 97 && ch <= 122 // a-z
|
||||
}
|
||||
|
||||
const isDigit = (ch: number): boolean => {
|
||||
return ch >= 48 && ch <= 57 // 0-9
|
||||
}
|
||||
|
||||
const getFullCodePoint = (input: InputStream, pos: number): number => {
|
||||
const ch = input.peek(pos)
|
||||
|
||||
// Check if this is a high surrogate (0xD800-0xDBFF)
|
||||
if (ch >= 0xd800 && ch <= 0xdbff) {
|
||||
const low = input.peek(pos + 1)
|
||||
// Check if next is low surrogate (0xDC00-0xDFFF)
|
||||
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||
// Combine surrogate pair into full code point
|
||||
return 0x10000 + ((ch & 0x3ff) << 10) + (low & 0x3ff)
|
||||
}
|
||||
}
|
||||
|
||||
return ch
|
||||
}
|
||||
|
||||
const isEmojiOrUnicode = (ch: number): boolean => {
|
||||
return (
|
||||
// Basic Emoticons
|
||||
(ch >= 0x1f600 && ch <= 0x1f64f) ||
|
||||
// Miscellaneous Symbols and Pictographs
|
||||
(ch >= 0x1f300 && ch <= 0x1f5ff) ||
|
||||
// Transport and Map Symbols
|
||||
(ch >= 0x1f680 && ch <= 0x1f6ff) ||
|
||||
// Regional Indicator Symbols (flags)
|
||||
(ch >= 0x1f1e6 && ch <= 0x1f1ff) ||
|
||||
// Miscellaneous Symbols (hearts, stars, weather)
|
||||
(ch >= 0x2600 && ch <= 0x26ff) ||
|
||||
// Dingbats (scissors, pencils, etc)
|
||||
(ch >= 0x2700 && ch <= 0x27bf) ||
|
||||
// Supplemental Symbols and Pictographs (newer emojis)
|
||||
(ch >= 0x1f900 && ch <= 0x1f9ff) ||
|
||||
// Symbols and Pictographs Extended-A (newest emojis)
|
||||
(ch >= 0x1fa70 && ch <= 0x1faff) ||
|
||||
// Various Asian Characters with emoji presentation
|
||||
(ch >= 0x1f018 && ch <= 0x1f270) ||
|
||||
// Variation Selectors (for emoji presentation)
|
||||
(ch >= 0xfe00 && ch <= 0xfe0f) ||
|
||||
// Additional miscellaneous items
|
||||
(ch >= 0x238c && ch <= 0x2454) ||
|
||||
// Combining Diacritical Marks for Symbols
|
||||
(ch >= 0x20d0 && ch <= 0x20ff) ||
|
||||
// Latin-1 Supplement (includes ², ³, ¹ and other special chars)
|
||||
(ch >= 0x00a0 && ch <= 0x00ff) ||
|
||||
// Greek and Coptic (U+0370-U+03FF)
|
||||
(ch >= 0x0370 && ch <= 0x03ff) ||
|
||||
// Mathematical Alphanumeric Symbols (U+1D400-U+1D7FF)
|
||||
(ch >= 0x1d400 && ch <= 0x1d7ff) ||
|
||||
// Mathematical Operators (U+2200-U+22FF)
|
||||
(ch >= 0x2200 && ch <= 0x22ff) ||
|
||||
// Superscripts and Subscripts (U+2070-U+209F)
|
||||
(ch >= 0x2070 && ch <= 0x209f) ||
|
||||
// Arrows (U+2190-U+21FF)
|
||||
(ch >= 0x2190 && ch <= 0x21ff) ||
|
||||
// Hiragana (U+3040-U+309F)
|
||||
(ch >= 0x3040 && ch <= 0x309f) ||
|
||||
// Katakana (U+30A0-U+30FF)
|
||||
(ch >= 0x30a0 && ch <= 0x30ff) ||
|
||||
// CJK Unified Ideographs (U+4E00-U+9FFF)
|
||||
(ch >= 0x4e00 && ch <= 0x9fff)
|
||||
)
|
||||
}
|
||||
|
||||
const getCharSize = (ch: number) => (ch > 0xffff ? 2 : 1) // emoji takes 2 UTF-16 code units
|
||||
|
||||
export const pipeStartsLineTokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => {
|
||||
const ch = input.peek(0)
|
||||
|
||||
if (ch !== 10 /* \n */) return
|
||||
|
||||
// ignore whitespace
|
||||
let offset = 1
|
||||
let lastNewlineOffset = 0
|
||||
|
||||
while (true) {
|
||||
const ch = input.peek(offset)
|
||||
if (ch === 10 /* \n */) {
|
||||
lastNewlineOffset = offset
|
||||
offset++
|
||||
} else if (isWhiteSpace(ch)) {
|
||||
offset++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// look for pipe after skipping empty lines
|
||||
if (input.peek(offset) === 124 /* | */) {
|
||||
input.advance(lastNewlineOffset + 1)
|
||||
input.acceptToken(pipeStartsLine)
|
||||
} else {
|
||||
input.advance(1)
|
||||
input.acceptToken(newline)
|
||||
}
|
||||
})
|
||||
|
|
@ -475,12 +475,12 @@ const isStringDelim = (ch: number): boolean => {
|
|||
return ch === c`'` || ch === c`"`
|
||||
}
|
||||
|
||||
const isIdentStart = (char: number | string): boolean => {
|
||||
export const isIdentStart = (char: number | string): boolean => {
|
||||
let ch = typeof char === 'string' ? char.charCodeAt(0) : char
|
||||
return isLowercaseLetter(ch) || isEmojiOrUnicode(ch) || ch === 36 /* $ */
|
||||
}
|
||||
|
||||
const isIdentChar = (char: number | string): boolean => {
|
||||
export const isIdentChar = (char: number | string): boolean => {
|
||||
let ch = typeof char === 'string' ? char.charCodeAt(0) : char
|
||||
return isIdentStart(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,36 +2,13 @@ import { expect } from 'bun:test'
|
|||
import { diffLines } from 'diff'
|
||||
import color from 'kleur'
|
||||
import { Scanner, TokenType, type Token } from '#parser/tokenizer2'
|
||||
import { parser } from '#parser/shrimp'
|
||||
import { setGlobals } from '#parser/tokenizer'
|
||||
import { parse } from '#parser/parser2'
|
||||
import { parse, setGlobals } from '#parser/parser2'
|
||||
import { Tree } from '#parser/node'
|
||||
import { globals as prelude } from '#prelude'
|
||||
import { $ } from 'bun'
|
||||
import { assert, errorMessage } from '#utils/utils'
|
||||
import { Compiler } from '#compiler/compiler'
|
||||
import { run, VM } from 'reefvm'
|
||||
import { treeToString2, treeToString, VMResultToValue } from '#utils/tree'
|
||||
|
||||
const regenerateParser = async () => {
|
||||
let generate = true
|
||||
try {
|
||||
const grammarStat = await Bun.file('./src/parser/shrimp.grammar').stat()
|
||||
const tokenizerStat = await Bun.file('./src/parser/tokenizer.ts').stat()
|
||||
const parserStat = await Bun.file('./src/parser/shrimp.ts').stat()
|
||||
|
||||
if (grammarStat.mtime <= parserStat.mtime && tokenizerStat.mtime <= parserStat.mtime) {
|
||||
generate = false
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error checking or regenerating parser:', e)
|
||||
} finally {
|
||||
if (generate) {
|
||||
await $`bun generate-parser`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await regenerateParser()
|
||||
import { treeToString2, VMResultToValue } from '#utils/tree'
|
||||
|
||||
// Type declaration for TypeScript
|
||||
declare module 'bun:test' {
|
||||
|
|
@ -73,7 +50,8 @@ expect.extend({
|
|||
assert(typeof received === 'string', 'toFailParse can only be used with string values')
|
||||
|
||||
try {
|
||||
const tree = parser.parse(received)
|
||||
const node = parse(received)
|
||||
const tree = new Tree(node)
|
||||
let hasErrors = false
|
||||
tree.iterate({
|
||||
enter(n) {
|
||||
|
|
@ -90,7 +68,7 @@ expect.extend({
|
|||
pass: true,
|
||||
}
|
||||
} else {
|
||||
const actual = treeToString(tree, received)
|
||||
const actual = treeToString2(node, received)
|
||||
return {
|
||||
message: () => `Expected input to fail parsing, but it parsed successfully:\n${actual}`,
|
||||
pass: false,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user