remove lezer parser & grammar
This commit is contained in:
parent
e45a6d9bf7
commit
87cb01392a
|
|
@ -5,9 +5,8 @@
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "bun generate-parser && bun --hot src/server/server.tsx",
|
"dev": "bun --hot src/server/server.tsx",
|
||||||
"generate-parser": "lezer-generator src/parser/shrimp.grammar --typeScript -o src/parser/shrimp.ts",
|
"repl": "bun bin/repl",
|
||||||
"repl": "bun generate-parser && bun bin/repl",
|
|
||||||
"update-reef": "rm -rf ~/.bun/install/cache/ && rm bun.lock && bun update reefvm",
|
"update-reef": "rm -rf ~/.bun/install/cache/ && rm bun.lock && bun update reefvm",
|
||||||
"cli:install": "ln -s \"$(pwd)/bin/shrimp\" ~/.bun/bin/shrimp",
|
"cli:install": "ln -s \"$(pwd)/bin/shrimp\" ~/.bun/bin/shrimp",
|
||||||
"cli:remove": "rm ~/.bun/bin/shrimp",
|
"cli:remove": "rm ~/.bun/bin/shrimp",
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,6 @@
|
||||||
import { CompilerError } from '#compiler/compilerError.ts'
|
import { CompilerError } from '#compiler/compilerError.ts'
|
||||||
import { parse } from '#parser/parser2'
|
import { parse, setGlobals } from '#parser/parser2'
|
||||||
import { SyntaxNode, Tree } from '#parser/node'
|
import { SyntaxNode, Tree } from '#parser/node'
|
||||||
import { parser } from '#parser/shrimp.ts'
|
|
||||||
import * as terms from '#parser/shrimp.terms'
|
|
||||||
import { setGlobals } from '#parser/tokenizer'
|
|
||||||
import { tokenizeCurlyString } from '#parser/curlyTokenizer'
|
import { tokenizeCurlyString } from '#parser/curlyTokenizer'
|
||||||
import { assert, errorMessage } from '#utils/utils'
|
import { assert, errorMessage } from '#utils/utils'
|
||||||
import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
|
import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
|
||||||
|
|
@ -91,7 +88,7 @@ export class Compiler {
|
||||||
}
|
}
|
||||||
|
|
||||||
#compileCst(cst: Tree, input: string) {
|
#compileCst(cst: Tree, input: string) {
|
||||||
const isProgram = cst.topNode.type.id === terms.Program
|
const isProgram = cst.topNode.type.is('Program')
|
||||||
assert(isProgram, `Expected Program node, got ${cst.topNode.type.name}`)
|
assert(isProgram, `Expected Program node, got ${cst.topNode.type.name}`)
|
||||||
|
|
||||||
let child = cst.topNode.firstChild
|
let child = cst.topNode.firstChild
|
||||||
|
|
@ -107,8 +104,8 @@ export class Compiler {
|
||||||
const value = input.slice(node.from, node.to)
|
const value = input.slice(node.from, node.to)
|
||||||
if (DEBUG) console.log(`🫦 ${node.name}: ${value}`)
|
if (DEBUG) console.log(`🫦 ${node.name}: ${value}`)
|
||||||
|
|
||||||
switch (node.type.id) {
|
switch (node.type.name) {
|
||||||
case terms.Number:
|
case 'Number':
|
||||||
// Handle sign prefix for hex, binary, and octal literals
|
// Handle sign prefix for hex, binary, and octal literals
|
||||||
// Number() doesn't parse '-0xFF', '+0xFF', '-0o77', etc. correctly
|
// Number() doesn't parse '-0xFF', '+0xFF', '-0o77', etc. correctly
|
||||||
let numberValue: number
|
let numberValue: number
|
||||||
|
|
@ -125,8 +122,8 @@ export class Compiler {
|
||||||
|
|
||||||
return [[`PUSH`, numberValue]]
|
return [[`PUSH`, numberValue]]
|
||||||
|
|
||||||
case terms.String: {
|
case 'String': {
|
||||||
if (node.firstChild?.type.id === terms.CurlyString)
|
if (node.firstChild?.type.is('CurlyString'))
|
||||||
return this.#compileCurlyString(value, input)
|
return this.#compileCurlyString(value, input)
|
||||||
|
|
||||||
const { parts, hasInterpolation } = getStringParts(node, input)
|
const { parts, hasInterpolation } = getStringParts(node, input)
|
||||||
|
|
@ -143,19 +140,19 @@ export class Compiler {
|
||||||
parts.forEach((part) => {
|
parts.forEach((part) => {
|
||||||
const partValue = input.slice(part.from, part.to)
|
const partValue = input.slice(part.from, part.to)
|
||||||
|
|
||||||
switch (part.type.id) {
|
switch (part.type.name) {
|
||||||
case terms.StringFragment:
|
case 'StringFragment':
|
||||||
// Plain text fragment - just push as-is
|
// Plain text fragment - just push as-is
|
||||||
instructions.push(['PUSH', partValue])
|
instructions.push(['PUSH', partValue])
|
||||||
break
|
break
|
||||||
|
|
||||||
case terms.EscapeSeq:
|
case 'EscapeSeq':
|
||||||
// Process escape sequence and push the result
|
// Process escape sequence and push the result
|
||||||
const processed = processEscapeSeq(partValue)
|
const processed = processEscapeSeq(partValue)
|
||||||
instructions.push(['PUSH', processed])
|
instructions.push(['PUSH', processed])
|
||||||
break
|
break
|
||||||
|
|
||||||
case terms.Interpolation:
|
case 'Interpolation':
|
||||||
// Interpolation contains either Identifier or ParenExpr (the $ is anonymous)
|
// Interpolation contains either Identifier or ParenExpr (the $ is anonymous)
|
||||||
const child = part.firstChild
|
const child = part.firstChild
|
||||||
if (!child) {
|
if (!child) {
|
||||||
|
|
@ -179,15 +176,15 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Boolean: {
|
case 'Boolean': {
|
||||||
return [[`PUSH`, value === 'true']]
|
return [[`PUSH`, value === 'true']]
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Null: {
|
case 'Null': {
|
||||||
return [[`PUSH`, null]]
|
return [[`PUSH`, null]]
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Regex: {
|
case 'Regex': {
|
||||||
// remove the surrounding slashes and any flags
|
// remove the surrounding slashes and any flags
|
||||||
const [_, pattern, flags] = value.match(/^\/\/(.*)\/\/([gimsuy]*)$/) || []
|
const [_, pattern, flags] = value.match(/^\/\/(.*)\/\/([gimsuy]*)$/) || []
|
||||||
if (!pattern) {
|
if (!pattern) {
|
||||||
|
|
@ -204,15 +201,15 @@ export class Compiler {
|
||||||
return [['PUSH', regex]]
|
return [['PUSH', regex]]
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Identifier: {
|
case 'Identifier': {
|
||||||
return [[`TRY_LOAD`, value]]
|
return [[`TRY_LOAD`, value]]
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Word: {
|
case 'Word': {
|
||||||
return [['PUSH', value]]
|
return [['PUSH', value]]
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.DotGet: {
|
case 'DotGet': {
|
||||||
// DotGet is parsed into a nested tree because it's hard to parse it into a flat one.
|
// DotGet is parsed into a nested tree because it's hard to parse it into a flat one.
|
||||||
// However, we want a flat tree - so we're going to pretend like we are getting one from the parser.
|
// However, we want a flat tree - so we're going to pretend like we are getting one from the parser.
|
||||||
//
|
//
|
||||||
|
|
@ -224,7 +221,7 @@ export class Compiler {
|
||||||
instructions.push(['TRY_LOAD', objectName])
|
instructions.push(['TRY_LOAD', objectName])
|
||||||
|
|
||||||
const flattenProperty = (prop: SyntaxNode): void => {
|
const flattenProperty = (prop: SyntaxNode): void => {
|
||||||
if (prop.type.id === terms.DotGet) {
|
if (prop.type.is('DotGet')) {
|
||||||
const nestedParts = getDotGetParts(prop, input)
|
const nestedParts = getDotGetParts(prop, input)
|
||||||
|
|
||||||
const nestedObjectValue = input.slice(nestedParts.object.from, nestedParts.object.to)
|
const nestedObjectValue = input.slice(nestedParts.object.from, nestedParts.object.to)
|
||||||
|
|
@ -233,7 +230,7 @@ export class Compiler {
|
||||||
|
|
||||||
flattenProperty(nestedParts.property)
|
flattenProperty(nestedParts.property)
|
||||||
} else {
|
} else {
|
||||||
if (prop.type.id === terms.ParenExpr) {
|
if (prop.type.is('ParenExpr')) {
|
||||||
instructions.push(...this.#compileNode(prop, input))
|
instructions.push(...this.#compileNode(prop, input))
|
||||||
} else {
|
} else {
|
||||||
const propertyValue = input.slice(prop.from, prop.to)
|
const propertyValue = input.slice(prop.from, prop.to)
|
||||||
|
|
@ -247,7 +244,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.BinOp: {
|
case 'BinOp': {
|
||||||
const { left, op, right } = getBinaryParts(node)
|
const { left, op, right } = getBinaryParts(node)
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
instructions.push(...this.#compileNode(left, input))
|
instructions.push(...this.#compileNode(left, input))
|
||||||
|
|
@ -295,7 +292,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Assign: {
|
case 'Assign': {
|
||||||
const assignParts = getAssignmentParts(node)
|
const assignParts = getAssignmentParts(node)
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
|
|
||||||
|
|
@ -326,7 +323,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.CompoundAssign: {
|
case 'CompoundAssign': {
|
||||||
const { identifier, operator, right } = getCompoundAssignmentParts(node)
|
const { identifier, operator, right } = getCompoundAssignmentParts(node)
|
||||||
const identifierName = input.slice(identifier.from, identifier.to)
|
const identifierName = input.slice(identifier.from, identifier.to)
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
|
|
@ -388,14 +385,14 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.ParenExpr: {
|
case 'ParenExpr': {
|
||||||
const child = node.firstChild
|
const child = node.firstChild
|
||||||
if (!child) return [] // I guess it is empty parentheses?
|
if (!child) return [] // I guess it is empty parentheses?
|
||||||
|
|
||||||
return this.#compileNode(child, input)
|
return this.#compileNode(child, input)
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.FunctionDef: {
|
case 'FunctionDef': {
|
||||||
const { paramNames, bodyNodes, catchVariable, catchBody, finallyBody } =
|
const { paramNames, bodyNodes, catchVariable, catchBody, finallyBody } =
|
||||||
getFunctionDefParts(node, input)
|
getFunctionDefParts(node, input)
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
|
|
@ -441,8 +438,8 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.FunctionCallOrIdentifier: {
|
case 'FunctionCallOrIdentifier': {
|
||||||
if (node.firstChild?.type.id === terms.DotGet) {
|
if (node.firstChild?.type.is('DotGet')) {
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
const callLabel: Label = `.call_dotget_${++this.labelCount}`
|
const callLabel: Label = `.call_dotget_${++this.labelCount}`
|
||||||
const afterLabel: Label = `.after_dotget_${++this.labelCount}`
|
const afterLabel: Label = `.after_dotget_${++this.labelCount}`
|
||||||
|
|
@ -484,8 +481,8 @@ export class Compiler {
|
||||||
PUSH 1 ; Named count
|
PUSH 1 ; Named count
|
||||||
CALL
|
CALL
|
||||||
*/
|
*/
|
||||||
case terms.FunctionCallWithNewlines:
|
|
||||||
case terms.FunctionCall: {
|
case 'FunctionCall': {
|
||||||
const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts(node, input)
|
const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts(node, input)
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
instructions.push(...this.#compileNode(identifierNode, input))
|
instructions.push(...this.#compileNode(identifierNode, input))
|
||||||
|
|
@ -507,7 +504,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Block: {
|
case 'Block': {
|
||||||
const children = getAllChildren(node)
|
const children = getAllChildren(node)
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
|
|
||||||
|
|
@ -522,7 +519,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.FunctionCallWithBlock: {
|
case 'FunctionCallWithBlock': {
|
||||||
const [fn, _colon, ...block] = getAllChildren(node)
|
const [fn, _colon, ...block] = getAllChildren(node)
|
||||||
let instructions: ProgramItem[] = []
|
let instructions: ProgramItem[] = []
|
||||||
|
|
||||||
|
|
@ -540,13 +537,13 @@ export class Compiler {
|
||||||
instructions.push(['RETURN'])
|
instructions.push(['RETURN'])
|
||||||
instructions.push([`${afterLabel}:`])
|
instructions.push([`${afterLabel}:`])
|
||||||
|
|
||||||
if (fn?.type.id === terms.FunctionCallOrIdentifier) {
|
if (fn?.type.is('FunctionCallOrIdentifier')) {
|
||||||
instructions.push(['LOAD', input.slice(fn!.from, fn!.to)])
|
instructions.push(['LOAD', input.slice(fn!.from, fn!.to)])
|
||||||
instructions.push(['MAKE_FUNCTION', [], fnLabel])
|
instructions.push(['MAKE_FUNCTION', [], fnLabel])
|
||||||
instructions.push(['PUSH', 1])
|
instructions.push(['PUSH', 1])
|
||||||
instructions.push(['PUSH', 0])
|
instructions.push(['PUSH', 0])
|
||||||
instructions.push(['CALL'])
|
instructions.push(['CALL'])
|
||||||
} else if (fn?.type.id === terms.FunctionCall) {
|
} else if (fn?.type.is('FunctionCall')) {
|
||||||
let body = this.#compileNode(fn!, input)
|
let body = this.#compileNode(fn!, input)
|
||||||
const namedArgCount = (body[body.length - 2]![1] as number) * 2
|
const namedArgCount = (body[body.length - 2]![1] as number) * 2
|
||||||
const startSlice = body.length - namedArgCount - 3
|
const startSlice = body.length - namedArgCount - 3
|
||||||
|
|
@ -569,7 +566,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.TryExpr: {
|
case 'TryExpr': {
|
||||||
const { tryBlock, catchVariable, catchBody, finallyBody } = getTryExprParts(node, input)
|
const { tryBlock, catchVariable, catchBody, finallyBody } = getTryExprParts(node, input)
|
||||||
|
|
||||||
return this.#compileTryCatchFinally(
|
return this.#compileTryCatchFinally(
|
||||||
|
|
@ -581,9 +578,9 @@ export class Compiler {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Throw:
|
case 'Throw':
|
||||||
case terms.Not: {
|
case 'Not': {
|
||||||
const keyword = node.type.id === terms.Throw ? 'Throw' : 'Not'
|
const keyword = node.type.is('Throw') ? 'Throw' : 'Not'
|
||||||
const children = getAllChildren(node)
|
const children = getAllChildren(node)
|
||||||
const [_throwKeyword, expression] = children
|
const [_throwKeyword, expression] = children
|
||||||
if (!expression) {
|
if (!expression) {
|
||||||
|
|
@ -601,7 +598,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.IfExpr: {
|
case 'IfExpr': {
|
||||||
const { conditionNode, thenBlock, elseIfBlocks, elseThenBlock } = getIfExprParts(
|
const { conditionNode, thenBlock, elseIfBlocks, elseThenBlock } = getIfExprParts(
|
||||||
node,
|
node,
|
||||||
input
|
input
|
||||||
|
|
@ -644,7 +641,7 @@ export class Compiler {
|
||||||
}
|
}
|
||||||
|
|
||||||
// - `EQ`, `NEQ`, `LT`, `GT`, `LTE`, `GTE` - Pop 2, push boolean
|
// - `EQ`, `NEQ`, `LT`, `GT`, `LTE`, `GTE` - Pop 2, push boolean
|
||||||
case terms.ConditionalOp: {
|
case 'ConditionalOp': {
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
const { left, op, right } = getBinaryParts(node)
|
const { left, op, right } = getBinaryParts(node)
|
||||||
const leftInstructions: ProgramItem[] = this.#compileNode(left, input)
|
const leftInstructions: ProgramItem[] = this.#compileNode(left, input)
|
||||||
|
|
@ -719,7 +716,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.PipeExpr: {
|
case 'PipeExpr': {
|
||||||
const { pipedFunctionCall, pipeReceivers } = getPipeExprParts(node)
|
const { pipedFunctionCall, pipeReceivers } = getPipeExprParts(node)
|
||||||
if (!pipedFunctionCall || pipeReceivers.length === 0) {
|
if (!pipedFunctionCall || pipeReceivers.length === 0) {
|
||||||
throw new CompilerError('PipeExpr must have at least two operands', node.from, node.to)
|
throw new CompilerError('PipeExpr must have at least two operands', node.from, node.to)
|
||||||
|
|
@ -741,11 +738,11 @@ export class Compiler {
|
||||||
instructions.push(...this.#compileNode(identifierNode, input))
|
instructions.push(...this.#compileNode(identifierNode, input))
|
||||||
|
|
||||||
const isUnderscoreInPositionalArgs = positionalArgs.some(
|
const isUnderscoreInPositionalArgs = positionalArgs.some(
|
||||||
(arg) => arg.type.id === terms.Underscore
|
(arg) => arg.type.is('Underscore')
|
||||||
)
|
)
|
||||||
const isUnderscoreInNamedArgs = namedArgs.some((arg) => {
|
const isUnderscoreInNamedArgs = namedArgs.some((arg) => {
|
||||||
const { valueNode } = getNamedArgParts(arg, input)
|
const { valueNode } = getNamedArgParts(arg, input)
|
||||||
return valueNode.type.id === terms.Underscore
|
return valueNode.type.is('Underscore')
|
||||||
})
|
})
|
||||||
|
|
||||||
const shouldPushPositionalArg = !isUnderscoreInPositionalArgs && !isUnderscoreInNamedArgs
|
const shouldPushPositionalArg = !isUnderscoreInPositionalArgs && !isUnderscoreInNamedArgs
|
||||||
|
|
@ -756,7 +753,7 @@ export class Compiler {
|
||||||
}
|
}
|
||||||
|
|
||||||
positionalArgs.forEach((arg) => {
|
positionalArgs.forEach((arg) => {
|
||||||
if (arg.type.id === terms.Underscore) {
|
if (arg.type.is('Underscore')) {
|
||||||
instructions.push(['LOAD', pipeValName])
|
instructions.push(['LOAD', pipeValName])
|
||||||
} else {
|
} else {
|
||||||
instructions.push(...this.#compileNode(arg, input))
|
instructions.push(...this.#compileNode(arg, input))
|
||||||
|
|
@ -766,7 +763,7 @@ export class Compiler {
|
||||||
namedArgs.forEach((arg) => {
|
namedArgs.forEach((arg) => {
|
||||||
const { name, valueNode } = getNamedArgParts(arg, input)
|
const { name, valueNode } = getNamedArgParts(arg, input)
|
||||||
instructions.push(['PUSH', name])
|
instructions.push(['PUSH', name])
|
||||||
if (valueNode.type.id === terms.Underscore) {
|
if (valueNode.type.is('Underscore')) {
|
||||||
instructions.push(['LOAD', pipeValName])
|
instructions.push(['LOAD', pipeValName])
|
||||||
} else {
|
} else {
|
||||||
instructions.push(...this.#compileNode(valueNode, input))
|
instructions.push(...this.#compileNode(valueNode, input))
|
||||||
|
|
@ -781,14 +778,14 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Array: {
|
case 'Array': {
|
||||||
const children = getAllChildren(node)
|
const children = getAllChildren(node)
|
||||||
|
|
||||||
// We can easily parse [=] as an empty dict, but `[ = ]` is tougher.
|
// We can easily parse [=] as an empty dict, but `[ = ]` is tougher.
|
||||||
// = can be a valid word, and is also valid inside words, so for now we cheat
|
// = can be a valid word, and is also valid inside words, so for now we cheat
|
||||||
// and check for arrays that look like `[ = ]` to interpret them as
|
// and check for arrays that look like `[ = ]` to interpret them as
|
||||||
// empty dicts
|
// empty dicts
|
||||||
if (children.length === 1 && children[0]!.type.id === terms.Word) {
|
if (children.length === 1 && children[0]!.type.is('Word')) {
|
||||||
const child = children[0]!
|
const child = children[0]!
|
||||||
if (input.slice(child.from, child.to) === '=') {
|
if (input.slice(child.from, child.to) === '=') {
|
||||||
return [['MAKE_DICT', 0]]
|
return [['MAKE_DICT', 0]]
|
||||||
|
|
@ -800,7 +797,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Dict: {
|
case 'Dict': {
|
||||||
const children = getAllChildren(node)
|
const children = getAllChildren(node)
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
|
|
||||||
|
|
@ -819,7 +816,7 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.WhileExpr: {
|
case 'WhileExpr': {
|
||||||
const [_while, test, _colon, block] = getAllChildren(node)
|
const [_while, test, _colon, block] = getAllChildren(node)
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
|
|
||||||
|
|
@ -837,11 +834,11 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Import: {
|
case 'Import': {
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
const [_import, ...nodes] = getAllChildren(node)
|
const [_import, ...nodes] = getAllChildren(node)
|
||||||
const args = nodes.filter(node => node.type.id === terms.Identifier)
|
const args = nodes.filter(node => node.type.is('Identifier'))
|
||||||
const namedArgs = nodes.filter(node => node.type.id === terms.NamedArg)
|
const namedArgs = nodes.filter(node => node.type.is('NamedArg'))
|
||||||
|
|
||||||
instructions.push(['LOAD', 'import'])
|
instructions.push(['LOAD', 'import'])
|
||||||
|
|
||||||
|
|
@ -862,13 +859,13 @@ export class Compiler {
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.Comment: {
|
case 'Comment': {
|
||||||
return [] // ignore comments
|
return [] // ignore comments
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`Compiler doesn't know how to handle a "${node.type.name}" (${node.type.id}) node.`,
|
`Compiler doesn't know how to handle a "${node.type.name}" node.`,
|
||||||
node.from,
|
node.from,
|
||||||
node.to
|
node.to
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
import { CompilerError } from '#compiler/compilerError.ts'
|
import { CompilerError } from '#compiler/compilerError.ts'
|
||||||
import * as terms from '#parser/shrimp.terms'
|
|
||||||
import type { SyntaxNode, Tree } from '#parser/node'
|
import type { SyntaxNode, Tree } from '#parser/node'
|
||||||
|
|
||||||
export const checkTreeForErrors = (tree: Tree): CompilerError[] => {
|
export const checkTreeForErrors = (tree: Tree): CompilerError[] => {
|
||||||
|
|
@ -24,7 +23,7 @@ export const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
|
||||||
child = child.nextSibling
|
child = child.nextSibling
|
||||||
}
|
}
|
||||||
|
|
||||||
return children.filter((n) => n.type.id !== terms.Comment)
|
return children.filter((n) => !n.type.is('Comment'))
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getBinaryParts = (node: SyntaxNode) => {
|
export const getBinaryParts = (node: SyntaxNode) => {
|
||||||
|
|
@ -51,12 +50,12 @@ export const getAssignmentParts = (node: SyntaxNode) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
// array destructuring
|
// array destructuring
|
||||||
if (left && left.type.id === terms.Array) {
|
if (left && left.type.is('Array')) {
|
||||||
const identifiers = getAllChildren(left).filter((child) => child.type.id === terms.Identifier)
|
const identifiers = getAllChildren(left).filter((child) => child.type.is('Identifier'))
|
||||||
return { arrayPattern: identifiers, right }
|
return { arrayPattern: identifiers, right }
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!left || left.type.id !== terms.AssignableIdentifier) {
|
if (!left || !left.type.is('AssignableIdentifier')) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`Assign left child must be an AssignableIdentifier or Array, got ${left ? left.type.name : 'none'
|
`Assign left child must be an AssignableIdentifier or Array, got ${left ? left.type.name : 'none'
|
||||||
}`,
|
}`,
|
||||||
|
|
@ -72,7 +71,7 @@ export const getCompoundAssignmentParts = (node: SyntaxNode) => {
|
||||||
const children = getAllChildren(node)
|
const children = getAllChildren(node)
|
||||||
const [left, operator, right] = children
|
const [left, operator, right] = children
|
||||||
|
|
||||||
if (!left || left.type.id !== terms.AssignableIdentifier) {
|
if (!left || !left.type.is('AssignableIdentifier')) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`CompoundAssign left child must be an AssignableIdentifier, got ${left ? left.type.name : 'none'
|
`CompoundAssign left child must be an AssignableIdentifier, got ${left ? left.type.name : 'none'
|
||||||
}`,
|
}`,
|
||||||
|
|
@ -103,7 +102,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
const paramNames = getAllChildren(paramsNode).map((param) => {
|
const paramNames = getAllChildren(paramsNode).map((param) => {
|
||||||
if (param.type.id !== terms.Identifier && param.type.id !== terms.NamedParam) {
|
if (!param.type.is('Identifier') && !param.type.is('NamedParam')) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`FunctionDef params must be Identifier or NamedParam, got ${param.type.name}`,
|
`FunctionDef params must be Identifier or NamedParam, got ${param.type.name}`,
|
||||||
param.from,
|
param.from,
|
||||||
|
|
@ -122,7 +121,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
||||||
let finallyBody: SyntaxNode | undefined
|
let finallyBody: SyntaxNode | undefined
|
||||||
|
|
||||||
for (const child of rest) {
|
for (const child of rest) {
|
||||||
if (child.type.id === terms.CatchExpr) {
|
if (child.type.is('CatchExpr')) {
|
||||||
catchExpr = child
|
catchExpr = child
|
||||||
const catchChildren = getAllChildren(child)
|
const catchChildren = getAllChildren(child)
|
||||||
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
||||||
|
|
@ -135,7 +134,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
||||||
}
|
}
|
||||||
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
||||||
catchBody = body
|
catchBody = body
|
||||||
} else if (child.type.id === terms.FinallyExpr) {
|
} else if (child.type.is('FinallyExpr')) {
|
||||||
finallyExpr = child
|
finallyExpr = child
|
||||||
const finallyChildren = getAllChildren(child)
|
const finallyChildren = getAllChildren(child)
|
||||||
const [_finallyKeyword, _colon, body] = finallyChildren
|
const [_finallyKeyword, _colon, body] = finallyChildren
|
||||||
|
|
@ -164,9 +163,9 @@ export const getFunctionCallParts = (node: SyntaxNode, input: string) => {
|
||||||
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
|
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
|
||||||
}
|
}
|
||||||
|
|
||||||
const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg)
|
const namedArgs = args.filter((arg) => arg.type.is('NamedArg'))
|
||||||
const positionalArgs = args
|
const positionalArgs = args
|
||||||
.filter((arg) => arg.type.id === terms.PositionalArg)
|
.filter((arg) => arg.type.is('PositionalArg'))
|
||||||
.map((arg) => {
|
.map((arg) => {
|
||||||
const child = arg.firstChild
|
const child = arg.firstChild
|
||||||
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
|
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
|
||||||
|
|
@ -207,13 +206,13 @@ export const getIfExprParts = (node: SyntaxNode, input: string) => {
|
||||||
rest.forEach((child) => {
|
rest.forEach((child) => {
|
||||||
const parts = getAllChildren(child)
|
const parts = getAllChildren(child)
|
||||||
|
|
||||||
if (child.type.id === terms.ElseExpr) {
|
if (child.type.is('ElseExpr')) {
|
||||||
if (parts.length !== 3) {
|
if (parts.length !== 3) {
|
||||||
const message = `ElseExpr expected 1 child, got ${parts.length}`
|
const message = `ElseExpr expected 1 child, got ${parts.length}`
|
||||||
throw new CompilerError(message, child.from, child.to)
|
throw new CompilerError(message, child.from, child.to)
|
||||||
}
|
}
|
||||||
elseThenBlock = parts.at(-1)
|
elseThenBlock = parts.at(-1)
|
||||||
} else if (child.type.id === terms.ElseIfExpr) {
|
} else if (child.type.is('ElseIfExpr')) {
|
||||||
const [_else, _if, conditional, _colon, thenBlock] = parts
|
const [_else, _if, conditional, _colon, thenBlock] = parts
|
||||||
if (!conditional || !thenBlock) {
|
if (!conditional || !thenBlock) {
|
||||||
const names = parts.map((p) => p.type.name).join(', ')
|
const names = parts.map((p) => p.type.name).join(', ')
|
||||||
|
|
@ -248,10 +247,10 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
||||||
// The text is just between the quotes
|
// The text is just between the quotes
|
||||||
const parts = children.filter((child) => {
|
const parts = children.filter((child) => {
|
||||||
return (
|
return (
|
||||||
child.type.id === terms.StringFragment ||
|
child.type.is('StringFragment') ||
|
||||||
child.type.id === terms.Interpolation ||
|
child.type.is('Interpolation') ||
|
||||||
child.type.id === terms.EscapeSeq ||
|
child.type.is('EscapeSeq') ||
|
||||||
child.type.id === terms.CurlyString
|
child.type.is('CurlyString')
|
||||||
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
@ -259,10 +258,10 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
||||||
// Validate each part is the expected type
|
// Validate each part is the expected type
|
||||||
parts.forEach((part) => {
|
parts.forEach((part) => {
|
||||||
if (
|
if (
|
||||||
part.type.id !== terms.StringFragment &&
|
part.type.is('StringFragment') &&
|
||||||
part.type.id !== terms.Interpolation &&
|
part.type.is('Interpolation') &&
|
||||||
part.type.id !== terms.EscapeSeq &&
|
part.type.is('EscapeSeq') &&
|
||||||
part.type.id !== terms.CurlyString
|
part.type.is('CurlyString')
|
||||||
) {
|
) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`,
|
`String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`,
|
||||||
|
|
@ -275,7 +274,7 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
||||||
// hasInterpolation means the string has interpolation ($var) or escape sequences (\n)
|
// hasInterpolation means the string has interpolation ($var) or escape sequences (\n)
|
||||||
// A simple string like 'hello' has one StringFragment but no interpolation
|
// A simple string like 'hello' has one StringFragment but no interpolation
|
||||||
const hasInterpolation = parts.some(
|
const hasInterpolation = parts.some(
|
||||||
(p) => p.type.id === terms.Interpolation || p.type.id === terms.EscapeSeq
|
(p) => p.type.is('Interpolation') || p.type.is('EscapeSeq')
|
||||||
)
|
)
|
||||||
return { parts, hasInterpolation }
|
return { parts, hasInterpolation }
|
||||||
}
|
}
|
||||||
|
|
@ -292,7 +291,7 @@ export const getDotGetParts = (node: SyntaxNode, input: string) => {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (object.type.id !== terms.IdentifierBeforeDot && object.type.id !== terms.Dollar) {
|
if (!object.type.is('IdentifierBeforeDot')) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`DotGet object must be an IdentifierBeforeDot, got ${object.type.name}`,
|
`DotGet object must be an IdentifierBeforeDot, got ${object.type.name}`,
|
||||||
object.from,
|
object.from,
|
||||||
|
|
@ -300,7 +299,7 @@ export const getDotGetParts = (node: SyntaxNode, input: string) => {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (![terms.Identifier, terms.Number, terms.ParenExpr, terms.DotGet].includes(property.type.id)) {
|
if (!['Identifier', 'Number', 'ParenExpr', 'DotGet'].includes(property.type.name)) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`DotGet property must be an Identifier, Number, ParenExpr, or DotGet, got ${property.type.name}`,
|
`DotGet property must be an Identifier, Number, ParenExpr, or DotGet, got ${property.type.name}`,
|
||||||
property.from,
|
property.from,
|
||||||
|
|
@ -334,7 +333,7 @@ export const getTryExprParts = (node: SyntaxNode, input: string) => {
|
||||||
let finallyBody: SyntaxNode | undefined
|
let finallyBody: SyntaxNode | undefined
|
||||||
|
|
||||||
rest.forEach((child) => {
|
rest.forEach((child) => {
|
||||||
if (child.type.id === terms.CatchExpr) {
|
if (child.type.is('CatchExpr')) {
|
||||||
catchExpr = child
|
catchExpr = child
|
||||||
const catchChildren = getAllChildren(child)
|
const catchChildren = getAllChildren(child)
|
||||||
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
||||||
|
|
@ -347,7 +346,7 @@ export const getTryExprParts = (node: SyntaxNode, input: string) => {
|
||||||
}
|
}
|
||||||
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
||||||
catchBody = body
|
catchBody = body
|
||||||
} else if (child.type.id === terms.FinallyExpr) {
|
} else if (child.type.is('FinallyExpr')) {
|
||||||
finallyExpr = child
|
finallyExpr = child
|
||||||
const finallyChildren = getAllChildren(child)
|
const finallyChildren = getAllChildren(child)
|
||||||
const [_finallyKeyword, _colon, body] = finallyChildren
|
const [_finallyKeyword, _colon, body] = finallyChildren
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ import { VM, fromValue, toValue, isValue, type Bytecode } from 'reefvm'
|
||||||
import { Compiler } from '#compiler/compiler'
|
import { Compiler } from '#compiler/compiler'
|
||||||
import { parse } from '#parser/parser2'
|
import { parse } from '#parser/parser2'
|
||||||
import { Tree } from '#parser/node'
|
import { Tree } from '#parser/node'
|
||||||
import { globals as parserGlobals, setGlobals as setParserGlobals } from '#parser/tokenizer'
|
import { globals as parserGlobals, setGlobals as setParserGlobals } from '#parser/parser2'
|
||||||
import { globals as prelude } from '#prelude'
|
import { globals as prelude } from '#prelude'
|
||||||
|
|
||||||
export { Compiler } from '#compiler/compiler'
|
export { Compiler } from '#compiler/compiler'
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
import { parser } from '#parser/shrimp.ts'
|
|
||||||
import { parse } from '#parser/parser2'
|
import { parse } from '#parser/parser2'
|
||||||
import type { SyntaxNode } from '#parser/node'
|
import type { SyntaxNode } from '#parser/node'
|
||||||
import { isIdentStart, isIdentChar } from './tokenizer'
|
import { isIdentStart, isIdentChar } from './tokenizer2'
|
||||||
|
|
||||||
// Turns a { curly string } into strings and nodes for interpolation
|
// Turns a { curly string } into strings and nodes for interpolation
|
||||||
export const tokenizeCurlyString = (value: string): (string | [string, SyntaxNode])[] => {
|
export const tokenizeCurlyString = (value: string): (string | [string, SyntaxNode])[] => {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
import { type Token, TokenType } from './tokenizer2'
|
import { type Token, TokenType } from './tokenizer2'
|
||||||
import * as term from './shrimp.terms'
|
|
||||||
|
|
||||||
export type NodeType =
|
export type NodeType =
|
||||||
| 'Program'
|
| 'Program'
|
||||||
|
|
@ -140,183 +139,6 @@ export class Tree {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: TEMPORARY SHIM
|
|
||||||
class SyntaxNodeType {
|
|
||||||
constructor(public nodeType: NodeType, public isError: boolean) { }
|
|
||||||
|
|
||||||
is(other: string) {
|
|
||||||
return this.nodeType === other
|
|
||||||
}
|
|
||||||
|
|
||||||
get id(): number {
|
|
||||||
switch (this.nodeType) {
|
|
||||||
case 'Program':
|
|
||||||
return term.Program
|
|
||||||
|
|
||||||
case 'Block':
|
|
||||||
return term.Block
|
|
||||||
|
|
||||||
case 'FunctionCall':
|
|
||||||
return term.FunctionCall
|
|
||||||
|
|
||||||
case 'FunctionCallOrIdentifier':
|
|
||||||
return term.FunctionCallOrIdentifier
|
|
||||||
|
|
||||||
case 'FunctionCallWithBlock':
|
|
||||||
return term.FunctionCallWithBlock
|
|
||||||
|
|
||||||
case 'PositionalArg':
|
|
||||||
return term.PositionalArg
|
|
||||||
|
|
||||||
case 'NamedArg':
|
|
||||||
return term.NamedArg
|
|
||||||
|
|
||||||
case 'FunctionDef':
|
|
||||||
return term.FunctionDef
|
|
||||||
|
|
||||||
case 'Params':
|
|
||||||
return term.Params
|
|
||||||
|
|
||||||
case 'NamedParam':
|
|
||||||
return term.NamedParam
|
|
||||||
|
|
||||||
case 'Null':
|
|
||||||
return term.Null
|
|
||||||
|
|
||||||
case 'Boolean':
|
|
||||||
return term.Boolean
|
|
||||||
|
|
||||||
case 'Number':
|
|
||||||
return term.Number
|
|
||||||
|
|
||||||
case 'String':
|
|
||||||
return term.String
|
|
||||||
|
|
||||||
case 'StringFragment':
|
|
||||||
return term.StringFragment
|
|
||||||
|
|
||||||
case 'CurlyString':
|
|
||||||
return term.CurlyString
|
|
||||||
|
|
||||||
case 'DoubleQuote':
|
|
||||||
return term.DoubleQuote
|
|
||||||
|
|
||||||
case 'EscapeSeq':
|
|
||||||
return term.EscapeSeq
|
|
||||||
|
|
||||||
case 'Interpolation':
|
|
||||||
return term.Interpolation
|
|
||||||
|
|
||||||
case 'Regex':
|
|
||||||
return term.Regex
|
|
||||||
|
|
||||||
case 'Identifier':
|
|
||||||
return term.Identifier
|
|
||||||
|
|
||||||
case 'AssignableIdentifier':
|
|
||||||
return term.AssignableIdentifier
|
|
||||||
|
|
||||||
case 'IdentifierBeforeDot':
|
|
||||||
return term.IdentifierBeforeDot
|
|
||||||
|
|
||||||
case 'Word':
|
|
||||||
return term.Word
|
|
||||||
|
|
||||||
case 'Array':
|
|
||||||
return term.Array
|
|
||||||
|
|
||||||
case 'Dict':
|
|
||||||
return term.Dict
|
|
||||||
|
|
||||||
case 'Comment':
|
|
||||||
return term.Comment
|
|
||||||
|
|
||||||
case 'BinOp':
|
|
||||||
return term.BinOp
|
|
||||||
|
|
||||||
case 'ConditionalOp':
|
|
||||||
return term.ConditionalOp
|
|
||||||
|
|
||||||
case 'ParenExpr':
|
|
||||||
return term.ParenExpr
|
|
||||||
|
|
||||||
case 'Assign':
|
|
||||||
return term.Assign
|
|
||||||
|
|
||||||
case 'CompoundAssign':
|
|
||||||
return term.CompoundAssign
|
|
||||||
|
|
||||||
case 'DotGet':
|
|
||||||
return term.DotGet
|
|
||||||
|
|
||||||
case 'PipeExpr':
|
|
||||||
return term.PipeExpr
|
|
||||||
|
|
||||||
case 'IfExpr':
|
|
||||||
return term.IfExpr
|
|
||||||
|
|
||||||
case 'ElseIfExpr':
|
|
||||||
return term.ElseIfExpr
|
|
||||||
|
|
||||||
case 'ElseExpr':
|
|
||||||
return term.ElseExpr
|
|
||||||
|
|
||||||
case 'WhileExpr':
|
|
||||||
return term.WhileExpr
|
|
||||||
|
|
||||||
case 'TryExpr':
|
|
||||||
return term.TryExpr
|
|
||||||
|
|
||||||
case 'CatchExpr':
|
|
||||||
return term.CatchExpr
|
|
||||||
|
|
||||||
case 'FinallyExpr':
|
|
||||||
return term.FinallyExpr
|
|
||||||
|
|
||||||
case 'Throw':
|
|
||||||
return term.Throw
|
|
||||||
|
|
||||||
case 'Not':
|
|
||||||
return term.Not
|
|
||||||
|
|
||||||
case 'Eq':
|
|
||||||
return term.Eq
|
|
||||||
|
|
||||||
case 'Modulo':
|
|
||||||
return term.Modulo
|
|
||||||
|
|
||||||
case 'Plus':
|
|
||||||
return term.Plus
|
|
||||||
|
|
||||||
case 'Star':
|
|
||||||
return term.Star
|
|
||||||
|
|
||||||
case 'Slash':
|
|
||||||
return term.Slash
|
|
||||||
|
|
||||||
case 'Import':
|
|
||||||
return term.Import
|
|
||||||
|
|
||||||
case 'Do':
|
|
||||||
return term.Do
|
|
||||||
|
|
||||||
case 'Underscore':
|
|
||||||
return term.Underscore
|
|
||||||
|
|
||||||
case 'colon':
|
|
||||||
return term.colon
|
|
||||||
|
|
||||||
case 'keyword':
|
|
||||||
return term.keyword
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
get name(): string {
|
|
||||||
return this.nodeType
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export class SyntaxNode {
|
export class SyntaxNode {
|
||||||
#type: NodeType
|
#type: NodeType
|
||||||
#isError = false
|
#isError = false
|
||||||
|
|
@ -336,8 +158,13 @@ export class SyntaxNode {
|
||||||
return new SyntaxNode(TokenType[token.type] as NodeType, token.from, token.to, parent ?? null)
|
return new SyntaxNode(TokenType[token.type] as NodeType, token.from, token.to, parent ?? null)
|
||||||
}
|
}
|
||||||
|
|
||||||
get type(): SyntaxNodeType {
|
get type(): { type: NodeType, name: NodeType, isError: boolean, is: (other: NodeType) => boolean } {
|
||||||
return new SyntaxNodeType(this.#type, this.#isError)
|
return {
|
||||||
|
type: this.#type,
|
||||||
|
name: this.#type,
|
||||||
|
isError: this.#isError,
|
||||||
|
is: (other: NodeType) => other === this.#type
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
set type(name: NodeType) {
|
set type(name: NodeType) {
|
||||||
|
|
|
||||||
|
|
@ -1,99 +0,0 @@
|
||||||
import { ExternalTokenizer, InputStream } from '@lezer/lr'
|
|
||||||
import * as terms from './shrimp.terms'
|
|
||||||
|
|
||||||
type Operator = { str: string; tokenName: keyof typeof terms }
|
|
||||||
const operators: Array<Operator> = [
|
|
||||||
{ str: 'and', tokenName: 'And' },
|
|
||||||
{ str: 'or', tokenName: 'Or' },
|
|
||||||
{ str: 'band', tokenName: 'Band' },
|
|
||||||
{ str: 'bor', tokenName: 'Bor' },
|
|
||||||
{ str: 'bxor', tokenName: 'Bxor' },
|
|
||||||
{ str: '>>>', tokenName: 'Ushr' }, // Must come before >>
|
|
||||||
{ str: '>>', tokenName: 'Shr' },
|
|
||||||
{ str: '<<', tokenName: 'Shl' },
|
|
||||||
{ str: '>=', tokenName: 'Gte' },
|
|
||||||
{ str: '<=', tokenName: 'Lte' },
|
|
||||||
{ str: '!=', tokenName: 'Neq' },
|
|
||||||
{ str: '==', tokenName: 'EqEq' },
|
|
||||||
|
|
||||||
// Compound assignment operators (must come before single-char operators)
|
|
||||||
{ str: '??=', tokenName: 'NullishEq' },
|
|
||||||
{ str: '+=', tokenName: 'PlusEq' },
|
|
||||||
{ str: '-=', tokenName: 'MinusEq' },
|
|
||||||
{ str: '*=', tokenName: 'StarEq' },
|
|
||||||
{ str: '/=', tokenName: 'SlashEq' },
|
|
||||||
{ str: '%=', tokenName: 'ModuloEq' },
|
|
||||||
|
|
||||||
// Nullish coalescing (must come before it could be mistaken for other tokens)
|
|
||||||
{ str: '??', tokenName: 'NullishCoalesce' },
|
|
||||||
|
|
||||||
// Single-char operators
|
|
||||||
{ str: '*', tokenName: 'Star' },
|
|
||||||
{ str: '=', tokenName: 'Eq' },
|
|
||||||
{ str: '/', tokenName: 'Slash' },
|
|
||||||
{ str: '+', tokenName: 'Plus' },
|
|
||||||
{ str: '-', tokenName: 'Minus' },
|
|
||||||
{ str: '>', tokenName: 'Gt' },
|
|
||||||
{ str: '<', tokenName: 'Lt' },
|
|
||||||
{ str: '%', tokenName: 'Modulo' },
|
|
||||||
]
|
|
||||||
|
|
||||||
export const operatorTokenizer = new ExternalTokenizer((input: InputStream) => {
|
|
||||||
for (let operator of operators) {
|
|
||||||
if (!matchesString(input, 0, operator.str)) continue
|
|
||||||
const afterOpPos = operator.str.length
|
|
||||||
const charAfterOp = input.peek(afterOpPos)
|
|
||||||
if (!isWhitespace(charAfterOp)) continue
|
|
||||||
|
|
||||||
// Accept the operator token
|
|
||||||
const token = terms[operator.tokenName]
|
|
||||||
if (token === undefined) {
|
|
||||||
throw new Error(`Unknown token name: ${operator.tokenName}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
input.advance(afterOpPos)
|
|
||||||
input.acceptToken(token)
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
const isWhitespace = (ch: number): boolean => {
|
|
||||||
return matchesChar(ch, [' ', '\t', '\n'])
|
|
||||||
}
|
|
||||||
|
|
||||||
const matchesChar = (ch: number, chars: (string | number)[]): boolean => {
|
|
||||||
for (const c of chars) {
|
|
||||||
if (typeof c === 'number') {
|
|
||||||
if (ch === c) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
} else if (ch === c.charCodeAt(0)) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
const matchesString = (input: InputStream, pos: number, str: string): boolean => {
|
|
||||||
for (let i = 0; i < str.length; i++) {
|
|
||||||
if (input.peek(pos + i) !== str.charCodeAt(i)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
const peek = (numChars: number, input: InputStream): string => {
|
|
||||||
let result = ''
|
|
||||||
for (let i = 0; i < numChars; i++) {
|
|
||||||
const ch = input.peek(i)
|
|
||||||
if (ch === -1) {
|
|
||||||
result += 'EOF'
|
|
||||||
break
|
|
||||||
} else {
|
|
||||||
result += String.fromCharCode(ch)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
@ -1,11 +1,17 @@
|
||||||
import { CompilerError } from '#compiler/compilerError'
|
import { CompilerError } from '#compiler/compilerError'
|
||||||
import { Scanner, type Token, TokenType } from './tokenizer2'
|
import { Scanner, type Token, TokenType } from './tokenizer2'
|
||||||
import { SyntaxNode, operators, precedence, conditionals, compounds } from './node'
|
import { SyntaxNode, operators, precedence, conditionals, compounds } from './node'
|
||||||
import { globals } from './tokenizer'
|
|
||||||
import { parseString } from './stringParser'
|
import { parseString } from './stringParser'
|
||||||
|
|
||||||
const $T = TokenType
|
const $T = TokenType
|
||||||
|
|
||||||
|
// tell the dotGet searcher about builtin globals
|
||||||
|
export const globals: string[] = []
|
||||||
|
export const setGlobals = (newGlobals: string[] | Record<string, any>) => {
|
||||||
|
globals.length = 0
|
||||||
|
globals.push(...(Array.isArray(newGlobals) ? newGlobals : Object.keys(newGlobals)))
|
||||||
|
}
|
||||||
|
|
||||||
export const parse = (input: string): SyntaxNode => {
|
export const parse = (input: string): SyntaxNode => {
|
||||||
const parser = new Parser()
|
const parser = new Parser()
|
||||||
return parser.parse(input)
|
return parser.parse(input)
|
||||||
|
|
|
||||||
|
|
@ -1,129 +0,0 @@
|
||||||
import { ContextTracker, InputStream } from '@lezer/lr'
|
|
||||||
import * as terms from './shrimp.terms'
|
|
||||||
|
|
||||||
export class Scope {
|
|
||||||
constructor(public parent: Scope | null, public vars = new Set<string>()) { }
|
|
||||||
|
|
||||||
has(name: string): boolean {
|
|
||||||
return this.vars.has(name) || (this.parent?.has(name) ?? false)
|
|
||||||
}
|
|
||||||
|
|
||||||
hash(): number {
|
|
||||||
let h = 0
|
|
||||||
for (const name of this.vars) {
|
|
||||||
for (let i = 0; i < name.length; i++) {
|
|
||||||
h = (h << 5) - h + name.charCodeAt(i)
|
|
||||||
h |= 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (this.parent) {
|
|
||||||
h = (h << 5) - h + this.parent.hash()
|
|
||||||
h |= 0
|
|
||||||
}
|
|
||||||
return h
|
|
||||||
}
|
|
||||||
|
|
||||||
// Static methods that return new Scopes (immutable operations)
|
|
||||||
|
|
||||||
static add(scope: Scope, ...names: string[]): Scope {
|
|
||||||
const newVars = new Set(scope.vars)
|
|
||||||
names.forEach((name) => newVars.add(name))
|
|
||||||
return new Scope(scope.parent, newVars)
|
|
||||||
}
|
|
||||||
|
|
||||||
push(): Scope {
|
|
||||||
return new Scope(this, new Set())
|
|
||||||
}
|
|
||||||
|
|
||||||
pop(): Scope {
|
|
||||||
return this.parent ?? this
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tracker context that combines Scope with temporary pending identifiers
|
|
||||||
class TrackerContext {
|
|
||||||
constructor(public scope: Scope, public pendingIds: string[] = []) { }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract identifier text from input stream
|
|
||||||
const readIdentifierText = (input: InputStream, start: number, end: number): string => {
|
|
||||||
let text = ''
|
|
||||||
for (let i = start; i < end; i++) {
|
|
||||||
const offset = i - input.pos
|
|
||||||
const ch = input.peek(offset)
|
|
||||||
if (ch === -1) break
|
|
||||||
text += String.fromCharCode(ch)
|
|
||||||
}
|
|
||||||
return text
|
|
||||||
}
|
|
||||||
|
|
||||||
let inParams = false
|
|
||||||
|
|
||||||
export const trackScope = new ContextTracker<TrackerContext>({
|
|
||||||
start: new TrackerContext(new Scope(null, new Set())),
|
|
||||||
|
|
||||||
shift(context, term, stack, input) {
|
|
||||||
if (term == terms.Do) inParams = true
|
|
||||||
|
|
||||||
if (term === terms.AssignableIdentifier) {
|
|
||||||
const text = readIdentifierText(input, input.pos, stack.pos)
|
|
||||||
return new TrackerContext(Scope.add(context.scope, text), context.pendingIds)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inParams && term === terms.Identifier) {
|
|
||||||
const text = readIdentifierText(input, input.pos, stack.pos)
|
|
||||||
return new TrackerContext(context.scope, [...context.pendingIds, text])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Track identifiers in array destructuring: [ a b ] = ...
|
|
||||||
if (!inParams && term === terms.Identifier && isArrayDestructuring(input)) {
|
|
||||||
const text = readIdentifierText(input, input.pos, stack.pos)
|
|
||||||
return new TrackerContext(Scope.add(context.scope, text), context.pendingIds)
|
|
||||||
}
|
|
||||||
|
|
||||||
return context
|
|
||||||
},
|
|
||||||
|
|
||||||
reduce(context, term) {
|
|
||||||
if (term === terms.Params) {
|
|
||||||
inParams = false
|
|
||||||
let newScope = context.scope.push()
|
|
||||||
if (context.pendingIds.length > 0) {
|
|
||||||
newScope = Scope.add(newScope, ...context.pendingIds)
|
|
||||||
}
|
|
||||||
return new TrackerContext(newScope, [])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pop scope when exiting function
|
|
||||||
if (term === terms.FunctionDef) {
|
|
||||||
return new TrackerContext(context.scope.pop(), [])
|
|
||||||
}
|
|
||||||
|
|
||||||
return context
|
|
||||||
},
|
|
||||||
|
|
||||||
hash: (context) => context.scope.hash(),
|
|
||||||
})
|
|
||||||
|
|
||||||
// Check if we're parsing array destructuring: [ a b ] = ...
|
|
||||||
const isArrayDestructuring = (input: InputStream): boolean => {
|
|
||||||
let pos = 0
|
|
||||||
|
|
||||||
// Find closing bracket
|
|
||||||
while (pos < 200 && input.peek(pos) !== 93 /* ] */) {
|
|
||||||
if (input.peek(pos) === -1) return false // EOF
|
|
||||||
pos++
|
|
||||||
}
|
|
||||||
|
|
||||||
if (input.peek(pos) !== 93 /* ] */) return false
|
|
||||||
pos++
|
|
||||||
|
|
||||||
// Skip whitespace
|
|
||||||
while (input.peek(pos) === 32 /* space */ ||
|
|
||||||
input.peek(pos) === 9 /* tab */ ||
|
|
||||||
input.peek(pos) === 10 /* \n */) {
|
|
||||||
pos++
|
|
||||||
}
|
|
||||||
|
|
||||||
return input.peek(pos) === 61 /* = */
|
|
||||||
}
|
|
||||||
|
|
@ -1,299 +0,0 @@
|
||||||
@external propSource highlighting from "./highlight"
|
|
||||||
|
|
||||||
@context trackScope from "./parserScopeContext"
|
|
||||||
|
|
||||||
@skip { space | Comment }
|
|
||||||
|
|
||||||
@top Program { item* }
|
|
||||||
|
|
||||||
@external tokens operatorTokenizer from "./operatorTokenizer" { Star, Slash, Plus, Minus, And, Or, Eq, EqEq, Neq, Lt, Lte, Gt, Gte, Modulo, PlusEq, MinusEq, StarEq, SlashEq, ModuloEq, Band, Bor, Bxor, Shl, Shr, Ushr, NullishCoalesce, NullishEq }
|
|
||||||
|
|
||||||
@tokens {
|
|
||||||
@precedence { Number Regex }
|
|
||||||
|
|
||||||
StringFragment { !['\\$]+ }
|
|
||||||
DoubleQuote { '"' !["]* '"' }
|
|
||||||
NamedArgPrefix { $[a-z] $[a-z0-9-]* "=" }
|
|
||||||
Number {
|
|
||||||
("-" | "+")? "0x" $[0-9a-fA-F]+ |
|
|
||||||
("-" | "+")? "0b" $[01]+ |
|
|
||||||
("-" | "+")? "0o" $[0-7]+ |
|
|
||||||
("-" | "+")? $[0-9]+ ("_"? $[0-9]+)* ('.' $[0-9]+ ("_"? $[0-9]+)*)?
|
|
||||||
}
|
|
||||||
Boolean { "true" | "false" }
|
|
||||||
semicolon { ";" }
|
|
||||||
eof { @eof }
|
|
||||||
space { " " | "\t" }
|
|
||||||
Comment { "#" ![\n]* }
|
|
||||||
leftParen { "(" }
|
|
||||||
rightParen { ")" }
|
|
||||||
colon[closedBy="end", @name="colon"] { ":" }
|
|
||||||
Underscore { "_" }
|
|
||||||
Dollar { "$" }
|
|
||||||
Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar
|
|
||||||
"|"[@name=operator]
|
|
||||||
}
|
|
||||||
|
|
||||||
newlineOrSemicolon { newline | semicolon }
|
|
||||||
|
|
||||||
end { @specialize[@name=keyword]<Identifier, "end"> }
|
|
||||||
while { @specialize[@name=keyword]<Identifier, "while"> }
|
|
||||||
if { @specialize[@name=keyword]<Identifier, "if"> }
|
|
||||||
else { @specialize[@name=keyword]<Identifier, "else"> }
|
|
||||||
try { @specialize[@name=keyword]<Identifier, "try"> }
|
|
||||||
catch { @specialize[@name=keyword]<Identifier, "catch"> }
|
|
||||||
finally { @specialize[@name=keyword]<Identifier, "finally"> }
|
|
||||||
throw { @specialize[@name=keyword]<Identifier, "throw"> }
|
|
||||||
not { @specialize[@name=keyword]<Identifier, "not"> }
|
|
||||||
import { @specialize[@name=keyword]<Identifier, "import"> }
|
|
||||||
null { @specialize[@name=Null]<Identifier, "null"> }
|
|
||||||
|
|
||||||
@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, CurlyString }
|
|
||||||
@external tokens pipeStartsLineTokenizer from "./tokenizer" { newline, pipeStartsLine }
|
|
||||||
@external specialize {Identifier} specializeKeyword from "./tokenizer" { Do }
|
|
||||||
|
|
||||||
@precedence {
|
|
||||||
pipe @left,
|
|
||||||
or @left,
|
|
||||||
and @left,
|
|
||||||
nullish @left,
|
|
||||||
comparison @left,
|
|
||||||
multiplicative @left,
|
|
||||||
additive @left,
|
|
||||||
bitwise @left,
|
|
||||||
call,
|
|
||||||
functionWithNewlines
|
|
||||||
}
|
|
||||||
|
|
||||||
item {
|
|
||||||
consumeToTerminator newlineOrSemicolon |
|
|
||||||
consumeToTerminator eof |
|
|
||||||
newlineOrSemicolon // allow blank lines
|
|
||||||
}
|
|
||||||
|
|
||||||
consumeToTerminator {
|
|
||||||
PipeExpr |
|
|
||||||
WhileExpr |
|
|
||||||
FunctionCallWithBlock |
|
|
||||||
ambiguousFunctionCall |
|
|
||||||
TryExpr |
|
|
||||||
Throw |
|
|
||||||
Not |
|
|
||||||
Import |
|
|
||||||
IfExpr |
|
|
||||||
FunctionDef |
|
|
||||||
CompoundAssign |
|
|
||||||
Assign |
|
|
||||||
BinOp |
|
|
||||||
ConditionalOp |
|
|
||||||
expressionWithoutIdentifier
|
|
||||||
}
|
|
||||||
|
|
||||||
PipeExpr {
|
|
||||||
pipeOperand (!pipe (pipeStartsLine? "|") newlineOrSemicolon* pipeOperand)+
|
|
||||||
}
|
|
||||||
|
|
||||||
pipeOperand {
|
|
||||||
consumeToTerminator
|
|
||||||
}
|
|
||||||
|
|
||||||
WhileExpr {
|
|
||||||
while (ConditionalOp | expression) colon Block end
|
|
||||||
}
|
|
||||||
|
|
||||||
Block {
|
|
||||||
consumeToTerminator | newlineOrSemicolon block
|
|
||||||
}
|
|
||||||
|
|
||||||
FunctionCallWithBlock {
|
|
||||||
ambiguousFunctionCall colon Block CatchExpr? FinallyExpr? end
|
|
||||||
}
|
|
||||||
|
|
||||||
FunctionCallOrIdentifier {
|
|
||||||
DotGet | Identifier
|
|
||||||
}
|
|
||||||
|
|
||||||
ambiguousFunctionCall {
|
|
||||||
FunctionCall | FunctionCallOrIdentifier
|
|
||||||
}
|
|
||||||
|
|
||||||
FunctionCall {
|
|
||||||
(DotGet | Identifier | ParenExpr) arg+
|
|
||||||
}
|
|
||||||
|
|
||||||
arg {
|
|
||||||
PositionalArg | NamedArg
|
|
||||||
}
|
|
||||||
|
|
||||||
PositionalArg {
|
|
||||||
expression | FunctionDef | Underscore
|
|
||||||
}
|
|
||||||
|
|
||||||
NamedArg {
|
|
||||||
NamedArgPrefix (expression | FunctionDef | Underscore)
|
|
||||||
}
|
|
||||||
|
|
||||||
FunctionDef {
|
|
||||||
Do Params colon (consumeToTerminator | newlineOrSemicolon block) CatchExpr? FinallyExpr? end
|
|
||||||
}
|
|
||||||
|
|
||||||
ifTest {
|
|
||||||
ConditionalOp | expression | FunctionCall
|
|
||||||
}
|
|
||||||
|
|
||||||
IfExpr {
|
|
||||||
if ifTest colon Block ElseIfExpr* ElseExpr? end
|
|
||||||
}
|
|
||||||
|
|
||||||
ElseIfExpr {
|
|
||||||
else if ifTest colon Block
|
|
||||||
}
|
|
||||||
|
|
||||||
ElseExpr {
|
|
||||||
else colon Block
|
|
||||||
}
|
|
||||||
|
|
||||||
TryExpr {
|
|
||||||
try colon Block CatchExpr? FinallyExpr? end
|
|
||||||
}
|
|
||||||
|
|
||||||
CatchExpr {
|
|
||||||
catch Identifier colon Block
|
|
||||||
}
|
|
||||||
|
|
||||||
FinallyExpr {
|
|
||||||
finally colon Block
|
|
||||||
}
|
|
||||||
|
|
||||||
Throw {
|
|
||||||
throw (BinOp | ConditionalOp | expression)
|
|
||||||
}
|
|
||||||
|
|
||||||
Not {
|
|
||||||
not (BinOp | ConditionalOp | expression)
|
|
||||||
}
|
|
||||||
|
|
||||||
// this has to be in the parse tree so the scope tracker can use it
|
|
||||||
Import {
|
|
||||||
import NamedArg* Identifier+ NamedArg*
|
|
||||||
}
|
|
||||||
|
|
||||||
ConditionalOp {
|
|
||||||
expression !comparison EqEq expression |
|
|
||||||
expression !comparison Neq expression |
|
|
||||||
expression !comparison Lt expression |
|
|
||||||
expression !comparison Lte expression |
|
|
||||||
expression !comparison Gt expression |
|
|
||||||
expression !comparison Gte expression |
|
|
||||||
(expression | ConditionalOp) !and And (expression | ConditionalOp) |
|
|
||||||
(expression | ConditionalOp) !or Or (expression | ConditionalOp) |
|
|
||||||
(expression | ConditionalOp) !nullish NullishCoalesce (expression | ConditionalOp)
|
|
||||||
}
|
|
||||||
|
|
||||||
Params {
|
|
||||||
Identifier* NamedParam*
|
|
||||||
}
|
|
||||||
|
|
||||||
NamedParam {
|
|
||||||
NamedArgPrefix (String | Number | Boolean | null)
|
|
||||||
}
|
|
||||||
|
|
||||||
Assign {
|
|
||||||
(AssignableIdentifier | Array) Eq consumeToTerminator
|
|
||||||
}
|
|
||||||
|
|
||||||
CompoundAssign {
|
|
||||||
AssignableIdentifier (PlusEq | MinusEq | StarEq | SlashEq | ModuloEq | NullishEq) consumeToTerminator
|
|
||||||
}
|
|
||||||
|
|
||||||
BinOp {
|
|
||||||
expression !multiplicative Modulo expression |
|
|
||||||
(expression | BinOp) !multiplicative Star (expression | BinOp) |
|
|
||||||
(expression | BinOp) !multiplicative Slash (expression | BinOp) |
|
|
||||||
(expression | BinOp) !additive Plus (expression | BinOp) |
|
|
||||||
(expression | BinOp) !additive Minus (expression | BinOp) |
|
|
||||||
(expression | BinOp) !bitwise Band (expression | BinOp) |
|
|
||||||
(expression | BinOp) !bitwise Bor (expression | BinOp) |
|
|
||||||
(expression | BinOp) !bitwise Bxor (expression | BinOp) |
|
|
||||||
(expression | BinOp) !bitwise Shl (expression | BinOp) |
|
|
||||||
(expression | BinOp) !bitwise Shr (expression | BinOp) |
|
|
||||||
(expression | BinOp) !bitwise Ushr (expression | BinOp)
|
|
||||||
}
|
|
||||||
|
|
||||||
ParenExpr {
|
|
||||||
leftParen newlineOrSemicolon* (
|
|
||||||
FunctionCallWithNewlines |
|
|
||||||
IfExpr |
|
|
||||||
ambiguousFunctionCall |
|
|
||||||
BinOp newlineOrSemicolon* |
|
|
||||||
expressionWithoutIdentifier |
|
|
||||||
ConditionalOp newlineOrSemicolon* |
|
|
||||||
PipeExpr |
|
|
||||||
FunctionDef
|
|
||||||
)
|
|
||||||
rightParen
|
|
||||||
}
|
|
||||||
|
|
||||||
FunctionCallWithNewlines[@name=FunctionCall] {
|
|
||||||
(DotGet | Identifier | ParenExpr) newlineOrSemicolon+ arg !functionWithNewlines (newlineOrSemicolon+ arg)* newlineOrSemicolon*
|
|
||||||
}
|
|
||||||
|
|
||||||
expression {
|
|
||||||
expressionWithoutIdentifier | DotGet | Identifier
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@local tokens {
|
|
||||||
dot { "." }
|
|
||||||
}
|
|
||||||
|
|
||||||
@skip {} {
|
|
||||||
DotGet {
|
|
||||||
IdentifierBeforeDot dot (DotGet | Number | Identifier | ParenExpr) |
|
|
||||||
Dollar dot (DotGet | Number | Identifier | ParenExpr)
|
|
||||||
}
|
|
||||||
|
|
||||||
String {
|
|
||||||
"'" stringContent* "'" | CurlyString | DoubleQuote
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stringContent {
|
|
||||||
StringFragment |
|
|
||||||
Interpolation |
|
|
||||||
EscapeSeq
|
|
||||||
}
|
|
||||||
|
|
||||||
Interpolation {
|
|
||||||
"$" FunctionCallOrIdentifier |
|
|
||||||
"$" ParenExpr
|
|
||||||
}
|
|
||||||
|
|
||||||
EscapeSeq {
|
|
||||||
"\\" ("$" | "n" | "t" | "r" | "\\" | "'")
|
|
||||||
}
|
|
||||||
|
|
||||||
Dict {
|
|
||||||
"[=]" |
|
|
||||||
"[" newlineOrSemicolon* NamedArg (newlineOrSemicolon | NamedArg)* "]"
|
|
||||||
}
|
|
||||||
|
|
||||||
Array {
|
|
||||||
"[" newlineOrSemicolon* (expression (newlineOrSemicolon | expression)*)? "]"
|
|
||||||
}
|
|
||||||
|
|
||||||
// We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator.
|
|
||||||
// Without this, when parsing "my-var" at statement level, the parser can't decide:
|
|
||||||
// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier
|
|
||||||
// - expression → Identifier
|
|
||||||
// Both want the same Identifier token! So we use expressionWithoutIdentifier
|
|
||||||
// to remove Identifier from the second path, forcing standalone identifiers
|
|
||||||
// to go through ambiguousFunctionCall (which is what we want semantically).
|
|
||||||
// Yes, it is annoying and I gave up trying to use GLR to fix it.
|
|
||||||
expressionWithoutIdentifier {
|
|
||||||
ParenExpr | Word | String | Number | Boolean | Regex | Dict | Array | null
|
|
||||||
}
|
|
||||||
|
|
||||||
block {
|
|
||||||
(consumeToTerminator? newlineOrSemicolon)*
|
|
||||||
}
|
|
||||||
4
src/parser/shrimp.grammar.d.ts
vendored
4
src/parser/shrimp.grammar.d.ts
vendored
|
|
@ -1,4 +0,0 @@
|
||||||
declare module '*.grammar' {
|
|
||||||
const content: string
|
|
||||||
export default content
|
|
||||||
}
|
|
||||||
|
|
@ -1,82 +0,0 @@
|
||||||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
|
||||||
export const
|
|
||||||
Star = 1,
|
|
||||||
Slash = 2,
|
|
||||||
Plus = 3,
|
|
||||||
Minus = 4,
|
|
||||||
And = 5,
|
|
||||||
Or = 6,
|
|
||||||
Eq = 7,
|
|
||||||
EqEq = 8,
|
|
||||||
Neq = 9,
|
|
||||||
Lt = 10,
|
|
||||||
Lte = 11,
|
|
||||||
Gt = 12,
|
|
||||||
Gte = 13,
|
|
||||||
Modulo = 14,
|
|
||||||
PlusEq = 15,
|
|
||||||
MinusEq = 16,
|
|
||||||
StarEq = 17,
|
|
||||||
SlashEq = 18,
|
|
||||||
ModuloEq = 19,
|
|
||||||
Band = 20,
|
|
||||||
Bor = 21,
|
|
||||||
Bxor = 22,
|
|
||||||
Shl = 23,
|
|
||||||
Shr = 24,
|
|
||||||
Ushr = 25,
|
|
||||||
NullishCoalesce = 26,
|
|
||||||
NullishEq = 27,
|
|
||||||
Identifier = 28,
|
|
||||||
AssignableIdentifier = 29,
|
|
||||||
Word = 30,
|
|
||||||
IdentifierBeforeDot = 31,
|
|
||||||
CurlyString = 32,
|
|
||||||
newline = 103,
|
|
||||||
pipeStartsLine = 104,
|
|
||||||
Do = 33,
|
|
||||||
Comment = 34,
|
|
||||||
Program = 35,
|
|
||||||
PipeExpr = 36,
|
|
||||||
WhileExpr = 38,
|
|
||||||
keyword = 86,
|
|
||||||
ConditionalOp = 40,
|
|
||||||
ParenExpr = 41,
|
|
||||||
FunctionCallWithNewlines = 42,
|
|
||||||
DotGet = 43,
|
|
||||||
Number = 44,
|
|
||||||
Dollar = 45,
|
|
||||||
PositionalArg = 46,
|
|
||||||
FunctionDef = 47,
|
|
||||||
Params = 48,
|
|
||||||
NamedParam = 49,
|
|
||||||
NamedArgPrefix = 50,
|
|
||||||
String = 51,
|
|
||||||
StringFragment = 52,
|
|
||||||
Interpolation = 53,
|
|
||||||
FunctionCallOrIdentifier = 54,
|
|
||||||
EscapeSeq = 55,
|
|
||||||
DoubleQuote = 56,
|
|
||||||
Boolean = 57,
|
|
||||||
Null = 58,
|
|
||||||
colon = 59,
|
|
||||||
CatchExpr = 60,
|
|
||||||
Block = 62,
|
|
||||||
FinallyExpr = 63,
|
|
||||||
Underscore = 66,
|
|
||||||
NamedArg = 67,
|
|
||||||
IfExpr = 68,
|
|
||||||
FunctionCall = 70,
|
|
||||||
ElseIfExpr = 71,
|
|
||||||
ElseExpr = 73,
|
|
||||||
BinOp = 74,
|
|
||||||
Regex = 75,
|
|
||||||
Dict = 76,
|
|
||||||
Array = 77,
|
|
||||||
FunctionCallWithBlock = 78,
|
|
||||||
TryExpr = 79,
|
|
||||||
Throw = 81,
|
|
||||||
Not = 83,
|
|
||||||
Import = 85,
|
|
||||||
CompoundAssign = 87,
|
|
||||||
Assign = 88
|
|
||||||
|
|
@ -1,27 +0,0 @@
|
||||||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
|
||||||
import {LRParser, LocalTokenGroup} from "@lezer/lr"
|
|
||||||
import {operatorTokenizer} from "./operatorTokenizer"
|
|
||||||
import {tokenizer, pipeStartsLineTokenizer, specializeKeyword} from "./tokenizer"
|
|
||||||
import {trackScope} from "./parserScopeContext"
|
|
||||||
import {highlighting} from "./highlight"
|
|
||||||
const spec_Identifier = {__proto__:null,while:78, null:116, catch:122, finally:128, end:130, if:138, else:144, try:160, throw:164, not:168, import:172}
|
|
||||||
export const parser = LRParser.deserialize({
|
|
||||||
version: 14,
|
|
||||||
states: "?tQYQ!SOOOOQ!Q'#Em'#EmO!vO!bO'#DXO%nQ!TO'#DdO&XOSO'#DaOOQ!R'#Da'#DaO)VQ!TO'#EpOOQ!Q'#E}'#E}O)sQRO'#DxO+{Q!TO'#ElO,iQ!SO'#DVOOQ!R'#Dz'#DzO/^Q!SO'#D{OOQ!R'#Ep'#EpO/eQ!TO'#EpO1iQ!TO'#EoO2wQ!TO'#ElO3UQRO'#EVOOQ!Q'#El'#ElO3mQ!SO'#ElO3tQrO'#EkOOQ!Q'#Ek'#EkOOQ!Q'#EX'#EXQYQ!SOOO4VQbO'#D]O4bQbO'#DrO5`QbO'#DSO6^QQO'#D}O5`QbO'#EPO5`QbO'#ERO6cQbO'#ETO6kObO,59sOOQ!Q'#D['#D[O6|QbO'#DqOOQ!Q'#Es'#EsOOQ!Q'#Ea'#EaO7WQ!SO,5:`OOQ!R'#Eo'#EoO8WQbO'#DcO8fQWO'#DeOOOO'#Eu'#EuOOOO'#E^'#E^O8zOSO,59{OOQ!R,59{,59{O5`QbO,5:dO5`QbO,5:dO5`QbO,5:dO5`QbO,5:dO5`QbO,59pO5`QbO,59pO5`QbO,59pO5`QbO,59pOOQ!Q'#EZ'#EZO,iQ!SO,59qO9YQ!TO'#DdO9dQ!TO'#EpO9nQsO,59qO9{QQO,59qO:QQrO,59qO:]QrO,59qO:kQsO,59qO;ZQsO,59qO;bQrO'#DQO;jQ!SO,5:gO;qQrO,5:fOOQ!R,5:g,5:gO<PQ!SO,5:gO<^QbO,5:rO<^QbO,5:qOYQ!SO,5:hO=tQ!SO,59lOOQ!Q,5;V,5;VOYQ!SO'#EYO>fQQO'#EYOOQ!Q-E8V-E8VOOQ!Q'#E['#E[O>kQbO'#D^O>vQbO'#D_OOQO'#E]'#E]O>nQQO'#D^O?[QQO,59wO?aQcO'#EoO@^QRO'#E|OAZQRO'#E|OOQO'#E|'#E|OAbQQO,5:^OAgQRO,59nOAnQRO,59nOYQ!SO,5:iOA|Q!TO,5:kOCbQ!TO,5:kODUQ!TO,5:kODcQ!TO,5:mOEwQ!TO,5:mOFkQ!TO,5:mOFxQ!SO,5:oOOQ!Q'#Ee'#EeO6cQbO,5:oOOQ!R1G/_1G/_OOQ!Q,5:],5:]OOQ!Q-E8_-E8_OOOO'#Dd'#DdOOOO,59},59}OOOO,5:P,5:POOOO-E8[-E8[OOQ!R1G/g1G/gOOQ!R1G0O1G0OOH}Q!TO1G0OOIXQ!TO1G0OOJmQ!TO1G0OOJwQ!TO1G0OOKUQ!TO1G0OOOQ!R1G/[1G/[OLmQ!TO1G/[OLtQ!TO1G/[OL{Q!TO1G/[ONQQ!TO1G/[OMSQ!TO1G/[OOQ!Q-E8X-E8XONhQsO1G/]ONuQQO1G/]ONzQrO1G/]O! VQrO1G/]O! eQsO1G/]O! lQsO1G/]O! sQ!SO,59rO! }QrO1G/]OOQ!R1G/]1G/]O!!YQrO1G0QOOQ!R1G0R1G0RO!!hQ!SO1G0ROOQp'#Ec'#EcO!!YQrO1G0QOOQ!R1G0Q1G0QOOQ!Q'#Ed'#EdO!!hQ!SO1G0RO!!uQ!SO1G0^O!#gQ!SO1G0]O!$XQ!SO'#DlO!$mQ!SO'#DlO!$}QbO1G0SOOQ!Q-E8W-E8WOYQ!SO,5:tOOQ!Q,5:t,5:tOYQ!SO,5:tOOQ!Q-E8Y-E8YO!%YQQO,59xOOQO,59y,59yOOQO-E8Z-E8ZOYQ!SO1G/cOYQ!SO1G/xOYQ!SO1G/YO!%bQbO1G0TO!%mQ!SO1G0ZO!&bQ!SO1G0ZOOQ!Q-E8c-E8cO!&iQrO7+$wOOQ!R7+$w7+$wO!&tQrO1G/^O!'PQrO7+%lOOQ!R7+%l7+%lO!'_Q!SO7+%mOOQ!R7+%m7+%mOOQp-E8a-E8aOOQ!Q-E8b-E8bOOQ!Q'#E_'#E_O!'lQrO'#E_O!'zQ!SO'#E{OOQ`,5:W,5:WO!([QbO'#DjO!(aQQO'#DmOOQ!Q7+%n7+%nO!(fQbO7+%nO!(kQbO7+%nOOQ!Q1G0`1G0`OYQ!SO1G0`O!(sQ!SO7+$}O!)UQ!SO7+$}O!)cQbO7+%dO!)kQbO7+$tOOQ!Q7+%o7+%oO!)pQbO7+%oO!)uQbO7+%oO!)}Q!SO7+%uOOQ!R<<Hc<<HcO!*rQ!SO7+$xO!+PQrO7+$xOOQ!R<<IW<<IWOOQ!R<<IX<<IXOOQ!Q,5:y,5:yOOQ!Q-E8]-E8]O!+[QQO,5:UOYQ!SO,5:XOOQ!Q<<IY<<IYO!+aQbO<<IYOOQ!Q7+%z7+%zOOQ!Q<<Hi<<HiO!+fQbO<<HiO!+kQbO<<HiO!+sQbO<<HiOOQ`'#Eb'#EbO!,OQbO<<IOO!,WQbO'#DwOOQ!Q<<IO<<IOO!,`QbO<<IOOOQ!Q<<H`<<H`OOQ!Q<<IZ<<IZO!,eQbO<<IZOOQp,5:z,5:zO!,jQ!SO<<HdOOQp-E8^-E8^OYQ!SO1G/pOOQ`1G/s1G/sOOQ!QAN>tAN>tOOQ!QAN>TAN>TO!,wQbOAN>TO!,|QbOAN>TOOQ`-E8`-E8`OOQ!QAN>jAN>jO!-UQbOAN>jO4bQbO,5:aOYQ!SO,5:cOOQ!QAN>uAN>uP! sQ!SO'#EZOOQ`7+%[7+%[OOQ!QG23oG23oO!-ZQbOG23oP!,ZQbO'#DuOOQ!QG24UG24UO!-`QQO1G/{OOQ`1G/}1G/}OOQ!QLD)ZLD)ZOYQ!SO7+%gOOQ`<<IR<<IRO!-eObO,59sO!-vO!bO'#DX",
|
|
||||||
stateData: "!.O~O#^OSrOS~OlROmaOn]OoQOpTOqhOwjO|]O}QO!YTO!Z]O![]O!giO!m]O!rkO!tlO!vmO!xnO#ZPO#bPO#eYO#hSO#sZO#t[O~O#foO~OluOn]OoQOpTOqhO|]O}QO!SqO!YTO!Z]O![]O!dpO!m]O#eYO#hSO#sZO#t[OP#cXQ#cXR#cXS#cXT#cXU#cXW#cXX#cXY#cXZ#cX[#cX]#cX^#cXd#cXe#cXf#cXg#cXh#cXi#cXj#cXu!WX!]!WX#[!WX#r!WX~O#Z!WX#b!WX#v!WX!_!WX!b!WX!c!WX!j!WX~P!{O!UxO#h{O#jvO#kwO~OluOn]OoQOpTOqhO|]O}QO!SqO!YTO!Z]O![]O!dpO!m]O#eYO#hSO#sZO#t[OP#dXQ#dXR#dXS#dXT#dXU#dXW#dXX#dXY#dXZ#dX[#dX]#dX^#dXd#dXe#dXf#dXg#dXh#dXi#dXj#dXu#dX#[#dX#r#dX~O#Z#dX#b#dX#v#dX!]#dX!_#dX!b#dX!c#dX!j#dX~P&gOP}OQ}OR!OOS!OOT!ROU!SOW!QOX!QOY!QOZ!QO[!QO]!QO^|Od!POe!POf!POg!POh!POi!POj!TO~OP}OQ}OR!OOS!OOd!POe!POf!POg!POh!POi!POu#`X#[#`X~O#Z#`X#b#`X#v#`X!_#`X!b#`X!c#`X#r#`X!j#`X~P+TOl!WOmaOn]OoQOpTOqhOwjO|]O}QO!YTO!Z]O![]O!giO!m]O!rkO!tlO!vmO!xnO#ZPO#bPO#eYO#hSO#sZO#t[O~OluOn]OoQOpTO|]O}QO!SqO!YTO!Z]O![]O!m]O#ZPO#bPO#eYO#hSO#sZO#t[O~O#u!cO~P.VOV!eO#Z#dX#b#dX#v#dX!_#dX!b#dX!c#dX!j#dX~P'lOP#cXQ#cXR#cXS#cXT#cXU#cXW#cXX#cXY#cXZ#cX[#cX]#cX^#cXd#cXe#cXf#cXg#cXh#cXi#cXj#cXu#`X#[#`X~O#Z#`X#b#`X#v#`X!_#`X!b#`X!c#`X#r#`X!j#`X~P0ROu#`X#Z#`X#[#`X#b#`X#v#`X!_#`X!b#`X!c#`X#r#`X!j#`X~OT!ROU!SOj!TO~P2VOV!eO_!fO`!fOa!fOb!fOc!fOk!fO~O!]!gO~P2VOu!jO#ZPO#[!kO#bPO#v!iO~Ol!mO!S!oO!]!QP~Ol!sOn]OoQOpTO|]O}QO!YTO!Z]O![]O!m]O#eYO#hSO#sZO#t[O~OluOn]OoQOpTO|]O}QO!YTO!Z]O![]O!m]O#eYO#hSO#sZO#t[O~O!]!zO~Ol!mO!SqO~Ol#UOoQO|#UO}QO#eYO~OqhO!d#VO~P5`OqhO!SqO!dpOu!ha!]!ha#Z!ha#[!ha#b!ha#v!ha#r!ha!_!ha!b!ha!c!ha!j!ha~P5`Ol#XOo&TO}&TO#eYO~O#h#ZO#j#ZO#k#ZO#l#ZO#m#ZO#n#ZO~O!UxO#h#]O#jvO#kwO~O#ZPO#bPO~P!{O#ZPO#bPO~P&gO#ZPO#bPO#r#sO~P+TO#r#sO~O#r#sOu#`X#[#`X~O!]!gO#r#sOu#`X#[#`X~O#r#sO~P0ROT!ROU!SOj!TO#ZPO#bPOu#`X#[#`X~O#r#sO~P:rOu!jO#[!kO~O#u#uO~P.VO!SqO#ZPO#bPO#u#yO~O#ZPO#bPO#u#uO~P5`OlROmaOn]OoQOpTOqhOwjO|]O}QO!YTO!Z]O![]O!giO!m]O!rkO!tlO!vmO!xnO#eYO#hSO#sZO#t[O~Ou!jO#[!kO#Zta#bta#vta#rta!_ta!bta!cta!jta~Ou$UO~Ol!mO!S!oO!]!QX~OpTO|$XO!YTO!Z$XO![$XO#hSO~O!]$ZO~OqhO!SqO!dpOT#cXU#cXW#cXX#cXY#cXZ#cX[#cX]#cXj#cX!]#cX~P5`OT!ROU!SOj!TO!]#pX~OT!ROU!SOW!QOX!QOY!QOZ!QO[!QO]!QOj!TO~O!]#pX~P@lO!]$[O~O!]$]O~P@lOT!ROU!SOj!TO!]$]O~Ou!sa#Z!sa#[!sa#b!sa#v!sa!_!sa!b!sa!c!sa#r!sa!j!sa~P)sOu!sa#Z!sa#[!sa#b!sa#v!sa!_!sa!b!sa!c!sa#r!sa!j!sa~OP}OQ}OR!OOS!OOd!POe!POf!POg!POh!POi!PO~PBpOT!ROU!SOj!TO~PBpOu!ua#Z!ua#[!ua#b!ua#v!ua!_!ua!b!ua!c!ua#r!ua!j!ua~P)sOu!ua#Z!ua#[!ua#b!ua#v!ua!_!ua!b!ua!c!ua#r!ua!j!ua~OP}OQ}OR!OOS!OOd!POe!POf!POg!POh!POi!PO~PEVOT!ROU!SOj!TO~PEVOl!mO!SqOu!wa#Z!wa#[!wa#b!wa#v!wa!_!wa!b!wa!c!wa#r!wa!j!wa~O^|OR!liS!lid!lie!lif!lig!lih!lii!liu!li#Z!li#[!li#b!li#v!li#r!li!_!li!b!li!c!li!j!li~OP!liQ!li~PGpOP}OQ}O~PGpOP}OQ}Od!lie!lif!lig!lih!lii!liu!li#Z!li#[!li#b!li#v!li#r!li!_!li!b!li!c!li!j!li~OR!liS!li~PIcOR!OOS!OO^|O~PIcOR!OOS!OO~PIcOW!QOX!QOY!QOZ!QO[!QO]!QOTxijxiuxi#Zxi#[xi#bxi#vxi#rxi!]xi!_xi!bxi!cxi!jxi~OU!SO~PK`OU!SO~PKrOUxi~PK`OT!ROU!SOjxiuxi#Zxi#[xi#bxi#vxi#rxi!]xi!_xi!bxi!cxi!jxi~OW!QOX!QOY!QOZ!QO[!QO]!QO~PMSO#ZPO#bPO#r$cO~P+TO#r$cO~O#r$cOu#`X#[#`X~O!]!gO#r$cOu#`X#[#`X~O#r$cO~P0RO#r$cO~P:rOqhO!dpO~P.VO#ZPO#bPO#r$cO~O!SqO#ZPO#bPO#u$fO~O#ZPO#bPO#u$hO~P5`Ou!jO#[!kO#Z!zi#b!zi#v!zi!_!zi!b!zi!c!zi#r!zi!j!zi~Ou!jO#[!kO#Z!yi#b!yi#v!yi!_!yi!b!yi!c!yi#r!yi!j!yi~Ou!jO#[!kO!_!`X!b!`X!c!`X!j!`X~O!_#oP!b#oP!c#oP!j#oP~PYO!_$oO!b$pO!c$qO~O!S!oO!]!Qa~O!_$oO!b$pO!c$zO~O!SqOu!wi#Z!wi#[!wi#b!wi#v!wi!_!wi!b!wi!c!wi#r!wi!j!wi~Ol!mO~P!%mO#ZPO#bPO#r%OO~O#ZPO#bPO#rzi~O!SqO#ZPO#bPO#u%RO~O#ZPO#bPO#u%SO~P5`Ou!jO#ZPO#[!kO#bPO~O!_#oX!b#oX!c#oX!j#oX~PYOl%VO~O!]%WO~O!c%XO~O!b$pO!c%XO~Ou!jO!_$oO!b$pO!c%[O#[!kO~O!_#oP!b#oP!c#oP~PYO!c%cO!j%bO~O!c%eO~O!c%fO~O!b$pO!c%fO~O!SqOu!wq#Z!wq#[!wq#b!wq#v!wq!_!wq!b!wq!c!wq#r!wq!j!wq~OqhO!dpO#rzq~P.VO#ZPO#bPO#rzq~O!]%kO~O!c%mO~O!c%nO~O!b$pO!c%nO~O!_$oO!b$pO!c%nO~O!c%rO!j%bO~O!]%uO!g%tO~O!c%rO~O!c%vO~OqhO!dpO#rzy~P.VO!c%yO~O!b$pO!c%yO~O!c%|O~O!c&PO~O!]&QO~Ol#UOo&TO|#UO}&TO#eYO~O#f&SO~O|!m~",
|
|
||||||
goto: "<m#rPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP#sP$aP$y%}'b'hPP(})Z*W*ZP*aP+l+p+lPPPP,]P,i-RPPP-i#sP.Z.wP.{/RP0O1W$a$aP$aP$aP$aP$a$a2b2h2t3p4O4Y4`4g4m4w4}5X5cPPPPP5q5u6qP8T:PPP;_P;oPPPPP;s;y<PxbOg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQ![YR#m!V}bOYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&Qx`Og!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQ!_YS!ti%tQ!yjQ!}lQ#QmQ#d!SQ#f!RQ#i!TR#p!V|UOgi!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%t%u&Q!Y]RU[jlmqt|}!O!P!Q!R!S!T!W!X!a!d!s#q#v#{$g%P%i%wS!XY!VS#Uo&SR#YvQ!ZYR#l!VxROg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&Q!YuRU[jlmqt|}!O!P!Q!R!S!T!W!X!a!d!s#q#v#{$g%P%i%wS!WY!VS!si%tS#Uo&SR#XverRUt!W!X!s#q%P%i%wxbOg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QdpRUt!W!X!s#q%P%i%wQ![YQ#VqR#m!VR!rhX!ph!n!q$W#[]ORUY[gijlmqt|}!O!P!Q!R!S!T!V!W!X!a!d!e!f!g!j!s!z#q#v#{$P$S$U$Z$[$]$g$m$u$w%P%W%i%k%t%u%w&QR$X!oTxSz|VOYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QR#YvQ$s$QQ$|$^Q%^$vR%p%_Q$Q!gQ$^!zQ$x$[Q$y$]Q%l%WQ%x%kQ&O%uR&R&QQ$r$QQ${$^Q%Y$sQ%]$vQ%g$|S%o%^%_R%z%pdrRUt!W!X!s#q%P%i%wQ!b[[#Sn#R#T$_$`$}Q#t!aX#w!b#t#x$e|VOYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QT!vi%tT%`$x%aQ%d$xR%s%axXOg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQ!YYQ!|lQ#PmQ#^}Q#a!OQ#c!PR#k!V#]]ORUY[gijlmqt|}!O!P!Q!R!S!T!V!W!X!a!d!e!f!g!j!s!z#q#v#{$P$S$U$Z$[$]$g$m$u$w%P%W%i%k%t%u%w&Q!^]RU[ijlmqt|}!O!P!Q!R!S!T!W!X!a!d!s#q#v#{$g%P%i%t%w}^OYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQgOR!lg^!hd!`#|#}$O$l$vR$R!hQ!VYQ!a[d#j!V!a#q#r$S$b$u%P%i%wS#q!W!XS#r!Y!_Q$S!jS$b#k#pQ$u$UQ%P$dR%i%QQ!nhQ#RnU$V!n#R$`R$`#TQ!qhQ$W!nT$Y!q$WQzSR#[zS$m$P$wR%U$mQ%Q$dR%j%QYtRU!W!X!sR#WtQ%a$xR%q%aQ#x!bQ$e#tT$i#x$eQ#{!dQ$g#vT$j#{$gQ#TnQ$_#RU$a#T$_$}R$}$`TfOgSdOgS!`Y!VQ#|!eQ#}!f`$O!g!z$[$]%W%k%u&QQ$T!jU$l$P$m$wS$t$S$UQ$v$ZR%Z$uSeOg|!UY[!V!W!X!Y!_!a!j#k#p#q#r$S$U$b$d$u%P%Q%i%wQ!idW#w!b#t#x$eW#z!d#v#{$g`$P!g!z$[$]%W%k%u&QU$k$P$m$wQ$w$ZR%T$l|WOYg!V!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QdpRUt!W!X!s#q%P%i%wQ!d[S!ui%tQ!xjQ!{lQ#OmQ#VqQ#^|Q#_}Q#`!OQ#b!PQ#d!QQ#e!RQ#g!SQ#h!TQ#v!aX#z!d#v#{$gx_Og!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&Q!^uRU[ijlmqt|}!O!P!Q!R!S!T!W!X!a!d!s#q#v#{$g%P%i%t%wQ!^YR#o!V[sRUt!W!X!sQ$d#qV%h%P%i%wTySzQ$n$PR%_$wQ!wiR%}%txcOg!e!f!g!j!z$P$S$U$Z$[$]$m$u$w%W%k%u&QQ!]YR#n!V",
|
|
||||||
nodeNames: "⚠ Star Slash Plus Minus And Or Eq EqEq Neq Lt Lte Gt Gte Modulo PlusEq MinusEq StarEq SlashEq ModuloEq Band Bor Bxor Shl Shr Ushr NullishCoalesce NullishEq Identifier AssignableIdentifier Word IdentifierBeforeDot CurlyString Do Comment Program PipeExpr operator WhileExpr keyword ConditionalOp ParenExpr FunctionCall DotGet Number Dollar PositionalArg FunctionDef Params NamedParam NamedArgPrefix String StringFragment Interpolation FunctionCallOrIdentifier EscapeSeq DoubleQuote Boolean Null colon CatchExpr keyword Block FinallyExpr keyword keyword Underscore NamedArg IfExpr keyword FunctionCall ElseIfExpr keyword ElseExpr BinOp Regex Dict Array FunctionCallWithBlock TryExpr keyword Throw keyword Not keyword Import keyword CompoundAssign Assign",
|
|
||||||
maxTerm: 130,
|
|
||||||
context: trackScope,
|
|
||||||
nodeProps: [
|
|
||||||
["closedBy", 59,"end"]
|
|
||||||
],
|
|
||||||
propSources: [highlighting],
|
|
||||||
skippedNodes: [0,34],
|
|
||||||
repeatNodeCount: 13,
|
|
||||||
tokenData: "Lp~R}OX$OXY$mYp$Opq$mqr$Ors%Wst'^tu(uuw$Owx(|xy)Ryz)lz{$O{|*V|}$O}!O*V!O!P$O!P!Q3r!Q!R*w!R![-l![!]<_!]!^<x!^!}$O!}#O=c#O#P?X#P#Q?^#Q#R$O#R#S?w#S#T$O#T#Y@b#Y#ZA|#Z#b@b#b#cGj#c#f@b#f#gHm#g#h@b#h#iIp#i#o@b#o#p$O#p#qLQ#q;'S$O;'S;=`$g<%l~$O~O$O~~LkS$TU!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OS$jP;=`<%l$O^$tU!US#^YOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU%]Z!USOr%Wrs&Ost%Wtu&iuw%Wwx&ix#O%W#O#P&i#P;'S%W;'S;=`'W<%lO%WU&VU!YQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OQ&lTOr&irs&{s;'S&i;'S;=`'Q<%lO&iQ'QO!YQQ'TP;=`<%l&iU'ZP;=`<%l%W^'eZrY!USOY'^YZ$OZt'^tu(Wuw'^wx(Wx#O'^#O#P(W#P;'S'^;'S;=`(o<%lO'^Y(]SrYOY(WZ;'S(W;'S;=`(i<%lO(WY(lP;=`<%l(W^(rP;=`<%l'^^(|O#j[}Q~)RO#h~U)YU!US#eQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU)sU!US#rQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU*[X!USOt$Ouw$Ox!Q$O!Q!R*w!R![-l![#O$O#P;'S$O;'S;=`$g<%lO$OU+Ob!US|QOt$Ouw$Ox!O$O!O!P,W!P!Q$O!Q![-l![#O$O#P#R$O#R#S.i#S#U$O#U#V/W#V#c$O#c#d0l#d#l$O#l#m1z#m;'S$O;'S;=`$g<%lO$OU,]W!USOt$Ouw$Ox!Q$O!Q![,u![#O$O#P;'S$O;'S;=`$g<%lO$OU,|Y!US|QOt$Ouw$Ox!Q$O!Q![,u![#O$O#P#R$O#R#S,W#S;'S$O;'S;=`$g<%lO$OU-s[!US|QOt$Ouw$Ox!O$O!O!P,W!P!Q$O!Q![-l![#O$O#P#R$O#R#S.i#S;'S$O;'S;=`$g<%lO$OU.nW!USOt$Ouw$Ox!Q$O!Q![-l![#O$O#P;'S$O;'S;=`$g<%lO$OU/]X!USOt$Ouw$Ox!Q$O!Q!R/x!R!S/x!S#O$O#P;'S$O;'S;=`$g<%lO$OU0PX!US|QOt$Ouw$Ox!Q$O!Q!R/x!R!S/x!S#O$O#P;'S$O;'S;=`$g<%lO$OU0qW!USOt$Ouw$Ox!Q$O!Q!Y1Z!Y#O$O#P;'S$O;'S;=`$g<%lO$OU1bW!US|QOt$Ouw$Ox!Q$O!Q!Y1Z!Y#O$O#P;'S$O;'S;=`$g<%lO$OU2P[!USOt$Ouw$Ox!Q$O!Q![2u![!c$O!c!i2u!i#O$O#P#T$O#T#Z2u#Z;'S$O;'S;=`$g<%lO$OU2|[!US|QOt$Ouw$Ox!Q$O!Q![2u![!c$O!c!i2u!i#O$O#P#T$O#T#Z2u#Z;'S$O;'S;=`$g<%lO$OU3wW!USOt$Ouw$Ox!P$O!P!Q4a!Q#O$O#P;'S$O;'S;=`$g<%lO$OU4f^!USOY5bYZ$OZt5btu6euw5bwx6ex!P5b!P!Q$O!Q!}5b!}#O;W#O#P8s#P;'S5b;'S;=`<X<%lO5bU5i^!US!mQOY5bYZ$OZt5btu6euw5bwx6ex!P5b!P!Q9Y!Q!}5b!}#O;W#O#P8s#P;'S5b;'S;=`<X<%lO5bQ6jX!mQOY6eZ!P6e!P!Q7V!Q!}6e!}#O7t#O#P8s#P;'S6e;'S;=`9S<%lO6eQ7YP!P!Q7]Q7bU!mQ#Z#[7]#]#^7]#a#b7]#g#h7]#i#j7]#m#n7]Q7wVOY7tZ#O7t#O#P8^#P#Q6e#Q;'S7t;'S;=`8m<%lO7tQ8aSOY7tZ;'S7t;'S;=`8m<%lO7tQ8pP;=`<%l7tQ8vSOY6eZ;'S6e;'S;=`9S<%lO6eQ9VP;=`<%l6eU9_W!USOt$Ouw$Ox!P$O!P!Q9w!Q#O$O#P;'S$O;'S;=`$g<%lO$OU:Ob!US!mQOt$Ouw$Ox#O$O#P#Z$O#Z#[9w#[#]$O#]#^9w#^#a$O#a#b9w#b#g$O#g#h9w#h#i$O#i#j9w#j#m$O#m#n9w#n;'S$O;'S;=`$g<%lO$OU;][!USOY;WYZ$OZt;Wtu7tuw;Wwx7tx#O;W#O#P8^#P#Q5b#Q;'S;W;'S;=`<R<%lO;WU<UP;=`<%l;WU<[P;=`<%l5bU<fU!US!]QOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU=PU!US#bQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU=jW#tQ!USOt$Ouw$Ox!_$O!_!`>S!`#O$O#P;'S$O;'S;=`$g<%lO$OU>XV!USOt$Ouw$Ox#O$O#P#Q>n#Q;'S$O;'S;=`$g<%lO$OU>uU#sQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$O~?^O#k~U?eU#uQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU@OU!US!dQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU@g^!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#o@b#o;'S$O;'S;=`$g<%lO$OUAjU!SQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OUBR_!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#UCQ#U#o@b#o;'S$O;'S;=`$g<%lO$OUCV`!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#`@b#`#aDX#a#o@b#o;'S$O;'S;=`$g<%lO$OUD^`!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#g@b#g#hE`#h#o@b#o;'S$O;'S;=`$g<%lO$OUEe`!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#X@b#X#YFg#Y#o@b#o;'S$O;'S;=`$g<%lO$OUFn^!ZQ!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#o@b#o;'S$O;'S;=`$g<%lO$O^Gq^#lW!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#o@b#o;'S$O;'S;=`$g<%lO$O^Ht^#nW!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#o@b#o;'S$O;'S;=`$g<%lO$O^Iw`#mW!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#f@b#f#gJy#g#o@b#o;'S$O;'S;=`$g<%lO$OUKO`!USOt$Ouw$Ox}$O}!O@b!O!Q$O!Q![@b![!_$O!_!`Ac!`#O$O#P#T$O#T#i@b#i#jE`#j#o@b#o;'S$O;'S;=`$g<%lO$OULXUuQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$O~LpO#v~",
|
|
||||||
tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, pipeStartsLineTokenizer, new LocalTokenGroup("[~RP!O!PU~ZO#f~~", 11)],
|
|
||||||
topRules: {"Program":[0,35]},
|
|
||||||
specialized: [{term: 28, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 28, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}],
|
|
||||||
tokenPrec: 2711
|
|
||||||
})
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('null', () => {
|
describe('null', () => {
|
||||||
test('parses null', () => {
|
test('parses null', () => {
|
||||||
expect('null').toMatchTree(`Null null`)
|
expect('null').toMatchTree(`Null null`)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('bitwise operators - grammar', () => {
|
describe('bitwise operators - grammar', () => {
|
||||||
test('parses band (bitwise AND)', () => {
|
test('parses band (bitwise AND)', () => {
|
||||||
expect('5 band 3').toMatchTree(`
|
expect('5 band 3').toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('if/else if/else', () => {
|
describe('if/else if/else', () => {
|
||||||
test('parses single line if', () => {
|
test('parses single line if', () => {
|
||||||
expect(`if y == 1: 'cool' end`).toMatchTree(`
|
expect(`if y == 1: 'cool' end`).toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('Array destructuring', () => {
|
describe('Array destructuring', () => {
|
||||||
test('parses array pattern with two variables', () => {
|
test('parses array pattern with two variables', () => {
|
||||||
expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
|
expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('try/catch/finally/throw', () => {
|
describe('try/catch/finally/throw', () => {
|
||||||
test('parses try with catch', () => {
|
test('parses try with catch', () => {
|
||||||
expect(`try:
|
expect(`try:
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('single line function blocks', () => {
|
describe('single line function blocks', () => {
|
||||||
test('work with no args', () => {
|
test('work with no args', () => {
|
||||||
expect(`trap: echo bye bye end`).toMatchTree(`
|
expect(`trap: echo bye bye end`).toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('calling functions', () => {
|
describe('calling functions', () => {
|
||||||
test('call with no args', () => {
|
test('call with no args', () => {
|
||||||
expect('tail').toMatchTree(`
|
expect('tail').toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('import', () => {
|
describe('import', () => {
|
||||||
test('parses single import', () => {
|
test('parses single import', () => {
|
||||||
expect(`import str`).toMatchTree(`
|
expect(`import str`).toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('number literals', () => {
|
describe('number literals', () => {
|
||||||
test('binary numbers', () => {
|
test('binary numbers', () => {
|
||||||
expect('0b110').toMatchTree(`
|
expect('0b110').toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('multiline', () => {
|
describe('multiline', () => {
|
||||||
test('parses multiline strings', () => {
|
test('parses multiline strings', () => {
|
||||||
expect(`'first'\n'second'`).toMatchTree(`
|
expect(`'first'\n'second'`).toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,4 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
import { parser } from '../shrimp'
|
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('pipe expressions', () => {
|
describe('pipe expressions', () => {
|
||||||
test('simple pipe expression', () => {
|
test('simple pipe expression', () => {
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
import { expect, describe, test } from 'bun:test'
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
|
||||||
|
|
||||||
describe('string interpolation', () => {
|
describe('string interpolation', () => {
|
||||||
test('string with variable interpolation', () => {
|
test('string with variable interpolation', () => {
|
||||||
expect("'hello $name'").toMatchTree(`
|
expect("'hello $name'").toMatchTree(`
|
||||||
|
|
|
||||||
|
|
@ -1,389 +0,0 @@
|
||||||
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
|
|
||||||
import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, CurlyString, DotGet, newline, pipeStartsLine } from './shrimp.terms'
|
|
||||||
|
|
||||||
// doobie doobie do (we need the `do` keyword to know when we're defining params)
|
|
||||||
export function specializeKeyword(ident: string) {
|
|
||||||
return ident === 'do' ? Do : -1
|
|
||||||
}
|
|
||||||
|
|
||||||
// tell the dotGet searcher about builtin globals
|
|
||||||
export const globals: string[] = []
|
|
||||||
export const setGlobals = (newGlobals: string[] | Record<string, any>) => {
|
|
||||||
globals.length = 0
|
|
||||||
globals.push(...(Array.isArray(newGlobals) ? newGlobals : Object.keys(newGlobals)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
|
|
||||||
|
|
||||||
export const tokenizer = new ExternalTokenizer(
|
|
||||||
(input: InputStream, stack: Stack) => {
|
|
||||||
const ch = getFullCodePoint(input, 0)
|
|
||||||
|
|
||||||
// Handle curly strings
|
|
||||||
if (ch === 123 /* { */) return consumeCurlyString(input, stack)
|
|
||||||
|
|
||||||
if (!isWordChar(ch)) return
|
|
||||||
|
|
||||||
// Don't consume things that start with digits - let Number token handle it
|
|
||||||
if (isDigit(ch)) return
|
|
||||||
|
|
||||||
// Don't consume things that start with - or + followed by a digit (negative/positive numbers)
|
|
||||||
if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return
|
|
||||||
|
|
||||||
const isValidStart = isIdentStart(ch)
|
|
||||||
const canBeWord = stack.canShift(Word)
|
|
||||||
|
|
||||||
// Consume all word characters, tracking if it remains a valid identifier
|
|
||||||
const { pos, isValidIdentifier, stoppedAtDot } = consumeWordToken(
|
|
||||||
input,
|
|
||||||
isValidStart,
|
|
||||||
canBeWord
|
|
||||||
)
|
|
||||||
|
|
||||||
// Check if we should emit IdentifierBeforeDot for property access
|
|
||||||
if (stoppedAtDot) {
|
|
||||||
const dotGetToken = checkForDotGet(input, stack, pos)
|
|
||||||
|
|
||||||
if (dotGetToken) {
|
|
||||||
input.advance(pos)
|
|
||||||
input.acceptToken(dotGetToken)
|
|
||||||
} else {
|
|
||||||
// Not in scope - continue consuming the dot as part of the word
|
|
||||||
const afterDot = consumeRestOfWord(input, pos + 1, canBeWord)
|
|
||||||
input.advance(afterDot)
|
|
||||||
input.acceptToken(Word)
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Advance past the token we consumed
|
|
||||||
input.advance(pos)
|
|
||||||
|
|
||||||
// Choose which token to emit
|
|
||||||
if (isValidIdentifier) {
|
|
||||||
const token = chooseIdentifierToken(input, stack)
|
|
||||||
input.acceptToken(token)
|
|
||||||
} else {
|
|
||||||
input.acceptToken(Word)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{ contextual: true }
|
|
||||||
)
|
|
||||||
|
|
||||||
// Build identifier text from input stream, handling surrogate pairs for emoji
|
|
||||||
const buildIdentifierText = (input: InputStream, length: number): string => {
|
|
||||||
let text = ''
|
|
||||||
for (let i = 0; i < length; i++) {
|
|
||||||
const charCode = input.peek(i)
|
|
||||||
if (charCode === -1) break
|
|
||||||
|
|
||||||
// Handle surrogate pairs for emoji (UTF-16 encoding)
|
|
||||||
if (charCode >= 0xd800 && charCode <= 0xdbff && i + 1 < length) {
|
|
||||||
const low = input.peek(i + 1)
|
|
||||||
if (low >= 0xdc00 && low <= 0xdfff) {
|
|
||||||
text += String.fromCharCode(charCode, low)
|
|
||||||
i++ // Skip the low surrogate
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
text += String.fromCharCode(charCode)
|
|
||||||
}
|
|
||||||
return text
|
|
||||||
}
|
|
||||||
|
|
||||||
// Consume word characters, tracking if it remains a valid identifier
|
|
||||||
// Returns the position after consuming, whether it's a valid identifier, and if we stopped at a dot
|
|
||||||
const consumeWordToken = (
|
|
||||||
input: InputStream,
|
|
||||||
isValidStart: boolean,
|
|
||||||
canBeWord: boolean
|
|
||||||
): { pos: number; isValidIdentifier: boolean; stoppedAtDot: boolean } => {
|
|
||||||
let pos = getCharSize(getFullCodePoint(input, 0))
|
|
||||||
let isValidIdentifier = isValidStart
|
|
||||||
let stoppedAtDot = false
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
const ch = getFullCodePoint(input, pos)
|
|
||||||
|
|
||||||
// Stop at dot if we have a valid identifier (might be property access)
|
|
||||||
if (ch === 46 /* . */ && isValidIdentifier) {
|
|
||||||
stoppedAtDot = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stop if we hit a non-word character
|
|
||||||
if (!isWordChar(ch)) break
|
|
||||||
|
|
||||||
// Context-aware termination: semicolon/colon can end a word if followed by whitespace
|
|
||||||
// This allows `hello; 2` to parse correctly while `hello;world` stays as one word
|
|
||||||
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
|
|
||||||
const nextCh = getFullCodePoint(input, pos + 1)
|
|
||||||
if (!isWordChar(nextCh)) break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Track identifier validity: must be lowercase, digit, dash, or emoji/unicode
|
|
||||||
if (!isIdentChar(ch)) {
|
|
||||||
if (!canBeWord) break
|
|
||||||
isValidIdentifier = false
|
|
||||||
}
|
|
||||||
|
|
||||||
pos += getCharSize(ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
return { pos, isValidIdentifier, stoppedAtDot }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Consume the rest of a word after we've decided not to treat a dot as DotGet
|
|
||||||
// Used when we have "file.txt" - we already consumed "file", now consume ".txt"
|
|
||||||
const consumeRestOfWord = (input: InputStream, startPos: number, canBeWord: boolean): number => {
|
|
||||||
let pos = startPos
|
|
||||||
while (true) {
|
|
||||||
const ch = getFullCodePoint(input, pos)
|
|
||||||
|
|
||||||
// Stop if we hit a non-word character
|
|
||||||
if (!isWordChar(ch)) break
|
|
||||||
|
|
||||||
// Context-aware termination for semicolon/colon
|
|
||||||
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
|
|
||||||
const nextCh = getFullCodePoint(input, pos + 1)
|
|
||||||
if (!isWordChar(nextCh)) break
|
|
||||||
}
|
|
||||||
|
|
||||||
pos += getCharSize(ch)
|
|
||||||
}
|
|
||||||
return pos
|
|
||||||
}
|
|
||||||
|
|
||||||
// Consumes { curly strings } and tracks braces so you can { have { braces { inside { braces } } }
|
|
||||||
const consumeCurlyString = (input: InputStream, stack: Stack) => {
|
|
||||||
if (!stack.canShift(CurlyString)) return
|
|
||||||
|
|
||||||
let depth = 0
|
|
||||||
let pos = 0
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
const ch = input.peek(pos)
|
|
||||||
if (ch < 0) return // EOF - invalid
|
|
||||||
|
|
||||||
if (ch === 123) depth++ // {
|
|
||||||
else if (ch === 125) { // }
|
|
||||||
depth--
|
|
||||||
if (depth === 0) {
|
|
||||||
pos++ // consume final }
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pos++
|
|
||||||
}
|
|
||||||
|
|
||||||
input.acceptToken(CurlyString, pos)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if this identifier is in scope (for property access detection)
|
|
||||||
// Returns IdentifierBeforeDot token if in scope, null otherwise
|
|
||||||
const checkForDotGet = (input: InputStream, stack: Stack, pos: number): number | null => {
|
|
||||||
const identifierText = buildIdentifierText(input, pos)
|
|
||||||
const context = stack.context as { scope: { has(name: string): boolean } } | undefined
|
|
||||||
|
|
||||||
// Check if identifier is in scope (lexical scope or globals)
|
|
||||||
const inScope = context?.scope.has(identifierText) || globals.includes(identifierText)
|
|
||||||
|
|
||||||
// property access
|
|
||||||
if (inScope) return IdentifierBeforeDot
|
|
||||||
|
|
||||||
// Not in scope - check if we're inside a DotGet chain
|
|
||||||
// Inside the @skip {} block where DotGet is defined, Word cannot be shifted
|
|
||||||
// but Identifier can be. This tells us we're at the RHS of a DotGet.
|
|
||||||
const canShiftIdentifier = stack.canShift(Identifier)
|
|
||||||
const canShiftWord = stack.canShift(Word)
|
|
||||||
const inDotGetChain = canShiftIdentifier && !canShiftWord
|
|
||||||
|
|
||||||
// continue if we're inside a DotGet
|
|
||||||
return inDotGetChain ? IdentifierBeforeDot : null
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decide between AssignableIdentifier and Identifier using grammar state + peek-ahead
|
|
||||||
const chooseIdentifierToken = (input: InputStream, stack: Stack): number => {
|
|
||||||
const canAssignable = stack.canShift(AssignableIdentifier)
|
|
||||||
const canRegular = stack.canShift(Identifier)
|
|
||||||
|
|
||||||
// Only one option is valid - use it
|
|
||||||
if (canAssignable && !canRegular) return AssignableIdentifier
|
|
||||||
if (canRegular && !canAssignable) return Identifier
|
|
||||||
|
|
||||||
// Both possible (ambiguous context) - peek ahead for '=' to disambiguate
|
|
||||||
// This happens at statement start where both `x = 5` (assign) and `echo x` (call) are valid
|
|
||||||
let peekPos = 0
|
|
||||||
while (true) {
|
|
||||||
const ch = getFullCodePoint(input, peekPos)
|
|
||||||
if (isWhiteSpace(ch)) {
|
|
||||||
peekPos += getCharSize(ch)
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const nextCh = getFullCodePoint(input, peekPos)
|
|
||||||
const nextCh2 = getFullCodePoint(input, peekPos + 1)
|
|
||||||
const nextCh3 = getFullCodePoint(input, peekPos + 2)
|
|
||||||
|
|
||||||
// Check for ??= (three-character compound operator)
|
|
||||||
if (nextCh === 63 /* ? */ && nextCh2 === 63 /* ? */ && nextCh3 === 61 /* = */) {
|
|
||||||
const charAfterOp = getFullCodePoint(input, peekPos + 3)
|
|
||||||
if (isWhiteSpace(charAfterOp) || charAfterOp === -1 /* EOF */) {
|
|
||||||
return AssignableIdentifier
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for compound assignment operators: +=, -=, *=, /=, %=
|
|
||||||
if (
|
|
||||||
[43 /* + */, 45 /* - */, 42 /* * */, 47 /* / */, 37 /* % */].includes(nextCh) &&
|
|
||||||
nextCh2 === 61 /* = */
|
|
||||||
) {
|
|
||||||
// Found compound operator, check if it's followed by whitespace
|
|
||||||
const charAfterOp = getFullCodePoint(input, peekPos + 2)
|
|
||||||
if (isWhiteSpace(charAfterOp) || charAfterOp === -1 /* EOF */) {
|
|
||||||
return AssignableIdentifier
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nextCh === 61 /* = */) {
|
|
||||||
// Found '=', but check if it's followed by whitespace
|
|
||||||
// If '=' is followed by non-whitespace (like '=cool*'), it won't be tokenized as Eq
|
|
||||||
// In that case, this should be Identifier (for function call), not AssignableIdentifier
|
|
||||||
const charAfterEquals = getFullCodePoint(input, peekPos + 1)
|
|
||||||
if (isWhiteSpace(charAfterEquals) || charAfterEquals === -1 /* EOF */) {
|
|
||||||
return AssignableIdentifier
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Identifier
|
|
||||||
}
|
|
||||||
|
|
||||||
// Character classification helpers
|
|
||||||
export const isIdentStart = (ch: number): boolean => {
|
|
||||||
return isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
export const isIdentChar = (ch: number): boolean => {
|
|
||||||
return isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */ || isEmojiOrUnicode(ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
const isWhiteSpace = (ch: number): boolean => {
|
|
||||||
return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 13 /* \r */
|
|
||||||
}
|
|
||||||
|
|
||||||
const isWordChar = (ch: number): boolean => {
|
|
||||||
return (
|
|
||||||
!isWhiteSpace(ch) &&
|
|
||||||
ch !== 10 /* \n */ &&
|
|
||||||
ch !== 41 /* ) */ &&
|
|
||||||
ch !== 93 /* ] */ &&
|
|
||||||
ch !== -1 /* EOF */
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const isLowercaseLetter = (ch: number): boolean => {
|
|
||||||
return ch >= 97 && ch <= 122 // a-z
|
|
||||||
}
|
|
||||||
|
|
||||||
const isDigit = (ch: number): boolean => {
|
|
||||||
return ch >= 48 && ch <= 57 // 0-9
|
|
||||||
}
|
|
||||||
|
|
||||||
const getFullCodePoint = (input: InputStream, pos: number): number => {
|
|
||||||
const ch = input.peek(pos)
|
|
||||||
|
|
||||||
// Check if this is a high surrogate (0xD800-0xDBFF)
|
|
||||||
if (ch >= 0xd800 && ch <= 0xdbff) {
|
|
||||||
const low = input.peek(pos + 1)
|
|
||||||
// Check if next is low surrogate (0xDC00-0xDFFF)
|
|
||||||
if (low >= 0xdc00 && low <= 0xdfff) {
|
|
||||||
// Combine surrogate pair into full code point
|
|
||||||
return 0x10000 + ((ch & 0x3ff) << 10) + (low & 0x3ff)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ch
|
|
||||||
}
|
|
||||||
|
|
||||||
const isEmojiOrUnicode = (ch: number): boolean => {
|
|
||||||
return (
|
|
||||||
// Basic Emoticons
|
|
||||||
(ch >= 0x1f600 && ch <= 0x1f64f) ||
|
|
||||||
// Miscellaneous Symbols and Pictographs
|
|
||||||
(ch >= 0x1f300 && ch <= 0x1f5ff) ||
|
|
||||||
// Transport and Map Symbols
|
|
||||||
(ch >= 0x1f680 && ch <= 0x1f6ff) ||
|
|
||||||
// Regional Indicator Symbols (flags)
|
|
||||||
(ch >= 0x1f1e6 && ch <= 0x1f1ff) ||
|
|
||||||
// Miscellaneous Symbols (hearts, stars, weather)
|
|
||||||
(ch >= 0x2600 && ch <= 0x26ff) ||
|
|
||||||
// Dingbats (scissors, pencils, etc)
|
|
||||||
(ch >= 0x2700 && ch <= 0x27bf) ||
|
|
||||||
// Supplemental Symbols and Pictographs (newer emojis)
|
|
||||||
(ch >= 0x1f900 && ch <= 0x1f9ff) ||
|
|
||||||
// Symbols and Pictographs Extended-A (newest emojis)
|
|
||||||
(ch >= 0x1fa70 && ch <= 0x1faff) ||
|
|
||||||
// Various Asian Characters with emoji presentation
|
|
||||||
(ch >= 0x1f018 && ch <= 0x1f270) ||
|
|
||||||
// Variation Selectors (for emoji presentation)
|
|
||||||
(ch >= 0xfe00 && ch <= 0xfe0f) ||
|
|
||||||
// Additional miscellaneous items
|
|
||||||
(ch >= 0x238c && ch <= 0x2454) ||
|
|
||||||
// Combining Diacritical Marks for Symbols
|
|
||||||
(ch >= 0x20d0 && ch <= 0x20ff) ||
|
|
||||||
// Latin-1 Supplement (includes ², ³, ¹ and other special chars)
|
|
||||||
(ch >= 0x00a0 && ch <= 0x00ff) ||
|
|
||||||
// Greek and Coptic (U+0370-U+03FF)
|
|
||||||
(ch >= 0x0370 && ch <= 0x03ff) ||
|
|
||||||
// Mathematical Alphanumeric Symbols (U+1D400-U+1D7FF)
|
|
||||||
(ch >= 0x1d400 && ch <= 0x1d7ff) ||
|
|
||||||
// Mathematical Operators (U+2200-U+22FF)
|
|
||||||
(ch >= 0x2200 && ch <= 0x22ff) ||
|
|
||||||
// Superscripts and Subscripts (U+2070-U+209F)
|
|
||||||
(ch >= 0x2070 && ch <= 0x209f) ||
|
|
||||||
// Arrows (U+2190-U+21FF)
|
|
||||||
(ch >= 0x2190 && ch <= 0x21ff) ||
|
|
||||||
// Hiragana (U+3040-U+309F)
|
|
||||||
(ch >= 0x3040 && ch <= 0x309f) ||
|
|
||||||
// Katakana (U+30A0-U+30FF)
|
|
||||||
(ch >= 0x30a0 && ch <= 0x30ff) ||
|
|
||||||
// CJK Unified Ideographs (U+4E00-U+9FFF)
|
|
||||||
(ch >= 0x4e00 && ch <= 0x9fff)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const getCharSize = (ch: number) => (ch > 0xffff ? 2 : 1) // emoji takes 2 UTF-16 code units
|
|
||||||
|
|
||||||
export const pipeStartsLineTokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => {
|
|
||||||
const ch = input.peek(0)
|
|
||||||
|
|
||||||
if (ch !== 10 /* \n */) return
|
|
||||||
|
|
||||||
// ignore whitespace
|
|
||||||
let offset = 1
|
|
||||||
let lastNewlineOffset = 0
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
const ch = input.peek(offset)
|
|
||||||
if (ch === 10 /* \n */) {
|
|
||||||
lastNewlineOffset = offset
|
|
||||||
offset++
|
|
||||||
} else if (isWhiteSpace(ch)) {
|
|
||||||
offset++
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// look for pipe after skipping empty lines
|
|
||||||
if (input.peek(offset) === 124 /* | */) {
|
|
||||||
input.advance(lastNewlineOffset + 1)
|
|
||||||
input.acceptToken(pipeStartsLine)
|
|
||||||
} else {
|
|
||||||
input.advance(1)
|
|
||||||
input.acceptToken(newline)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
@ -475,12 +475,12 @@ const isStringDelim = (ch: number): boolean => {
|
||||||
return ch === c`'` || ch === c`"`
|
return ch === c`'` || ch === c`"`
|
||||||
}
|
}
|
||||||
|
|
||||||
const isIdentStart = (char: number | string): boolean => {
|
export const isIdentStart = (char: number | string): boolean => {
|
||||||
let ch = typeof char === 'string' ? char.charCodeAt(0) : char
|
let ch = typeof char === 'string' ? char.charCodeAt(0) : char
|
||||||
return isLowercaseLetter(ch) || isEmojiOrUnicode(ch) || ch === 36 /* $ */
|
return isLowercaseLetter(ch) || isEmojiOrUnicode(ch) || ch === 36 /* $ */
|
||||||
}
|
}
|
||||||
|
|
||||||
const isIdentChar = (char: number | string): boolean => {
|
export const isIdentChar = (char: number | string): boolean => {
|
||||||
let ch = typeof char === 'string' ? char.charCodeAt(0) : char
|
let ch = typeof char === 'string' ? char.charCodeAt(0) : char
|
||||||
return isIdentStart(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */
|
return isIdentStart(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,36 +2,13 @@ import { expect } from 'bun:test'
|
||||||
import { diffLines } from 'diff'
|
import { diffLines } from 'diff'
|
||||||
import color from 'kleur'
|
import color from 'kleur'
|
||||||
import { Scanner, TokenType, type Token } from '#parser/tokenizer2'
|
import { Scanner, TokenType, type Token } from '#parser/tokenizer2'
|
||||||
import { parser } from '#parser/shrimp'
|
import { parse, setGlobals } from '#parser/parser2'
|
||||||
import { setGlobals } from '#parser/tokenizer'
|
import { Tree } from '#parser/node'
|
||||||
import { parse } from '#parser/parser2'
|
|
||||||
import { globals as prelude } from '#prelude'
|
import { globals as prelude } from '#prelude'
|
||||||
import { $ } from 'bun'
|
|
||||||
import { assert, errorMessage } from '#utils/utils'
|
import { assert, errorMessage } from '#utils/utils'
|
||||||
import { Compiler } from '#compiler/compiler'
|
import { Compiler } from '#compiler/compiler'
|
||||||
import { run, VM } from 'reefvm'
|
import { run, VM } from 'reefvm'
|
||||||
import { treeToString2, treeToString, VMResultToValue } from '#utils/tree'
|
import { treeToString2, VMResultToValue } from '#utils/tree'
|
||||||
|
|
||||||
const regenerateParser = async () => {
|
|
||||||
let generate = true
|
|
||||||
try {
|
|
||||||
const grammarStat = await Bun.file('./src/parser/shrimp.grammar').stat()
|
|
||||||
const tokenizerStat = await Bun.file('./src/parser/tokenizer.ts').stat()
|
|
||||||
const parserStat = await Bun.file('./src/parser/shrimp.ts').stat()
|
|
||||||
|
|
||||||
if (grammarStat.mtime <= parserStat.mtime && tokenizerStat.mtime <= parserStat.mtime) {
|
|
||||||
generate = false
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.error('Error checking or regenerating parser:', e)
|
|
||||||
} finally {
|
|
||||||
if (generate) {
|
|
||||||
await $`bun generate-parser`
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
await regenerateParser()
|
|
||||||
|
|
||||||
// Type declaration for TypeScript
|
// Type declaration for TypeScript
|
||||||
declare module 'bun:test' {
|
declare module 'bun:test' {
|
||||||
|
|
@ -73,7 +50,8 @@ expect.extend({
|
||||||
assert(typeof received === 'string', 'toFailParse can only be used with string values')
|
assert(typeof received === 'string', 'toFailParse can only be used with string values')
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const tree = parser.parse(received)
|
const node = parse(received)
|
||||||
|
const tree = new Tree(node)
|
||||||
let hasErrors = false
|
let hasErrors = false
|
||||||
tree.iterate({
|
tree.iterate({
|
||||||
enter(n) {
|
enter(n) {
|
||||||
|
|
@ -90,7 +68,7 @@ expect.extend({
|
||||||
pass: true,
|
pass: true,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const actual = treeToString(tree, received)
|
const actual = treeToString2(node, received)
|
||||||
return {
|
return {
|
||||||
message: () => `Expected input to fail parsing, but it parsed successfully:\n${actual}`,
|
message: () => `Expected input to fail parsing, but it parsed successfully:\n${actual}`,
|
||||||
pass: false,
|
pass: false,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user