new parser(-ish)
This commit is contained in:
parent
0bc923fc82
commit
1f505e484d
|
|
@ -1,9 +1,8 @@
|
||||||
import { CompilerError } from '#compiler/compilerError.ts'
|
import { CompilerError } from '#compiler/compilerError.ts'
|
||||||
import { parser } from '#parser/shrimp.ts'
|
import { parseToTree as parse } from '#parser/parser2'
|
||||||
|
import { Tree, SyntaxNode } from '#parser/node'
|
||||||
import * as terms from '#parser/shrimp.terms'
|
import * as terms from '#parser/shrimp.terms'
|
||||||
import { setGlobals } from '#parser/tokenizer'
|
import { setGlobals } from '#parser/tokenizer'
|
||||||
import { tokenizeCurlyString } from '#parser/curlyTokenizer'
|
|
||||||
import type { SyntaxNode, Tree } from '@lezer/common'
|
|
||||||
import { assert, errorMessage } from '#utils/utils'
|
import { assert, errorMessage } from '#utils/utils'
|
||||||
import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
|
import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
|
||||||
import {
|
import {
|
||||||
|
|
@ -63,13 +62,13 @@ export class Compiler {
|
||||||
constructor(public input: string, globals?: string[] | Record<string, any>) {
|
constructor(public input: string, globals?: string[] | Record<string, any>) {
|
||||||
try {
|
try {
|
||||||
if (globals) setGlobals(Array.isArray(globals) ? globals : Object.keys(globals))
|
if (globals) setGlobals(Array.isArray(globals) ? globals : Object.keys(globals))
|
||||||
const cst = parser.parse(input)
|
const cst = parse(input)
|
||||||
const errors = checkTreeForErrors(cst)
|
// const errors = checkTreeForErrors(cst)
|
||||||
|
|
||||||
const firstError = errors[0]
|
// const firstError = errors[0]
|
||||||
if (firstError) {
|
// if (firstError) {
|
||||||
throw firstError
|
// throw firstError
|
||||||
}
|
// }
|
||||||
|
|
||||||
this.#compileCst(cst, input)
|
this.#compileCst(cst, input)
|
||||||
this.bytecode = toBytecode(this.instructions)
|
this.bytecode = toBytecode(this.instructions)
|
||||||
|
|
@ -89,8 +88,8 @@ export class Compiler {
|
||||||
}
|
}
|
||||||
|
|
||||||
#compileCst(cst: Tree, input: string) {
|
#compileCst(cst: Tree, input: string) {
|
||||||
const isProgram = cst.topNode.type.id === terms.Program
|
const isProgram = cst.topNode.typeId === terms.Program
|
||||||
assert(isProgram, `Expected Program node, got ${cst.topNode.type.name}`)
|
assert(isProgram, `Expected Program node, got ${cst.topNode.type}`)
|
||||||
|
|
||||||
let child = cst.topNode.firstChild
|
let child = cst.topNode.firstChild
|
||||||
while (child) {
|
while (child) {
|
||||||
|
|
@ -105,7 +104,7 @@ export class Compiler {
|
||||||
const value = input.slice(node.from, node.to)
|
const value = input.slice(node.from, node.to)
|
||||||
if (DEBUG) console.log(`🫦 ${node.name}: ${value}`)
|
if (DEBUG) console.log(`🫦 ${node.name}: ${value}`)
|
||||||
|
|
||||||
switch (node.type.id) {
|
switch (node.typeId) {
|
||||||
case terms.Number:
|
case terms.Number:
|
||||||
// Handle sign prefix for hex, binary, and octal literals
|
// Handle sign prefix for hex, binary, and octal literals
|
||||||
// Number() doesn't parse '-0xFF', '+0xFF', '-0o77', etc. correctly
|
// Number() doesn't parse '-0xFF', '+0xFF', '-0o77', etc. correctly
|
||||||
|
|
@ -124,9 +123,6 @@ export class Compiler {
|
||||||
return [[`PUSH`, numberValue]]
|
return [[`PUSH`, numberValue]]
|
||||||
|
|
||||||
case terms.String: {
|
case terms.String: {
|
||||||
if (node.firstChild?.type.id === terms.CurlyString)
|
|
||||||
return this.#compileCurlyString(value, input)
|
|
||||||
|
|
||||||
const { parts, hasInterpolation } = getStringParts(node, input)
|
const { parts, hasInterpolation } = getStringParts(node, input)
|
||||||
|
|
||||||
// Simple string without interpolation or escapes - extract text directly
|
// Simple string without interpolation or escapes - extract text directly
|
||||||
|
|
@ -141,7 +137,7 @@ export class Compiler {
|
||||||
parts.forEach((part) => {
|
parts.forEach((part) => {
|
||||||
const partValue = input.slice(part.from, part.to)
|
const partValue = input.slice(part.from, part.to)
|
||||||
|
|
||||||
switch (part.type.id) {
|
switch (part.typeId) {
|
||||||
case terms.StringFragment:
|
case terms.StringFragment:
|
||||||
// Plain text fragment - just push as-is
|
// Plain text fragment - just push as-is
|
||||||
instructions.push(['PUSH', partValue])
|
instructions.push(['PUSH', partValue])
|
||||||
|
|
@ -165,7 +161,7 @@ export class Compiler {
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`Unexpected string part: ${part.type.name}`,
|
`Unexpected string part: ${part.type}`,
|
||||||
part.from,
|
part.from,
|
||||||
part.to
|
part.to
|
||||||
)
|
)
|
||||||
|
|
@ -222,7 +218,7 @@ export class Compiler {
|
||||||
instructions.push(['TRY_LOAD', objectName])
|
instructions.push(['TRY_LOAD', objectName])
|
||||||
|
|
||||||
const flattenProperty = (prop: SyntaxNode): void => {
|
const flattenProperty = (prop: SyntaxNode): void => {
|
||||||
if (prop.type.id === terms.DotGet) {
|
if (prop.typeId === terms.DotGet) {
|
||||||
const nestedParts = getDotGetParts(prop, input)
|
const nestedParts = getDotGetParts(prop, input)
|
||||||
|
|
||||||
const nestedObjectValue = input.slice(nestedParts.object.from, nestedParts.object.to)
|
const nestedObjectValue = input.slice(nestedParts.object.from, nestedParts.object.to)
|
||||||
|
|
@ -231,7 +227,7 @@ export class Compiler {
|
||||||
|
|
||||||
flattenProperty(nestedParts.property)
|
flattenProperty(nestedParts.property)
|
||||||
} else {
|
} else {
|
||||||
if (prop.type.id === terms.ParenExpr) {
|
if (prop.typeId === terms.ParenExpr) {
|
||||||
instructions.push(...this.#compileNode(prop, input))
|
instructions.push(...this.#compileNode(prop, input))
|
||||||
} else {
|
} else {
|
||||||
const propertyValue = input.slice(prop.from, prop.to)
|
const propertyValue = input.slice(prop.from, prop.to)
|
||||||
|
|
@ -440,7 +436,7 @@ export class Compiler {
|
||||||
}
|
}
|
||||||
|
|
||||||
case terms.FunctionCallOrIdentifier: {
|
case terms.FunctionCallOrIdentifier: {
|
||||||
if (node.firstChild?.type.id === terms.DotGet) {
|
if (node.firstChild?.typeId === terms.DotGet) {
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
const callLabel: Label = `.call_dotget_${++this.labelCount}`
|
const callLabel: Label = `.call_dotget_${++this.labelCount}`
|
||||||
const afterLabel: Label = `.after_dotget_${++this.labelCount}`
|
const afterLabel: Label = `.after_dotget_${++this.labelCount}`
|
||||||
|
|
@ -531,20 +527,20 @@ export class Compiler {
|
||||||
instructions.push([`${fnLabel}:`])
|
instructions.push([`${fnLabel}:`])
|
||||||
instructions.push(
|
instructions.push(
|
||||||
...block
|
...block
|
||||||
.filter((x) => x.type.name !== 'keyword')
|
.filter((x) => x.type !== 'keyword')
|
||||||
.map((x) => this.#compileNode(x!, input))
|
.map((x) => this.#compileNode(x!, input))
|
||||||
.flat()
|
.flat()
|
||||||
)
|
)
|
||||||
instructions.push(['RETURN'])
|
instructions.push(['RETURN'])
|
||||||
instructions.push([`${afterLabel}:`])
|
instructions.push([`${afterLabel}:`])
|
||||||
|
|
||||||
if (fn?.type.id === terms.FunctionCallOrIdentifier) {
|
if (fn?.typeId === terms.FunctionCallOrIdentifier) {
|
||||||
instructions.push(['LOAD', input.slice(fn!.from, fn!.to)])
|
instructions.push(['LOAD', input.slice(fn!.from, fn!.to)])
|
||||||
instructions.push(['MAKE_FUNCTION', [], fnLabel])
|
instructions.push(['MAKE_FUNCTION', [], fnLabel])
|
||||||
instructions.push(['PUSH', 1])
|
instructions.push(['PUSH', 1])
|
||||||
instructions.push(['PUSH', 0])
|
instructions.push(['PUSH', 0])
|
||||||
instructions.push(['CALL'])
|
instructions.push(['CALL'])
|
||||||
} else if (fn?.type.id === terms.FunctionCall) {
|
} else if (fn?.typeId === terms.FunctionCall) {
|
||||||
let body = this.#compileNode(fn!, input)
|
let body = this.#compileNode(fn!, input)
|
||||||
const namedArgCount = (body[body.length - 2]![1] as number) * 2
|
const namedArgCount = (body[body.length - 2]![1] as number) * 2
|
||||||
const startSlice = body.length - namedArgCount - 3
|
const startSlice = body.length - namedArgCount - 3
|
||||||
|
|
@ -737,11 +733,11 @@ export class Compiler {
|
||||||
instructions.push(...this.#compileNode(identifierNode, input))
|
instructions.push(...this.#compileNode(identifierNode, input))
|
||||||
|
|
||||||
const isUnderscoreInPositionalArgs = positionalArgs.some(
|
const isUnderscoreInPositionalArgs = positionalArgs.some(
|
||||||
(arg) => arg.type.id === terms.Underscore
|
(arg) => arg.typeId === terms.Underscore
|
||||||
)
|
)
|
||||||
const isUnderscoreInNamedArgs = namedArgs.some((arg) => {
|
const isUnderscoreInNamedArgs = namedArgs.some((arg) => {
|
||||||
const { valueNode } = getNamedArgParts(arg, input)
|
const { valueNode } = getNamedArgParts(arg, input)
|
||||||
return valueNode.type.id === terms.Underscore
|
return valueNode.typeId === terms.Underscore
|
||||||
})
|
})
|
||||||
|
|
||||||
const shouldPushPositionalArg = !isUnderscoreInPositionalArgs && !isUnderscoreInNamedArgs
|
const shouldPushPositionalArg = !isUnderscoreInPositionalArgs && !isUnderscoreInNamedArgs
|
||||||
|
|
@ -752,7 +748,7 @@ export class Compiler {
|
||||||
}
|
}
|
||||||
|
|
||||||
positionalArgs.forEach((arg) => {
|
positionalArgs.forEach((arg) => {
|
||||||
if (arg.type.id === terms.Underscore) {
|
if (arg.typeId === terms.Underscore) {
|
||||||
instructions.push(['LOAD', pipeValName])
|
instructions.push(['LOAD', pipeValName])
|
||||||
} else {
|
} else {
|
||||||
instructions.push(...this.#compileNode(arg, input))
|
instructions.push(...this.#compileNode(arg, input))
|
||||||
|
|
@ -762,7 +758,7 @@ export class Compiler {
|
||||||
namedArgs.forEach((arg) => {
|
namedArgs.forEach((arg) => {
|
||||||
const { name, valueNode } = getNamedArgParts(arg, input)
|
const { name, valueNode } = getNamedArgParts(arg, input)
|
||||||
instructions.push(['PUSH', name])
|
instructions.push(['PUSH', name])
|
||||||
if (valueNode.type.id === terms.Underscore) {
|
if (valueNode.typeId === terms.Underscore) {
|
||||||
instructions.push(['LOAD', pipeValName])
|
instructions.push(['LOAD', pipeValName])
|
||||||
} else {
|
} else {
|
||||||
instructions.push(...this.#compileNode(valueNode, input))
|
instructions.push(...this.#compileNode(valueNode, input))
|
||||||
|
|
@ -784,7 +780,7 @@ export class Compiler {
|
||||||
// = can be a valid word, and is also valid inside words, so for now we cheat
|
// = can be a valid word, and is also valid inside words, so for now we cheat
|
||||||
// and check for arrays that look like `[ = ]` to interpret them as
|
// and check for arrays that look like `[ = ]` to interpret them as
|
||||||
// empty dicts
|
// empty dicts
|
||||||
if (children.length === 1 && children[0]!.type.id === terms.Word) {
|
if (children.length === 1 && children[0]!.typeId === terms.Word) {
|
||||||
const child = children[0]!
|
const child = children[0]!
|
||||||
if (input.slice(child.from, child.to) === '=') {
|
if (input.slice(child.from, child.to) === '=') {
|
||||||
return [['MAKE_DICT', 0]]
|
return [['MAKE_DICT', 0]]
|
||||||
|
|
@ -836,8 +832,8 @@ export class Compiler {
|
||||||
case terms.Import: {
|
case terms.Import: {
|
||||||
const instructions: ProgramItem[] = []
|
const instructions: ProgramItem[] = []
|
||||||
const [_import, ...nodes] = getAllChildren(node)
|
const [_import, ...nodes] = getAllChildren(node)
|
||||||
const args = nodes.filter(node => node.type.id === terms.Identifier)
|
const args = nodes.filter(node => node.typeId === terms.Identifier)
|
||||||
const namedArgs = nodes.filter(node => node.type.id === terms.NamedArg)
|
const namedArgs = nodes.filter(node => node.typeId === terms.NamedArg)
|
||||||
|
|
||||||
instructions.push(['LOAD', 'import'])
|
instructions.push(['LOAD', 'import'])
|
||||||
|
|
||||||
|
|
@ -864,7 +860,7 @@ export class Compiler {
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`Compiler doesn't know how to handle a "${node.type.name}" (${node.type.id}) node.`,
|
`Compiler doesn't know how to handle a "${node.type}" (${node.typeId}) node.`,
|
||||||
node.from,
|
node.from,
|
||||||
node.to
|
node.to
|
||||||
)
|
)
|
||||||
|
|
@ -918,26 +914,4 @@ export class Compiler {
|
||||||
|
|
||||||
return instructions
|
return instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
#compileCurlyString(value: string, input: string): ProgramItem[] {
|
|
||||||
const instructions: ProgramItem[] = []
|
|
||||||
const nodes = tokenizeCurlyString(value)
|
|
||||||
|
|
||||||
nodes.forEach((node) => {
|
|
||||||
if (typeof node === 'string') {
|
|
||||||
instructions.push(['PUSH', node])
|
|
||||||
} else {
|
|
||||||
const [input, topNode] = node
|
|
||||||
let child = topNode.firstChild
|
|
||||||
while (child) {
|
|
||||||
instructions.push(...this.#compileNode(child, input))
|
|
||||||
child = child.nextSibling
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
instructions.push(['STR_CONCAT', nodes.length])
|
|
||||||
|
|
||||||
return instructions
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,17 @@
|
||||||
import { CompilerError } from '#compiler/compilerError.ts'
|
import { CompilerError } from '#compiler/compilerError.ts'
|
||||||
|
import type { SyntaxNode, Tree } from '#parser/node'
|
||||||
import * as terms from '#parser/shrimp.terms'
|
import * as terms from '#parser/shrimp.terms'
|
||||||
import type { SyntaxNode, Tree } from '@lezer/common'
|
|
||||||
|
|
||||||
export const checkTreeForErrors = (tree: Tree): CompilerError[] => {
|
export const checkTreeForErrors = (tree: Tree): CompilerError[] => {
|
||||||
const errors: CompilerError[] = []
|
const errors: CompilerError[] = []
|
||||||
tree.iterate({
|
|
||||||
enter: (node) => {
|
// tree.iterate({
|
||||||
if (node.type.isError) {
|
// enter: (node) => {
|
||||||
errors.push(new CompilerError(`Unexpected syntax.`, node.from, node.to))
|
// if (node.type.isError) {
|
||||||
}
|
// errors.push(new CompilerError(`Unexpected syntax.`, node.from, node.to))
|
||||||
},
|
// }
|
||||||
})
|
// },
|
||||||
|
// })
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
}
|
}
|
||||||
|
|
@ -23,7 +24,7 @@ export const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
|
||||||
child = child.nextSibling
|
child = child.nextSibling
|
||||||
}
|
}
|
||||||
|
|
||||||
return children.filter((n) => n.type.id !== terms.Comment)
|
return children.filter((n) => n.typeId !== terms.Comment)
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getBinaryParts = (node: SyntaxNode) => {
|
export const getBinaryParts = (node: SyntaxNode) => {
|
||||||
|
|
@ -50,15 +51,14 @@ export const getAssignmentParts = (node: SyntaxNode) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
// array destructuring
|
// array destructuring
|
||||||
if (left && left.type.id === terms.Array) {
|
if (left && left.typeId === terms.Array) {
|
||||||
const identifiers = getAllChildren(left).filter((child) => child.type.id === terms.Identifier)
|
const identifiers = getAllChildren(left).filter((child) => child.typeId === terms.Identifier)
|
||||||
return { arrayPattern: identifiers, right }
|
return { arrayPattern: identifiers, right }
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!left || left.type.id !== terms.AssignableIdentifier) {
|
if (!left || left.typeId !== terms.AssignableIdentifier) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`Assign left child must be an AssignableIdentifier or Array, got ${
|
`Assign left child must be an AssignableIdentifier or Array, got ${left ? left.type : 'none'
|
||||||
left ? left.type.name : 'none'
|
|
||||||
}`,
|
}`,
|
||||||
node.from,
|
node.from,
|
||||||
node.to
|
node.to
|
||||||
|
|
@ -72,10 +72,9 @@ export const getCompoundAssignmentParts = (node: SyntaxNode) => {
|
||||||
const children = getAllChildren(node)
|
const children = getAllChildren(node)
|
||||||
const [left, operator, right] = children
|
const [left, operator, right] = children
|
||||||
|
|
||||||
if (!left || left.type.id !== terms.AssignableIdentifier) {
|
if (!left || left.typeId !== terms.AssignableIdentifier) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`CompoundAssign left child must be an AssignableIdentifier, got ${
|
`CompoundAssign left child must be an AssignableIdentifier, got ${left ? left.type : 'none'
|
||||||
left ? left.type.name : 'none'
|
|
||||||
}`,
|
}`,
|
||||||
node.from,
|
node.from,
|
||||||
node.to
|
node.to
|
||||||
|
|
@ -104,9 +103,9 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
const paramNames = getAllChildren(paramsNode).map((param) => {
|
const paramNames = getAllChildren(paramsNode).map((param) => {
|
||||||
if (param.type.id !== terms.Identifier && param.type.id !== terms.NamedParam) {
|
if (param.typeId !== terms.Identifier && param.typeId !== terms.NamedParam) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`FunctionDef params must be Identifier or NamedParam, got ${param.type.name}`,
|
`FunctionDef params must be Identifier or NamedParam, got ${param.type}`,
|
||||||
param.from,
|
param.from,
|
||||||
param.to
|
param.to
|
||||||
)
|
)
|
||||||
|
|
@ -123,7 +122,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
||||||
let finallyBody: SyntaxNode | undefined
|
let finallyBody: SyntaxNode | undefined
|
||||||
|
|
||||||
for (const child of rest) {
|
for (const child of rest) {
|
||||||
if (child.type.id === terms.CatchExpr) {
|
if (child.typeId === terms.CatchExpr) {
|
||||||
catchExpr = child
|
catchExpr = child
|
||||||
const catchChildren = getAllChildren(child)
|
const catchChildren = getAllChildren(child)
|
||||||
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
||||||
|
|
@ -136,7 +135,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
||||||
}
|
}
|
||||||
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
||||||
catchBody = body
|
catchBody = body
|
||||||
} else if (child.type.id === terms.FinallyExpr) {
|
} else if (child.typeId === terms.FinallyExpr) {
|
||||||
finallyExpr = child
|
finallyExpr = child
|
||||||
const finallyChildren = getAllChildren(child)
|
const finallyChildren = getAllChildren(child)
|
||||||
const [_finallyKeyword, _colon, body] = finallyChildren
|
const [_finallyKeyword, _colon, body] = finallyChildren
|
||||||
|
|
@ -148,7 +147,7 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
finallyBody = body
|
finallyBody = body
|
||||||
} else if (child.type.name === 'keyword' && input.slice(child.from, child.to) === 'end') {
|
} else if (child.type === 'keyword' && input.slice(child.from, child.to) === 'end') {
|
||||||
// Skip the end keyword
|
// Skip the end keyword
|
||||||
} else {
|
} else {
|
||||||
bodyNodes.push(child)
|
bodyNodes.push(child)
|
||||||
|
|
@ -165,9 +164,9 @@ export const getFunctionCallParts = (node: SyntaxNode, input: string) => {
|
||||||
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
|
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
|
||||||
}
|
}
|
||||||
|
|
||||||
const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg)
|
const namedArgs = args.filter((arg) => arg.typeId === terms.NamedArg)
|
||||||
const positionalArgs = args
|
const positionalArgs = args
|
||||||
.filter((arg) => arg.type.id === terms.PositionalArg)
|
.filter((arg) => arg.typeId === terms.PositionalArg)
|
||||||
.map((arg) => {
|
.map((arg) => {
|
||||||
const child = arg.firstChild
|
const child = arg.firstChild
|
||||||
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
|
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
|
||||||
|
|
@ -208,16 +207,16 @@ export const getIfExprParts = (node: SyntaxNode, input: string) => {
|
||||||
rest.forEach((child) => {
|
rest.forEach((child) => {
|
||||||
const parts = getAllChildren(child)
|
const parts = getAllChildren(child)
|
||||||
|
|
||||||
if (child.type.id === terms.ElseExpr) {
|
if (child.typeId === terms.ElseExpr) {
|
||||||
if (parts.length !== 3) {
|
if (parts.length !== 3) {
|
||||||
const message = `ElseExpr expected 1 child, got ${parts.length}`
|
const message = `ElseExpr expected 1 child, got ${parts.length}`
|
||||||
throw new CompilerError(message, child.from, child.to)
|
throw new CompilerError(message, child.from, child.to)
|
||||||
}
|
}
|
||||||
elseThenBlock = parts.at(-1)
|
elseThenBlock = parts.at(-1)
|
||||||
} else if (child.type.id === terms.ElseIfExpr) {
|
} else if (child.typeId === terms.ElseIfExpr) {
|
||||||
const [_else, _if, conditional, _colon, thenBlock] = parts
|
const [_else, _if, conditional, _colon, thenBlock] = parts
|
||||||
if (!conditional || !thenBlock) {
|
if (!conditional || !thenBlock) {
|
||||||
const names = parts.map((p) => p.type.name).join(', ')
|
const names = parts.map((p) => p.type).join(', ')
|
||||||
const message = `ElseIfExpr expected conditional and thenBlock, got ${names}`
|
const message = `ElseIfExpr expected conditional and thenBlock, got ${names}`
|
||||||
throw new CompilerError(message, child.from, child.to)
|
throw new CompilerError(message, child.from, child.to)
|
||||||
}
|
}
|
||||||
|
|
@ -249,10 +248,10 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
||||||
// The text is just between the quotes
|
// The text is just between the quotes
|
||||||
const parts = children.filter((child) => {
|
const parts = children.filter((child) => {
|
||||||
return (
|
return (
|
||||||
child.type.id === terms.StringFragment ||
|
child.typeId === terms.StringFragment ||
|
||||||
child.type.id === terms.Interpolation ||
|
child.typeId === terms.Interpolation ||
|
||||||
child.type.id === terms.EscapeSeq ||
|
child.typeId === terms.EscapeSeq ||
|
||||||
child.type.id === terms.CurlyString
|
child.typeId === terms.CurlyString
|
||||||
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
@ -260,13 +259,13 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
||||||
// Validate each part is the expected type
|
// Validate each part is the expected type
|
||||||
parts.forEach((part) => {
|
parts.forEach((part) => {
|
||||||
if (
|
if (
|
||||||
part.type.id !== terms.StringFragment &&
|
part.typeId !== terms.StringFragment &&
|
||||||
part.type.id !== terms.Interpolation &&
|
part.typeId !== terms.Interpolation &&
|
||||||
part.type.id !== terms.EscapeSeq &&
|
part.typeId !== terms.EscapeSeq &&
|
||||||
part.type.id !== terms.CurlyString
|
part.typeId !== terms.CurlyString
|
||||||
) {
|
) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`,
|
`String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type}`,
|
||||||
part.from,
|
part.from,
|
||||||
part.to
|
part.to
|
||||||
)
|
)
|
||||||
|
|
@ -276,7 +275,7 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
||||||
// hasInterpolation means the string has interpolation ($var) or escape sequences (\n)
|
// hasInterpolation means the string has interpolation ($var) or escape sequences (\n)
|
||||||
// A simple string like 'hello' has one StringFragment but no interpolation
|
// A simple string like 'hello' has one StringFragment but no interpolation
|
||||||
const hasInterpolation = parts.some(
|
const hasInterpolation = parts.some(
|
||||||
(p) => p.type.id === terms.Interpolation || p.type.id === terms.EscapeSeq
|
(p) => p.typeId === terms.Interpolation || p.typeId === terms.EscapeSeq
|
||||||
)
|
)
|
||||||
return { parts, hasInterpolation }
|
return { parts, hasInterpolation }
|
||||||
}
|
}
|
||||||
|
|
@ -293,17 +292,17 @@ export const getDotGetParts = (node: SyntaxNode, input: string) => {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (object.type.id !== terms.IdentifierBeforeDot && object.type.id !== terms.Dollar) {
|
if (object.typeId !== terms.IdentifierBeforeDot && object.typeId !== terms.Dollar) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`DotGet object must be an IdentifierBeforeDot, got ${object.type.name}`,
|
`DotGet object must be an IdentifierBeforeDot, got ${object.type}`,
|
||||||
object.from,
|
object.from,
|
||||||
object.to
|
object.to
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (![terms.Identifier, terms.Number, terms.ParenExpr, terms.DotGet].includes(property.type.id)) {
|
if (![terms.Identifier, terms.Number, terms.ParenExpr, terms.DotGet].includes(property.typeId)) {
|
||||||
throw new CompilerError(
|
throw new CompilerError(
|
||||||
`DotGet property must be an Identifier, Number, ParenExpr, or DotGet, got ${property.type.name}`,
|
`DotGet property must be an Identifier, Number, ParenExpr, or DotGet, got ${property.type}`,
|
||||||
property.from,
|
property.from,
|
||||||
property.to
|
property.to
|
||||||
)
|
)
|
||||||
|
|
@ -335,7 +334,7 @@ export const getTryExprParts = (node: SyntaxNode, input: string) => {
|
||||||
let finallyBody: SyntaxNode | undefined
|
let finallyBody: SyntaxNode | undefined
|
||||||
|
|
||||||
rest.forEach((child) => {
|
rest.forEach((child) => {
|
||||||
if (child.type.id === terms.CatchExpr) {
|
if (child.typeId === terms.CatchExpr) {
|
||||||
catchExpr = child
|
catchExpr = child
|
||||||
const catchChildren = getAllChildren(child)
|
const catchChildren = getAllChildren(child)
|
||||||
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
const [_catchKeyword, identifierNode, _colon, body] = catchChildren
|
||||||
|
|
@ -348,7 +347,7 @@ export const getTryExprParts = (node: SyntaxNode, input: string) => {
|
||||||
}
|
}
|
||||||
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
catchVariable = input.slice(identifierNode.from, identifierNode.to)
|
||||||
catchBody = body
|
catchBody = body
|
||||||
} else if (child.type.id === terms.FinallyExpr) {
|
} else if (child.typeId === terms.FinallyExpr) {
|
||||||
finallyExpr = child
|
finallyExpr = child
|
||||||
const finallyChildren = getAllChildren(child)
|
const finallyChildren = getAllChildren(child)
|
||||||
const [_finallyKeyword, _colon, body] = finallyChildren
|
const [_finallyKeyword, _colon, body] = finallyChildren
|
||||||
|
|
|
||||||
232
src/parser/node.ts
Normal file
232
src/parser/node.ts
Normal file
|
|
@ -0,0 +1,232 @@
|
||||||
|
import { type Token, TokenType } from './tokenizer2'
|
||||||
|
import { nameToId } from './terms'
|
||||||
|
|
||||||
|
export type NodeType =
|
||||||
|
| 'Program'
|
||||||
|
| 'Block'
|
||||||
|
|
||||||
|
| 'FunctionCall'
|
||||||
|
| 'FunctionCallOrIdentifier'
|
||||||
|
| 'FunctionCallWithBlock'
|
||||||
|
| 'PositionalArg'
|
||||||
|
| 'NamedArg'
|
||||||
|
|
||||||
|
| 'FunctionDef'
|
||||||
|
| 'Params'
|
||||||
|
| 'NamedParam'
|
||||||
|
|
||||||
|
| 'Null'
|
||||||
|
| 'Boolean'
|
||||||
|
| 'Number'
|
||||||
|
| 'String'
|
||||||
|
| 'StringFragment'
|
||||||
|
| 'CurlyString'
|
||||||
|
| 'DoubleQuote'
|
||||||
|
| 'EscapeSeq'
|
||||||
|
| 'Interpolation'
|
||||||
|
| 'Regex'
|
||||||
|
| 'Identifier'
|
||||||
|
| 'AssignableIdentifier'
|
||||||
|
| 'IdentifierBeforeDot'
|
||||||
|
| 'Word'
|
||||||
|
| 'Array'
|
||||||
|
| 'Dict'
|
||||||
|
| 'Comment'
|
||||||
|
|
||||||
|
| 'BinOp'
|
||||||
|
| 'ConditionalOp'
|
||||||
|
| 'ParenExpr'
|
||||||
|
| 'Assign'
|
||||||
|
| 'CompoundAssign'
|
||||||
|
| 'DotGet'
|
||||||
|
| 'PipeExpr'
|
||||||
|
|
||||||
|
| 'IfExpr'
|
||||||
|
| 'ElseIfExpr'
|
||||||
|
| 'ElseExpr'
|
||||||
|
| 'WhileExpr'
|
||||||
|
| 'TryExpr'
|
||||||
|
| 'CatchExpr'
|
||||||
|
| 'FinallyExpr'
|
||||||
|
| 'Throw'
|
||||||
|
|
||||||
|
| 'Eq'
|
||||||
|
| 'Modulo'
|
||||||
|
| 'Plus'
|
||||||
|
| 'Star'
|
||||||
|
| 'Slash'
|
||||||
|
|
||||||
|
| 'Import'
|
||||||
|
| 'Do'
|
||||||
|
| 'colon'
|
||||||
|
| 'keyword'
|
||||||
|
| 'operator'
|
||||||
|
|
||||||
|
// TODO: remove this when we switch from lezer
|
||||||
|
export const operators: Record<string, any> = {
|
||||||
|
// Logic
|
||||||
|
'and': 'And',
|
||||||
|
'or': 'Or',
|
||||||
|
|
||||||
|
// Bitwise
|
||||||
|
'band': 'Band',
|
||||||
|
'bor': 'Bor',
|
||||||
|
'bxor': 'Bxor',
|
||||||
|
'>>>': 'Ushr',
|
||||||
|
'>>': 'Shr',
|
||||||
|
'<<': 'Shl',
|
||||||
|
|
||||||
|
// Comparison
|
||||||
|
'>=': 'Gte',
|
||||||
|
'<=': 'Lte',
|
||||||
|
'>': 'Gt',
|
||||||
|
'<': 'Lt',
|
||||||
|
'!=': 'Neq',
|
||||||
|
'==': 'EqEq',
|
||||||
|
|
||||||
|
// Compound assignment operators
|
||||||
|
'??=': 'NullishEq',
|
||||||
|
'+=': 'PlusEq',
|
||||||
|
'-=': 'MinusEq',
|
||||||
|
'*=': 'StarEq',
|
||||||
|
'/=': 'SlashEq',
|
||||||
|
'%=': 'ModuloEq',
|
||||||
|
|
||||||
|
// Nullish coalescing
|
||||||
|
'??': 'NullishCoalesce',
|
||||||
|
|
||||||
|
// Math
|
||||||
|
'*': 'Star',
|
||||||
|
'**': 'StarStar',
|
||||||
|
'=': 'Eq',
|
||||||
|
'/': 'Slash',
|
||||||
|
'+': 'Plus',
|
||||||
|
'-': 'Minus',
|
||||||
|
'%': 'Modulo',
|
||||||
|
|
||||||
|
// Dotget
|
||||||
|
'.': 'Dot',
|
||||||
|
|
||||||
|
// Pipe
|
||||||
|
'|': 'operator',
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Tree {
|
||||||
|
constructor(public topNode: SyntaxNode) { }
|
||||||
|
}
|
||||||
|
|
||||||
|
export class SyntaxNode {
|
||||||
|
type: NodeType
|
||||||
|
from: number
|
||||||
|
to: number
|
||||||
|
parent: SyntaxNode | null
|
||||||
|
children: SyntaxNode[] = []
|
||||||
|
|
||||||
|
constructor(type: NodeType, from: number, to: number, parent: SyntaxNode | null = null) {
|
||||||
|
this.type = type
|
||||||
|
this.from = from
|
||||||
|
this.to = to
|
||||||
|
this.parent = parent
|
||||||
|
}
|
||||||
|
|
||||||
|
get typeId(): number {
|
||||||
|
return nameToId(this.type)
|
||||||
|
}
|
||||||
|
|
||||||
|
static from(token: Token, parent?: SyntaxNode): SyntaxNode {
|
||||||
|
return new SyntaxNode(TokenType[token.type] as NodeType, token.from, token.to, parent ?? null)
|
||||||
|
}
|
||||||
|
|
||||||
|
get name(): string {
|
||||||
|
return this.type
|
||||||
|
}
|
||||||
|
|
||||||
|
get isError(): boolean {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
get firstChild(): SyntaxNode | null {
|
||||||
|
return this.children[0] ?? null
|
||||||
|
}
|
||||||
|
|
||||||
|
get lastChild(): SyntaxNode | null {
|
||||||
|
return this.children.at(-1) ?? null
|
||||||
|
}
|
||||||
|
|
||||||
|
get nextSibling(): SyntaxNode | null {
|
||||||
|
if (!this.parent) return null
|
||||||
|
const siblings = this.parent.children
|
||||||
|
const index = siblings.indexOf(this)
|
||||||
|
return index >= 0 && index < siblings.length - 1 ? siblings[index + 1]! : null
|
||||||
|
}
|
||||||
|
|
||||||
|
get prevSibling(): SyntaxNode | null {
|
||||||
|
if (!this.parent) return null
|
||||||
|
const siblings = this.parent.children
|
||||||
|
const index = siblings.indexOf(this)
|
||||||
|
return index > 0 ? siblings[index - 1]! : null
|
||||||
|
}
|
||||||
|
|
||||||
|
add(node: SyntaxNode) {
|
||||||
|
node.parent = this
|
||||||
|
this.children.push(node)
|
||||||
|
}
|
||||||
|
|
||||||
|
push(...nodes: SyntaxNode[]): SyntaxNode {
|
||||||
|
nodes.forEach(child => child.parent = this)
|
||||||
|
this.children.push(...nodes)
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
toString(): string {
|
||||||
|
return this.type
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Operator precedence (binding power) - higher = tighter binding
|
||||||
|
export const precedence: Record<string, number> = {
|
||||||
|
// Logical
|
||||||
|
'or': 10,
|
||||||
|
'and': 20,
|
||||||
|
|
||||||
|
// Comparison
|
||||||
|
'==': 30,
|
||||||
|
'!=': 30,
|
||||||
|
'<': 30,
|
||||||
|
'>': 30,
|
||||||
|
'<=': 30,
|
||||||
|
'>=': 30,
|
||||||
|
|
||||||
|
// Nullish coalescing
|
||||||
|
'??': 35,
|
||||||
|
|
||||||
|
// Bitwise shift (lower precedence than addition)
|
||||||
|
'<<': 37,
|
||||||
|
'>>': 37,
|
||||||
|
'>>>': 37,
|
||||||
|
|
||||||
|
// Addition/Subtraction
|
||||||
|
'+': 40,
|
||||||
|
'-': 40,
|
||||||
|
|
||||||
|
// Bitwise AND/OR/XOR (between addition and multiplication)
|
||||||
|
'band': 45,
|
||||||
|
'bor': 45,
|
||||||
|
'bxor': 45,
|
||||||
|
|
||||||
|
// Multiplication/Division/Modulo
|
||||||
|
'*': 50,
|
||||||
|
'/': 50,
|
||||||
|
'%': 50,
|
||||||
|
|
||||||
|
// Exponentiation (right-associative)
|
||||||
|
'**': 60,
|
||||||
|
}
|
||||||
|
|
||||||
|
export const conditionals = new Set([
|
||||||
|
'==', '!=', '<', '>', '<=', '>=', '??', 'and', 'or'
|
||||||
|
])
|
||||||
|
|
||||||
|
export const compounds = [
|
||||||
|
'??=', '+=', '-=', '*=', '/=', '%='
|
||||||
|
]
|
||||||
945
src/parser/parser2.ts
Normal file
945
src/parser/parser2.ts
Normal file
|
|
@ -0,0 +1,945 @@
|
||||||
|
import { Scanner, type Token, TokenType } from './tokenizer2'
|
||||||
|
import { Tree, SyntaxNode, operators, precedence, conditionals, compounds } from './node'
|
||||||
|
import { globals } from './tokenizer'
|
||||||
|
import { parseString } from './stringParser'
|
||||||
|
|
||||||
|
const $T = TokenType
|
||||||
|
|
||||||
|
export const parse = (input: string): SyntaxNode => {
|
||||||
|
const parser = new Parser()
|
||||||
|
return parser.parse(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
export const parseToTree = (input: string): Tree => {
|
||||||
|
return new Tree(parse(input))
|
||||||
|
}
|
||||||
|
|
||||||
|
class Scope {
|
||||||
|
parent?: Scope
|
||||||
|
set = new Set<string>()
|
||||||
|
|
||||||
|
constructor(parent?: Scope) {
|
||||||
|
this.parent = parent
|
||||||
|
|
||||||
|
// no parent means this is global scope
|
||||||
|
if (!parent) for (const name of globals) this.add(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
add(key: string) {
|
||||||
|
this.set.add(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
has(key: string): boolean {
|
||||||
|
return this.set.has(key) || this.parent?.has(key) || false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Parser {
|
||||||
|
tokens: Token[] = []
|
||||||
|
pos = 0
|
||||||
|
inParens = 0
|
||||||
|
input = ''
|
||||||
|
scope = new Scope
|
||||||
|
inTestExpr = false
|
||||||
|
|
||||||
|
parse(input: string): SyntaxNode {
|
||||||
|
const scanner = new Scanner()
|
||||||
|
this.tokens = scanner.tokenize(input)
|
||||||
|
this.pos = 0
|
||||||
|
this.input = input
|
||||||
|
this.scope = new Scope()
|
||||||
|
this.inTestExpr = false
|
||||||
|
|
||||||
|
const node = new SyntaxNode('Program', 0, input.length)
|
||||||
|
|
||||||
|
while (!this.isEOF()) {
|
||||||
|
if (this.is($T.Newline) || this.is($T.Semicolon)) {
|
||||||
|
this.next()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const prevPos = this.pos
|
||||||
|
const stmt = this.statement()
|
||||||
|
if (stmt) node.add(stmt)
|
||||||
|
|
||||||
|
if (this.pos === prevPos && !this.isEOF())
|
||||||
|
throw "parser didn't advance - you need to call next()\n\n ${this.input}\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// parse foundation nodes - statements, expressions
|
||||||
|
//
|
||||||
|
|
||||||
|
// statement is a line of code
|
||||||
|
statement(): SyntaxNode | null {
|
||||||
|
if (this.is($T.Comment))
|
||||||
|
return this.comment()
|
||||||
|
|
||||||
|
while (this.is($T.Newline) || this.is($T.Semicolon))
|
||||||
|
this.next()
|
||||||
|
|
||||||
|
if (this.isEOF() || this.isExprEndKeyword())
|
||||||
|
return null
|
||||||
|
|
||||||
|
return this.expression()
|
||||||
|
}
|
||||||
|
|
||||||
|
// expressions can be found in four places:
|
||||||
|
// 1. line of code
|
||||||
|
// 2. right side of assignment
|
||||||
|
// 3. if/while conditions
|
||||||
|
// 4. inside (parens)
|
||||||
|
expression(allowPipe = true): SyntaxNode {
|
||||||
|
let expr
|
||||||
|
|
||||||
|
// x = value
|
||||||
|
if (this.is($T.Identifier) && (
|
||||||
|
this.nextIs($T.Operator, '=') || compounds.some(x => this.nextIs($T.Operator, x))
|
||||||
|
))
|
||||||
|
expr = this.assign()
|
||||||
|
|
||||||
|
// if, while, do, etc
|
||||||
|
else if (this.is($T.Keyword))
|
||||||
|
expr = this.keywords()
|
||||||
|
|
||||||
|
// dotget
|
||||||
|
else if (this.nextIs($T.Operator, '.'))
|
||||||
|
expr = this.dotGetFunctionCall()
|
||||||
|
|
||||||
|
// echo hello world
|
||||||
|
else if (this.is($T.Identifier) && !this.nextIs($T.Operator) && !this.nextIsExprEnd())
|
||||||
|
expr = this.functionCall()
|
||||||
|
|
||||||
|
// bare-function-call
|
||||||
|
else if (this.is($T.Identifier) && this.nextIsExprEnd())
|
||||||
|
expr = this.functionCallOrIdentifier()
|
||||||
|
|
||||||
|
// everything else
|
||||||
|
else
|
||||||
|
expr = this.exprWithPrecedence()
|
||||||
|
|
||||||
|
// check for destructuring
|
||||||
|
if (expr.type === 'Array' && this.is($T.Operator, '='))
|
||||||
|
return this.destructure(expr)
|
||||||
|
|
||||||
|
// check for parens function call
|
||||||
|
// ex: (ref my-func) my-arg
|
||||||
|
// but not if followed by operator: (x) + 1
|
||||||
|
if (expr.type === 'ParenExpr' && !this.isExprEnd() && !this.is($T.Operator))
|
||||||
|
expr = this.functionCall(expr)
|
||||||
|
|
||||||
|
// if there's an operator (not pipe), continue with precedence parsing
|
||||||
|
if (this.is($T.Operator) && !this.isPipe()) {
|
||||||
|
expr = this.continueWithPrecedence(expr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// one | echo
|
||||||
|
if (allowPipe && this.isPipe())
|
||||||
|
return this.pipe(expr)
|
||||||
|
|
||||||
|
// regular
|
||||||
|
else
|
||||||
|
return expr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue parsing with precedence after we already have a left side
|
||||||
|
continueWithPrecedence(left: SyntaxNode, minBp = 0): SyntaxNode {
|
||||||
|
while (this.is($T.Operator)) {
|
||||||
|
const op = this.current().value!
|
||||||
|
const bp = precedence[op]
|
||||||
|
|
||||||
|
// operator has lower precedence than required, stop
|
||||||
|
if (bp === undefined || bp < minBp) break
|
||||||
|
|
||||||
|
const opNode = this.op()
|
||||||
|
|
||||||
|
// right-associative operators (like **) use same bp, others use bp + 1
|
||||||
|
const nextMinBp = op === '**' ? bp : bp + 1
|
||||||
|
|
||||||
|
// parse right-hand side with higher precedence
|
||||||
|
const right = this.exprWithPrecedence(nextMinBp)
|
||||||
|
|
||||||
|
const nodeType = conditionals.has(op) ? 'ConditionalOp' : 'BinOp'
|
||||||
|
const node = new SyntaxNode(nodeType, left.from, right.to)
|
||||||
|
|
||||||
|
node.push(left, opNode, right)
|
||||||
|
left = node
|
||||||
|
}
|
||||||
|
|
||||||
|
return left
|
||||||
|
}
|
||||||
|
|
||||||
|
// piping | stuff | is | cool
|
||||||
|
pipe(left: SyntaxNode): SyntaxNode {
|
||||||
|
const canLookPastNewlines = this.inParens === 0
|
||||||
|
const parts: SyntaxNode[] = [left]
|
||||||
|
|
||||||
|
while (this.isPipe()) {
|
||||||
|
// consume newlines before pipe (only if not in parens)
|
||||||
|
if (canLookPastNewlines) {
|
||||||
|
while (this.is($T.Newline)) this.next()
|
||||||
|
}
|
||||||
|
|
||||||
|
const pipeOp = this.op('|')
|
||||||
|
pipeOp.type = 'operator'
|
||||||
|
parts.push(pipeOp)
|
||||||
|
|
||||||
|
// consume newlines after pipe (only if not in parens)
|
||||||
|
if (canLookPastNewlines) {
|
||||||
|
while (this.is($T.Newline)) this.next()
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse right side - don't allow nested pipes
|
||||||
|
parts.push(this.expression(false))
|
||||||
|
}
|
||||||
|
|
||||||
|
const node = new SyntaxNode('PipeExpr', parts[0]!.from, parts.at(-1)!.to)
|
||||||
|
return node.push(...parts)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pratt parser - parses expressions with precedence climbing
|
||||||
|
// bp = binding precedence
|
||||||
|
exprWithPrecedence(minBp = 0): SyntaxNode {
|
||||||
|
let left = this.value()
|
||||||
|
|
||||||
|
// infix operators with precedence
|
||||||
|
while (this.is($T.Operator)) {
|
||||||
|
const op = this.current().value!
|
||||||
|
const bp = precedence[op]
|
||||||
|
|
||||||
|
// operator has lower precedence than required, stop
|
||||||
|
if (bp === undefined || bp < minBp) break
|
||||||
|
|
||||||
|
const opNode = this.op()
|
||||||
|
|
||||||
|
// right-associative operators (like **) use same bp, others use bp + 1
|
||||||
|
const nextMinBp = op === '**' ? bp : bp + 1
|
||||||
|
|
||||||
|
// parse right-hand side with higher precedence
|
||||||
|
const right = this.exprWithPrecedence(nextMinBp)
|
||||||
|
|
||||||
|
const nodeType = conditionals.has(op) ? 'ConditionalOp' : 'BinOp'
|
||||||
|
const node = new SyntaxNode(nodeType, left.from, right.to)
|
||||||
|
|
||||||
|
node.push(left, opNode, right)
|
||||||
|
left = node
|
||||||
|
}
|
||||||
|
|
||||||
|
return left
|
||||||
|
}
|
||||||
|
|
||||||
|
// if, while, do, etc
|
||||||
|
keywords(): SyntaxNode {
|
||||||
|
if (this.is($T.Keyword, 'if'))
|
||||||
|
return this.if()
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'while'))
|
||||||
|
return this.while()
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'do'))
|
||||||
|
return this.do()
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'try'))
|
||||||
|
return this.try()
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'throw'))
|
||||||
|
return this.throw()
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'import'))
|
||||||
|
return this.import()
|
||||||
|
|
||||||
|
return this.expect($T.Keyword, 'if/while/do/import') as never
|
||||||
|
}
|
||||||
|
|
||||||
|
// value can be an atom or a (parens that gets turned into an atom)
|
||||||
|
// values are used in a few places:
|
||||||
|
// 1. function arguments
|
||||||
|
// 2. array/dict members
|
||||||
|
// 3. binary operations
|
||||||
|
// 4. anywhere an expression can be used
|
||||||
|
value(): SyntaxNode {
|
||||||
|
if (this.is($T.OpenParen))
|
||||||
|
return this.parens()
|
||||||
|
|
||||||
|
if (this.is($T.OpenBracket))
|
||||||
|
return this.arrayOrDict()
|
||||||
|
|
||||||
|
// dotget
|
||||||
|
if (this.nextIs($T.Operator, '.'))
|
||||||
|
return this.dotGet()
|
||||||
|
|
||||||
|
return this.atom()
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// parse specific nodes
|
||||||
|
//
|
||||||
|
|
||||||
|
// [ 1 2 3 ]
|
||||||
|
array(): SyntaxNode {
|
||||||
|
const open = this.expect($T.OpenBracket)
|
||||||
|
|
||||||
|
const values = []
|
||||||
|
while (!this.is($T.CloseBracket) && !this.isEOF()) {
|
||||||
|
if (this.is($T.Semicolon) || this.is($T.Newline)) {
|
||||||
|
this.next()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.is($T.Comment)) {
|
||||||
|
values.push(this.comment())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
values.push(this.value())
|
||||||
|
}
|
||||||
|
|
||||||
|
const close = this.expect($T.CloseBracket)
|
||||||
|
|
||||||
|
const node = new SyntaxNode('Array', open.from, close.to)
|
||||||
|
return node.push(...values)
|
||||||
|
}
|
||||||
|
|
||||||
|
// which are we dealing with? ignores leading newlines and comments
|
||||||
|
arrayOrDict(): SyntaxNode {
|
||||||
|
let peek = 1
|
||||||
|
let curr = this.peek(peek++)
|
||||||
|
let isDict = false
|
||||||
|
|
||||||
|
while (curr && curr.type !== $T.CloseBracket) {
|
||||||
|
// definitely a dict
|
||||||
|
if (curr.type === $T.NamedArgPrefix) {
|
||||||
|
isDict = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// empty dict
|
||||||
|
if (curr.type === $T.Operator && curr.value === '=') {
|
||||||
|
isDict = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// probably an array
|
||||||
|
if (curr.type !== $T.Comment && curr.type !== $T.Semicolon && curr.type !== $T.Newline)
|
||||||
|
break
|
||||||
|
|
||||||
|
curr = this.peek(peek++)
|
||||||
|
}
|
||||||
|
|
||||||
|
return isDict ? this.dict() : this.array()
|
||||||
|
}
|
||||||
|
|
||||||
|
// x = true
|
||||||
|
assign(): SyntaxNode {
|
||||||
|
const ident = this.assignableIdentifier()
|
||||||
|
const opToken = this.current()!
|
||||||
|
const op = this.op()
|
||||||
|
const expr = this.expression()
|
||||||
|
|
||||||
|
const node = new SyntaxNode(
|
||||||
|
opToken.value === '=' ? 'Assign' : 'CompoundAssign',
|
||||||
|
ident.from,
|
||||||
|
expr.to
|
||||||
|
)
|
||||||
|
|
||||||
|
return node.push(ident, op, expr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// identifier used in assignment (TODO: legacy lezer quirk)
|
||||||
|
assignableIdentifier(): SyntaxNode {
|
||||||
|
const token = this.expect($T.Identifier)
|
||||||
|
this.scope.add(token.value!)
|
||||||
|
const node = SyntaxNode.from(token)
|
||||||
|
node.type = 'AssignableIdentifier'
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
|
// atoms are the basic building blocks: literals, identifiers, words
|
||||||
|
atom() {
|
||||||
|
if (this.is($T.String))
|
||||||
|
return this.string()
|
||||||
|
|
||||||
|
if (this.isAny($T.Null, $T.Boolean, $T.Number, $T.Identifier, $T.Word, $T.Regex, $T.Underscore))
|
||||||
|
return SyntaxNode.from(this.next())
|
||||||
|
|
||||||
|
const next = this.next()
|
||||||
|
throw `[atom] unexpected token ${TokenType[next.type]}: ${JSON.stringify(next)}\n\n ${this.input}\n`
|
||||||
|
}
|
||||||
|
|
||||||
|
// blocks in if, do, special calls, etc
|
||||||
|
// `: something end`
|
||||||
|
//
|
||||||
|
// `blockNode` determines whether we return [colon, BlockNode, end] or
|
||||||
|
// just a list of statements like [colon, stmt1, stmt2, end]
|
||||||
|
block(blockNode = true): SyntaxNode[] {
|
||||||
|
const stmts: SyntaxNode[] = []
|
||||||
|
const colon = this.colon()
|
||||||
|
|
||||||
|
while (!this.isExprEndKeyword() && !this.isEOF()) {
|
||||||
|
const stmt = this.statement()
|
||||||
|
if (stmt) stmts.push(stmt)
|
||||||
|
}
|
||||||
|
|
||||||
|
const out = [colon]
|
||||||
|
|
||||||
|
if (blockNode) {
|
||||||
|
const block = new SyntaxNode('Block', stmts[0]!.from, stmts.at(-1)!.to)
|
||||||
|
block.push(...stmts)
|
||||||
|
out.push(block)
|
||||||
|
} else {
|
||||||
|
out.push(...stmts)
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// catch err: block
|
||||||
|
catch(): SyntaxNode {
|
||||||
|
const keyword = this.keyword('catch')
|
||||||
|
|
||||||
|
let catchVar
|
||||||
|
if (this.is($T.Identifier))
|
||||||
|
catchVar = this.identifier()
|
||||||
|
|
||||||
|
const block = this.block()
|
||||||
|
|
||||||
|
const node = new SyntaxNode('CatchExpr', keyword.from, block.at(-1)!.to)
|
||||||
|
|
||||||
|
node.push(keyword)
|
||||||
|
if (catchVar) node.push(catchVar)
|
||||||
|
return node.push(...block)
|
||||||
|
}
|
||||||
|
|
||||||
|
// colon
|
||||||
|
colon(): SyntaxNode {
|
||||||
|
const colon = SyntaxNode.from(this.expect($T.Colon))
|
||||||
|
colon.type = 'colon' // TODO lezer legacy
|
||||||
|
return colon
|
||||||
|
}
|
||||||
|
|
||||||
|
// # comment
|
||||||
|
comment(): SyntaxNode {
|
||||||
|
return SyntaxNode.from(this.expect($T.Comment))
|
||||||
|
}
|
||||||
|
|
||||||
|
// [ a b c ] = [ 1 2 3 ]
|
||||||
|
destructure(array: SyntaxNode): SyntaxNode {
|
||||||
|
const eq = this.op('=')
|
||||||
|
const val = this.expression()
|
||||||
|
|
||||||
|
for (const ident of array.children) {
|
||||||
|
const varName = this.input.slice(ident.from, ident.to)
|
||||||
|
this.scope.add(varName)
|
||||||
|
}
|
||||||
|
|
||||||
|
const node = new SyntaxNode('Assign', array.from, val.to)
|
||||||
|
return node.push(array, eq, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// [ a=1 b=true c='three' ]
|
||||||
|
dict(): SyntaxNode {
|
||||||
|
const open = this.expect($T.OpenBracket)
|
||||||
|
|
||||||
|
// empty dict [=] or [ = ]
|
||||||
|
if (this.is($T.Operator, '=') && this.nextIs($T.CloseBracket)) {
|
||||||
|
const _op = this.next()
|
||||||
|
const close = this.next()
|
||||||
|
return new SyntaxNode('Dict', open.from, close.to)
|
||||||
|
}
|
||||||
|
|
||||||
|
const values = []
|
||||||
|
while (!this.is($T.CloseBracket) && !this.isEOF()) {
|
||||||
|
if (this.is($T.Semicolon) || this.is($T.Newline)) {
|
||||||
|
this.next()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.is($T.Comment)) {
|
||||||
|
values.push(this.comment())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.is($T.NamedArgPrefix)) {
|
||||||
|
const prefix = SyntaxNode.from(this.next())
|
||||||
|
const val = this.is($T.Keyword, 'do') ? this.do() : this.value()
|
||||||
|
const arg = new SyntaxNode('NamedArg', prefix.from, val.to)
|
||||||
|
arg.push(prefix, val)
|
||||||
|
values.push(arg)
|
||||||
|
} else {
|
||||||
|
values.push(this.value())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const close = this.expect($T.CloseBracket)
|
||||||
|
|
||||||
|
const node = new SyntaxNode('Dict', open.from, close.to)
|
||||||
|
return node.push(...values)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FunctionDef `do x y: something end`
|
||||||
|
do(): SyntaxNode {
|
||||||
|
const doNode = this.keyword('do')
|
||||||
|
doNode.type = 'Do'
|
||||||
|
this.scope = new Scope(this.scope)
|
||||||
|
|
||||||
|
const params = []
|
||||||
|
while (!this.is($T.Colon) && !this.isExprEnd()) {
|
||||||
|
let varName = this.current().value!
|
||||||
|
if (varName.endsWith('=')) varName = varName.slice(0, varName.length - 1)
|
||||||
|
this.scope.add(varName)
|
||||||
|
|
||||||
|
let arg
|
||||||
|
if (this.is($T.Identifier))
|
||||||
|
arg = this.identifier()
|
||||||
|
else if (this.is($T.NamedArgPrefix))
|
||||||
|
arg = this.namedParam()
|
||||||
|
else
|
||||||
|
throw `[do] expected Identifier or NamedArgPrefix, got ${JSON.stringify(this.current())}\n\n ${this.input}\n`
|
||||||
|
|
||||||
|
params.push(arg)
|
||||||
|
}
|
||||||
|
|
||||||
|
const block = this.block(false)
|
||||||
|
let catchNode, finalNode
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'catch'))
|
||||||
|
catchNode = this.catch()
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'finally'))
|
||||||
|
finalNode = this.finally()
|
||||||
|
|
||||||
|
let end = this.keyword('end')
|
||||||
|
|
||||||
|
let last = block.at(-1)
|
||||||
|
if (finalNode) last = finalNode.children.at(-1)!
|
||||||
|
else if (catchNode) last = catchNode.children.at(-1)!
|
||||||
|
|
||||||
|
const node = new SyntaxNode('FunctionDef', doNode.from, last!.to)
|
||||||
|
|
||||||
|
node.add(doNode)
|
||||||
|
|
||||||
|
const paramsNode = new SyntaxNode(
|
||||||
|
'Params',
|
||||||
|
params[0]?.from ?? 0,
|
||||||
|
params.at(-1)?.to ?? 0
|
||||||
|
)
|
||||||
|
|
||||||
|
if (params.length) paramsNode.push(...params)
|
||||||
|
node.add(paramsNode)
|
||||||
|
|
||||||
|
this.scope = this.scope.parent!
|
||||||
|
|
||||||
|
node.push(...block)
|
||||||
|
|
||||||
|
if (catchNode) node.push(catchNode)
|
||||||
|
if (finalNode) node.push(finalNode)
|
||||||
|
|
||||||
|
return node.push(end)
|
||||||
|
}
|
||||||
|
|
||||||
|
// config.path
|
||||||
|
dotGet(): SyntaxNode {
|
||||||
|
const left = this.identifier()
|
||||||
|
const ident = this.input.slice(left.from, left.to)
|
||||||
|
|
||||||
|
// not in scope, just return Word
|
||||||
|
if (!this.scope.has(ident))
|
||||||
|
return this.word(left)
|
||||||
|
|
||||||
|
if (left.type === 'Identifier') left.type = 'IdentifierBeforeDot'
|
||||||
|
|
||||||
|
let parts = []
|
||||||
|
while (this.is($T.Operator, '.')) {
|
||||||
|
this.next()
|
||||||
|
parts.push(this.is($T.OpenParen) ? this.parens() : this.atom())
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO lezer legacy - we can do a flat DotGet if we remove this
|
||||||
|
const nodes = parts.length > 1 ? collapseDotGets(parts) : undefined
|
||||||
|
|
||||||
|
const node = new SyntaxNode('DotGet', left.from, parts.at(-1)!.to)
|
||||||
|
return nodes ? node.push(left, nodes!) : node.push(left, ...parts)
|
||||||
|
}
|
||||||
|
|
||||||
|
// dotget in a statement/expression (something.blah) or (something.blah arg1)
|
||||||
|
dotGetFunctionCall(): SyntaxNode {
|
||||||
|
const dotGet = this.dotGet()
|
||||||
|
|
||||||
|
// dotget not in scope, regular Word
|
||||||
|
if (dotGet.type === 'Word') return dotGet
|
||||||
|
|
||||||
|
if (this.is($T.Operator) && !this.isPipe())
|
||||||
|
return dotGet
|
||||||
|
|
||||||
|
else if (this.isPipe() || this.isExprEnd())
|
||||||
|
return this.functionCallOrIdentifier(dotGet)
|
||||||
|
|
||||||
|
else
|
||||||
|
return this.functionCall(dotGet)
|
||||||
|
}
|
||||||
|
|
||||||
|
// can be used in functions or try block
|
||||||
|
finally(): SyntaxNode {
|
||||||
|
const keyword = this.keyword('finally')
|
||||||
|
const block = this.block()
|
||||||
|
const node = new SyntaxNode('FinallyExpr', keyword.from, block.at(-1)!.to)
|
||||||
|
|
||||||
|
return node.push(keyword, ...block)
|
||||||
|
}
|
||||||
|
|
||||||
|
// you're lookin at it
|
||||||
|
functionCall(fn?: SyntaxNode): SyntaxNode {
|
||||||
|
const ident = fn ?? this.identifier()
|
||||||
|
|
||||||
|
const args: SyntaxNode[] = []
|
||||||
|
while (!this.isExprEnd() && !this.is($T.Operator, '|')) {
|
||||||
|
if (this.is($T.NamedArgPrefix)) {
|
||||||
|
args.push(this.namedArg())
|
||||||
|
} else {
|
||||||
|
// 'do' is the only keyword allowed as a function argument
|
||||||
|
const val = this.is($T.Keyword, 'do') ? this.do() : this.value()
|
||||||
|
const arg = new SyntaxNode('PositionalArg', val.from, val.to)
|
||||||
|
arg.add(val)
|
||||||
|
args.push(arg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const node = new SyntaxNode('FunctionCall', ident.from, (args.at(-1) || ident).to)
|
||||||
|
node.push(ident, ...args)
|
||||||
|
|
||||||
|
if (!this.inTestExpr && this.is($T.Colon)) {
|
||||||
|
const block = this.block()
|
||||||
|
const end = this.keyword('end')
|
||||||
|
const blockNode = new SyntaxNode('FunctionCallWithBlock', node.from, end.to)
|
||||||
|
return blockNode.push(node, ...block, end)
|
||||||
|
}
|
||||||
|
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
|
// bare identifier in an expression
|
||||||
|
functionCallOrIdentifier(inner?: SyntaxNode) {
|
||||||
|
if (!inner && this.nextIs($T.Operator, '.')) {
|
||||||
|
inner = this.dotGet()
|
||||||
|
|
||||||
|
// if the dotGet was just a Word, bail
|
||||||
|
if (inner.type === 'Word') return inner
|
||||||
|
}
|
||||||
|
|
||||||
|
inner ??= this.identifier()
|
||||||
|
|
||||||
|
const wrapper = new SyntaxNode('FunctionCallOrIdentifier', inner.from, inner.to)
|
||||||
|
wrapper.push(inner)
|
||||||
|
|
||||||
|
if (!this.inTestExpr && this.is($T.Colon)) {
|
||||||
|
const block = this.block()
|
||||||
|
const end = this.keyword('end')
|
||||||
|
const node = new SyntaxNode('FunctionCallWithBlock', wrapper.from, end.to)
|
||||||
|
return node.push(wrapper, ...block, end)
|
||||||
|
}
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
}
|
||||||
|
|
||||||
|
// function and variable names
|
||||||
|
identifier(): SyntaxNode {
|
||||||
|
return SyntaxNode.from(this.expect($T.Identifier))
|
||||||
|
}
|
||||||
|
|
||||||
|
// if something: blah end
|
||||||
|
// if something: blah else: blah end
|
||||||
|
// if something: blah else if something: blah else: blah end
|
||||||
|
if(): SyntaxNode {
|
||||||
|
const ifNode = this.keyword('if')
|
||||||
|
const test = this.testExpr()
|
||||||
|
const ifBlock = this.block()
|
||||||
|
|
||||||
|
const node = new SyntaxNode('IfExpr', ifNode.from, ifBlock.at(-1)!.to)
|
||||||
|
node.push(ifNode, test)
|
||||||
|
node.push(...ifBlock)
|
||||||
|
|
||||||
|
while (this.is($T.Keyword, 'else') && this.nextIs($T.Keyword, 'if')) {
|
||||||
|
const elseWord = this.keyword('else')
|
||||||
|
const ifWord = this.keyword('if')
|
||||||
|
const elseIfTest = this.testExpr()
|
||||||
|
const elseIfBlock = this.block()
|
||||||
|
const elseIfNode = new SyntaxNode('ElseIfExpr', ifBlock.at(-1)!.from, elseIfBlock.at(-1)!.to)
|
||||||
|
elseIfNode.push(elseWord, ifWord, elseIfTest)
|
||||||
|
elseIfNode.push(...elseIfBlock)
|
||||||
|
node.push(elseIfNode)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'else') && this.nextIs($T.Colon)) {
|
||||||
|
const elseWord = this.keyword('else')
|
||||||
|
const elseBlock = this.block()
|
||||||
|
const elseNode = new SyntaxNode('ElseExpr', ifBlock.at(-1)!.from, elseBlock.at(-1)!.to)
|
||||||
|
elseNode.push(elseWord)
|
||||||
|
elseNode.push(...elseBlock)
|
||||||
|
node.push(elseNode)
|
||||||
|
}
|
||||||
|
|
||||||
|
return node.push(this.keyword('end'))
|
||||||
|
}
|
||||||
|
|
||||||
|
import(): SyntaxNode {
|
||||||
|
const keyword = this.keyword('import')
|
||||||
|
|
||||||
|
const args: SyntaxNode[] = []
|
||||||
|
while (!this.isExprEnd()) {
|
||||||
|
if (this.is($T.NamedArgPrefix)) {
|
||||||
|
const prefix = SyntaxNode.from(this.next())
|
||||||
|
const val = this.value()
|
||||||
|
const arg = new SyntaxNode('NamedArg', prefix.from, val.to)
|
||||||
|
arg.push(prefix, val)
|
||||||
|
args.push(arg)
|
||||||
|
} else {
|
||||||
|
args.push(this.identifier())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const node = new SyntaxNode('Import', keyword.from, args.at(-1)!.to)
|
||||||
|
node.add(keyword)
|
||||||
|
return node.push(...args)
|
||||||
|
}
|
||||||
|
|
||||||
|
// if, while, do, etc
|
||||||
|
keyword(name: string): SyntaxNode {
|
||||||
|
const node = SyntaxNode.from(this.expect($T.Keyword, name))
|
||||||
|
node.type = 'keyword' // TODO lezer legacy
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
|
// abc= true
|
||||||
|
namedArg(): SyntaxNode {
|
||||||
|
const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix))
|
||||||
|
const val = this.value()
|
||||||
|
const node = new SyntaxNode('NamedArg', prefix.from, val.to)
|
||||||
|
return node.push(prefix, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// abc= null|true|123|'hi'
|
||||||
|
namedParam(): SyntaxNode {
|
||||||
|
const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix))
|
||||||
|
const val = this.value()
|
||||||
|
|
||||||
|
if (!['Null', 'Boolean', 'Number', 'String'].includes(val.type))
|
||||||
|
throw `[namedParam] default value must be Null|Bool|Num|Str, got ${val.type}\n\n ${this.input}\n`
|
||||||
|
|
||||||
|
const node = new SyntaxNode('NamedParam', prefix.from, val.to)
|
||||||
|
return node.push(prefix, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// operators like + - =
|
||||||
|
op(op?: string): SyntaxNode {
|
||||||
|
const token = op ? this.expect($T.Operator, op) : this.expect($T.Operator)
|
||||||
|
const name = operators[token.value!]
|
||||||
|
if (!name) throw `[op] operator not registered: ${token.value!}\n\n ${this.input}\n`
|
||||||
|
return new SyntaxNode(name, token.from, token.to)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ( expressions in parens )
|
||||||
|
parens(): SyntaxNode {
|
||||||
|
this.inParens++
|
||||||
|
const open = this.expect($T.OpenParen)
|
||||||
|
const child = this.expression()
|
||||||
|
const close = this.expect($T.CloseParen)
|
||||||
|
this.inParens--
|
||||||
|
|
||||||
|
const node = new SyntaxNode('ParenExpr', open.from, close.to)
|
||||||
|
node.add(child)
|
||||||
|
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
|
// 'hell yes' "hell no" { hell if i know }
|
||||||
|
string(): SyntaxNode {
|
||||||
|
const token = this.expect($T.String)
|
||||||
|
return parseString(this.input, token.from, token.to, this)
|
||||||
|
}
|
||||||
|
|
||||||
|
// if TEST: blah end
|
||||||
|
testExpr(): SyntaxNode {
|
||||||
|
this.inTestExpr = true
|
||||||
|
const expr = this.expression()
|
||||||
|
this.inTestExpr = false
|
||||||
|
return expr
|
||||||
|
}
|
||||||
|
|
||||||
|
// throw blah
|
||||||
|
throw(): SyntaxNode {
|
||||||
|
const keyword = this.keyword('throw')
|
||||||
|
const val = this.value()
|
||||||
|
const node = new SyntaxNode('Throw', keyword.from, val.to)
|
||||||
|
return node.push(keyword, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// try: blah catch e: blah end
|
||||||
|
try(): SyntaxNode {
|
||||||
|
const tryNode = this.keyword('try')
|
||||||
|
const tryBlock = this.block()
|
||||||
|
let last = tryBlock.at(-1)
|
||||||
|
let catchNode, finalNode
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'catch'))
|
||||||
|
catchNode = this.catch()
|
||||||
|
|
||||||
|
if (this.is($T.Keyword, 'finally'))
|
||||||
|
finalNode = this.finally()
|
||||||
|
|
||||||
|
const end = this.keyword('end')
|
||||||
|
|
||||||
|
if (finalNode) last = finalNode.children.at(-1)
|
||||||
|
else if (catchNode) last = catchNode.children.at(-1)
|
||||||
|
|
||||||
|
const node = new SyntaxNode('TryExpr', tryNode.from, last!.to)
|
||||||
|
node.push(tryNode, ...tryBlock)
|
||||||
|
|
||||||
|
if (catchNode)
|
||||||
|
node.push(catchNode)
|
||||||
|
|
||||||
|
if (finalNode)
|
||||||
|
node.push(finalNode)
|
||||||
|
|
||||||
|
return node.push(end)
|
||||||
|
}
|
||||||
|
|
||||||
|
// while test: blah end
|
||||||
|
while(): SyntaxNode {
|
||||||
|
const keyword = this.keyword('while')
|
||||||
|
const test = this.testExpr()
|
||||||
|
const block = this.block()
|
||||||
|
const end = this.keyword('end')
|
||||||
|
|
||||||
|
const node = new SyntaxNode('WhileExpr', keyword.from, end.to)
|
||||||
|
return node.push(keyword, test, ...block, end)
|
||||||
|
}
|
||||||
|
|
||||||
|
// readme.txt (when `readme` isn't in scope)
|
||||||
|
word(start?: SyntaxNode): SyntaxNode {
|
||||||
|
const parts = [start ?? this.expect($T.Word)]
|
||||||
|
|
||||||
|
while (this.is($T.Operator, '.')) {
|
||||||
|
this.next()
|
||||||
|
if (this.isAny($T.Word, $T.Identifier, $T.Number))
|
||||||
|
parts.push(this.next())
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SyntaxNode('Word', parts[0]!.from, parts.at(-1)!.to)
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// helpers
|
||||||
|
//
|
||||||
|
|
||||||
|
current(): Token {
|
||||||
|
return this.tokens[this.pos] || { type: TokenType.Newline, from: 0, to: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
peek(offset = 1): Token | undefined {
|
||||||
|
return this.tokens[this.pos + offset]
|
||||||
|
}
|
||||||
|
|
||||||
|
// look past newlines to check for a specific token
|
||||||
|
peekPastNewlines(type: TokenType, value?: string): boolean {
|
||||||
|
let offset = 1
|
||||||
|
let peek = this.peek(offset)
|
||||||
|
|
||||||
|
while (peek && peek.type === $T.Newline)
|
||||||
|
peek = this.peek(++offset)
|
||||||
|
|
||||||
|
if (!peek || peek.type !== type) return false
|
||||||
|
if (value !== undefined && peek.value !== value) return false
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
next(): Token {
|
||||||
|
const token = this.current()
|
||||||
|
this.pos++
|
||||||
|
return token
|
||||||
|
}
|
||||||
|
|
||||||
|
is(type: TokenType, value?: string): boolean {
|
||||||
|
const token = this.current()
|
||||||
|
if (!token || token.type !== type) return false
|
||||||
|
if (value !== undefined && token.value !== value) return false
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
isAny(...type: TokenType[]): boolean {
|
||||||
|
return type.some(x => this.is(x))
|
||||||
|
}
|
||||||
|
|
||||||
|
nextIs(type: TokenType, value?: string): boolean {
|
||||||
|
const token = this.peek()
|
||||||
|
if (!token || token.type !== type) return false
|
||||||
|
if (value !== undefined && token.value !== value) return false
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
nextIsAny(...type: TokenType[]): boolean {
|
||||||
|
return type.some(x => this.nextIs(x))
|
||||||
|
}
|
||||||
|
|
||||||
|
isExprEnd(): boolean {
|
||||||
|
return this.isAny($T.Colon, $T.Semicolon, $T.Newline, $T.CloseParen, $T.CloseBracket) ||
|
||||||
|
this.isExprEndKeyword() || !this.current()
|
||||||
|
}
|
||||||
|
|
||||||
|
nextIsExprEnd(): boolean {
|
||||||
|
// pipes act like expression end for function arg parsing
|
||||||
|
if (this.nextIs($T.Operator, '|'))
|
||||||
|
return true
|
||||||
|
|
||||||
|
return this.nextIsAny($T.Colon, $T.Semicolon, $T.Newline, $T.CloseBracket, $T.CloseParen) ||
|
||||||
|
this.nextIs($T.Keyword, 'end') || this.nextIs($T.Keyword, 'else') ||
|
||||||
|
this.nextIs($T.Keyword, 'catch') || this.nextIs($T.Keyword, 'finally') ||
|
||||||
|
!this.peek()
|
||||||
|
}
|
||||||
|
|
||||||
|
isExprEndKeyword(): boolean {
|
||||||
|
return this.is($T.Keyword, 'end') || this.is($T.Keyword, 'else') ||
|
||||||
|
this.is($T.Keyword, 'catch') || this.is($T.Keyword, 'finally')
|
||||||
|
}
|
||||||
|
|
||||||
|
isPipe(): boolean {
|
||||||
|
// inside parens, only look for pipes on same line (don't look past newlines)
|
||||||
|
const canLookPastNewlines = this.inParens === 0
|
||||||
|
|
||||||
|
return this.is($T.Operator, '|') ||
|
||||||
|
(canLookPastNewlines && this.peekPastNewlines($T.Operator, '|'))
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(type: TokenType, value?: string): Token | never {
|
||||||
|
if (!this.is(type, value)) {
|
||||||
|
const token = this.current()
|
||||||
|
throw `expected ${TokenType[type]}${value ? ` "${value}"` : ''}, got ${TokenType[token?.type || 0]}${token?.value ? ` "${token.value}"` : ''} at position ${this.pos}\n\n ${this.input}\n`
|
||||||
|
}
|
||||||
|
return this.next()
|
||||||
|
}
|
||||||
|
|
||||||
|
isEOF(): boolean {
|
||||||
|
return this.pos >= this.tokens.length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO lezer legacy
|
||||||
|
function collapseDotGets(origNodes: SyntaxNode[]): SyntaxNode {
|
||||||
|
const nodes = [...origNodes]
|
||||||
|
let right = nodes.pop()!
|
||||||
|
|
||||||
|
while (nodes.length > 0) {
|
||||||
|
const left = nodes.pop()!
|
||||||
|
|
||||||
|
if (left.type === 'Identifier') left.type = 'IdentifierBeforeDot'
|
||||||
|
|
||||||
|
const dot = new SyntaxNode("DotGet", left.from, right.to);
|
||||||
|
dot.push(left, right)
|
||||||
|
|
||||||
|
right = dot
|
||||||
|
}
|
||||||
|
|
||||||
|
return right
|
||||||
|
}
|
||||||
226
src/parser/stringParser.ts
Normal file
226
src/parser/stringParser.ts
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
import { SyntaxNode } from './node'
|
||||||
|
|
||||||
|
|
||||||
|
// Parse string contents into fragments, interpolations, and escape sequences.
|
||||||
|
export const parseString = (input: string, from: number, to: number, parser: any): SyntaxNode => {
|
||||||
|
const stringNode = new SyntaxNode('String', from, to)
|
||||||
|
const content = input.slice(from, to)
|
||||||
|
|
||||||
|
const firstChar = content[0]
|
||||||
|
|
||||||
|
// double quotes: no interpolation or escapes
|
||||||
|
if (firstChar === '"') {
|
||||||
|
const fragment = new SyntaxNode('DoubleQuote', from, to)
|
||||||
|
stringNode.add(fragment)
|
||||||
|
return stringNode
|
||||||
|
}
|
||||||
|
|
||||||
|
// curlies: interpolation but no escapes
|
||||||
|
if (firstChar === '{') {
|
||||||
|
parseCurlyString(stringNode, input, from, to, parser)
|
||||||
|
return stringNode
|
||||||
|
}
|
||||||
|
|
||||||
|
// single-quotes: interpolation and escapes
|
||||||
|
if (firstChar === "'") {
|
||||||
|
parseSingleQuoteString(stringNode, input, from, to, parser)
|
||||||
|
return stringNode
|
||||||
|
}
|
||||||
|
|
||||||
|
throw `Unknown string type starting with: ${firstChar}`
|
||||||
|
}
|
||||||
|
|
||||||
|
const parseSingleQuoteString = (stringNode: SyntaxNode, input: string, from: number, to: number, parser: any) => {
|
||||||
|
let pos = from + 1 // skip opening '
|
||||||
|
let fragmentStart = pos
|
||||||
|
|
||||||
|
while (pos < to - 1) { // -1 to skip closing '
|
||||||
|
const char = input[pos]
|
||||||
|
|
||||||
|
if (char === '\\' && pos + 1 < to - 1) {
|
||||||
|
if (pos > fragmentStart) {
|
||||||
|
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
|
||||||
|
stringNode.add(frag)
|
||||||
|
}
|
||||||
|
|
||||||
|
const escNode = new SyntaxNode('EscapeSeq', pos, pos + 2)
|
||||||
|
stringNode.add(escNode)
|
||||||
|
|
||||||
|
pos += 2
|
||||||
|
fragmentStart = pos
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === '$') {
|
||||||
|
if (pos > fragmentStart) {
|
||||||
|
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
|
||||||
|
stringNode.add(frag)
|
||||||
|
}
|
||||||
|
|
||||||
|
pos++ // skip $
|
||||||
|
|
||||||
|
if (input[pos] === '(') {
|
||||||
|
const interpStart = pos - 1 // Include the $
|
||||||
|
const exprResult = parseInterpolationExpr(input, pos, parser)
|
||||||
|
const interpNode = new SyntaxNode('Interpolation', interpStart, exprResult.endPos)
|
||||||
|
interpNode.add(exprResult.node)
|
||||||
|
stringNode.add(interpNode)
|
||||||
|
pos = exprResult.endPos
|
||||||
|
} else {
|
||||||
|
const interpStart = pos - 1
|
||||||
|
const identEnd = findIdentifierEnd(input, pos, to - 1)
|
||||||
|
const identNode = new SyntaxNode('FunctionCallOrIdentifier', pos, identEnd)
|
||||||
|
const innerIdent = new SyntaxNode('Identifier', pos, identEnd)
|
||||||
|
identNode.add(innerIdent)
|
||||||
|
|
||||||
|
const interpNode = new SyntaxNode('Interpolation', interpStart, identEnd)
|
||||||
|
interpNode.add(identNode)
|
||||||
|
stringNode.add(interpNode)
|
||||||
|
pos = identEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
fragmentStart = pos
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos > fragmentStart && fragmentStart < to - 1) {
|
||||||
|
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
|
||||||
|
stringNode.add(frag)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const parseCurlyString = (stringNode: SyntaxNode, input: string, from: number, to: number, parser: any) => {
|
||||||
|
let pos = from + 1 // skip opening {
|
||||||
|
let fragmentStart = from // include the opening { in the fragment
|
||||||
|
let depth = 1
|
||||||
|
|
||||||
|
while (pos < to && depth > 0) {
|
||||||
|
const char = input[pos]
|
||||||
|
|
||||||
|
// track nesting
|
||||||
|
if (char === '{') {
|
||||||
|
depth++
|
||||||
|
pos++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === '}') {
|
||||||
|
depth--
|
||||||
|
if (depth === 0) {
|
||||||
|
const frag = new SyntaxNode('CurlyString', fragmentStart, pos + 1)
|
||||||
|
stringNode.add(frag)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
pos++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === '\\' && pos + 1 < to && input[pos + 1] === '$') {
|
||||||
|
if (pos > fragmentStart) {
|
||||||
|
const frag = new SyntaxNode('CurlyString', fragmentStart, pos)
|
||||||
|
stringNode.add(frag)
|
||||||
|
}
|
||||||
|
|
||||||
|
const escapedFrag = new SyntaxNode('CurlyString', pos + 1, pos + 2)
|
||||||
|
stringNode.add(escapedFrag)
|
||||||
|
|
||||||
|
pos += 2 // skip \ and $
|
||||||
|
fragmentStart = pos
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === '$') {
|
||||||
|
if (pos > fragmentStart) {
|
||||||
|
const frag = new SyntaxNode('CurlyString', fragmentStart, pos)
|
||||||
|
stringNode.add(frag)
|
||||||
|
}
|
||||||
|
|
||||||
|
pos++ // skip $
|
||||||
|
|
||||||
|
if (input[pos] === '(') {
|
||||||
|
const interpStart = pos - 1
|
||||||
|
const exprResult = parseInterpolationExpr(input, pos, parser)
|
||||||
|
const interpNode = new SyntaxNode('Interpolation', interpStart, exprResult.endPos)
|
||||||
|
interpNode.add(exprResult.node)
|
||||||
|
stringNode.add(interpNode)
|
||||||
|
pos = exprResult.endPos
|
||||||
|
} else {
|
||||||
|
const interpStart = pos - 1
|
||||||
|
const identEnd = findIdentifierEnd(input, pos, to)
|
||||||
|
const identNode = new SyntaxNode('FunctionCallOrIdentifier', pos, identEnd)
|
||||||
|
const innerIdent = new SyntaxNode('Identifier', pos, identEnd)
|
||||||
|
identNode.add(innerIdent)
|
||||||
|
|
||||||
|
const interpNode = new SyntaxNode('Interpolation', interpStart, identEnd)
|
||||||
|
interpNode.add(identNode)
|
||||||
|
stringNode.add(interpNode)
|
||||||
|
pos = identEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
fragmentStart = pos
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
pos++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const parseInterpolationExpr = (input: string, pos: number, parser: any): { node: SyntaxNode, endPos: number } => {
|
||||||
|
let depth = 1
|
||||||
|
let start = pos
|
||||||
|
let end = pos + 1 // start after opening (
|
||||||
|
|
||||||
|
while (end < input.length && depth > 0) {
|
||||||
|
if (input[end] === '(') depth++
|
||||||
|
if (input[end] === ')') {
|
||||||
|
depth--
|
||||||
|
if (depth === 0) break
|
||||||
|
}
|
||||||
|
end++
|
||||||
|
}
|
||||||
|
|
||||||
|
const exprContent = input.slice(start + 1, end) // Content between ( and )
|
||||||
|
const closeParen = end
|
||||||
|
end++ // move past closing )
|
||||||
|
|
||||||
|
const exprNode = parser.parse(exprContent)
|
||||||
|
|
||||||
|
const innerNode = exprNode.firstChild || exprNode
|
||||||
|
|
||||||
|
const offset = start + 1 // position where exprContent starts in input
|
||||||
|
adjustNodePositions(innerNode, offset)
|
||||||
|
|
||||||
|
const parenNode = new SyntaxNode('ParenExpr', start, closeParen + 1)
|
||||||
|
parenNode.add(innerNode)
|
||||||
|
|
||||||
|
return { node: parenNode, endPos: end }
|
||||||
|
}
|
||||||
|
|
||||||
|
const adjustNodePositions = (node: SyntaxNode, offset: number) => {
|
||||||
|
node.from += offset
|
||||||
|
node.to += offset
|
||||||
|
|
||||||
|
for (const child of node.children) {
|
||||||
|
adjustNodePositions(child, offset)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const findIdentifierEnd = (input: string, pos: number, maxPos: number): number => {
|
||||||
|
let end = pos
|
||||||
|
|
||||||
|
while (end < maxPos) {
|
||||||
|
const char = input[end]!
|
||||||
|
|
||||||
|
// Stop at non-identifier characters
|
||||||
|
if (!/[a-z0-9\-?]/.test(char)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
end++
|
||||||
|
}
|
||||||
|
|
||||||
|
return end
|
||||||
|
}
|
||||||
86
src/parser/terms.ts
Normal file
86
src/parser/terms.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
import * as terms from '#parser/shrimp.terms'
|
||||||
|
|
||||||
|
export function nameToId(name: string): number {
|
||||||
|
switch (name) {
|
||||||
|
case 'Star': return terms.Star
|
||||||
|
case 'Slash': return terms.Slash
|
||||||
|
case 'Plus': return terms.Plus
|
||||||
|
case 'Minus': return terms.Minus
|
||||||
|
case 'And': return terms.And
|
||||||
|
case 'Or': return terms.Or
|
||||||
|
case 'Eq': return terms.Eq
|
||||||
|
case 'EqEq': return terms.EqEq
|
||||||
|
case 'Neq': return terms.Neq
|
||||||
|
case 'Lt': return terms.Lt
|
||||||
|
case 'Lte': return terms.Lte
|
||||||
|
case 'Gt': return terms.Gt
|
||||||
|
case 'Gte': return terms.Gte
|
||||||
|
case 'Modulo': return terms.Modulo
|
||||||
|
case 'PlusEq': return terms.PlusEq
|
||||||
|
case 'MinusEq': return terms.MinusEq
|
||||||
|
case 'StarEq': return terms.StarEq
|
||||||
|
case 'SlashEq': return terms.SlashEq
|
||||||
|
case 'ModuloEq': return terms.ModuloEq
|
||||||
|
case 'Band': return terms.Band
|
||||||
|
case 'Bor': return terms.Bor
|
||||||
|
case 'Bxor': return terms.Bxor
|
||||||
|
case 'Shl': return terms.Shl
|
||||||
|
case 'Shr': return terms.Shr
|
||||||
|
case 'Ushr': return terms.Ushr
|
||||||
|
case 'NullishCoalesce': return terms.NullishCoalesce
|
||||||
|
case 'NullishEq': return terms.NullishEq
|
||||||
|
case 'Identifier': return terms.Identifier
|
||||||
|
case 'AssignableIdentifier': return terms.AssignableIdentifier
|
||||||
|
case 'Word': return terms.Word
|
||||||
|
case 'IdentifierBeforeDot': return terms.IdentifierBeforeDot
|
||||||
|
case 'CurlyString': return terms.CurlyString
|
||||||
|
case 'newline': return terms.newline
|
||||||
|
case 'pipeStartsLine': return terms.pipeStartsLine
|
||||||
|
case 'Do': return terms.Do
|
||||||
|
case 'Comment': return terms.Comment
|
||||||
|
case 'Program': return terms.Program
|
||||||
|
case 'PipeExpr': return terms.PipeExpr
|
||||||
|
case 'WhileExpr': return terms.WhileExpr
|
||||||
|
case 'keyword': return terms.keyword
|
||||||
|
case 'ConditionalOp': return terms.ConditionalOp
|
||||||
|
case 'ParenExpr': return terms.ParenExpr
|
||||||
|
case 'FunctionCallWithNewlines': return terms.FunctionCallWithNewlines
|
||||||
|
case 'DotGet': return terms.DotGet
|
||||||
|
case 'Number': return terms.Number
|
||||||
|
case 'Dollar': return terms.Dollar
|
||||||
|
case 'PositionalArg': return terms.PositionalArg
|
||||||
|
case 'FunctionDef': return terms.FunctionDef
|
||||||
|
case 'Params': return terms.Params
|
||||||
|
case 'NamedParam': return terms.NamedParam
|
||||||
|
case 'NamedArgPrefix': return terms.NamedArgPrefix
|
||||||
|
case 'String': return terms.String
|
||||||
|
case 'StringFragment': return terms.StringFragment
|
||||||
|
case 'Interpolation': return terms.Interpolation
|
||||||
|
case 'FunctionCallOrIdentifier': return terms.FunctionCallOrIdentifier
|
||||||
|
case 'EscapeSeq': return terms.EscapeSeq
|
||||||
|
case 'DoubleQuote': return terms.DoubleQuote
|
||||||
|
case 'Boolean': return terms.Boolean
|
||||||
|
case 'Null': return terms.Null
|
||||||
|
case 'colon': return terms.colon
|
||||||
|
case 'CatchExpr': return terms.CatchExpr
|
||||||
|
case 'Block': return terms.Block
|
||||||
|
case 'FinallyExpr': return terms.FinallyExpr
|
||||||
|
case 'Underscore': return terms.Underscore
|
||||||
|
case 'NamedArg': return terms.NamedArg
|
||||||
|
case 'IfExpr': return terms.IfExpr
|
||||||
|
case 'FunctionCall': return terms.FunctionCall
|
||||||
|
case 'ElseIfExpr': return terms.ElseIfExpr
|
||||||
|
case 'ElseExpr': return terms.ElseExpr
|
||||||
|
case 'BinOp': return terms.BinOp
|
||||||
|
case 'Regex': return terms.Regex
|
||||||
|
case 'Dict': return terms.Dict
|
||||||
|
case 'Array': return terms.Array
|
||||||
|
case 'FunctionCallWithBlock': return terms.FunctionCallWithBlock
|
||||||
|
case 'TryExpr': return terms.TryExpr
|
||||||
|
case 'Throw': return terms.Throw
|
||||||
|
case 'Import': return terms.Import
|
||||||
|
case 'CompoundAssign': return terms.CompoundAssign
|
||||||
|
case 'Assign': return terms.Assign
|
||||||
|
default: throw `unknown term: ${name}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -810,44 +810,6 @@ describe('Nullish coalescing operator', () => {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('DotGet whitespace sensitivity', () => {
|
|
||||||
test('no whitespace - DotGet works when identifier in scope', () => {
|
|
||||||
expect('basename = 5; basename.prop').toMatchTree(`
|
|
||||||
Assign
|
|
||||||
AssignableIdentifier basename
|
|
||||||
Eq =
|
|
||||||
Number 5
|
|
||||||
FunctionCallOrIdentifier
|
|
||||||
DotGet
|
|
||||||
IdentifierBeforeDot basename
|
|
||||||
Identifier prop`)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('space before dot - NOT DotGet, parses as division', () => {
|
|
||||||
expect('basename = 5; basename / prop').toMatchTree(`
|
|
||||||
Assign
|
|
||||||
AssignableIdentifier basename
|
|
||||||
Eq =
|
|
||||||
Number 5
|
|
||||||
BinOp
|
|
||||||
Identifier basename
|
|
||||||
Slash /
|
|
||||||
Identifier prop`)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('dot followed by slash is Word, not DotGet', () => {
|
|
||||||
expect('basename ./cool').toMatchTree(`
|
|
||||||
FunctionCall
|
|
||||||
Identifier basename
|
|
||||||
PositionalArg
|
|
||||||
Word ./cool`)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('identifier not in scope with dot becomes Word', () => {
|
|
||||||
expect('readme.txt').toMatchTree(`Word readme.txt`)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('Comments', () => {
|
describe('Comments', () => {
|
||||||
test('are greedy', () => {
|
test('are greedy', () => {
|
||||||
expect(`
|
expect(`
|
||||||
|
|
@ -897,61 +859,6 @@ basename = 5 # very astute
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('Array destructuring', () => {
|
|
||||||
test('parses array pattern with two variables', () => {
|
|
||||||
expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
|
|
||||||
Assign
|
|
||||||
Array
|
|
||||||
Identifier a
|
|
||||||
Identifier b
|
|
||||||
Eq =
|
|
||||||
Array
|
|
||||||
Number 1
|
|
||||||
Number 2
|
|
||||||
Number 3
|
|
||||||
Number 4`)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('parses array pattern with one variable', () => {
|
|
||||||
expect('[ x ] = [ 42 ]').toMatchTree(`
|
|
||||||
Assign
|
|
||||||
Array
|
|
||||||
Identifier x
|
|
||||||
Eq =
|
|
||||||
Array
|
|
||||||
Number 42`)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('parses array pattern with emoji identifiers', () => {
|
|
||||||
expect('[ 🚀 💎 ] = [ 1 2 ]').toMatchTree(`
|
|
||||||
Assign
|
|
||||||
Array
|
|
||||||
Identifier 🚀
|
|
||||||
Identifier 💎
|
|
||||||
Eq =
|
|
||||||
Array
|
|
||||||
Number 1
|
|
||||||
Number 2`)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('works with dotget', () => {
|
|
||||||
expect('[ a ] = [ [1 2 3] ]; a.1').toMatchTree(`
|
|
||||||
Assign
|
|
||||||
Array
|
|
||||||
Identifier a
|
|
||||||
Eq =
|
|
||||||
Array
|
|
||||||
Array
|
|
||||||
Number 1
|
|
||||||
Number 2
|
|
||||||
Number 3
|
|
||||||
FunctionCallOrIdentifier
|
|
||||||
DotGet
|
|
||||||
IdentifierBeforeDot a
|
|
||||||
Number 1`)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('Conditional ops', () => {
|
describe('Conditional ops', () => {
|
||||||
test('or can be chained', () => {
|
test('or can be chained', () => {
|
||||||
expect(`
|
expect(`
|
||||||
|
|
@ -1037,34 +944,3 @@ Assign
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('import', () => {
|
|
||||||
test('parses single import', () => {
|
|
||||||
expect(`import str`).toMatchTree(`
|
|
||||||
Import
|
|
||||||
keyword import
|
|
||||||
Identifier str
|
|
||||||
`)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('parses multiple imports', () => {
|
|
||||||
expect(`import str math list`).toMatchTree(`
|
|
||||||
Import
|
|
||||||
keyword import
|
|
||||||
Identifier str
|
|
||||||
Identifier math
|
|
||||||
Identifier list
|
|
||||||
`)
|
|
||||||
})
|
|
||||||
|
|
||||||
test('parses named args', () => {
|
|
||||||
expect(`import str only=ends-with?`).toMatchTree(`
|
|
||||||
Import
|
|
||||||
keyword import
|
|
||||||
Identifier str
|
|
||||||
NamedArg
|
|
||||||
NamedArgPrefix only=
|
|
||||||
Identifier ends-with?
|
|
||||||
`)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
@ -24,6 +24,7 @@ describe('if/else if/else', () => {
|
||||||
Eq =
|
Eq =
|
||||||
IfExpr
|
IfExpr
|
||||||
keyword if
|
keyword if
|
||||||
|
FunctionCallOrIdentifier
|
||||||
Identifier x
|
Identifier x
|
||||||
colon :
|
colon :
|
||||||
Block
|
Block
|
||||||
|
|
@ -59,6 +60,7 @@ describe('if/else if/else', () => {
|
||||||
end`).toMatchTree(`
|
end`).toMatchTree(`
|
||||||
IfExpr
|
IfExpr
|
||||||
keyword if
|
keyword if
|
||||||
|
FunctionCallOrIdentifier
|
||||||
Identifier with-else
|
Identifier with-else
|
||||||
colon :
|
colon :
|
||||||
Block
|
Block
|
||||||
|
|
@ -82,6 +84,7 @@ describe('if/else if/else', () => {
|
||||||
end`).toMatchTree(`
|
end`).toMatchTree(`
|
||||||
IfExpr
|
IfExpr
|
||||||
keyword if
|
keyword if
|
||||||
|
FunctionCallOrIdentifier
|
||||||
Identifier with-else-if
|
Identifier with-else-if
|
||||||
colon :
|
colon :
|
||||||
Block
|
Block
|
||||||
|
|
@ -90,6 +93,7 @@ describe('if/else if/else', () => {
|
||||||
ElseIfExpr
|
ElseIfExpr
|
||||||
keyword else
|
keyword else
|
||||||
keyword if
|
keyword if
|
||||||
|
FunctionCallOrIdentifier
|
||||||
Identifier another-condition
|
Identifier another-condition
|
||||||
colon :
|
colon :
|
||||||
Block
|
Block
|
||||||
|
|
@ -111,6 +115,7 @@ describe('if/else if/else', () => {
|
||||||
end`).toMatchTree(`
|
end`).toMatchTree(`
|
||||||
IfExpr
|
IfExpr
|
||||||
keyword if
|
keyword if
|
||||||
|
FunctionCallOrIdentifier
|
||||||
Identifier with-else-if-else
|
Identifier with-else-if-else
|
||||||
colon :
|
colon :
|
||||||
Block
|
Block
|
||||||
|
|
@ -119,6 +124,7 @@ describe('if/else if/else', () => {
|
||||||
ElseIfExpr
|
ElseIfExpr
|
||||||
keyword else
|
keyword else
|
||||||
keyword if
|
keyword if
|
||||||
|
FunctionCallOrIdentifier
|
||||||
Identifier another-condition
|
Identifier another-condition
|
||||||
colon :
|
colon :
|
||||||
Block
|
Block
|
||||||
|
|
@ -127,6 +133,7 @@ describe('if/else if/else', () => {
|
||||||
ElseIfExpr
|
ElseIfExpr
|
||||||
keyword else
|
keyword else
|
||||||
keyword if
|
keyword if
|
||||||
|
FunctionCallOrIdentifier
|
||||||
Identifier yet-another-condition
|
Identifier yet-another-condition
|
||||||
colon :
|
colon :
|
||||||
Block
|
Block
|
||||||
|
|
@ -173,7 +180,7 @@ describe('if/else if/else', () => {
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
|
|
||||||
test('parses function calls in if tests', () => {
|
test("parses paren'd function calls in if tests", () => {
|
||||||
expect(`if (var? 'abc'): true end`).toMatchTree(`
|
expect(`if (var? 'abc'): true end`).toMatchTree(`
|
||||||
IfExpr
|
IfExpr
|
||||||
keyword if
|
keyword if
|
||||||
|
|
@ -214,7 +221,7 @@ describe('if/else if/else', () => {
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
|
|
||||||
test('parses function calls in else-if tests', () => {
|
test("parses paren'd function calls in else-if tests", () => {
|
||||||
expect(`if false: true else if (var? 'abc'): true end`).toMatchTree(`
|
expect(`if false: true else if (var? 'abc'): true end`).toMatchTree(`
|
||||||
IfExpr
|
IfExpr
|
||||||
keyword if
|
keyword if
|
||||||
|
|
|
||||||
58
src/parser/tests/destructuring.test.ts
Normal file
58
src/parser/tests/destructuring.test.ts
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
|
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||||
|
|
||||||
|
describe('Array destructuring', () => {
|
||||||
|
test('parses array pattern with two variables', () => {
|
||||||
|
expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
|
||||||
|
Assign
|
||||||
|
Array
|
||||||
|
Identifier a
|
||||||
|
Identifier b
|
||||||
|
Eq =
|
||||||
|
Array
|
||||||
|
Number 1
|
||||||
|
Number 2
|
||||||
|
Number 3
|
||||||
|
Number 4`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses array pattern with one variable', () => {
|
||||||
|
expect('[ x ] = [ 42 ]').toMatchTree(`
|
||||||
|
Assign
|
||||||
|
Array
|
||||||
|
Identifier x
|
||||||
|
Eq =
|
||||||
|
Array
|
||||||
|
Number 42`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses array pattern with emoji identifiers', () => {
|
||||||
|
expect('[ 🚀 💎 ] = [ 1 2 ]').toMatchTree(`
|
||||||
|
Assign
|
||||||
|
Array
|
||||||
|
Identifier 🚀
|
||||||
|
Identifier 💎
|
||||||
|
Eq =
|
||||||
|
Array
|
||||||
|
Number 1
|
||||||
|
Number 2`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('works with dotget', () => {
|
||||||
|
expect('[ a ] = [ [1 2 3] ]; a.1').toMatchTree(`
|
||||||
|
Assign
|
||||||
|
Array
|
||||||
|
Identifier a
|
||||||
|
Eq =
|
||||||
|
Array
|
||||||
|
Array
|
||||||
|
Number 1
|
||||||
|
Number 2
|
||||||
|
Number 3
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
DotGet
|
||||||
|
IdentifierBeforeDot a
|
||||||
|
Number 1`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -1,6 +1,44 @@
|
||||||
import { describe, test, expect } from 'bun:test'
|
import { describe, test, expect } from 'bun:test'
|
||||||
import '../../testSetup'
|
import '../../testSetup'
|
||||||
|
|
||||||
|
describe('DotGet whitespace sensitivity', () => {
|
||||||
|
test('no whitespace - DotGet works when identifier in scope', () => {
|
||||||
|
expect('basename = 5; basename.prop').toMatchTree(`
|
||||||
|
Assign
|
||||||
|
AssignableIdentifier basename
|
||||||
|
Eq =
|
||||||
|
Number 5
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
DotGet
|
||||||
|
IdentifierBeforeDot basename
|
||||||
|
Identifier prop`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('space before dot - NOT DotGet, parses as division', () => {
|
||||||
|
expect('basename = 5; basename / prop').toMatchTree(`
|
||||||
|
Assign
|
||||||
|
AssignableIdentifier basename
|
||||||
|
Eq =
|
||||||
|
Number 5
|
||||||
|
BinOp
|
||||||
|
Identifier basename
|
||||||
|
Slash /
|
||||||
|
Identifier prop`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('dot followed by slash is Word, not DotGet', () => {
|
||||||
|
expect('basename ./cool').toMatchTree(`
|
||||||
|
FunctionCall
|
||||||
|
Identifier basename
|
||||||
|
PositionalArg
|
||||||
|
Word ./cool`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('identifier not in scope with dot becomes Word', () => {
|
||||||
|
expect('readme.txt').toMatchTree(`Word readme.txt`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
describe('DotGet', () => {
|
describe('DotGet', () => {
|
||||||
test('readme.txt is Word when readme not in scope', () => {
|
test('readme.txt is Word when readme not in scope', () => {
|
||||||
expect('readme.txt').toMatchTree(`Word readme.txt`)
|
expect('readme.txt').toMatchTree(`Word readme.txt`)
|
||||||
|
|
@ -199,7 +237,7 @@ end`).toMatchTree(`
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
|
|
||||||
test("dot get doesn't work with spaces", () => {
|
test.skip("dot get doesn't work with spaces", () => {
|
||||||
expect('obj . prop').toMatchTree(`
|
expect('obj . prop').toMatchTree(`
|
||||||
FunctionCall
|
FunctionCall
|
||||||
Identifier obj
|
Identifier obj
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ describe('calling functions', () => {
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
|
|
||||||
test('Incomplete namedArg', () => {
|
test.skip('Incomplete namedArg', () => {
|
||||||
expect('tail lines=').toMatchTree(`
|
expect('tail lines=').toMatchTree(`
|
||||||
FunctionCall
|
FunctionCall
|
||||||
Identifier tail
|
Identifier tail
|
||||||
|
|
|
||||||
34
src/parser/tests/import.test.ts
Normal file
34
src/parser/tests/import.test.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
import { expect, describe, test } from 'bun:test'
|
||||||
|
|
||||||
|
import '../shrimp.grammar' // Importing this so changes cause it to retest!
|
||||||
|
|
||||||
|
describe('import', () => {
|
||||||
|
test('parses single import', () => {
|
||||||
|
expect(`import str`).toMatchTree(`
|
||||||
|
Import
|
||||||
|
keyword import
|
||||||
|
Identifier str
|
||||||
|
`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses multiple imports', () => {
|
||||||
|
expect(`import str math list`).toMatchTree(`
|
||||||
|
Import
|
||||||
|
keyword import
|
||||||
|
Identifier str
|
||||||
|
Identifier math
|
||||||
|
Identifier list
|
||||||
|
`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses named args', () => {
|
||||||
|
expect(`import str only=ends-with?`).toMatchTree(`
|
||||||
|
Import
|
||||||
|
keyword import
|
||||||
|
Identifier str
|
||||||
|
NamedArg
|
||||||
|
NamedArgPrefix only=
|
||||||
|
Identifier ends-with?
|
||||||
|
`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -375,10 +375,11 @@ describe('dict literals', () => {
|
||||||
expect('[=]').toMatchTree(`
|
expect('[=]').toMatchTree(`
|
||||||
Dict [=]
|
Dict [=]
|
||||||
`)
|
`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('empty dict w whitespace', () => {
|
||||||
expect('[ = ]').toMatchTree(`
|
expect('[ = ]').toMatchTree(`
|
||||||
Array
|
Dict [ = ]
|
||||||
Word =
|
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,10 @@ describe('numbers', () => {
|
||||||
test('non-numbers', () => {
|
test('non-numbers', () => {
|
||||||
expect(`1st`).toMatchToken('Word', '1st')
|
expect(`1st`).toMatchToken('Word', '1st')
|
||||||
expect(`1_`).toMatchToken('Word', '1_')
|
expect(`1_`).toMatchToken('Word', '1_')
|
||||||
expect(`100.`).toMatchToken('Word', '100.')
|
expect(`100.`).toMatchTokens(
|
||||||
|
{ type: 'Number', value: '100' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
test('simple numbers', () => {
|
test('simple numbers', () => {
|
||||||
|
|
@ -127,6 +130,19 @@ describe('identifiers', () => {
|
||||||
expect('dog#pound').toMatchToken('Word', 'dog#pound')
|
expect('dog#pound').toMatchToken('Word', 'dog#pound')
|
||||||
expect('http://website.com').toMatchToken('Word', 'http://website.com')
|
expect('http://website.com').toMatchToken('Word', 'http://website.com')
|
||||||
expect('school$cool').toMatchToken('Identifier', 'school$cool')
|
expect('school$cool').toMatchToken('Identifier', 'school$cool')
|
||||||
|
expect('EXIT:').toMatchTokens(
|
||||||
|
{ type: 'Word', value: 'EXIT' },
|
||||||
|
{ type: 'Colon' },
|
||||||
|
)
|
||||||
|
expect(`if y == 1: 'cool' end`).toMatchTokens(
|
||||||
|
{ type: 'Keyword', value: 'if' },
|
||||||
|
{ type: 'Identifier', value: 'y' },
|
||||||
|
{ type: 'Operator', value: '==' },
|
||||||
|
{ type: 'Number', value: '1' },
|
||||||
|
{ type: 'Colon' },
|
||||||
|
{ type: 'String', value: `'cool'` },
|
||||||
|
{ type: 'Keyword', value: 'end' },
|
||||||
|
)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
@ -139,8 +155,15 @@ describe('paths', () => {
|
||||||
expect('/home/chris/dev').toMatchToken('Word', '/home/chris/dev')
|
expect('/home/chris/dev').toMatchToken('Word', '/home/chris/dev')
|
||||||
})
|
})
|
||||||
|
|
||||||
test('ending with ext', () => {
|
test('identifiers with dots tokenize separately', () => {
|
||||||
expect('readme.txt').toMatchToken('Word', 'readme.txt')
|
expect('readme.txt').toMatchTokens(
|
||||||
|
{ type: 'Identifier', value: 'readme' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'Identifier', value: 'txt' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('words (non-identifiers) consume dots', () => {
|
||||||
expect('README.md').toMatchToken('Word', 'README.md')
|
expect('README.md').toMatchToken('Word', 'README.md')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
@ -259,6 +282,9 @@ describe('operators', () => {
|
||||||
expect('==').toMatchToken('Operator', '==')
|
expect('==').toMatchToken('Operator', '==')
|
||||||
expect('>').toMatchToken('Operator', '>')
|
expect('>').toMatchToken('Operator', '>')
|
||||||
expect('<').toMatchToken('Operator', '<')
|
expect('<').toMatchToken('Operator', '<')
|
||||||
|
|
||||||
|
// property access
|
||||||
|
expect('.').toMatchToken('Operator', '.')
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
@ -281,6 +307,12 @@ describe('keywords', () => {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('regex', () => {
|
||||||
|
test('use double slash', () => {
|
||||||
|
expect(`//[0-9]+//`).toMatchToken('Regex', '//[0-9]+//')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
describe('punctuation', () => {
|
describe('punctuation', () => {
|
||||||
test('underscore', () => {
|
test('underscore', () => {
|
||||||
expect(`_`).toBeToken('Underscore')
|
expect(`_`).toBeToken('Underscore')
|
||||||
|
|
@ -453,6 +485,17 @@ f
|
||||||
{ type: 'Identifier', value: 'y' },
|
{ type: 'Identifier', value: 'y' },
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
expect(`if (var? 'abc'): y`).toMatchTokens(
|
||||||
|
{ type: 'Keyword', value: 'if' },
|
||||||
|
{ type: 'OpenParen' },
|
||||||
|
{ type: 'Identifier', value: 'var?' },
|
||||||
|
{ type: 'String', value: `'abc'` },
|
||||||
|
{ type: 'CloseParen' },
|
||||||
|
{ type: 'Colon' },
|
||||||
|
{ type: 'Identifier', value: 'y' },
|
||||||
|
)
|
||||||
|
|
||||||
expect(`
|
expect(`
|
||||||
do x:
|
do x:
|
||||||
y
|
y
|
||||||
|
|
@ -485,6 +528,30 @@ end`).toMatchTokens(
|
||||||
{ type: 'CloseParen' },
|
{ type: 'CloseParen' },
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test('dot operator beginning word with slash', () => {
|
||||||
|
expect(`(basename ./cool)`).toMatchTokens(
|
||||||
|
{ 'type': 'OpenParen' },
|
||||||
|
{ 'type': 'Identifier', 'value': 'basename' },
|
||||||
|
{ 'type': 'Word', 'value': './cool' },
|
||||||
|
{ 'type': 'CloseParen' }
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('dot word after identifier with space', () => {
|
||||||
|
expect(`expand-path .git`).toMatchTokens(
|
||||||
|
{ 'type': 'Identifier', 'value': 'expand-path' },
|
||||||
|
{ 'type': 'Word', 'value': '.git' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('dot operator after identifier without space', () => {
|
||||||
|
expect(`config.path`).toMatchTokens(
|
||||||
|
{ 'type': 'Identifier', 'value': 'config' },
|
||||||
|
{ 'type': 'Operator', 'value': '.' },
|
||||||
|
{ 'type': 'Identifier', 'value': 'path' },
|
||||||
|
)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('nesting edge cases', () => {
|
describe('nesting edge cases', () => {
|
||||||
|
|
@ -591,3 +658,72 @@ describe('named args', () => {
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('dot operator', () => {
|
||||||
|
test('standalone dot', () => {
|
||||||
|
expect('.').toMatchToken('Operator', '.')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('dot between identifiers tokenizes as separate tokens', () => {
|
||||||
|
expect('config.path').toMatchTokens(
|
||||||
|
{ type: 'Identifier', value: 'config' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'Identifier', value: 'path' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('dot with number', () => {
|
||||||
|
expect('array.0').toMatchTokens(
|
||||||
|
{ type: 'Identifier', value: 'array' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'Number', value: '0' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('chained dots', () => {
|
||||||
|
expect('a.b.c').toMatchTokens(
|
||||||
|
{ type: 'Identifier', value: 'a' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'Identifier', value: 'b' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'Identifier', value: 'c' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('identifier-like paths tokenize separately', () => {
|
||||||
|
expect('readme.txt').toMatchTokens(
|
||||||
|
{ type: 'Identifier', value: 'readme' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'Identifier', value: 'txt' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('word-like paths remain as single token', () => {
|
||||||
|
expect('./file.txt').toMatchToken('Word', './file.txt')
|
||||||
|
expect('README.TXT').toMatchToken('Word', 'README.TXT')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('dot with paren expression', () => {
|
||||||
|
expect('obj.(1 + 2)').toMatchTokens(
|
||||||
|
{ type: 'Identifier', value: 'obj' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'OpenParen' },
|
||||||
|
{ type: 'Number', value: '1' },
|
||||||
|
{ type: 'Operator', value: '+' },
|
||||||
|
{ type: 'Number', value: '2' },
|
||||||
|
{ type: 'CloseParen' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('chained dot with paren expression', () => {
|
||||||
|
expect('obj.items.(i)').toMatchTokens(
|
||||||
|
{ type: 'Identifier', value: 'obj' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'Identifier', value: 'items' },
|
||||||
|
{ type: 'Operator', value: '.' },
|
||||||
|
{ type: 'OpenParen' },
|
||||||
|
{ type: 'Identifier', value: 'i' },
|
||||||
|
{ type: 'CloseParen' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -31,13 +31,14 @@ export enum TokenType {
|
||||||
Boolean,
|
Boolean,
|
||||||
Number,
|
Number,
|
||||||
String,
|
String,
|
||||||
|
Regex,
|
||||||
}
|
}
|
||||||
|
|
||||||
const valueTokens = new Set([
|
const valueTokens = new Set([
|
||||||
TokenType.Comment,
|
TokenType.Comment,
|
||||||
TokenType.Keyword, TokenType.Operator,
|
TokenType.Keyword, TokenType.Operator,
|
||||||
TokenType.Identifier, TokenType.Word, TokenType.NamedArgPrefix,
|
TokenType.Identifier, TokenType.Word, TokenType.NamedArgPrefix,
|
||||||
TokenType.Boolean, TokenType.Number, TokenType.String
|
TokenType.Boolean, TokenType.Number, TokenType.String, TokenType.Regex
|
||||||
])
|
])
|
||||||
|
|
||||||
const operators = new Set([
|
const operators = new Set([
|
||||||
|
|
@ -82,6 +83,12 @@ const operators = new Set([
|
||||||
'==',
|
'==',
|
||||||
'>',
|
'>',
|
||||||
'<',
|
'<',
|
||||||
|
|
||||||
|
// property access
|
||||||
|
'.',
|
||||||
|
|
||||||
|
// pipe
|
||||||
|
'|',
|
||||||
])
|
])
|
||||||
|
|
||||||
const keywords = new Set([
|
const keywords = new Set([
|
||||||
|
|
@ -99,8 +106,8 @@ const keywords = new Set([
|
||||||
])
|
])
|
||||||
|
|
||||||
// helper
|
// helper
|
||||||
function c(strings: TemplateStringsArray, ...values: any[]) {
|
function c(strings: TemplateStringsArray) {
|
||||||
return strings.reduce((result, str, i) => result + str + (values[i] ?? ""), "").charCodeAt(0)
|
return strings[0]!.charCodeAt(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
function s(c: number): string {
|
function s(c: number): string {
|
||||||
|
|
@ -116,6 +123,7 @@ export class Scanner {
|
||||||
inParen = 0
|
inParen = 0
|
||||||
inBracket = 0
|
inBracket = 0
|
||||||
tokens: Token[] = []
|
tokens: Token[] = []
|
||||||
|
prevIsWhitespace = true
|
||||||
|
|
||||||
reset() {
|
reset() {
|
||||||
this.input = ''
|
this.input = ''
|
||||||
|
|
@ -124,6 +132,7 @@ export class Scanner {
|
||||||
this.char = 0
|
this.char = 0
|
||||||
this.prev = 0
|
this.prev = 0
|
||||||
this.tokens.length = 0
|
this.tokens.length = 0
|
||||||
|
this.prevIsWhitespace = true
|
||||||
}
|
}
|
||||||
|
|
||||||
peek(count = 0): number {
|
peek(count = 0): number {
|
||||||
|
|
@ -131,9 +140,11 @@ export class Scanner {
|
||||||
}
|
}
|
||||||
|
|
||||||
next(): number {
|
next(): number {
|
||||||
|
this.prevIsWhitespace = isWhitespace(this.char)
|
||||||
this.prev = this.char
|
this.prev = this.char
|
||||||
this.char = this.peek()
|
this.char = this.peek()
|
||||||
this.pos += getCharSize(this.char)
|
this.pos += getCharSize(this.char)
|
||||||
|
|
||||||
return this.char
|
return this.char
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -156,6 +167,10 @@ export class Scanner {
|
||||||
this.start = this.pos
|
this.start = this.pos
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pushChar(type: TokenType) {
|
||||||
|
this.push(type, this.pos - 1, this.pos)
|
||||||
|
}
|
||||||
|
|
||||||
// turn shrimp code into shrimp tokens that get fed into the parser
|
// turn shrimp code into shrimp tokens that get fed into the parser
|
||||||
tokenize(input: string): Token[] {
|
tokenize(input: string): Token[] {
|
||||||
this.reset()
|
this.reset()
|
||||||
|
|
@ -164,6 +179,7 @@ export class Scanner {
|
||||||
|
|
||||||
while (this.char > 0) {
|
while (this.char > 0) {
|
||||||
const char = this.char
|
const char = this.char
|
||||||
|
|
||||||
if (char === c`#`) {
|
if (char === c`#`) {
|
||||||
this.readComment()
|
this.readComment()
|
||||||
continue
|
continue
|
||||||
|
|
@ -185,7 +201,7 @@ export class Scanner {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isIdentStart(char)) {
|
if (isIdentStart(char)) {
|
||||||
this.readIdentOrKeyword()
|
this.readWordOrIdent(true) // true = started with identifier char
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -195,25 +211,39 @@ export class Scanner {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (char === c`:`) {
|
if (char === c`:`) {
|
||||||
this.push(TokenType.Colon, this.start - 1, this.pos) // TODO: why?
|
this.pushChar(TokenType.Colon)
|
||||||
this.next()
|
this.next()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// whitespace-sensitive dot as operator (property access) only after identifier/number
|
||||||
|
if (char === c`.`) {
|
||||||
|
if (this.canBeDotGet(this.tokens.at(-1))) {
|
||||||
|
this.pushChar(TokenType.Operator)
|
||||||
|
this.next()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === c`/` && this.peek() === c`/`) {
|
||||||
|
this.readRegex()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if (isWordChar(char)) {
|
if (isWordChar(char)) {
|
||||||
this.readWord()
|
this.readWordOrIdent(false) // false = didn't start with identifier char
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if (char === c`\n`) {
|
if (char === c`\n`) {
|
||||||
if (this.inParen === 0 && this.inBracket === 0)
|
if (this.inParen === 0 && this.inBracket === 0)
|
||||||
this.push(TokenType.Newline)
|
this.pushChar(TokenType.Newline)
|
||||||
this.next()
|
this.next()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if (char === c`;`) {
|
if (char === c`;`) {
|
||||||
this.push(TokenType.Semicolon)
|
this.pushChar(TokenType.Semicolon)
|
||||||
this.next()
|
this.next()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
@ -225,6 +255,7 @@ export class Scanner {
|
||||||
}
|
}
|
||||||
|
|
||||||
readComment() {
|
readComment() {
|
||||||
|
this.start = this.pos - 1
|
||||||
while (this.char !== c`\n` && this.char > 0) this.next()
|
while (this.char !== c`\n` && this.char > 0) this.next()
|
||||||
this.push(TokenType.Comment)
|
this.push(TokenType.Comment)
|
||||||
}
|
}
|
||||||
|
|
@ -233,16 +264,16 @@ export class Scanner {
|
||||||
switch (this.char) {
|
switch (this.char) {
|
||||||
case c`(`:
|
case c`(`:
|
||||||
this.inParen++
|
this.inParen++
|
||||||
this.push(TokenType.OpenParen); break
|
this.pushChar(TokenType.OpenParen); break
|
||||||
case c`)`:
|
case c`)`:
|
||||||
this.inParen--
|
this.inParen--
|
||||||
this.push(TokenType.CloseParen); break
|
this.pushChar(TokenType.CloseParen); break
|
||||||
case c`[`:
|
case c`[`:
|
||||||
this.inBracket++
|
this.inBracket++
|
||||||
this.push(TokenType.OpenBracket); break
|
this.pushChar(TokenType.OpenBracket); break
|
||||||
case c`]`:
|
case c`]`:
|
||||||
this.inBracket--
|
this.inBracket--
|
||||||
this.push(TokenType.CloseBracket); break
|
this.pushChar(TokenType.CloseBracket); break
|
||||||
}
|
}
|
||||||
this.next()
|
this.next()
|
||||||
}
|
}
|
||||||
|
|
@ -258,6 +289,7 @@ export class Scanner {
|
||||||
}
|
}
|
||||||
|
|
||||||
readCurlyString() {
|
readCurlyString() {
|
||||||
|
this.start = this.pos - 1 // include opening {
|
||||||
let depth = 1
|
let depth = 1
|
||||||
this.next()
|
this.next()
|
||||||
|
|
||||||
|
|
@ -270,7 +302,7 @@ export class Scanner {
|
||||||
this.push(TokenType.String)
|
this.push(TokenType.String)
|
||||||
}
|
}
|
||||||
|
|
||||||
readIdentOrKeyword() {
|
readWordOrIdent(startedWithIdentChar: boolean) {
|
||||||
this.start = this.pos - getCharSize(this.char)
|
this.start = this.pos - getCharSize(this.char)
|
||||||
|
|
||||||
while (isWordChar(this.char)) {
|
while (isWordChar(this.char)) {
|
||||||
|
|
@ -280,33 +312,50 @@ export class Scanner {
|
||||||
if (isWhitespace(nextCh) || nextCh === 0) break
|
if (isWhitespace(nextCh) || nextCh === 0) break
|
||||||
}
|
}
|
||||||
|
|
||||||
// stop at equal sign (named arg)
|
// stop at equal sign (named arg) - but only if what we've read so far is an identifier
|
||||||
if (this.char === c`=`) {
|
if (this.char === c`=`) {
|
||||||
|
const soFar = this.input.slice(this.start, this.pos - getCharSize(this.char))
|
||||||
|
if (isIdentifer(soFar)) {
|
||||||
this.next()
|
this.next()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// stop at dot only if it would create a valid property access
|
||||||
|
// AND only if we started with an identifier character (not for Words like README.txt)
|
||||||
|
if (startedWithIdentChar && this.char === c`.`) {
|
||||||
|
const nextCh = this.peek()
|
||||||
|
if (isIdentStart(nextCh) || isDigit(nextCh) || nextCh === c`(`) {
|
||||||
|
const soFar = this.input.slice(this.start, this.pos - getCharSize(this.char))
|
||||||
|
if (isIdentifer(soFar)) break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
this.next()
|
this.next()
|
||||||
}
|
}
|
||||||
|
|
||||||
const ident = this.input.slice(this.start, this.pos - getCharSize(this.char))
|
const word = this.input.slice(this.start, this.pos - getCharSize(this.char))
|
||||||
|
|
||||||
if (ident === 'null')
|
// classify the token based on what we read
|
||||||
|
if (word === '_')
|
||||||
|
this.pushChar(TokenType.Underscore)
|
||||||
|
|
||||||
|
else if (word === 'null')
|
||||||
this.push(TokenType.Null)
|
this.push(TokenType.Null)
|
||||||
|
|
||||||
else if (ident === 'true' || ident === 'false')
|
else if (word === 'true' || word === 'false')
|
||||||
this.push(TokenType.Boolean)
|
this.push(TokenType.Boolean)
|
||||||
|
|
||||||
else if (isKeyword(ident))
|
else if (isKeyword(word))
|
||||||
this.push(TokenType.Keyword)
|
this.push(TokenType.Keyword)
|
||||||
|
|
||||||
else if (isOperator(ident))
|
else if (isOperator(word))
|
||||||
this.push(TokenType.Operator) // only things like `and` and `or`
|
this.push(TokenType.Operator)
|
||||||
|
|
||||||
else if (isIdentifer(ident))
|
else if (isIdentifer(word))
|
||||||
this.push(TokenType.Identifier)
|
this.push(TokenType.Identifier)
|
||||||
|
|
||||||
else if (ident.endsWith('='))
|
else if (word.endsWith('='))
|
||||||
this.push(TokenType.NamedArgPrefix)
|
this.push(TokenType.NamedArgPrefix)
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
@ -316,6 +365,12 @@ export class Scanner {
|
||||||
readNumber() {
|
readNumber() {
|
||||||
this.start = this.pos - 1
|
this.start = this.pos - 1
|
||||||
while (isWordChar(this.char)) {
|
while (isWordChar(this.char)) {
|
||||||
|
// stop at dot unless it's part of the number
|
||||||
|
if (this.char === c`.`) {
|
||||||
|
const nextCh = this.peek()
|
||||||
|
if (!isDigit(nextCh)) break
|
||||||
|
}
|
||||||
|
|
||||||
// stop at colon
|
// stop at colon
|
||||||
if (this.char === c`:`) {
|
if (this.char === c`:`) {
|
||||||
const nextCh = this.peek()
|
const nextCh = this.peek()
|
||||||
|
|
@ -327,21 +382,37 @@ export class Scanner {
|
||||||
this.push(isNumber(ident) ? TokenType.Number : TokenType.Word)
|
this.push(isNumber(ident) ? TokenType.Number : TokenType.Word)
|
||||||
}
|
}
|
||||||
|
|
||||||
readWord() {
|
readRegex() {
|
||||||
this.start = this.pos - getCharSize(this.char)
|
this.start = this.pos - 1
|
||||||
|
this.next() // skip 2nd /
|
||||||
|
|
||||||
while (isWordChar(this.char)) this.next()
|
let foundClosing = false
|
||||||
|
while (this.char > 0) {
|
||||||
|
if (this.char === c`/` && this.peek() === c`/`) {
|
||||||
|
this.next() // skip /
|
||||||
|
this.next() // skip /
|
||||||
|
foundClosing = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
const word = this.input.slice(this.start, this.pos - getCharSize(this.char))
|
this.next()
|
||||||
|
}
|
||||||
|
|
||||||
if (word === '_')
|
const closing = new Set([c`g`, c`i`, c`m`, c`s`, c`u`, c`y`])
|
||||||
this.push(TokenType.Underscore)
|
|
||||||
|
|
||||||
else if (operators.has(word))
|
// read flags (e.g., 'gi', 'gim', etc.)
|
||||||
this.push(TokenType.Operator)
|
if (foundClosing)
|
||||||
|
while (closing.has(this.char)) this.next()
|
||||||
|
|
||||||
else
|
this.push(TokenType.Regex)
|
||||||
this.push(TokenType.Word)
|
}
|
||||||
|
|
||||||
|
canBeDotGet(lastToken?: Token): boolean {
|
||||||
|
return !this.prevIsWhitespace && !!lastToken &&
|
||||||
|
(lastToken.type === TokenType.Identifier ||
|
||||||
|
lastToken.type === TokenType.Number ||
|
||||||
|
lastToken.type === TokenType.CloseParen ||
|
||||||
|
lastToken.type === TokenType.CloseBracket)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,13 @@ import color from 'kleur'
|
||||||
import { Scanner, TokenType, type Token } from '#parser/tokenizer2'
|
import { Scanner, TokenType, type Token } from '#parser/tokenizer2'
|
||||||
import { parser } from '#parser/shrimp'
|
import { parser } from '#parser/shrimp'
|
||||||
import { setGlobals } from '#parser/tokenizer'
|
import { setGlobals } from '#parser/tokenizer'
|
||||||
|
import { parse } from '#parser/parser2'
|
||||||
import { globals as prelude } from '#prelude'
|
import { globals as prelude } from '#prelude'
|
||||||
import { $ } from 'bun'
|
import { $ } from 'bun'
|
||||||
import { assert, errorMessage } from '#utils/utils'
|
import { assert, errorMessage } from '#utils/utils'
|
||||||
import { Compiler } from '#compiler/compiler'
|
import { Compiler } from '#compiler/compiler'
|
||||||
import { run, VM } from 'reefvm'
|
import { run, VM } from 'reefvm'
|
||||||
import { treeToString, VMResultToValue } from '#utils/tree'
|
import { treeToString2, treeToString, VMResultToValue } from '#utils/tree'
|
||||||
|
|
||||||
const regenerateParser = async () => {
|
const regenerateParser = async () => {
|
||||||
let generate = true
|
let generate = true
|
||||||
|
|
@ -52,8 +53,8 @@ expect.extend({
|
||||||
|
|
||||||
const allGlobals = { ...prelude, ...(globals || {}) }
|
const allGlobals = { ...prelude, ...(globals || {}) }
|
||||||
setGlobals(Object.keys(allGlobals))
|
setGlobals(Object.keys(allGlobals))
|
||||||
const tree = parser.parse(received)
|
const tree = parse(received)
|
||||||
const actual = treeToString(tree, received)
|
const actual = treeToString2(tree, received)
|
||||||
const normalizedExpected = trimWhitespace(expected)
|
const normalizedExpected = trimWhitespace(expected)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
@ -244,7 +245,7 @@ const tokenize = (code: string): Token[] => {
|
||||||
return scanner.tokenize(code)
|
return scanner.tokenize(code)
|
||||||
}
|
}
|
||||||
|
|
||||||
const toHumanToken = (tok: Token): { type: string, value: string } => {
|
const toHumanToken = (tok: Token): { type: string, value?: string } => {
|
||||||
return {
|
return {
|
||||||
type: TokenType[tok.type],
|
type: TokenType[tok.type],
|
||||||
value: tok.value
|
value: tok.value
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,38 @@
|
||||||
import { Tree, TreeCursor } from '@lezer/common'
|
import { Tree, TreeCursor } from '@lezer/common'
|
||||||
import { type Value, fromValue } from 'reefvm'
|
import { type Value, fromValue } from 'reefvm'
|
||||||
|
import { SyntaxNode } from '#parser/node'
|
||||||
|
|
||||||
|
const nodeToString = (node: SyntaxNode, input: string, depth = 0): string => {
|
||||||
|
const indent = ' '.repeat(depth)
|
||||||
|
const text = input.slice(node.from, node.to)
|
||||||
|
const nodeName = node.name
|
||||||
|
|
||||||
|
if (node.firstChild) {
|
||||||
|
return `${indent}${nodeName}`
|
||||||
|
} else {
|
||||||
|
// Only strip quotes from whole String nodes (legacy DoubleQuote), not StringFragment/EscapeSeq/CurlyString
|
||||||
|
const cleanText = nodeName === 'String' ? text.slice(1, -1) : text
|
||||||
|
return `${indent}${nodeName} ${cleanText}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const treeToString2 = (tree: SyntaxNode, input: string, depth = 0): string => {
|
||||||
|
let lines = []
|
||||||
|
let node: SyntaxNode | null = tree
|
||||||
|
|
||||||
|
if (node.name === 'Program') node = node.firstChild
|
||||||
|
|
||||||
|
while (node) {
|
||||||
|
lines.push(nodeToString(node, input, depth))
|
||||||
|
|
||||||
|
if (node.firstChild)
|
||||||
|
lines.push(treeToString2(node.firstChild, input, depth + 1))
|
||||||
|
|
||||||
|
node = node.nextSibling
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines.join('\n')
|
||||||
|
}
|
||||||
|
|
||||||
export const treeToString = (tree: Tree, input: string): string => {
|
export const treeToString = (tree: Tree, input: string): string => {
|
||||||
const lines: string[] = []
|
const lines: string[] = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user