import { CompilerError } from '#compiler/compilerError.ts' import { parser } from '#parser/shrimp.ts' import * as terms from '#parser/shrimp.terms' import { setGlobals } from '#parser/tokenizer' import type { SyntaxNode, Tree } from '@lezer/common' import { assert, errorMessage } from '#utils/utils' import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm' import { checkTreeForErrors, getAllChildren, getAssignmentParts, getBinaryParts, getDotGetParts, getFunctionCallParts, getFunctionDefParts, getIfExprParts, getNamedArgParts, getPipeExprParts, getStringParts, getTryExprParts, } from '#compiler/utils' const DEBUG = false // const DEBUG = true type Label = `.${string}` // Process escape sequences in strings function processEscapeSeq(escapeSeq: string): string { // escapeSeq includes the backslash, e.g., "\n", "\$", "\\" if (escapeSeq.length !== 2) return escapeSeq switch (escapeSeq[1]) { case 'n': return '\n' case 't': return '\t' case 'r': return '\r' case '\\': return '\\' case "'": return "'" case '$': return '$' default: return escapeSeq // Unknown escape, keep as-is } } export class Compiler { instructions: ProgramItem[] = [] fnLabelCount = 0 ifLabelCount = 0 tryLabelCount = 0 bytecode: Bytecode pipeCounter = 0 constructor(public input: string, globals?: string[]) { try { if (globals) setGlobals(globals) const cst = parser.parse(input) const errors = checkTreeForErrors(cst) const firstError = errors[0] if (firstError) { throw firstError } this.#compileCst(cst, input) this.bytecode = toBytecode(this.instructions) if (DEBUG) { const bytecodeString = bytecodeToString(this.bytecode) console.log(`\n🤖 bytecode:\n----------------\n${bytecodeString}\n\n`) } } catch (error) { if (error instanceof CompilerError) { throw new Error(error.toReadableString(input)) } else { throw new Error(`Unknown error during compilation:\n${errorMessage(error)}`) } } } #compileCst(cst: Tree, input: string) { const isProgram = cst.topNode.type.id === terms.Program assert(isProgram, `Expected Program node, got ${cst.topNode.type.name}`) let child = cst.topNode.firstChild while (child) { this.instructions.push(...this.#compileNode(child, input)) child = child.nextSibling } this.instructions.push(['HALT']) } #compileNode(node: SyntaxNode, input: string): ProgramItem[] { const value = input.slice(node.from, node.to) if (DEBUG) console.log(`🫦 ${node.name}: ${value}`) switch (node.type.id) { case terms.Number: const number = Number(value) if (Number.isNaN(number)) throw new CompilerError(`Invalid number literal: ${value}`, node.from, node.to) return [[`PUSH`, number]] case terms.String: { const { parts, hasInterpolation } = getStringParts(node, input) // Simple string without interpolation or escapes - extract text directly if (!hasInterpolation) { // Remove surrounding quotes and return as-is const strValue = value.slice(1, -1) return [['PUSH', strValue]] } // String with interpolation or escapes - compile each part and concatenate const instructions: ProgramItem[] = [] parts.forEach((part) => { const partValue = input.slice(part.from, part.to) switch (part.type.id) { case terms.StringFragment: // Plain text fragment - just push as-is instructions.push(['PUSH', partValue]) break case terms.EscapeSeq: // Process escape sequence and push the result const processed = processEscapeSeq(partValue) instructions.push(['PUSH', processed]) break case terms.Interpolation: // Interpolation contains either Identifier or ParenExpr (the $ is anonymous) const child = part.firstChild if (!child) { throw new CompilerError('Interpolation has no child', part.from, part.to) } // Compile the Identifier or ParenExpr instructions.push(...this.#compileNode(child, input)) break default: throw new CompilerError( `Unexpected string part: ${part.type.name}`, part.from, part.to ) } }) // Use STR_CONCAT to join all parts instructions.push(['STR_CONCAT', parts.length]) return instructions } case terms.Boolean: { return [[`PUSH`, value === 'true']] } case terms.Null: { return [[`PUSH`, null]] } case terms.Regex: { // remove the surrounding slashes and any flags const [_, pattern, flags] = value.match(/^\/\/(.*)\/\/([gimsuy]*)$/) || [] if (!pattern) { throw new CompilerError(`Invalid regex literal: ${value}`, node.from, node.to) } let regex: RegExp try { regex = new RegExp(pattern, flags) } catch (e) { throw new CompilerError(`Invalid regex literal: ${value}`, node.from, node.to) } return [['PUSH', regex]] } case terms.Identifier: { return [[`TRY_LOAD`, value]] } case terms.Word: { return [['PUSH', value]] } case terms.DotGet: { const { objectName, property } = getDotGetParts(node, input) const instructions: ProgramItem[] = [] instructions.push(['TRY_LOAD', objectName]) if (property.type.id === terms.ParenExpr) { instructions.push(...this.#compileNode(property, input)) } else { const propertyValue = input.slice(property.from, property.to) instructions.push(['PUSH', propertyValue]) } instructions.push(['DOT_GET']) return instructions } case terms.BinOp: { const { left, op, right } = getBinaryParts(node) const instructions: ProgramItem[] = [] instructions.push(...this.#compileNode(left, input)) instructions.push(...this.#compileNode(right, input)) const opValue = input.slice(op.from, op.to) switch (opValue) { case '+': instructions.push(['ADD']) break case '-': instructions.push(['SUB']) break case '*': instructions.push(['MUL']) break case '/': instructions.push(['DIV']) break case '%': instructions.push(['MOD']) break default: throw new CompilerError(`Unsupported binary operator: ${opValue}`, op.from, op.to) } return instructions } case terms.Assign: { const { identifier, right } = getAssignmentParts(node) const instructions: ProgramItem[] = [] instructions.push(...this.#compileNode(right, input)) instructions.push(['DUP']) // Keep a copy on the stack after storing const identifierName = input.slice(identifier.from, identifier.to) instructions.push(['STORE', identifierName]) return instructions } case terms.ParenExpr: { const child = node.firstChild if (!child) return [] // I guess it is empty parentheses? return this.#compileNode(child, input) } case terms.FunctionDef: { const { paramNames, bodyNodes, catchVariable, catchBody, finallyBody } = getFunctionDefParts( node, input ) const instructions: ProgramItem[] = [] const functionLabel: Label = `.func_${this.fnLabelCount++}` const afterLabel: Label = `.after_${functionLabel}` instructions.push(['JUMP', afterLabel]) instructions.push([`${functionLabel}:`]) const compileFunctionBody = () => { const bodyInstructions: ProgramItem[] = [] bodyNodes.forEach((bodyNode, index) => { bodyInstructions.push(...this.#compileNode(bodyNode, input)) if (index < bodyNodes.length - 1) { bodyInstructions.push(['POP']) } }) return bodyInstructions } if (catchVariable || finallyBody) { // If function has catch or finally, wrap body in try/catch/finally instructions.push( ...this.#compileTryCatchFinally(compileFunctionBody, catchVariable, catchBody, finallyBody, input) ) } else { instructions.push(...compileFunctionBody()) } instructions.push(['RETURN']) instructions.push([`${afterLabel}:`]) instructions.push(['MAKE_FUNCTION', paramNames, functionLabel]) return instructions } case terms.FunctionCallOrIdentifier: { if (node.firstChild?.type.id === terms.DotGet) { return this.#compileNode(node.firstChild, input) } return [['TRY_CALL', value]] } /* ### Function Calls Stack order (bottom to top): LOAD fn PUSH arg1 ; Positional args PUSH arg2 PUSH "name" ; Named arg key PUSH "value" ; Named arg value PUSH 2 ; Positional count PUSH 1 ; Named count CALL */ case terms.FunctionCall: { const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts(node, input) const instructions: ProgramItem[] = [] instructions.push(...this.#compileNode(identifierNode, input)) positionalArgs.forEach((arg) => { instructions.push(...this.#compileNode(arg, input)) }) namedArgs.forEach((arg) => { const { name, valueNode } = getNamedArgParts(arg, input) instructions.push(['PUSH', name]) instructions.push(...this.#compileNode(valueNode, input)) }) instructions.push(['PUSH', positionalArgs.length]) instructions.push(['PUSH', namedArgs.length]) instructions.push(['CALL']) return instructions } case terms.ThenBlock: case terms.SingleLineThenBlock: case terms.TryBlock: { const children = getAllChildren(node) const instructions: ProgramItem[] = [] children.forEach((child, index) => { instructions.push(...this.#compileNode(child, input)) // keep only the last expression's value if (index < children.length - 1) { instructions.push(['POP']) } }) return instructions } case terms.TryExpr: { const { tryBlock, catchVariable, catchBody, finallyBody } = getTryExprParts(node, input) return this.#compileTryCatchFinally( () => this.#compileNode(tryBlock, input), catchVariable, catchBody, finallyBody, input ) } case terms.Throw: { const children = getAllChildren(node) const [_throwKeyword, expression] = children if (!expression) { throw new CompilerError( `Throw expected expression, got ${children.length} children`, node.from, node.to ) } const instructions: ProgramItem[] = [] instructions.push(...this.#compileNode(expression, input)) instructions.push(['THROW']) return instructions } case terms.IfExpr: { const { conditionNode, thenBlock, elseIfBlocks, elseThenBlock } = getIfExprParts( node, input ) const instructions: ProgramItem[] = [] instructions.push(...this.#compileNode(conditionNode, input)) this.ifLabelCount++ const endLabel: Label = `.end_${this.ifLabelCount}` const thenBlockInstructions = this.#compileNode(thenBlock, input) instructions.push(['JUMP_IF_FALSE', thenBlockInstructions.length + 1]) instructions.push(...thenBlockInstructions) instructions.push(['JUMP', endLabel]) // Else if elseIfBlocks.forEach(({ conditional, thenBlock }) => { instructions.push(...this.#compileNode(conditional, input)) const elseIfInstructions = this.#compileNode(thenBlock, input) instructions.push(['JUMP_IF_FALSE', elseIfInstructions.length + 1]) instructions.push(...elseIfInstructions) instructions.push(['JUMP', endLabel]) }) // Else if (elseThenBlock) { const elseThenInstructions = this.#compileNode(elseThenBlock, input) instructions.push(...elseThenInstructions) } else { instructions.push(['PUSH', null]) } instructions.push([`${endLabel}:`]) return instructions } // - `EQ`, `NEQ`, `LT`, `GT`, `LTE`, `GTE` - Pop 2, push boolean case terms.ConditionalOp: { const instructions: ProgramItem[] = [] const { left, op, right } = getBinaryParts(node) const leftInstructions: ProgramItem[] = this.#compileNode(left, input) const rightInstructions: ProgramItem[] = this.#compileNode(right, input) const opValue = input.slice(op.from, op.to) switch (opValue) { case '==': instructions.push(...leftInstructions, ...rightInstructions, ['EQ']) break case '!=': instructions.push(...leftInstructions, ...rightInstructions, ['NEQ']) break case '<': instructions.push(...leftInstructions, ...rightInstructions, ['LT']) break case '>': instructions.push(...leftInstructions, ...rightInstructions, ['GT']) break case '<=': instructions.push(...leftInstructions, ...rightInstructions, ['LTE']) break case '>=': instructions.push(...leftInstructions, ...rightInstructions, ['GTE']) break case 'and': instructions.push(...leftInstructions) instructions.push(['DUP']) instructions.push(['JUMP_IF_FALSE', rightInstructions.length + 1]) instructions.push(['POP']) instructions.push(...rightInstructions) break case 'or': instructions.push(...leftInstructions) instructions.push(['DUP']) instructions.push(['JUMP_IF_TRUE', rightInstructions.length + 1]) instructions.push(['POP']) instructions.push(...rightInstructions) break default: throw new CompilerError(`Unsupported conditional operator: ${opValue}`, op.from, op.to) } return instructions } case terms.PipeExpr: { const { pipedFunctionCall, pipeReceivers } = getPipeExprParts(node) if (!pipedFunctionCall || pipeReceivers.length === 0) { throw new CompilerError('PipeExpr must have at least two operands', node.from, node.to) } const instructions: ProgramItem[] = [] instructions.push(...this.#compileNode(pipedFunctionCall, input)) this.pipeCounter++ const pipeValName = `_pipe_value_${this.pipeCounter}` pipeReceivers.forEach((pipeReceiver) => { instructions.push(['STORE', pipeValName]) const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts( pipeReceiver, input ) instructions.push(...this.#compileNode(identifierNode, input)) const isUnderscoreInPositionalArgs = positionalArgs.some( (arg) => arg.type.id === terms.Underscore ) const isUnderscoreInNamedArgs = namedArgs.some((arg) => { const { valueNode } = getNamedArgParts(arg, input) return valueNode.type.id === terms.Underscore }) const shouldPushPositionalArg = !isUnderscoreInPositionalArgs && !isUnderscoreInNamedArgs // If no underscore is explicitly used, add the piped value as the first positional arg if (shouldPushPositionalArg) { instructions.push(['LOAD', pipeValName]) } positionalArgs.forEach((arg) => { if (arg.type.id === terms.Underscore) { instructions.push(['LOAD', pipeValName]) } else { instructions.push(...this.#compileNode(arg, input)) } }) namedArgs.forEach((arg) => { const { name, valueNode } = getNamedArgParts(arg, input) instructions.push(['PUSH', name]) if (valueNode.type.id === terms.Underscore) { instructions.push(['LOAD', pipeValName]) } else { instructions.push(...this.#compileNode(valueNode, input)) } }) instructions.push(['PUSH', positionalArgs.length + (shouldPushPositionalArg ? 1 : 0)]) instructions.push(['PUSH', namedArgs.length]) instructions.push(['CALL']) }) return instructions } case terms.Array: { const children = getAllChildren(node) // We can easily parse [=] as an empty dict, but `[ = ]` is tougher. // = can be a valid word, and is also valid inside words, so for now we cheat // and check for arrays that look like `[ = ]` to interpret them as // empty dicts if (children.length === 1 && children[0]!.name === 'Word') { const child = children[0]! if (input.slice(child.from, child.to) === '=') { return [['MAKE_DICT', 0]] } } const instructions: ProgramItem[] = children.map((x) => this.#compileNode(x, input)).flat() instructions.push(['MAKE_ARRAY', children.length]) return instructions } case terms.Dict: { const children = getAllChildren(node) const instructions: ProgramItem[] = [] children.forEach((node) => { const keyNode = node.firstChild const valueNode = node.firstChild!.nextSibling // name= -> name const key = input.slice(keyNode!.from, keyNode!.to).slice(0, -1) instructions.push(['PUSH', key]) instructions.push(...this.#compileNode(valueNode!, input)) }) instructions.push(['MAKE_DICT', children.length]) return instructions } default: throw new CompilerError( `Compiler doesn't know how to handle a "${node.type.name}" node.`, node.from, node.to ) } } #compileTryCatchFinally( compileTryBody: () => ProgramItem[], catchVariable: string | undefined, catchBody: SyntaxNode | undefined, finallyBody: SyntaxNode | undefined, input: string ): ProgramItem[] { const instructions: ProgramItem[] = [] this.tryLabelCount++ const catchLabel: Label = `.catch_${this.tryLabelCount}` const finallyLabel: Label = finallyBody ? `.finally_${this.tryLabelCount}` : (null as any) const endLabel: Label = `.end_try_${this.tryLabelCount}` instructions.push(['PUSH_TRY', catchLabel]) instructions.push(...compileTryBody()) instructions.push(['POP_TRY']) instructions.push(['JUMP', finallyBody ? finallyLabel : endLabel]) // catch block instructions.push([`${catchLabel}:`]) if (catchBody && catchVariable) { instructions.push(['STORE', catchVariable]) const catchInstructions = this.#compileNode(catchBody, input) instructions.push(...catchInstructions) instructions.push(['JUMP', finallyBody ? finallyLabel : endLabel]) } else { // no catch block if (finallyBody) { instructions.push(['JUMP', finallyLabel]) } else { instructions.push(['THROW']) } } // finally block if (finallyBody) { instructions.push([`${finallyLabel}:`]) const finallyInstructions = this.#compileNode(finallyBody, input) instructions.push(...finallyInstructions) // finally doesn't return a value instructions.push(['POP']) } instructions.push([`${endLabel}:`]) return instructions } }