import { type Value, type FunctionDef, toValue } from "./value" import { OpCode } from "./opcode" export type Bytecode = { instructions: Instruction[] constants: Constant[] labels?: Map // Maps instruction index to label name } export type Instruction = { op: OpCode operand?: number | string } export type Constant = | Value | FunctionDef type Atom = number | string | boolean | null type InstructionTuple = // Stack | ["PUSH", Atom] | ["POP"] | ["DUP"] // Variables | ["LOAD", string] | ["STORE", string] | ["TRY_LOAD", string] // Arithmetic | ["ADD"] | ["SUB"] | ["MUL"] | ["DIV"] | ["MOD"] // Comparison | ["EQ"] | ["NEQ"] | ["LT"] | ["GT"] | ["LTE"] | ["GTE"] // Logical | ["NOT"] // Control flow | ["JUMP", string | number] | ["JUMP_IF_FALSE", string | number] | ["JUMP_IF_TRUE", string | number] | ["BREAK"] // Exception handling | ["PUSH_TRY", string | number] | ["PUSH_FINALLY", string | number] | ["POP_TRY"] | ["THROW"] // Functions | ["MAKE_FUNCTION", string[], string | number] | ["CALL"] | ["TAIL_CALL"] | ["RETURN"] | ["TRY_CALL", string] // Arrays | ["MAKE_ARRAY", number] | ["ARRAY_GET"] | ["ARRAY_SET"] | ["ARRAY_PUSH"] | ["ARRAY_LEN"] // Dicts | ["MAKE_DICT", number] | ["DICT_GET"] | ["DICT_SET"] | ["DICT_HAS"] // Native | ["CALL_NATIVE", string] // Special | ["HALT"] type LabelDefinition = [string] // Just ".label_name:" export type ProgramItem = InstructionTuple | LabelDefinition // // Parse bytecode from human-readable string format. // Operand types are determined by prefix/literal: // #42 -> immediate number (e.g., JUMP #5, MAKE_ARRAY #3) // .label -> label reference (e.g., JUMP .loop_start, MAKE_FUNCTION (x y) .body) // name -> variable/function name (e.g., LOAD x, CALL_NATIVE add) // 42 -> number constant (e.g., PUSH 42) // "str" -> string constant (e.g., PUSH "hello") // 'str' -> string constant (e.g., PUSH 'hello') // true -> boolean constant (e.g., PUSH true) // false -> boolean constant (e.g., PUSH false) // null -> null constant (e.g., PUSH null) // // Labels: // .label_name: -> label definition (marks current instruction position) // // Function definitions: // MAKE_FUNCTION (x y) #7 -> basic function (numeric offset) // MAKE_FUNCTION (x y) .body -> basic function (label reference) // MAKE_FUNCTION (x y=42) #7 -> with defaults // MAKE_FUNCTION (x ...rest) #7 -> variadic // MAKE_FUNCTION (x @named) #7 -> named // function parseFunctionParams(paramStr: string, constants: Constant[]): { params: string[] defaults: Record variadic: boolean named: boolean } { const params: string[] = [] const defaults: Record = {} let variadic = false let named = false // Remove parens and split by whitespace const paramList = paramStr.slice(1, -1).trim() if (!paramList) { return { params, defaults, variadic, named: named } } const parts = paramList.split(/\s+/) for (const part of parts) { // Check for named args (@name) if (part.startsWith('@')) { named = true params.push(part.slice(1)) } else if (part.startsWith('...')) { // Check for variadic (...name) variadic = true params.push(part.slice(3)) } else if (part.includes('=')) { // Check for default value (name=value) const [name, defaultValue] = part.split('=').map(s => s.trim()) params.push(name!) // Parse default value and add to constants if (/^-?\d+(\.\d+)?$/.test(defaultValue!)) { constants.push(toValue(parseFloat(defaultValue!))) } else if (/^['\"].*['\"]$/.test(defaultValue!)) { constants.push(toValue(defaultValue!.slice(1, -1))) } else if (defaultValue === 'true') { constants.push(toValue(true)) } else if (defaultValue === 'false') { constants.push(toValue(false)) } else if (defaultValue === 'null') { constants.push(toValue(null)) } else { throw new Error(`Invalid default value: ${defaultValue}`) } defaults[name!] = constants.length - 1 } else { params.push(part) } } return { params, defaults, variadic, named: named } } function isLabelDefinition(item: ProgramItem): item is LabelDefinition { return item.length === 1 && typeof item[0] === "string" && item[0].startsWith(".") && item[0].endsWith(":") } function isLabelReference(value: string | number): value is string { return typeof value === "string" && value.startsWith(".") } function parseFunctionParamsFromArray(params: string[]): { params: string[] defaults: Record variadic: boolean named: boolean defaultConstants: Constant[] } { const resultParams: string[] = [] const defaults: Record = {} const defaultConstants: Constant[] = [] let variadic = false let named = false for (const param of params) { if (param.startsWith("@")) { named = true resultParams.push(param.slice(1)) } else if (param.startsWith("...")) { variadic = true resultParams.push(param.slice(3)) } else if (param.includes("=")) { const [name, defaultValue] = param.split("=").map(s => s.trim()) resultParams.push(name!) if (/^-?\d+(\.\d+)?$/.test(defaultValue!)) { defaultConstants.push(toValue(parseFloat(defaultValue!))) } else if (defaultValue === "true") { defaultConstants.push(toValue(true)) } else if (defaultValue === "false") { defaultConstants.push(toValue(false)) } else if (defaultValue === "null") { defaultConstants.push(toValue(null)) } else if (/^['"].*['"]$/.test(defaultValue!)) { defaultConstants.push(toValue(defaultValue!.slice(1, -1))) } else { throw new Error(`Invalid default value: ${defaultValue}`) } defaults[name!] = -1 } else { resultParams.push(param) } } return { params: resultParams, defaults, variadic, named, defaultConstants } } function toBytecodeFromArray(program: ProgramItem[]): Bytecode /* throws */ { const constants: Constant[] = [] const instructions: any[] = [] const labels = new Map() // First pass: collect labels const filteredProgram: InstructionTuple[] = [] for (const item of program) { if (isLabelDefinition(item)) { const labelName = item[0].slice(1, -1) // Remove . prefix and : suffix labels.set(labelName, filteredProgram.length) } else { filteredProgram.push(item as InstructionTuple) } } // Second pass: build instructions for (let i = 0; i < filteredProgram.length; i++) { const item = filteredProgram[i]! const op = item[0] as string const opCode = OpCode[op as keyof typeof OpCode] if (opCode === undefined) { throw new Error(`Unknown opcode: ${op}`) } let operandValue: number | string | undefined = undefined if (item.length > 1) { const operand = item[1] switch (op) { case "PUSH": constants.push(toValue(operand as Atom)) operandValue = constants.length - 1 break case "MAKE_FUNCTION": { const params = operand as string[] const body = item[2] if (body === undefined) { throw new Error("MAKE_FUNCTION requires body address") } const { params: resultParams, defaults, variadic, named, defaultConstants } = parseFunctionParamsFromArray(params) const defaultIndices: Record = {} for (const [paramName, _] of Object.entries(defaults)) { const defaultConst = defaultConstants.shift()! constants.push(defaultConst) defaultIndices[paramName] = constants.length - 1 } let bodyAddress: number if (isLabelReference(body)) { const labelName = body.slice(1) const labelPos = labels.get(labelName) if (labelPos === undefined) { throw new Error(`Undefined label: ${labelName}`) } bodyAddress = labelPos } else { bodyAddress = body as number } constants.push({ type: "function_def", params: resultParams, defaults: defaultIndices, body: bodyAddress, variadic, named }) operandValue = constants.length - 1 break } case "JUMP": case "JUMP_IF_FALSE": case "JUMP_IF_TRUE": { if (isLabelReference(operand as string | number)) { const labelName = (operand as string).slice(1) const labelPos = labels.get(labelName) if (labelPos === undefined) { throw new Error(`Undefined label: ${labelName}`) } operandValue = labelPos - (i + 1) } else { operandValue = operand as number } break } case "PUSH_TRY": case "PUSH_FINALLY": { if (isLabelReference(operand as string | number)) { const labelName = (operand as string).slice(1) const labelPos = labels.get(labelName) if (labelPos === undefined) { throw new Error(`Undefined label: ${labelName}`) } operandValue = labelPos } else { operandValue = operand as number } break } case "LOAD": case "STORE": case "TRY_LOAD": case "TRY_CALL": case "CALL_NATIVE": operandValue = operand as string break case "MAKE_ARRAY": case "MAKE_DICT": operandValue = operand as number break default: throw new Error(`Unexpected operand for ${op}`) } } instructions.push({ op: opCode, operand: operandValue }) } const labelsByIndex = new Map() for (const [name, index] of labels.entries()) { labelsByIndex.set(index, name) } return { instructions, constants, labels: labelsByIndex.size > 0 ? labelsByIndex : undefined } } function toBytecodeFromString(str: string): Bytecode /* throws */ { const lines = str.trim().split("\n") // First pass: collect labels and their positions const labels = new Map() const cleanLines: string[] = [] for (let line of lines) { // Strip semicolon comments const commentIndex = line.indexOf(';') if (commentIndex !== -1) { line = line.slice(0, commentIndex) } const trimmed = line.trim() if (!trimmed) continue // Check for label definition (.label_name:) if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) { const labelName = trimmed.slice(1, -1) labels.set(labelName, cleanLines.length) continue } cleanLines.push(trimmed) } // Second pass: parse instructions and resolve label references const bytecode: Bytecode = { instructions: [], constants: [] } for (let i = 0; i < cleanLines.length; i++) { const trimmed = cleanLines[i]! const [op, ...rest] = trimmed.split(/\s+/) const opCode = OpCode[op as keyof typeof OpCode] if (opCode === undefined) { throw new Error(`Unknown opcode: ${op}`) } let operandValue: number | string | undefined = undefined if (rest.length > 0) { const operand = rest.join(' ') // Special handling for MAKE_FUNCTION with paren syntax if (opCode === OpCode.MAKE_FUNCTION && operand.startsWith('(')) { // Parse: MAKE_FUNCTION (params) #body or MAKE_FUNCTION (params) .label const match = operand.match(/^(\(.*?\))\s+(#-?\d+|\.[a-zA-Z_][a-zA-Z0-9_]*)$/) if (!match) { throw new Error(`Invalid MAKE_FUNCTION syntax: ${operand}`) } const paramStr = match[1]! const bodyStr = match[2]! let body: number if (bodyStr.startsWith('.')) { // Label reference const labelName = bodyStr.slice(1) const labelPos = labels.get(labelName) if (labelPos === undefined) { throw new Error(`Undefined label: ${labelName}`) } body = labelPos } else { // Numeric offset body = parseInt(bodyStr.slice(1)) } const { params, defaults, variadic, named } = parseFunctionParams(paramStr, bytecode.constants) // Add function definition to constants bytecode.constants.push({ type: 'function_def', params, defaults, body, variadic, named }) operandValue = bytecode.constants.length - 1 } else if (operand.startsWith('.')) { // Label reference - resolve to relative offset const labelName = operand.slice(1) const labelPos = labels.get(labelName) if (labelPos === undefined) { throw new Error(`Undefined label: ${labelName}`) } // For PUSH_TRY and PUSH_FINALLY, use absolute position // For other jump instructions, use relative offset from next instruction (i + 1) if (opCode === OpCode.PUSH_TRY || opCode === OpCode.PUSH_FINALLY) { operandValue = labelPos } else { operandValue = labelPos - (i + 1) } } else if (operand.startsWith('#')) { // immediate number operandValue = parseInt(operand.slice(1)) } else if (/^['"].*['"]$/.test(operand)) { // string const stringValue = operand.slice(1, operand.length - 1) bytecode.constants.push(toValue(stringValue)) operandValue = bytecode.constants.length - 1 } else if (/^-?\d+(\.\d+)?$/.test(operand)) { // number bytecode.constants.push(toValue(parseFloat(operand))) operandValue = bytecode.constants.length - 1 } else if (operand === 'true' || operand === 'false') { // boolean bytecode.constants.push(toValue(operand === 'true')) operandValue = bytecode.constants.length - 1 } else if (operand === 'null') { // null bytecode.constants.push(toValue(null)) operandValue = bytecode.constants.length - 1 } else { // Assume it's a variable name if it doesn't match any other pattern // This allows emoji, Unicode, and other creative identifiers // (already checked that it doesn't start with . # or match other patterns) operandValue = operand } } bytecode.instructions.push({ op: opCode, operand: operandValue }) } // Invert labels map: name->index becomes index->name for debugger display const labelsByIndex = new Map() for (const [name, index] of labels.entries()) { labelsByIndex.set(index, name) } if (labelsByIndex.size > 0) bytecode.labels = labelsByIndex return bytecode } /** * Compile bytecode from either a string or programmatic array format. * * String format: * ``` * PUSH 42 * STORE x * LOAD x * HALT * ``` * * Array format: * ``` * [ * ["PUSH", 42], * ["STORE", "x"], * ["LOAD", "x"], * ["HALT"] * ] * ``` */ export function toBytecode(input: string | ProgramItem[]): Bytecode { if (typeof input === "string") { return toBytecodeFromString(input) } else { return toBytecodeFromArray(input) } }