ReefVM/src/bytecode.ts

562 lines
16 KiB
TypeScript

import { type Value, type FunctionDef, toValue } from "./value"
import { OpCode } from "./opcode"
export type Bytecode = {
instructions: Instruction[]
constants: Constant[]
labels?: Map<number, string> // Maps instruction index to label name
}
export type Instruction = {
op: OpCode
operand?: number | string
}
export type Constant =
| Value
| FunctionDef
type Atom = number | string | boolean | null
type InstructionTuple =
// Stack
| ["PUSH", Atom]
| ["POP"]
| ["DUP"]
// Variables
| ["LOAD", string]
| ["STORE", string]
| ["TRY_LOAD", string]
// Arithmetic
| ["ADD"] | ["SUB"] | ["MUL"] | ["DIV"] | ["MOD"]
// Comparison
| ["EQ"] | ["NEQ"] | ["LT"] | ["GT"] | ["LTE"] | ["GTE"]
// Logical
| ["NOT"]
// Control flow
| ["JUMP", string | number]
| ["JUMP_IF_FALSE", string | number]
| ["JUMP_IF_TRUE", string | number]
| ["BREAK"]
// Exception handling
| ["PUSH_TRY", string | number]
| ["PUSH_FINALLY", string | number]
| ["POP_TRY"]
| ["THROW"]
// Functions
| ["MAKE_FUNCTION", string[], string | number]
| ["CALL"]
| ["TAIL_CALL"]
| ["RETURN"]
| ["TRY_CALL", string]
// Arrays
| ["MAKE_ARRAY", number]
| ["ARRAY_GET"]
| ["ARRAY_SET"]
| ["ARRAY_PUSH"]
| ["ARRAY_LEN"]
// Dicts
| ["MAKE_DICT", number]
| ["DICT_GET"]
| ["DICT_SET"]
| ["DICT_HAS"]
// Strings
| ["STR_CONCAT", number]
// Arrays and dicts
| ["DOT_GET"]
// Special
| ["HALT"]
type LabelDefinition = [string] // Just ".label_name:"
export type ProgramItem = InstructionTuple | LabelDefinition
//
// Parse bytecode from human-readable string format.
// Operand types are determined by prefix/literal:
// #42 -> immediate number (e.g., JUMP #5, MAKE_ARRAY #3)
// .label -> label reference (e.g., JUMP .loop_start, MAKE_FUNCTION (x y) .body)
// name -> variable/function name (e.g., LOAD x, LOAD_NATIVE add)
// 42 -> number constant (e.g., PUSH 42)
// "str" -> string constant (e.g., PUSH "hello")
// 'str' -> string constant (e.g., PUSH 'hello')
// true -> boolean constant (e.g., PUSH true)
// false -> boolean constant (e.g., PUSH false)
// null -> null constant (e.g., PUSH null)
//
// Labels:
// .label_name: -> label definition (marks current instruction position)
//
// Function definitions:
// MAKE_FUNCTION (x y) #7 -> basic function (numeric offset)
// MAKE_FUNCTION (x y) .body -> basic function (label reference)
// MAKE_FUNCTION (x y=42) #7 -> with defaults
// MAKE_FUNCTION (x ...rest) #7 -> variadic
// MAKE_FUNCTION (x @named) #7 -> named
//
function parseFunctionParams(paramStr: string, constants: Constant[]): {
params: string[]
defaults: Record<string, number>
variadic: boolean
named: boolean
} {
const params: string[] = []
const defaults: Record<string, number> = {}
let variadic = false
let named = false
// Remove parens and split by whitespace
const paramList = paramStr.slice(1, -1).trim()
if (!paramList) {
return { params, defaults, variadic, named: named }
}
const parts = paramList.split(/\s+/)
for (const part of parts) {
// Check for named args (@name)
if (part.startsWith('@')) {
named = true
params.push(part.slice(1))
} else if (part.startsWith('...')) {
// Check for variadic (...name)
variadic = true
params.push(part.slice(3))
} else if (part.includes('=')) {
// Check for default value (name=value)
const [name, defaultValue] = part.split('=').map(s => s.trim())
params.push(name!)
// Parse default value and add to constants
if (/^-?\d+(\.\d+)?$/.test(defaultValue!)) {
constants.push(toValue(parseFloat(defaultValue!)))
} else if (/^['\"].*['\"]$/.test(defaultValue!)) {
constants.push(toValue(defaultValue!.slice(1, -1)))
} else if (defaultValue === 'true') {
constants.push(toValue(true))
} else if (defaultValue === 'false') {
constants.push(toValue(false))
} else if (defaultValue === 'null') {
constants.push(toValue(null))
} else {
throw new Error(`Invalid default value: ${defaultValue}`)
}
defaults[name!] = constants.length - 1
} else {
params.push(part)
}
}
return { params, defaults, variadic, named: named }
}
function isLabelDefinition(item: ProgramItem): item is LabelDefinition {
return item.length === 1 && typeof item[0] === "string" && item[0].startsWith(".") && item[0].endsWith(":")
}
function isLabelReference(value: string | number): value is string {
return typeof value === "string" && value.startsWith(".")
}
function parseFunctionParamsFromArray(params: string[]): {
params: string[]
defaults: Record<string, number>
variadic: boolean
named: boolean
defaultConstants: Constant[]
} {
const resultParams: string[] = []
const defaults: Record<string, number> = {}
const defaultConstants: Constant[] = []
let variadic = false
let named = false
for (const param of params) {
if (param.startsWith("@")) {
named = true
resultParams.push(param.slice(1))
} else if (param.startsWith("...")) {
variadic = true
resultParams.push(param.slice(3))
} else if (param.includes("=")) {
const [name, defaultValue] = param.split("=").map(s => s.trim())
resultParams.push(name!)
if (/^-?\d+(\.\d+)?$/.test(defaultValue!)) {
defaultConstants.push(toValue(parseFloat(defaultValue!)))
} else if (defaultValue === "true") {
defaultConstants.push(toValue(true))
} else if (defaultValue === "false") {
defaultConstants.push(toValue(false))
} else if (defaultValue === "null") {
defaultConstants.push(toValue(null))
} else if (/^['"].*['"]$/.test(defaultValue!)) {
defaultConstants.push(toValue(defaultValue!.slice(1, -1)))
} else {
throw new Error(`Invalid default value: ${defaultValue}`)
}
defaults[name!] = -1
} else {
resultParams.push(param)
}
}
return { params: resultParams, defaults, variadic, named, defaultConstants }
}
function toBytecodeFromArray(program: ProgramItem[]): Bytecode /* throws */ {
const constants: Constant[] = []
const instructions: any[] = []
const labels = new Map<string, number>()
// First pass: collect labels
const filteredProgram: InstructionTuple[] = []
for (const item of program) {
if (isLabelDefinition(item)) {
const labelName = item[0].slice(1, -1) // Remove . prefix and : suffix
labels.set(labelName, filteredProgram.length)
} else {
filteredProgram.push(item as InstructionTuple)
}
}
// Second pass: build instructions
for (let i = 0; i < filteredProgram.length; i++) {
const item = filteredProgram[i]!
const op = item[0] as string
const opCode = OpCode[op as keyof typeof OpCode]
if (opCode === undefined) {
throw new Error(`Unknown opcode: ${op}`)
}
let operandValue: number | string | undefined = undefined
if (item.length > 1) {
const operand = item[1]
switch (op) {
case "PUSH":
constants.push(toValue(operand as Atom))
operandValue = constants.length - 1
break
case "MAKE_FUNCTION": {
const params = operand as string[]
const body = item[2]
if (body === undefined) {
throw new Error("MAKE_FUNCTION requires body address")
}
const { params: resultParams, defaults, variadic, named, defaultConstants } = parseFunctionParamsFromArray(params)
const defaultIndices: Record<string, number> = {}
for (const [paramName, _] of Object.entries(defaults)) {
const defaultConst = defaultConstants.shift()!
constants.push(defaultConst)
defaultIndices[paramName] = constants.length - 1
}
let bodyAddress: number
if (isLabelReference(body)) {
const labelName = body.slice(1)
const labelPos = labels.get(labelName)
if (labelPos === undefined) {
throw new Error(`Undefined label: ${labelName}`)
}
bodyAddress = labelPos
} else {
bodyAddress = body as number
}
constants.push({
type: "function_def",
params: resultParams,
defaults: defaultIndices,
body: bodyAddress,
variadic,
named
})
operandValue = constants.length - 1
break
}
case "JUMP":
case "JUMP_IF_FALSE":
case "JUMP_IF_TRUE": {
if (isLabelReference(operand as string | number)) {
const labelName = (operand as string).slice(1)
const labelPos = labels.get(labelName)
if (labelPos === undefined) {
throw new Error(`Undefined label: ${labelName}`)
}
operandValue = labelPos - (i + 1)
} else {
operandValue = operand as number
}
break
}
case "PUSH_TRY":
case "PUSH_FINALLY": {
if (isLabelReference(operand as string | number)) {
const labelName = (operand as string).slice(1)
const labelPos = labels.get(labelName)
if (labelPos === undefined) {
throw new Error(`Undefined label: ${labelName}`)
}
operandValue = labelPos
} else {
operandValue = operand as number
}
break
}
case "LOAD":
case "STORE":
case "TRY_LOAD":
case "TRY_CALL":
operandValue = operand as string
break
case "MAKE_ARRAY":
case "MAKE_DICT":
case "STR_CONCAT":
operandValue = operand as number
break
default:
throw new Error(`Unexpected operand for ${op}`)
}
}
instructions.push({
op: opCode,
operand: operandValue
})
}
const labelsByIndex = new Map<number, string>()
for (const [name, index] of labels.entries()) {
labelsByIndex.set(index, name)
}
return {
instructions,
constants,
labels: labelsByIndex.size > 0 ? labelsByIndex : undefined
}
}
function toBytecodeFromString(str: string): Bytecode /* throws */ {
const lines = str.trim().split("\n")
// First pass: collect labels and their positions
const labels = new Map<string, number>()
const cleanLines: string[] = []
for (let line of lines) {
// Strip semicolon comments
const commentIndex = line.indexOf(';')
if (commentIndex !== -1) {
line = line.slice(0, commentIndex)
}
const trimmed = line.trim()
if (!trimmed) continue
// Check for label definition (.label_name:)
if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) {
const labelName = trimmed.slice(1, -1)
labels.set(labelName, cleanLines.length)
continue
}
cleanLines.push(trimmed)
}
// Second pass: parse instructions and resolve label references
const bytecode: Bytecode = {
instructions: [],
constants: []
}
for (let i = 0; i < cleanLines.length; i++) {
const trimmed = cleanLines[i]!
const [op, ...rest] = trimmed.split(/\s+/)
const opCode = OpCode[op as keyof typeof OpCode]
if (opCode === undefined) {
throw new Error(`Unknown opcode: ${op}`)
}
let operandValue: number | string | undefined = undefined
if (rest.length > 0) {
const operand = rest.join(' ')
// Special handling for MAKE_FUNCTION with paren syntax
if (opCode === OpCode.MAKE_FUNCTION && operand.startsWith('(')) {
// Parse: MAKE_FUNCTION (params) #body or MAKE_FUNCTION (params) .label
const match = operand.match(/^(\(.*?\))\s+(#-?\d+|\.[a-zA-Z_][a-zA-Z0-9_]*)$/)
if (!match) {
throw new Error(`Invalid MAKE_FUNCTION syntax: ${operand}`)
}
const paramStr = match[1]!
const bodyStr = match[2]!
let body: number
if (bodyStr.startsWith('.')) {
// Label reference
const labelName = bodyStr.slice(1)
const labelPos = labels.get(labelName)
if (labelPos === undefined) {
throw new Error(`Undefined label: ${labelName}`)
}
body = labelPos
} else {
// Numeric offset
body = parseInt(bodyStr.slice(1))
}
const { params, defaults, variadic, named } = parseFunctionParams(paramStr, bytecode.constants)
// Add function definition to constants
bytecode.constants.push({
type: 'function_def',
params,
defaults,
body,
variadic,
named
})
operandValue = bytecode.constants.length - 1
}
else if (operand.startsWith('.')) {
// Label reference - resolve to relative offset
const labelName = operand.slice(1)
const labelPos = labels.get(labelName)
if (labelPos === undefined) {
throw new Error(`Undefined label: ${labelName}`)
}
// For PUSH_TRY and PUSH_FINALLY, use absolute position
// For other jump instructions, use relative offset from next instruction (i + 1)
if (opCode === OpCode.PUSH_TRY || opCode === OpCode.PUSH_FINALLY) {
operandValue = labelPos
} else {
operandValue = labelPos - (i + 1)
}
} else if (operand.startsWith('#')) {
// immediate number
operandValue = parseInt(operand.slice(1))
} else if (/^['"].*['"]$/.test(operand)) {
// string
const stringValue = operand.slice(1, operand.length - 1)
bytecode.constants.push(toValue(stringValue))
operandValue = bytecode.constants.length - 1
} else if (/^-?\d+(\.\d+)?$/.test(operand)) {
// number
bytecode.constants.push(toValue(parseFloat(operand)))
operandValue = bytecode.constants.length - 1
} else if (operand === 'true' || operand === 'false') {
// boolean
bytecode.constants.push(toValue(operand === 'true'))
operandValue = bytecode.constants.length - 1
} else if (operand === 'null') {
// null
bytecode.constants.push(toValue(null))
operandValue = bytecode.constants.length - 1
} else if (/^\/.*\/[a-z]*$/.test(operand)) {
// regex literal (/pattern/flags)
const lastSlash = operand.lastIndexOf('/')
const pattern = operand.slice(1, lastSlash)
const flags = operand.slice(lastSlash + 1)
try {
const regex = new RegExp(pattern, flags)
bytecode.constants.push(toValue(regex))
operandValue = bytecode.constants.length - 1
} catch (e) {
throw new Error(`Invalid regex literal: ${operand}`)
}
} else {
// Assume it's a variable name if it doesn't match any other pattern
// This allows emoji, Unicode, and other creative identifiers
// (already checked that it doesn't start with . # or match other patterns)
operandValue = operand
}
}
bytecode.instructions.push({
op: opCode,
operand: operandValue
})
}
// Invert labels map: name->index becomes index->name for debugger display
const labelsByIndex = new Map<number, string>()
for (const [name, index] of labels.entries()) {
labelsByIndex.set(index, name)
}
if (labelsByIndex.size > 0)
bytecode.labels = labelsByIndex
return bytecode
}
/**
* Compile bytecode from either a string or programmatic array format.
*
* String format:
* ```
* PUSH 42
* STORE x
* LOAD x
* HALT
* ```
*
* Array format:
* ```
* [
* ["PUSH", 42],
* ["STORE", "x"],
* ["LOAD", "x"],
* ["HALT"]
* ]
* ```
*/
export function toBytecode(input: string | ProgramItem[]): Bytecode {
if (typeof input === "string") {
return toBytecodeFromString(input)
} else {
return toBytecodeFromArray(input)
}
}