forked from defunkt/ReefVM
547 lines
15 KiB
TypeScript
547 lines
15 KiB
TypeScript
import { type Value, type FunctionDef, toValue } from "./value"
|
|
import { OpCode } from "./opcode"
|
|
|
|
export type Bytecode = {
|
|
instructions: Instruction[]
|
|
constants: Constant[]
|
|
labels?: Map<number, string> // Maps instruction index to label name
|
|
}
|
|
|
|
export type Instruction = {
|
|
op: OpCode
|
|
operand?: number | string
|
|
}
|
|
|
|
export type Constant =
|
|
| Value
|
|
| FunctionDef
|
|
|
|
type Atom = number | string | boolean | null
|
|
|
|
type InstructionTuple =
|
|
// Stack
|
|
| ["PUSH", Atom]
|
|
| ["POP"]
|
|
| ["DUP"]
|
|
|
|
// Variables
|
|
| ["LOAD", string]
|
|
| ["STORE", string]
|
|
| ["TRY_LOAD", string]
|
|
|
|
// Arithmetic
|
|
| ["ADD"] | ["SUB"] | ["MUL"] | ["DIV"] | ["MOD"]
|
|
|
|
// Comparison
|
|
| ["EQ"] | ["NEQ"] | ["LT"] | ["GT"] | ["LTE"] | ["GTE"]
|
|
|
|
// Logical
|
|
| ["NOT"]
|
|
|
|
// Control flow
|
|
| ["JUMP", string | number]
|
|
| ["JUMP_IF_FALSE", string | number]
|
|
| ["JUMP_IF_TRUE", string | number]
|
|
| ["BREAK"]
|
|
|
|
// Exception handling
|
|
| ["PUSH_TRY", string | number]
|
|
| ["PUSH_FINALLY", string | number]
|
|
| ["POP_TRY"]
|
|
| ["THROW"]
|
|
|
|
// Functions
|
|
| ["MAKE_FUNCTION", string[], string | number]
|
|
| ["CALL"]
|
|
| ["TAIL_CALL"]
|
|
| ["RETURN"]
|
|
| ["TRY_CALL", string]
|
|
|
|
// Arrays
|
|
| ["MAKE_ARRAY", number]
|
|
| ["ARRAY_GET"]
|
|
| ["ARRAY_SET"]
|
|
| ["ARRAY_PUSH"]
|
|
| ["ARRAY_LEN"]
|
|
|
|
// Dicts
|
|
| ["MAKE_DICT", number]
|
|
| ["DICT_GET"]
|
|
| ["DICT_SET"]
|
|
| ["DICT_HAS"]
|
|
|
|
// Native
|
|
| ["CALL_NATIVE", string]
|
|
|
|
// Special
|
|
| ["HALT"]
|
|
|
|
type LabelDefinition = [string] // Just ".label_name:"
|
|
|
|
export type ProgramItem = InstructionTuple | LabelDefinition
|
|
|
|
//
|
|
// Parse bytecode from human-readable string format.
|
|
// Operand types are determined by prefix/literal:
|
|
// #42 -> immediate number (e.g., JUMP #5, MAKE_ARRAY #3)
|
|
// .label -> label reference (e.g., JUMP .loop_start, MAKE_FUNCTION (x y) .body)
|
|
// name -> variable/function name (e.g., LOAD x, CALL_NATIVE add)
|
|
// 42 -> number constant (e.g., PUSH 42)
|
|
// "str" -> string constant (e.g., PUSH "hello")
|
|
// 'str' -> string constant (e.g., PUSH 'hello')
|
|
// true -> boolean constant (e.g., PUSH true)
|
|
// false -> boolean constant (e.g., PUSH false)
|
|
// null -> null constant (e.g., PUSH null)
|
|
//
|
|
// Labels:
|
|
// .label_name: -> label definition (marks current instruction position)
|
|
//
|
|
// Function definitions:
|
|
// MAKE_FUNCTION (x y) #7 -> basic function (numeric offset)
|
|
// MAKE_FUNCTION (x y) .body -> basic function (label reference)
|
|
// MAKE_FUNCTION (x y=42) #7 -> with defaults
|
|
// MAKE_FUNCTION (x ...rest) #7 -> variadic
|
|
// MAKE_FUNCTION (x @named) #7 -> named
|
|
//
|
|
|
|
function parseFunctionParams(paramStr: string, constants: Constant[]): {
|
|
params: string[]
|
|
defaults: Record<string, number>
|
|
variadic: boolean
|
|
named: boolean
|
|
} {
|
|
const params: string[] = []
|
|
const defaults: Record<string, number> = {}
|
|
let variadic = false
|
|
let named = false
|
|
|
|
// Remove parens and split by whitespace
|
|
const paramList = paramStr.slice(1, -1).trim()
|
|
if (!paramList) {
|
|
return { params, defaults, variadic, named: named }
|
|
}
|
|
|
|
const parts = paramList.split(/\s+/)
|
|
|
|
for (const part of parts) {
|
|
// Check for named args (@name)
|
|
if (part.startsWith('@')) {
|
|
named = true
|
|
params.push(part.slice(1))
|
|
|
|
} else if (part.startsWith('...')) {
|
|
// Check for variadic (...name)
|
|
variadic = true
|
|
params.push(part.slice(3))
|
|
|
|
} else if (part.includes('=')) {
|
|
// Check for default value (name=value)
|
|
const [name, defaultValue] = part.split('=').map(s => s.trim())
|
|
params.push(name!)
|
|
|
|
// Parse default value and add to constants
|
|
if (/^-?\d+(\.\d+)?$/.test(defaultValue!)) {
|
|
constants.push(toValue(parseFloat(defaultValue!)))
|
|
} else if (/^['\"].*['\"]$/.test(defaultValue!)) {
|
|
constants.push(toValue(defaultValue!.slice(1, -1)))
|
|
} else if (defaultValue === 'true') {
|
|
constants.push(toValue(true))
|
|
} else if (defaultValue === 'false') {
|
|
constants.push(toValue(false))
|
|
} else if (defaultValue === 'null') {
|
|
constants.push(toValue(null))
|
|
} else {
|
|
throw new Error(`Invalid default value: ${defaultValue}`)
|
|
}
|
|
|
|
defaults[name!] = constants.length - 1
|
|
|
|
} else {
|
|
params.push(part)
|
|
}
|
|
}
|
|
|
|
return { params, defaults, variadic, named: named }
|
|
}
|
|
|
|
function isLabelDefinition(item: ProgramItem): item is LabelDefinition {
|
|
return item.length === 1 && typeof item[0] === "string" && item[0].startsWith(".") && item[0].endsWith(":")
|
|
}
|
|
|
|
function isLabelReference(value: string | number): value is string {
|
|
return typeof value === "string" && value.startsWith(".")
|
|
}
|
|
|
|
function parseFunctionParamsFromArray(params: string[]): {
|
|
params: string[]
|
|
defaults: Record<string, number>
|
|
variadic: boolean
|
|
named: boolean
|
|
defaultConstants: Constant[]
|
|
} {
|
|
const resultParams: string[] = []
|
|
const defaults: Record<string, number> = {}
|
|
const defaultConstants: Constant[] = []
|
|
let variadic = false
|
|
let named = false
|
|
|
|
for (const param of params) {
|
|
if (param.startsWith("@")) {
|
|
named = true
|
|
resultParams.push(param.slice(1))
|
|
} else if (param.startsWith("...")) {
|
|
variadic = true
|
|
resultParams.push(param.slice(3))
|
|
} else if (param.includes("=")) {
|
|
const [name, defaultValue] = param.split("=").map(s => s.trim())
|
|
resultParams.push(name!)
|
|
|
|
if (/^-?\d+(\.\d+)?$/.test(defaultValue!)) {
|
|
defaultConstants.push(toValue(parseFloat(defaultValue!)))
|
|
} else if (defaultValue === "true") {
|
|
defaultConstants.push(toValue(true))
|
|
} else if (defaultValue === "false") {
|
|
defaultConstants.push(toValue(false))
|
|
} else if (defaultValue === "null") {
|
|
defaultConstants.push(toValue(null))
|
|
} else if (/^['"].*['"]$/.test(defaultValue!)) {
|
|
defaultConstants.push(toValue(defaultValue!.slice(1, -1)))
|
|
} else {
|
|
throw new Error(`Invalid default value: ${defaultValue}`)
|
|
}
|
|
|
|
defaults[name!] = -1
|
|
} else {
|
|
resultParams.push(param)
|
|
}
|
|
}
|
|
|
|
return { params: resultParams, defaults, variadic, named, defaultConstants }
|
|
}
|
|
|
|
function toBytecodeFromArray(program: ProgramItem[]): Bytecode /* throws */ {
|
|
const constants: Constant[] = []
|
|
const instructions: any[] = []
|
|
const labels = new Map<string, number>()
|
|
|
|
// First pass: collect labels
|
|
const filteredProgram: InstructionTuple[] = []
|
|
for (const item of program) {
|
|
if (isLabelDefinition(item)) {
|
|
const labelName = item[0].slice(1, -1) // Remove . prefix and : suffix
|
|
labels.set(labelName, filteredProgram.length)
|
|
} else {
|
|
filteredProgram.push(item as InstructionTuple)
|
|
}
|
|
}
|
|
|
|
// Second pass: build instructions
|
|
for (let i = 0; i < filteredProgram.length; i++) {
|
|
const item = filteredProgram[i]!
|
|
const op = item[0] as string
|
|
const opCode = OpCode[op as keyof typeof OpCode]
|
|
|
|
if (opCode === undefined) {
|
|
throw new Error(`Unknown opcode: ${op}`)
|
|
}
|
|
|
|
let operandValue: number | string | undefined = undefined
|
|
|
|
if (item.length > 1) {
|
|
const operand = item[1]
|
|
|
|
switch (op) {
|
|
case "PUSH":
|
|
constants.push(toValue(operand as Atom))
|
|
operandValue = constants.length - 1
|
|
break
|
|
|
|
case "MAKE_FUNCTION": {
|
|
const params = operand as string[]
|
|
const body = item[2]
|
|
|
|
if (body === undefined) {
|
|
throw new Error("MAKE_FUNCTION requires body address")
|
|
}
|
|
|
|
const { params: resultParams, defaults, variadic, named, defaultConstants } = parseFunctionParamsFromArray(params)
|
|
|
|
const defaultIndices: Record<string, number> = {}
|
|
for (const [paramName, _] of Object.entries(defaults)) {
|
|
const defaultConst = defaultConstants.shift()!
|
|
constants.push(defaultConst)
|
|
defaultIndices[paramName] = constants.length - 1
|
|
}
|
|
|
|
let bodyAddress: number
|
|
if (isLabelReference(body)) {
|
|
const labelName = body.slice(1)
|
|
const labelPos = labels.get(labelName)
|
|
if (labelPos === undefined) {
|
|
throw new Error(`Undefined label: ${labelName}`)
|
|
}
|
|
bodyAddress = labelPos
|
|
} else {
|
|
bodyAddress = body as number
|
|
}
|
|
|
|
constants.push({
|
|
type: "function_def",
|
|
params: resultParams,
|
|
defaults: defaultIndices,
|
|
body: bodyAddress,
|
|
variadic,
|
|
named
|
|
})
|
|
|
|
operandValue = constants.length - 1
|
|
break
|
|
}
|
|
|
|
case "JUMP":
|
|
case "JUMP_IF_FALSE":
|
|
case "JUMP_IF_TRUE": {
|
|
if (isLabelReference(operand as string | number)) {
|
|
const labelName = (operand as string).slice(1)
|
|
const labelPos = labels.get(labelName)
|
|
if (labelPos === undefined) {
|
|
throw new Error(`Undefined label: ${labelName}`)
|
|
}
|
|
operandValue = labelPos - (i + 1)
|
|
} else {
|
|
operandValue = operand as number
|
|
}
|
|
break
|
|
}
|
|
|
|
case "PUSH_TRY":
|
|
case "PUSH_FINALLY": {
|
|
if (isLabelReference(operand as string | number)) {
|
|
const labelName = (operand as string).slice(1)
|
|
const labelPos = labels.get(labelName)
|
|
if (labelPos === undefined) {
|
|
throw new Error(`Undefined label: ${labelName}`)
|
|
}
|
|
operandValue = labelPos
|
|
} else {
|
|
operandValue = operand as number
|
|
}
|
|
break
|
|
}
|
|
|
|
case "LOAD":
|
|
case "STORE":
|
|
case "TRY_LOAD":
|
|
case "TRY_CALL":
|
|
case "CALL_NATIVE":
|
|
operandValue = operand as string
|
|
break
|
|
|
|
case "MAKE_ARRAY":
|
|
case "MAKE_DICT":
|
|
operandValue = operand as number
|
|
break
|
|
|
|
default:
|
|
throw new Error(`Unexpected operand for ${op}`)
|
|
}
|
|
}
|
|
|
|
instructions.push({
|
|
op: opCode,
|
|
operand: operandValue
|
|
})
|
|
}
|
|
|
|
const labelsByIndex = new Map<number, string>()
|
|
for (const [name, index] of labels.entries()) {
|
|
labelsByIndex.set(index, name)
|
|
}
|
|
|
|
return {
|
|
instructions,
|
|
constants,
|
|
labels: labelsByIndex.size > 0 ? labelsByIndex : undefined
|
|
}
|
|
}
|
|
|
|
function toBytecodeFromString(str: string): Bytecode /* throws */ {
|
|
const lines = str.trim().split("\n")
|
|
|
|
// First pass: collect labels and their positions
|
|
const labels = new Map<string, number>()
|
|
const cleanLines: string[] = []
|
|
|
|
for (let line of lines) {
|
|
// Strip semicolon comments
|
|
const commentIndex = line.indexOf(';')
|
|
if (commentIndex !== -1) {
|
|
line = line.slice(0, commentIndex)
|
|
}
|
|
|
|
const trimmed = line.trim()
|
|
if (!trimmed) continue
|
|
|
|
// Check for label definition (.label_name:)
|
|
if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) {
|
|
const labelName = trimmed.slice(1, -1)
|
|
labels.set(labelName, cleanLines.length)
|
|
continue
|
|
}
|
|
|
|
cleanLines.push(trimmed)
|
|
}
|
|
|
|
// Second pass: parse instructions and resolve label references
|
|
const bytecode: Bytecode = {
|
|
instructions: [],
|
|
constants: []
|
|
}
|
|
|
|
for (let i = 0; i < cleanLines.length; i++) {
|
|
const trimmed = cleanLines[i]!
|
|
const [op, ...rest] = trimmed.split(/\s+/)
|
|
const opCode = OpCode[op as keyof typeof OpCode]
|
|
|
|
if (opCode === undefined) {
|
|
throw new Error(`Unknown opcode: ${op}`)
|
|
}
|
|
|
|
let operandValue: number | string | undefined = undefined
|
|
|
|
if (rest.length > 0) {
|
|
const operand = rest.join(' ')
|
|
|
|
// Special handling for MAKE_FUNCTION with paren syntax
|
|
if (opCode === OpCode.MAKE_FUNCTION && operand.startsWith('(')) {
|
|
// Parse: MAKE_FUNCTION (params) #body or MAKE_FUNCTION (params) .label
|
|
const match = operand.match(/^(\(.*?\))\s+(#-?\d+|\.[a-zA-Z_][a-zA-Z0-9_]*)$/)
|
|
if (!match) {
|
|
throw new Error(`Invalid MAKE_FUNCTION syntax: ${operand}`)
|
|
}
|
|
|
|
const paramStr = match[1]!
|
|
const bodyStr = match[2]!
|
|
|
|
let body: number
|
|
if (bodyStr.startsWith('.')) {
|
|
// Label reference
|
|
const labelName = bodyStr.slice(1)
|
|
const labelPos = labels.get(labelName)
|
|
if (labelPos === undefined) {
|
|
throw new Error(`Undefined label: ${labelName}`)
|
|
}
|
|
body = labelPos
|
|
} else {
|
|
// Numeric offset
|
|
body = parseInt(bodyStr.slice(1))
|
|
}
|
|
|
|
const { params, defaults, variadic, named } = parseFunctionParams(paramStr, bytecode.constants)
|
|
|
|
// Add function definition to constants
|
|
bytecode.constants.push({
|
|
type: 'function_def',
|
|
params,
|
|
defaults,
|
|
body,
|
|
variadic,
|
|
named
|
|
})
|
|
|
|
operandValue = bytecode.constants.length - 1
|
|
}
|
|
else if (operand.startsWith('.')) {
|
|
// Label reference - resolve to relative offset
|
|
const labelName = operand.slice(1)
|
|
const labelPos = labels.get(labelName)
|
|
if (labelPos === undefined) {
|
|
throw new Error(`Undefined label: ${labelName}`)
|
|
}
|
|
// For PUSH_TRY and PUSH_FINALLY, use absolute position
|
|
// For other jump instructions, use relative offset from next instruction (i + 1)
|
|
if (opCode === OpCode.PUSH_TRY || opCode === OpCode.PUSH_FINALLY) {
|
|
operandValue = labelPos
|
|
} else {
|
|
operandValue = labelPos - (i + 1)
|
|
}
|
|
|
|
} else if (operand.startsWith('#')) {
|
|
// immediate number
|
|
operandValue = parseInt(operand.slice(1))
|
|
|
|
} else if (/^['"].*['"]$/.test(operand)) {
|
|
// string
|
|
const stringValue = operand.slice(1, operand.length - 1)
|
|
bytecode.constants.push(toValue(stringValue))
|
|
operandValue = bytecode.constants.length - 1
|
|
|
|
} else if (/^-?\d+(\.\d+)?$/.test(operand)) {
|
|
// number
|
|
bytecode.constants.push(toValue(parseFloat(operand)))
|
|
operandValue = bytecode.constants.length - 1
|
|
|
|
} else if (operand === 'true' || operand === 'false') {
|
|
// boolean
|
|
bytecode.constants.push(toValue(operand === 'true'))
|
|
operandValue = bytecode.constants.length - 1
|
|
|
|
} else if (operand === 'null') {
|
|
// null
|
|
bytecode.constants.push(toValue(null))
|
|
operandValue = bytecode.constants.length - 1
|
|
|
|
} else {
|
|
// Assume it's a variable name if it doesn't match any other pattern
|
|
// This allows emoji, Unicode, and other creative identifiers
|
|
// (already checked that it doesn't start with . # or match other patterns)
|
|
operandValue = operand
|
|
}
|
|
}
|
|
|
|
bytecode.instructions.push({
|
|
op: opCode,
|
|
operand: operandValue
|
|
})
|
|
}
|
|
|
|
// Invert labels map: name->index becomes index->name for debugger display
|
|
const labelsByIndex = new Map<number, string>()
|
|
for (const [name, index] of labels.entries()) {
|
|
labelsByIndex.set(index, name)
|
|
}
|
|
if (labelsByIndex.size > 0)
|
|
bytecode.labels = labelsByIndex
|
|
|
|
return bytecode
|
|
}
|
|
|
|
/**
|
|
* Compile bytecode from either a string or programmatic array format.
|
|
*
|
|
* String format:
|
|
* ```
|
|
* PUSH 42
|
|
* STORE x
|
|
* LOAD x
|
|
* HALT
|
|
* ```
|
|
*
|
|
* Array format:
|
|
* ```
|
|
* [
|
|
* ["PUSH", 42],
|
|
* ["STORE", "x"],
|
|
* ["LOAD", "x"],
|
|
* ["HALT"]
|
|
* ]
|
|
* ```
|
|
*/
|
|
export function toBytecode(input: string | ProgramItem[]): Bytecode {
|
|
if (typeof input === "string") {
|
|
return toBytecodeFromString(input)
|
|
} else {
|
|
return toBytecodeFromArray(input)
|
|
}
|
|
}
|