update bytecode text language

This commit is contained in:
Chris Wanstrath 2025-10-05 18:37:06 -07:00
parent e16a8104c7
commit 4608ec7b9e
7 changed files with 94 additions and 82 deletions

View File

@ -70,7 +70,7 @@ It's where Shrimp live.
- [x] DICT_HAS
### TypeScript Interop
- [x] CALL_TYPESCRIPT
- [x] CALL_NATIVE
### Special
- [x] HALT
@ -88,7 +88,7 @@ It's where Shrimp live.
- All dictionary operations (MAKE_DICT, DICT_GET, DICT_SET, DICT_HAS)
- Basic function operations (MAKE_FUNCTION, CALL, RETURN) with parameter binding
- Exception handling (PUSH_TRY, PUSH_FINALLY, POP_TRY, THROW) with nested try/finally blocks and call stack unwinding
- TypeScript interop (CALL_TYPESCRIPT) with sync and async functions
- TypeScript interop (CALL_NATIVE) with sync and async functions
- HALT instruction
## Design Decisions

10
SPEC.md
View File

@ -16,7 +16,7 @@ The ReefVM is a stack-based bytecode virtual machine designed for the Shrimp pro
- **Scope Chain**: Linked scopes for lexical variable resolution
- **Program Counter (PC)**: Current instruction index
- **Constants Pool**: Immutable values and function metadata
- **TypeScript Function Registry**: External functions callable from Shrimp
- **Native Function Registry**: External functions callable from Shrimp
### Execution Model
@ -448,7 +448,7 @@ Key is coerced to string.
### TypeScript Interop
#### CALL_TYPESCRIPT
#### CALL_NATIVE
**Operand**: Function name (string)
**Effect**: Call registered TypeScript function
**Stack**: [...args] → [returnValue]
@ -456,7 +456,7 @@ Key is coerced to string.
**Behavior**:
1. Look up function by name in registry
2. Mark current frame (if exists) as break target
3. Await function call (TypeScript function receives arguments and returns a Value)
3. Await function call (native function receives arguments and returns a Value)
4. Push return value onto stack
**Notes**:
@ -576,7 +576,7 @@ All of these should throw errors:
6. **Break Outside Loop**: BREAK with no break target
7. **Continue Outside Loop**: CONTINUE with no continue target
8. **Return Outside Function**: RETURN with no call frame
9. **Unknown Function**: CALL_TYPESCRIPT with unregistered function
9. **Unknown Function**: CALL_NATIVE with unregistered function
10. **Mismatched Handler**: POP_TRY with no handler
11. **Invalid Constant**: PUSH with invalid constant index
12. **Invalid Function Definition**: MAKE_FUNCTION with non-function_def constant
@ -670,7 +670,7 @@ const result = await vm.execute()
- PC increment happens after each instruction execution
- Jump instructions use relative offsets (added to current PC after increment)
- All async operations (TypeScript functions) must be awaited
- All async operations (native functions) must be awaited
- Arrays and dicts are mutable (pass by reference)
- Functions are immutable values
- The VM is single-threaded (no concurrency primitives)

View File

@ -15,10 +15,14 @@ export type Constant =
| Value
| FunctionDef
const opsWithVarNames = new Set([OpCode.LOAD, OpCode.STORE, OpCode.CALL_TYPESCRIPT])
const opsWithAddresses = new Set([OpCode.JUMP, OpCode.JUMP_IF_FALSE, OpCode.JUMP_IF_TRUE, OpCode.PUSH_TRY])
const opsWithNumbers = new Set([OpCode.MAKE_ARRAY, OpCode.MAKE_DICT])
//
// Parse bytecode from human-readable string format.
// Operand types are determined by prefix:
// #42 -> immediate number (e.g., JUMP #5, MAKE_ARRAY #3)
// name -> variable/function name (e.g., LOAD x, CALL_NATIVE add)
// 42 -> number constant (e.g., PUSH 42)
// "str" -> string constant (e.g., PUSH "hello")
// 'str' -> string constant (e.g., PUSH 'hello')
export function toBytecode(str: string): Bytecode /* throws */ {
const lines = str.trim().split("\n")
@ -28,34 +32,42 @@ export function toBytecode(str: string): Bytecode /* throws */ {
}
for (let line of lines) {
let [op, operand] = line.trim().split(" ")
const trimmed = line.trim()
if (!trimmed) continue
const [op, ...rest] = trimmed.split(/\s+/)
const opCode = OpCode[op as keyof typeof OpCode]
if (opCode === undefined) {
throw new Error(`Unknown opcode: ${op}`)
}
let operandValue: number | string | undefined = undefined
if (operand) {
// Variable names for LOAD, STORE, CALL_TYPESCRIPT
if (opsWithVarNames.has(opCode)) {
operandValue = operand
}
// Direct addresses for JUMP operations
else if (opsWithAddresses.has(opCode)) {
operandValue = parseInt(operand)
}
// Direct numbers for MAKE_ARRAY, MAKE_DICT
else if (opsWithNumbers.has(opCode)) {
operandValue = parseInt(operand)
}
// Constants (numbers, strings) for PUSH
else {
if (/^\d+/.test(operand)) {
bytecode.constants.push(toValue(parseFloat(operand)))
} else if (/^['"]/.test(operand)) {
bytecode.constants.push(toValue(operand.slice(1, operand.length - 1)))
} else {
throw `Unknown operand: ${operand}`
}
if (rest.length > 0) {
const operand = rest.join(' ')
if (operand.startsWith('#')) {
// immediate number
operandValue = parseInt(operand.slice(1))
} else if (/^['"].*['"]$/.test(operand)) {
// string
const stringValue = operand.slice(1, operand.length - 1)
bytecode.constants.push(toValue(stringValue))
operandValue = bytecode.constants.length - 1
} else if (/^-?\d+(\.\d+)?$/.test(operand)) {
// number
bytecode.constants.push(toValue(parseFloat(operand)))
operandValue = bytecode.constants.length - 1
} else if (/^[a-zA-Z_].*$/.test(operand)) {
// variable
operandValue = operand
} else {
throw new Error(`Invalid operand: ${operand}`)
}
}

View File

@ -59,7 +59,7 @@ export enum OpCode {
DICT_HAS,
// typescript interop
CALL_TYPESCRIPT,
CALL_NATIVE,
// special
HALT

View File

@ -5,7 +5,7 @@ import { OpCode } from "./opcode"
import { Scope } from "./scope"
import { type Value, toValue, toNumber, isTrue, isEqual, toString } from "./value"
type TypeScriptFunction = (...args: Value[]) => Promise<Value> | Value
type NativeFunction = (...args: Value[]) => Promise<Value> | Value
export class VM {
pc = 0
@ -16,7 +16,7 @@ export class VM {
scope: Scope
constants: Constant[] = []
instructions: Instruction[] = []
typescriptFunctions: Map<string, TypeScriptFunction> = new Map()
nativeFunctions: Map<string, NativeFunction> = new Map()
constructor(bytecode: Bytecode) {
this.instructions = bytecode.instructions
@ -24,8 +24,8 @@ export class VM {
this.scope = new Scope()
}
registerFunction(name: string, fn: TypeScriptFunction) {
this.typescriptFunctions.set(name, fn)
registerFunction(name: string, fn: NativeFunction) {
this.nativeFunctions.set(name, fn)
}
async run(): Promise<Value> {
@ -412,12 +412,12 @@ export class VM {
this.stack.push(returnValue)
break
case OpCode.CALL_TYPESCRIPT:
case OpCode.CALL_NATIVE:
const functionName = instruction.operand as string
const tsFunction = this.typescriptFunctions.get(functionName)
const tsFunction = this.nativeFunctions.get(functionName)
if (!tsFunction)
throw new Error(`CALL_TYPESCRIPT: function not found: ${functionName}`)
throw new Error(`CALL_NATIVE: function not found: ${functionName}`)
// Mark current frame as break target (like CALL does)
if (this.callStack.length > 0)

View File

@ -188,7 +188,7 @@ test("AND pattern - short circuits when false", async () => {
PUSH 0
EQ
DUP
JUMP_IF_FALSE 2
JUMP_IF_FALSE #2
POP
PUSH 999
`
@ -203,7 +203,7 @@ test("AND pattern - evaluates both when true", async () => {
const str = `
PUSH 1
DUP
JUMP_IF_FALSE 2
JUMP_IF_FALSE #2
POP
PUSH 2
`
@ -214,7 +214,7 @@ test("OR pattern - short circuits when true", async () => {
const str = `
PUSH 1
DUP
JUMP_IF_TRUE 2
JUMP_IF_TRUE #2
POP
PUSH 2
`
@ -227,7 +227,7 @@ test("OR pattern - evaluates second when false", async () => {
PUSH 0
EQ
DUP
JUMP_IF_TRUE 2
JUMP_IF_TRUE #2
POP
PUSH 2
`
@ -263,7 +263,7 @@ test("isTruthy - only null and false are falsy", async () => {
// 0 is truthy (unlike JS)
const str1 = `
PUSH 0
JUMP_IF_FALSE 1
JUMP_IF_FALSE #1
PUSH 1
`
expect(await run(toBytecode(str1))).toEqual({ type: 'number', value: 1 })
@ -271,7 +271,7 @@ test("isTruthy - only null and false are falsy", async () => {
// empty string is truthy (unlike JS)
const str2 = `
PUSH ''
JUMP_IF_FALSE 1
JUMP_IF_FALSE #1
PUSH 1
`
expect(await run(toBytecode(str2))).toEqual({ type: 'number', value: 1 })
@ -281,7 +281,7 @@ test("isTruthy - only null and false are falsy", async () => {
PUSH 0
PUSH 0
EQ
JUMP_IF_FALSE 1
JUMP_IF_FALSE #1
PUSH 999
`
expect(await run(toBytecode(str3))).toEqual({ type: 'number', value: 999 })
@ -323,7 +323,7 @@ test("STORE and LOAD - multiple variables", async () => {
test("JUMP - relative jump forward", async () => {
const str = `
PUSH 1
JUMP 1
JUMP #1
PUSH 100
PUSH 2
`
@ -334,7 +334,7 @@ test("JUMP - backward offset demonstrates relative jumps", async () => {
// Use forward jump to skip, demonstrating relative addressing
const str = `
PUSH 100
JUMP 2
JUMP #2
PUSH 200
PUSH 300
PUSH 400
@ -347,7 +347,7 @@ test("JUMP_IF_FALSE - conditional jump when false", async () => {
PUSH 1
PUSH 0
EQ
JUMP_IF_FALSE 1
JUMP_IF_FALSE #1
PUSH 100
PUSH 42
`
@ -357,7 +357,7 @@ test("JUMP_IF_FALSE - conditional jump when false", async () => {
test("JUMP_IF_FALSE - no jump when true", async () => {
const str = `
PUSH 1
JUMP_IF_FALSE 1
JUMP_IF_FALSE #1
PUSH 100
`
expect(await run(toBytecode(str))).toEqual({ type: 'number', value: 100 })
@ -366,7 +366,7 @@ test("JUMP_IF_FALSE - no jump when true", async () => {
test("JUMP_IF_TRUE - conditional jump when true", async () => {
const str = `
PUSH 1
JUMP_IF_TRUE 1
JUMP_IF_TRUE #1
PUSH 100
PUSH 42
`
@ -378,7 +378,7 @@ test("MAKE_ARRAY - creates array", async () => {
PUSH 10
PUSH 20
PUSH 30
MAKE_ARRAY 3
MAKE_ARRAY #3
`
const result = await run(toBytecode(str))
expect(result.type).toBe('array')
@ -395,7 +395,7 @@ test("ARRAY_GET - gets element", async () => {
PUSH 10
PUSH 20
PUSH 30
MAKE_ARRAY 3
MAKE_ARRAY #3
PUSH 1
ARRAY_GET
`
@ -407,7 +407,7 @@ test("ARRAY_SET - sets element", async () => {
PUSH 10
PUSH 20
PUSH 30
MAKE_ARRAY 3
MAKE_ARRAY #3
DUP
PUSH 1
PUSH 99
@ -422,7 +422,7 @@ test("ARRAY_PUSH - appends to array", async () => {
const str = `
PUSH 10
PUSH 20
MAKE_ARRAY 2
MAKE_ARRAY #2
DUP
PUSH 30
ARRAY_PUSH
@ -435,7 +435,7 @@ test("ARRAY_PUSH - mutates original array", async () => {
const str = `
PUSH 10
PUSH 20
MAKE_ARRAY 2
MAKE_ARRAY #2
DUP
PUSH 30
ARRAY_PUSH
@ -450,7 +450,7 @@ test("ARRAY_LEN - gets length", async () => {
PUSH 10
PUSH 20
PUSH 30
MAKE_ARRAY 3
MAKE_ARRAY #3
ARRAY_LEN
`
expect(await run(toBytecode(str))).toEqual({ type: 'number', value: 3 })
@ -462,7 +462,7 @@ test("MAKE_DICT - creates dict", async () => {
PUSH 'Alice'
PUSH 'age'
PUSH 30
MAKE_DICT 2
MAKE_DICT #2
`
const result = await run(toBytecode(str))
expect(result.type).toBe('dict')
@ -477,7 +477,7 @@ test("DICT_GET - gets value", async () => {
const str = `
PUSH 'name'
PUSH 'Bob'
MAKE_DICT 1
MAKE_DICT #1
PUSH 'name'
DICT_GET
`
@ -486,7 +486,7 @@ test("DICT_GET - gets value", async () => {
test("DICT_SET - sets value", async () => {
const str = `
MAKE_DICT 0
MAKE_DICT #0
DUP
PUSH 'key'
PUSH 'value'
@ -501,7 +501,7 @@ test("DICT_HAS - checks key exists", async () => {
const str = `
PUSH 'key'
PUSH 'value'
MAKE_DICT 1
MAKE_DICT #1
PUSH 'key'
DICT_HAS
`
@ -510,7 +510,7 @@ test("DICT_HAS - checks key exists", async () => {
test("DICT_HAS - checks key missing", async () => {
const str = `
MAKE_DICT 0
MAKE_DICT #0
PUSH 'missing'
DICT_HAS
`

View File

@ -3,12 +3,12 @@ import { VM } from "#vm"
import { OpCode } from "#opcode"
import { toValue, toNumber } from "#value"
test("CALL_TYPESCRIPT - basic function call", async () => {
test("CALL_NATIVE - basic function call", async () => {
const vm = new VM({
instructions: [
{ op: OpCode.PUSH, operand: 0 }, // push 5
{ op: OpCode.PUSH, operand: 1 }, // push 10
{ op: OpCode.CALL_TYPESCRIPT, operand: 'add' }, // call TypeScript 'add'
{ op: OpCode.CALL_NATIVE, operand: 'add' }, // call TypeScript 'add'
{ op: OpCode.HALT }
],
constants: [
@ -26,12 +26,12 @@ test("CALL_TYPESCRIPT - basic function call", async () => {
expect(result).toEqual({ type: 'number', value: 15 })
})
test("CALL_TYPESCRIPT - function with string manipulation", async () => {
test("CALL_NATIVE - function with string manipulation", async () => {
const vm = new VM({
instructions: [
{ op: OpCode.PUSH, operand: 0 }, // push "hello"
{ op: OpCode.PUSH, operand: 1 }, // push "world"
{ op: OpCode.CALL_TYPESCRIPT, operand: 'concat' }, // call TypeScript 'concat'
{ op: OpCode.CALL_NATIVE, operand: 'concat' }, // call TypeScript 'concat'
{ op: OpCode.HALT }
],
constants: [
@ -50,11 +50,11 @@ test("CALL_TYPESCRIPT - function with string manipulation", async () => {
expect(result).toEqual({ type: 'string', value: 'hello world' })
})
test("CALL_TYPESCRIPT - async function", async () => {
test("CALL_NATIVE - async function", async () => {
const vm = new VM({
instructions: [
{ op: OpCode.PUSH, operand: 0 }, // push 42
{ op: OpCode.CALL_TYPESCRIPT, operand: 'asyncDouble' }, // call async TypeScript function
{ op: OpCode.CALL_NATIVE, operand: 'asyncDouble' }, // call async TypeScript function
{ op: OpCode.HALT }
],
constants: [
@ -72,10 +72,10 @@ test("CALL_TYPESCRIPT - async function", async () => {
expect(result).toEqual({ type: 'number', value: 84 })
})
test("CALL_TYPESCRIPT - function with no arguments", async () => {
test("CALL_NATIVE - function with no arguments", async () => {
const vm = new VM({
instructions: [
{ op: OpCode.CALL_TYPESCRIPT, operand: 'getAnswer' }, // call with empty stack
{ op: OpCode.CALL_NATIVE, operand: 'getAnswer' }, // call with empty stack
{ op: OpCode.HALT }
],
constants: []
@ -89,13 +89,13 @@ test("CALL_TYPESCRIPT - function with no arguments", async () => {
expect(result).toEqual({ type: 'number', value: 42 })
})
test("CALL_TYPESCRIPT - function with multiple arguments", async () => {
test("CALL_NATIVE - function with multiple arguments", async () => {
const vm = new VM({
instructions: [
{ op: OpCode.PUSH, operand: 0 }, // push 2
{ op: OpCode.PUSH, operand: 1 }, // push 3
{ op: OpCode.PUSH, operand: 2 }, // push 4
{ op: OpCode.CALL_TYPESCRIPT, operand: 'sum' }, // call TypeScript 'sum'
{ op: OpCode.CALL_NATIVE, operand: 'sum' }, // call TypeScript 'sum'
{ op: OpCode.HALT }
],
constants: [
@ -114,11 +114,11 @@ test("CALL_TYPESCRIPT - function with multiple arguments", async () => {
expect(result).toEqual({ type: 'number', value: 9 })
})
test("CALL_TYPESCRIPT - function returns array", async () => {
test("CALL_NATIVE - function returns array", async () => {
const vm = new VM({
instructions: [
{ op: OpCode.PUSH, operand: 0 }, // push 3
{ op: OpCode.CALL_TYPESCRIPT, operand: 'makeRange' }, // call TypeScript 'makeRange'
{ op: OpCode.CALL_NATIVE, operand: 'makeRange' }, // call TypeScript 'makeRange'
{ op: OpCode.HALT }
],
constants: [
@ -147,22 +147,22 @@ test("CALL_TYPESCRIPT - function returns array", async () => {
}
})
test("CALL_TYPESCRIPT - function not found", async () => {
test("CALL_NATIVE - function not found", async () => {
const vm = new VM({
instructions: [
{ op: OpCode.CALL_TYPESCRIPT, operand: 'nonexistent' }
{ op: OpCode.CALL_NATIVE, operand: 'nonexistent' }
],
constants: []
})
await expect(vm.run()).rejects.toThrow('CALL_TYPESCRIPT: function not found: nonexistent')
await expect(vm.run()).rejects.toThrow('CALL_NATIVE: function not found: nonexistent')
})
test("CALL_TYPESCRIPT - using result in subsequent operations", async () => {
test("CALL_NATIVE - using result in subsequent operations", async () => {
const vm = new VM({
instructions: [
{ op: OpCode.PUSH, operand: 0 }, // push 5
{ op: OpCode.CALL_TYPESCRIPT, operand: 'triple' }, // call TypeScript 'triple' -> 15
{ op: OpCode.CALL_NATIVE, operand: 'triple' }, // call TypeScript 'triple' -> 15
{ op: OpCode.PUSH, operand: 1 }, // push 10
{ op: OpCode.ADD }, // 15 + 10 = 25
{ op: OpCode.HALT }