From b2a6021fb8a3fcdd69de36a01438a55bc26de672 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Sun, 9 Nov 2025 22:18:10 -0800 Subject: [PATCH] require labels for JUMP opcodes to avoid compiler bugs --- CLAUDE.md | 8 ++++---- SPEC.md | 43 ++++++++++++++++++++--------------------- src/bytecode.ts | 8 ++++---- src/validator.ts | 24 ++++++++++++++++++----- tests/validator.test.ts | 17 ++++++++-------- 5 files changed, 57 insertions(+), 43 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 0098761..c42c738 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -55,7 +55,7 @@ No build step required - Bun runs TypeScript directly. ### Critical Design Decisions -**Relative jumps**: All JUMP instructions use PC-relative offsets (not absolute addresses), making bytecode position-independent. PUSH_TRY/PUSH_FINALLY use absolute addresses. +**Label-based jumps**: All JUMP instructions (`JUMP`, `JUMP_IF_FALSE`, `JUMP_IF_TRUE`) require label operands (`.label`), not numeric offsets. Labels are resolved to PC-relative offsets during compilation, making bytecode position-independent. PUSH_TRY/PUSH_FINALLY use absolute addresses and can accept either labels or numeric offsets. **Truthiness semantics**: Only `null` and `false` are falsy. Unlike JavaScript, `0`, `""`, empty arrays, and empty dicts are truthy. @@ -229,8 +229,8 @@ await vm.call('log', 'Hello!') - Automatically converts arguments to ReefVM Values - Converts result back to JavaScript types -### Label Usage (Preferred) -Use labels instead of numeric offsets for readability: +### Label Usage (Required for JUMP instructions) +All JUMP instructions must use labels: ``` JUMP .skip PUSH 42 @@ -486,7 +486,7 @@ Run `bun test` to verify all tests pass before committing. ## Common Gotchas -**Jump offsets**: JUMP/JUMP_IF_FALSE/JUMP_IF_TRUE use relative offsets from the next instruction (PC + 1). PUSH_TRY/PUSH_FINALLY use absolute instruction indices. +**Label requirements**: JUMP/JUMP_IF_FALSE/JUMP_IF_TRUE require label operands (`.label`), not numeric offsets. The bytecode compiler resolves labels to PC-relative offsets internally. PUSH_TRY/PUSH_FINALLY can use either labels or absolute instruction indices (`#N`). **Stack operations**: Most binary operations pop in reverse order (second operand is popped first, then first operand). diff --git a/SPEC.md b/SPEC.md index 222d5e1..cb5fc9d 100644 --- a/SPEC.md +++ b/SPEC.md @@ -327,39 +327,45 @@ All comparison operations pop two values, compare, push boolean result. ``` DUP -JUMP_IF_FALSE #2 # skip POP and +JUMP_IF_FALSE .end POP -end: +.end: ``` **OR pattern** (short-circuits if left side is true): ``` DUP -JUMP_IF_TRUE #2 # skip POP and +JUMP_IF_TRUE .end POP -end: +.end: ``` ### Control Flow #### JUMP -**Operand**: Offset (number) -**Effect**: Add offset to PC (relative jump) +**Operand**: Label (string) +**Effect**: Jump to the specified label **Stack**: No change +**Note**: JUMP only accepts label operands (`.label`), not numeric offsets. The VM resolves labels to relative offsets internally. + #### JUMP_IF_FALSE -**Operand**: Offset (number) -**Effect**: If top of stack is falsy, add offset to PC (relative jump) +**Operand**: Label (string) +**Effect**: If top of stack is falsy, jump to the specified label **Stack**: [condition] → [] +**Note**: JUMP_IF_FALSE only accepts label operands (`.label`), not numeric offsets. + #### JUMP_IF_TRUE -**Operand**: Offset (number) -**Effect**: If top of stack is truthy, add offset to PC (relative jump) +**Operand**: Label (string) +**Effect**: If top of stack is truthy, jump to the specified label **Stack**: [condition] → [] +**Note**: JUMP_IF_TRUE only accepts label operands (`.label`), not numeric offsets. + #### BREAK **Operand**: None **Effect**: Unwind call stack until frame with `isBreakTarget = true`, resume there @@ -814,14 +820,16 @@ CALL ; → "Hi, Bob!" ## Label Syntax -The bytecode format supports labels for improved readability: +The bytecode format requires labels for control flow jumps: **Label Definition**: `.label_name:` marks an instruction position **Label Reference**: `.label_name` in operands (e.g., `JUMP .loop_start`) -Labels are resolved to numeric offsets during parsing. The original numeric offset syntax (`#N`) is still supported for backwards compatibility. +Labels are resolved to relative PC offsets during bytecode compilation. All JUMP instructions (`JUMP`, `JUMP_IF_FALSE`, `JUMP_IF_TRUE`) require label operands. -Example with labels: +**Note**: Exception handling instructions (`PUSH_TRY`, `PUSH_FINALLY`) and function definitions (`MAKE_FUNCTION`) can use either labels or absolute instruction indices (`#N`). + +Example: ``` JUMP .skip .middle: @@ -832,15 +840,6 @@ JUMP .skip HALT ``` -Equivalent with numeric offsets: -``` -JUMP #2 -PUSH 999 -HALT -PUSH 42 -HALT -``` - ## Common Bytecode Patterns ### If-Else Statement diff --git a/src/bytecode.ts b/src/bytecode.ts index ef760be..6de7b16 100644 --- a/src/bytecode.ts +++ b/src/bytecode.ts @@ -44,9 +44,9 @@ type InstructionTuple = | ["NOT"] // Control flow - | ["JUMP", string | number] - | ["JUMP_IF_FALSE", string | number] - | ["JUMP_IF_TRUE", string | number] + | ["JUMP", string] + | ["JUMP_IF_FALSE", string] + | ["JUMP_IF_TRUE", string] | ["BREAK"] // Exception handling @@ -56,7 +56,7 @@ type InstructionTuple = | ["THROW"] // Functions - | ["MAKE_FUNCTION", string[], string | number] + | ["MAKE_FUNCTION", string[], string] | ["CALL"] | ["TAIL_CALL"] | ["RETURN"] diff --git a/src/validator.ts b/src/validator.ts index 7d44816..c5cd6b3 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -87,11 +87,15 @@ const OPCODES_WITHOUT_OPERANDS = new Set([ OpCode.DOT_GET, ]) -// immediate = immediate number, eg #5 -const OPCODES_REQUIRING_IMMEDIATE_OR_LABEL = new Set([ +// JUMP* instructions require labels only (no numeric immediates) +const OPCODES_REQUIRING_LABEL = new Set([ OpCode.JUMP, OpCode.JUMP_IF_FALSE, OpCode.JUMP_IF_TRUE, +]) + +// PUSH_TRY/PUSH_FINALLY still allow immediate or label +const OPCODES_REQUIRING_IMMEDIATE_OR_LABEL = new Set([ OpCode.PUSH_TRY, OpCode.PUSH_FINALLY, ]) @@ -197,6 +201,16 @@ export function validateBytecode(source: string): ValidationResult { // Validate specific operand formats if (operand) { + if (OPCODES_REQUIRING_LABEL.has(opCode)) { + if (!operand.startsWith('.')) { + errors.push({ + line: lineNum, + message: `${opName} requires label (.label), got: ${operand}`, + }) + continue + } + } + if (OPCODES_REQUIRING_IMMEDIATE_OR_LABEL.has(opCode)) { if (!operand.startsWith('#') && !operand.startsWith('.')) { errors.push({ @@ -310,11 +324,11 @@ export function validateBytecode(source: string): ValidationResult { } } - // Validate body address - if (!bodyAddr!.startsWith('.') && !bodyAddr!.startsWith('#')) { + // Validate body address (must be a label) + if (!bodyAddr!.startsWith('.')) { errors.push({ line: lineNum, - message: `Invalid body address: expected .label or #offset`, + message: `Invalid body address: expected .label, got: ${bodyAddr}`, }) } diff --git a/tests/validator.test.ts b/tests/validator.test.ts index 0afd65b..451a881 100644 --- a/tests/validator.test.ts +++ b/tests/validator.test.ts @@ -201,17 +201,17 @@ test("formatValidationErrors produces readable output", () => { expect(formatted).toContain("UNKNOWN") }) -test("detects JUMP without # or .label", () => { +test("detects JUMP without .label", () => { const source = ` JUMP 5 HALT ` const result = validateBytecode(source) expect(result.valid).toBe(false) - expect(result.errors[0]!.message).toContain("JUMP requires immediate (#number) or label (.label)") + expect(result.errors[0]!.message).toContain("JUMP requires label (.label)") }) -test("detects JUMP_IF_TRUE without # or .label", () => { +test("detects JUMP_IF_TRUE without .label", () => { const source = ` PUSH true JUMP_IF_TRUE 2 @@ -219,10 +219,10 @@ test("detects JUMP_IF_TRUE without # or .label", () => { ` const result = validateBytecode(source) expect(result.valid).toBe(false) - expect(result.errors[0]!.message).toContain("JUMP_IF_TRUE requires immediate (#number) or label (.label)") + expect(result.errors[0]!.message).toContain("JUMP_IF_TRUE requires label (.label)") }) -test("detects JUMP_IF_FALSE without # or .label", () => { +test("detects JUMP_IF_FALSE without .label", () => { const source = ` PUSH false JUMP_IF_FALSE 2 @@ -230,17 +230,18 @@ test("detects JUMP_IF_FALSE without # or .label", () => { ` const result = validateBytecode(source) expect(result.valid).toBe(false) - expect(result.errors[0]!.message).toContain("JUMP_IF_FALSE requires immediate (#number) or label (.label)") + expect(result.errors[0]!.message).toContain("JUMP_IF_FALSE requires label (.label)") }) -test("allows JUMP with immediate number", () => { +test("rejects JUMP with immediate number", () => { const source = ` JUMP #2 PUSH 999 HALT ` const result = validateBytecode(source) - expect(result.valid).toBe(true) + expect(result.valid).toBe(false) + expect(result.errors[0]!.message).toContain("JUMP requires label (.label)") }) test("detects MAKE_ARRAY without #", () => {