diff --git a/bin/validate b/bin/validate new file mode 100755 index 0000000..08ee45c --- /dev/null +++ b/bin/validate @@ -0,0 +1,19 @@ +#!/usr/bin/env bun +import { validateBytecode, formatValidationErrors } from "../src/validator" + +const args = process.argv.slice(2) + +if (args.length === 0) { + console.error("Usage: validate ") + process.exit(1) +} + +const filePath = args[0]! +const source = await Bun.file(filePath).text() +const result = validateBytecode(source) + +console.log(formatValidationErrors(result)) + +if (!result.valid) { + process.exit(1) +} diff --git a/src/validator.ts b/src/validator.ts new file mode 100644 index 0000000..c00bfdb --- /dev/null +++ b/src/validator.ts @@ -0,0 +1,333 @@ +import { OpCode } from "./opcode" + +export type ValidationError = { + line: number + message: string +} + +export type ValidationResult = { + valid: boolean + errors: ValidationError[] +} + +// Opcodes that require operands +const OPCODES_WITH_OPERANDS = new Set([ + OpCode.PUSH, + OpCode.LOAD, + OpCode.STORE, + OpCode.JUMP, + OpCode.JUMP_IF_FALSE, + OpCode.JUMP_IF_TRUE, + OpCode.PUSH_TRY, + OpCode.PUSH_FINALLY, + OpCode.MAKE_ARRAY, + OpCode.MAKE_DICT, + OpCode.MAKE_FUNCTION, + OpCode.CALL_NATIVE, +]) + +// Opcodes that should NOT have operands +const OPCODES_WITHOUT_OPERANDS = new Set([ + OpCode.POP, + OpCode.DUP, + OpCode.ADD, + OpCode.SUB, + OpCode.MUL, + OpCode.DIV, + OpCode.MOD, + OpCode.EQ, + OpCode.NEQ, + OpCode.LT, + OpCode.GT, + OpCode.LTE, + OpCode.GTE, + OpCode.NOT, + OpCode.HALT, + OpCode.BREAK, + OpCode.POP_TRY, + OpCode.THROW, + OpCode.CALL, + OpCode.TAIL_CALL, + OpCode.RETURN, + OpCode.ARRAY_GET, + OpCode.ARRAY_SET, + OpCode.ARRAY_PUSH, + OpCode.ARRAY_LEN, + OpCode.DICT_GET, + OpCode.DICT_SET, + OpCode.DICT_HAS, +]) + +export function validateBytecode(source: string): ValidationResult { + const errors: ValidationError[] = [] + const lines = source.split("\n") + const labels = new Map() + const labelReferences = new Map() + + let instructionCount = 0 + + // First pass: collect labels and check for duplicates + for (let i = 0; i < lines.length; i++) { + const lineNum = i + 1 + let line = lines[i]! + + // Strip comments + const commentIndex = line.indexOf(';') + if (commentIndex !== -1) { + line = line.slice(0, commentIndex) + } + + const trimmed = line.trim() + if (!trimmed) continue + + // Check for label definition + if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) { + const labelName = trimmed.slice(1, -1) + if (labels.has(labelName)) { + errors.push({ + line: lineNum, + message: `Duplicate label: .${labelName} (first defined at line ${labels.get(labelName)})`, + }) + } else { + labels.set(labelName, lineNum) + } + continue + } + + instructionCount++ + } + + // Second pass: validate instructions + instructionCount = 0 + for (let i = 0; i < lines.length; i++) { + const lineNum = i + 1 + let line = lines[i]! + + // Strip comments + const commentIndex = line.indexOf(';') + if (commentIndex !== -1) { + line = line.slice(0, commentIndex) + } + + const trimmed = line.trim() + if (!trimmed) continue + + // Skip label definitions + if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) { + continue + } + + instructionCount++ + + const parts = trimmed.split(/\s+/) + const opName = parts[0]! + const operand = parts.slice(1).join(' ') + + // Check if opcode exists + const opCode = OpCode[opName as keyof typeof OpCode] + if (opCode === undefined) { + errors.push({ + line: lineNum, + message: `Unknown opcode: ${opName}`, + }) + continue + } + + // Check operand requirements + if (OPCODES_WITH_OPERANDS.has(opCode) && !operand) { + errors.push({ + line: lineNum, + message: `${opName} requires an operand`, + }) + continue + } + + if (OPCODES_WITHOUT_OPERANDS.has(opCode) && operand) { + errors.push({ + line: lineNum, + message: `${opName} does not take an operand`, + }) + continue + } + + // Validate specific operand formats + if (operand) { + // Check for label references + if (operand.startsWith('.') && !operand.includes('(')) { + const labelName = operand.slice(1) + if (!labelReferences.has(labelName)) { + labelReferences.set(labelName, []) + } + labelReferences.get(labelName)!.push(lineNum) + } + + // Validate MAKE_FUNCTION syntax + if (opCode === OpCode.MAKE_FUNCTION) { + if (!operand.startsWith('(')) { + errors.push({ + line: lineNum, + message: `MAKE_FUNCTION requires parameter list: MAKE_FUNCTION (params) address`, + }) + continue + } + + const match = operand.match(/^(\(.*?\))\s+(.+)$/) + if (!match) { + errors.push({ + line: lineNum, + message: `Invalid MAKE_FUNCTION syntax: expected (params) address`, + }) + continue + } + + const [, paramStr, bodyAddr] = match + + // Validate parameter syntax + const paramList = paramStr!.slice(1, -1).trim() + if (paramList) { + const params = paramList.split(/\s+/) + let seenVariadic = false + let seenNamed = false + + for (const param of params) { + // Check for invalid order + if (seenVariadic && !param.startsWith('@')) { + errors.push({ + line: lineNum, + message: `Invalid parameter order: variadic parameter (...) must come before named parameter (@)`, + }) + } + + if (seenNamed) { + errors.push({ + line: lineNum, + message: `Invalid parameter order: named parameter (@) must be last`, + }) + } + + // Check parameter format + if (param.startsWith('...')) { + seenVariadic = true + const name = param.slice(3) + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { + errors.push({ + line: lineNum, + message: `Invalid variadic parameter name: ${param}`, + }) + } + } else if (param.startsWith('@')) { + seenNamed = true + const name = param.slice(1) + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { + errors.push({ + line: lineNum, + message: `Invalid named parameter name: ${param}`, + }) + } + } else if (param.includes('=')) { + // Default parameter + const [name, defaultValue] = param.split('=') + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name!.trim())) { + errors.push({ + line: lineNum, + message: `Invalid parameter name: ${name}`, + }) + } + } else { + // Regular parameter + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(param)) { + errors.push({ + line: lineNum, + message: `Invalid parameter name: ${param}`, + }) + } + } + } + } + + // Validate body address + if (!bodyAddr!.startsWith('.') && !bodyAddr!.startsWith('#')) { + errors.push({ + line: lineNum, + message: `Invalid body address: expected .label or #offset`, + }) + } + + // If it's a label, track it + if (bodyAddr!.startsWith('.')) { + const labelName = bodyAddr!.slice(1) + if (!labelReferences.has(labelName)) { + labelReferences.set(labelName, []) + } + labelReferences.get(labelName)!.push(lineNum) + } + } + + // Validate immediate numbers + if (operand.startsWith('#')) { + const numStr = operand.slice(1) + if (!/^-?\d+$/.test(numStr)) { + errors.push({ + line: lineNum, + message: `Invalid immediate number: ${operand}`, + }) + } + } + + // Validate variable names for LOAD/STORE + if ((opCode === OpCode.LOAD || opCode === OpCode.STORE) && + !/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(operand)) { + errors.push({ + line: lineNum, + message: `Invalid variable name: ${operand}`, + }) + } + + // Validate string constants + if ((operand.startsWith('"') || operand.startsWith("'")) && + !(operand.endsWith('"') || operand.endsWith("'"))) { + errors.push({ + line: lineNum, + message: `Unterminated string: ${operand}`, + }) + } + } + } + + // Check for undefined label references + for (const [labelName, refLines] of labelReferences) { + if (!labels.has(labelName)) { + for (const refLine of refLines) { + errors.push({ + line: refLine, + message: `Undefined label: .${labelName}`, + }) + } + } + } + + // Sort errors by line number + errors.sort((a, b) => a.line - b.line) + + return { + valid: errors.length === 0, + errors, + } +} + +export function formatValidationErrors(result: ValidationResult): string { + if (result.valid) { + return "✓ Bytecode is valid" + } + + const lines: string[] = [ + `✗ Found ${result.errors.length} error${result.errors.length === 1 ? '' : 's'}:`, + '', + ] + + for (const error of result.errors) { + lines.push(` Line ${error.line}: ${error.message}`) + } + + return lines.join('\n') +} diff --git a/tests/validator.test.ts b/tests/validator.test.ts new file mode 100644 index 0000000..8588e78 --- /dev/null +++ b/tests/validator.test.ts @@ -0,0 +1,202 @@ +import { test, expect } from "bun:test" +import { validateBytecode, formatValidationErrors } from "#validator" + +test("valid bytecode passes validation", () => { + const source = ` + PUSH 1 + PUSH 2 + ADD + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(true) + expect(result.errors).toHaveLength(0) +}) + +test("valid bytecode with labels passes validation", () => { + const source = ` + JUMP .end + PUSH 999 + .end: + PUSH 42 + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(true) + expect(result.errors).toHaveLength(0) +}) + +test("detects unknown opcode", () => { + const source = ` + PUSH 1 + INVALID_OP + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors).toHaveLength(1) + expect(result.errors[0]!.message).toContain("Unknown opcode: INVALID_OP") +}) + +test("detects undefined label", () => { + const source = ` + JUMP .nowhere + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors).toHaveLength(1) + expect(result.errors[0]!.message).toContain("Undefined label: .nowhere") +}) + +test("detects duplicate labels", () => { + const source = ` + .loop: + PUSH 1 + .loop: + PUSH 2 + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors).toHaveLength(1) + expect(result.errors[0]!.message).toContain("Duplicate label: .loop") +}) + +test("detects missing operand", () => { + const source = ` + PUSH + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors).toHaveLength(1) + expect(result.errors[0]!.message).toContain("PUSH requires an operand") +}) + +test("detects unexpected operand", () => { + const source = ` + ADD 42 + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors).toHaveLength(1) + expect(result.errors[0]!.message).toContain("ADD does not take an operand") +}) + +test("detects invalid MAKE_FUNCTION syntax", () => { + const source = ` + MAKE_FUNCTION x y .body + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors[0]!.message).toContain("MAKE_FUNCTION requires parameter list") +}) + +test("detects invalid parameter order", () => { + const source = ` + MAKE_FUNCTION (x ...rest y) .body + HALT + .body: + RETURN + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors[0]!.message).toContain("variadic parameter") +}) + +test("detects invalid parameter name", () => { + const source = ` + MAKE_FUNCTION (123invalid) .body + HALT + .body: + RETURN + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors[0]!.message).toContain("Invalid parameter name") +}) + +test("detects invalid variable name", () => { + const source = ` + LOAD 123invalid + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors[0]!.message).toContain("Invalid variable name") +}) + +test("detects unterminated string", () => { + const source = ` + PUSH "unterminated + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors[0]!.message).toContain("Unterminated string") +}) + +test("detects invalid immediate number", () => { + const source = ` + MAKE_ARRAY #abc + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors[0]!.message).toContain("Invalid immediate number") +}) + +test("handles comments correctly", () => { + const source = ` + PUSH 1 ; this is a comment + ; this entire line is a comment + PUSH 2 + ADD ; another comment + ` + const result = validateBytecode(source) + expect(result.valid).toBe(true) +}) + +test("validates function with label reference", () => { + const source = ` + MAKE_FUNCTION (x y) .body + JUMP .skip + .body: + LOAD x + LOAD y + ADD + RETURN + .skip: + HALT + ` + const result = validateBytecode(source) + expect(result.valid).toBe(true) +}) + +test("detects multiple errors and sorts by line", () => { + const source = ` + UNKNOWN_OP + PUSH + JUMP .undefined + ` + const result = validateBytecode(source) + expect(result.valid).toBe(false) + expect(result.errors.length).toBeGreaterThanOrEqual(2) + // Check that errors are sorted by line number + for (let i = 1; i < result.errors.length; i++) { + expect(result.errors[i]!.line).toBeGreaterThanOrEqual(result.errors[i-1]!.line) + } +}) + +test("formatValidationErrors produces readable output", () => { + const source = ` + PUSH 1 + UNKNOWN + ` + const result = validateBytecode(source) + const formatted = formatValidationErrors(result) + expect(formatted).toContain("error") + expect(formatted).toContain("Line") + expect(formatted).toContain("UNKNOWN") +})