import { OpCode } from "./opcode" export type ValidationError = { line: number message: string } export type ValidationResult = { valid: boolean errors: ValidationError[] } // Valid identifier: no whitespace, no special syntax characters // Supports Unicode/emoji: 💎, 変数, etc. function isValidIdentifier(name: string): boolean { // Must not be empty if (!name) return false // Must not start with special syntax prefixes if (name.startsWith('.') || name.startsWith('#') || name.startsWith('@') || name.startsWith('...')) { return false } // Must not start with a digit if (/^\d/.test(name)) { return false } // Must not contain whitespace or special syntax characters return !/[\s;()[\]{}='"#@.]/.test(name) } const OPCODES_WITH_OPERANDS = new Set([ OpCode.PUSH, OpCode.LOAD, OpCode.STORE, OpCode.TRY_LOAD, OpCode.TRY_CALL, OpCode.JUMP, OpCode.JUMP_IF_FALSE, OpCode.JUMP_IF_TRUE, OpCode.PUSH_TRY, OpCode.PUSH_FINALLY, OpCode.MAKE_ARRAY, OpCode.MAKE_DICT, OpCode.STR_CONCAT, OpCode.MAKE_FUNCTION, ]) const OPCODES_WITHOUT_OPERANDS = new Set([ OpCode.POP, OpCode.DUP, OpCode.SWAP, OpCode.TYPE, OpCode.ADD, OpCode.SUB, OpCode.MUL, OpCode.DIV, OpCode.MOD, OpCode.BIT_AND, OpCode.BIT_OR, OpCode.BIT_XOR, OpCode.BIT_SHL, OpCode.BIT_SHR, OpCode.BIT_USHR, OpCode.EQ, OpCode.NEQ, OpCode.LT, OpCode.GT, OpCode.LTE, OpCode.GTE, OpCode.NOT, OpCode.HALT, OpCode.BREAK, OpCode.POP_TRY, OpCode.THROW, OpCode.CALL, OpCode.TAIL_CALL, OpCode.RETURN, OpCode.ARRAY_GET, OpCode.ARRAY_SET, OpCode.ARRAY_PUSH, OpCode.ARRAY_LEN, OpCode.DICT_GET, OpCode.DICT_SET, OpCode.DICT_HAS, OpCode.DOT_GET, ]) // immediate = immediate number, eg #5 const OPCODES_REQUIRING_IMMEDIATE_OR_LABEL = new Set([ OpCode.JUMP, OpCode.JUMP_IF_FALSE, OpCode.JUMP_IF_TRUE, OpCode.PUSH_TRY, OpCode.PUSH_FINALLY, ]) // immediate = immediate number, eg #5 const OPCODES_REQUIRING_IMMEDIATE = new Set([ OpCode.MAKE_ARRAY, OpCode.MAKE_DICT, OpCode.STR_CONCAT, ]) export function validateBytecode(source: string): ValidationResult { const errors: ValidationError[] = [] const lines = source.split("\n") const labels = new Map() const labelReferences = new Map() let instructionCount = 0 // First pass: collect labels and check for duplicates for (let i = 0; i < lines.length; i++) { const lineNum = i + 1 let line = lines[i]! // Strip comments const commentIndex = line.indexOf(';') if (commentIndex !== -1) { line = line.slice(0, commentIndex) } const trimmed = line.trim() if (!trimmed) continue // Check for label definition if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) { const labelName = trimmed.slice(1, -1) if (labels.has(labelName)) { errors.push({ line: lineNum, message: `Duplicate label: .${labelName} (first defined at line ${labels.get(labelName)})`, }) } else { labels.set(labelName, lineNum) } continue } instructionCount++ } // Second pass: validate instructions instructionCount = 0 for (let i = 0; i < lines.length; i++) { const lineNum = i + 1 let line = lines[i]! // Strip comments const commentIndex = line.indexOf(';') if (commentIndex !== -1) { line = line.slice(0, commentIndex) } const trimmed = line.trim() if (!trimmed) continue // Skip label definitions if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) { continue } instructionCount++ const parts = trimmed.split(/\s+/) const opName = parts[0]! const operand = parts.slice(1).join(' ') // Check if opcode exists const opCode = OpCode[opName as keyof typeof OpCode] if (opCode === undefined) { errors.push({ line: lineNum, message: `Unknown opcode: ${opName}`, }) continue } // Check operand requirements if (OPCODES_WITH_OPERANDS.has(opCode) && !operand) { errors.push({ line: lineNum, message: `${opName} requires an operand`, }) continue } if (OPCODES_WITHOUT_OPERANDS.has(opCode) && operand) { errors.push({ line: lineNum, message: `${opName} does not take an operand`, }) continue } // Validate specific operand formats if (operand) { if (OPCODES_REQUIRING_IMMEDIATE_OR_LABEL.has(opCode)) { if (!operand.startsWith('#') && !operand.startsWith('.')) { errors.push({ line: lineNum, message: `${opName} requires immediate (#number) or label (.label), got: ${operand}`, }) continue } } if (OPCODES_REQUIRING_IMMEDIATE.has(opCode)) { if (!operand.startsWith('#')) { errors.push({ line: lineNum, message: `${opName} requires immediate number (#count), got: ${operand}`, }) continue } } // Check for label references if (operand.startsWith('.') && !operand.includes('(')) { const labelName = operand.slice(1) if (!labelReferences.has(labelName)) { labelReferences.set(labelName, []) } labelReferences.get(labelName)!.push(lineNum) } // Validate MAKE_FUNCTION syntax if (opCode === OpCode.MAKE_FUNCTION) { if (!operand.startsWith('(')) { errors.push({ line: lineNum, message: `MAKE_FUNCTION requires parameter list: MAKE_FUNCTION (params) address`, }) continue } const match = operand.match(/^(\(.*?\))\s+(.+)$/) if (!match) { errors.push({ line: lineNum, message: `Invalid MAKE_FUNCTION syntax: expected (params) address`, }) continue } const [, paramStr, bodyAddr] = match // Validate parameter syntax const paramList = paramStr!.slice(1, -1).trim() if (paramList) { const params = paramList.split(/\s+/) let seenVariadic = false let seenNamed = false for (const param of params) { // Check for invalid order if (seenVariadic && !param.startsWith('@')) { errors.push({ line: lineNum, message: `Invalid parameter order: variadic parameter (...) must come before named parameter (@)`, }) } if (seenNamed) { errors.push({ line: lineNum, message: `Invalid parameter order: named parameter (@) must be last`, }) } // Check parameter format if (param.startsWith('...')) { seenVariadic = true const name = param.slice(3) if (!isValidIdentifier(name)) { errors.push({ line: lineNum, message: `Invalid variadic parameter name: ${param}`, }) } } else if (param.startsWith('@')) { seenNamed = true const name = param.slice(1) if (!isValidIdentifier(name)) { errors.push({ line: lineNum, message: `Invalid named parameter name: ${param}`, }) } } else if (param.includes('=')) { // Default parameter const [name] = param.split('=') if (!isValidIdentifier(name!.trim())) { errors.push({ line: lineNum, message: `Invalid parameter name: ${name}`, }) } } else { // Regular parameter if (!isValidIdentifier(param)) { errors.push({ line: lineNum, message: `Invalid parameter name: ${param}`, }) } } } } // Validate body address if (!bodyAddr!.startsWith('.') && !bodyAddr!.startsWith('#')) { errors.push({ line: lineNum, message: `Invalid body address: expected .label or #offset`, }) } // If it's a label, track it if (bodyAddr!.startsWith('.')) { const labelName = bodyAddr!.slice(1) if (!labelReferences.has(labelName)) { labelReferences.set(labelName, []) } labelReferences.get(labelName)!.push(lineNum) } } // Validate immediate numbers if (operand.startsWith('#')) { const numStr = operand.slice(1) if (!/^-?\d+$/.test(numStr)) { errors.push({ line: lineNum, message: `Invalid immediate number: ${operand}`, }) } } // Validate variable names for LOAD/STORE/TRY_LOAD/TRY_CALL if ((opCode === OpCode.LOAD || opCode === OpCode.STORE || opCode === OpCode.TRY_LOAD || opCode === OpCode.TRY_CALL) && !isValidIdentifier(operand)) { errors.push({ line: lineNum, message: `Invalid variable name: ${operand}`, }) } // Validate string constants if ((operand.startsWith('"') || operand.startsWith("'")) && !(operand.endsWith('"') || operand.endsWith("'"))) { errors.push({ line: lineNum, message: `Unterminated string: ${operand}`, }) } } } // Check for undefined label references for (const [labelName, refLines] of labelReferences) { if (!labels.has(labelName)) { for (const refLine of refLines) { errors.push({ line: refLine, message: `Undefined label: .${labelName}`, }) } } } // Sort errors by line number errors.sort((a, b) => a.line - b.line) return { valid: errors.length === 0, errors, } } export function formatValidationErrors(result: ValidationResult): string { if (result.valid) { return "✓ Bytecode is valid" } const lines: string[] = [ `✗ Found ${result.errors.length} error${result.errors.length === 1 ? '' : 's'}:`, '', ] for (const error of result.errors) { lines.push(` Line ${error.line}: ${error.message}`) } return lines.join('\n') }