ReefVM/src/validator.ts
2025-11-08 00:01:21 -08:00

400 lines
10 KiB
TypeScript

import { OpCode } from "./opcode"
export type ValidationError = {
line: number
message: string
}
export type ValidationResult = {
valid: boolean
errors: ValidationError[]
}
// Valid identifier: no whitespace, no special syntax characters
// Supports Unicode/emoji: 💎, 変数, etc.
function isValidIdentifier(name: string): boolean {
// Must not be empty
if (!name) return false
// Must not start with special syntax prefixes
if (name.startsWith('.') || name.startsWith('#') || name.startsWith('@') || name.startsWith('...')) {
return false
}
// Must not start with a digit
if (/^\d/.test(name)) {
return false
}
// Must not contain whitespace or special syntax characters
return !/[\s;()[\]{}='"#@.]/.test(name)
}
const OPCODES_WITH_OPERANDS = new Set([
OpCode.PUSH,
OpCode.LOAD,
OpCode.STORE,
OpCode.TRY_LOAD,
OpCode.TRY_CALL,
OpCode.JUMP,
OpCode.JUMP_IF_FALSE,
OpCode.JUMP_IF_TRUE,
OpCode.PUSH_TRY,
OpCode.PUSH_FINALLY,
OpCode.MAKE_ARRAY,
OpCode.MAKE_DICT,
OpCode.STR_CONCAT,
OpCode.MAKE_FUNCTION,
])
const OPCODES_WITHOUT_OPERANDS = new Set([
OpCode.POP,
OpCode.DUP,
OpCode.SWAP,
OpCode.TYPE,
OpCode.ADD,
OpCode.SUB,
OpCode.MUL,
OpCode.DIV,
OpCode.MOD,
OpCode.BIT_AND,
OpCode.BIT_OR,
OpCode.BIT_XOR,
OpCode.BIT_SHL,
OpCode.BIT_SHR,
OpCode.BIT_USHR,
OpCode.EQ,
OpCode.NEQ,
OpCode.LT,
OpCode.GT,
OpCode.LTE,
OpCode.GTE,
OpCode.NOT,
OpCode.HALT,
OpCode.BREAK,
OpCode.POP_TRY,
OpCode.THROW,
OpCode.CALL,
OpCode.TAIL_CALL,
OpCode.RETURN,
OpCode.ARRAY_GET,
OpCode.ARRAY_SET,
OpCode.ARRAY_PUSH,
OpCode.ARRAY_LEN,
OpCode.DICT_GET,
OpCode.DICT_SET,
OpCode.DICT_HAS,
OpCode.DOT_GET,
])
// immediate = immediate number, eg #5
const OPCODES_REQUIRING_IMMEDIATE_OR_LABEL = new Set([
OpCode.JUMP,
OpCode.JUMP_IF_FALSE,
OpCode.JUMP_IF_TRUE,
OpCode.PUSH_TRY,
OpCode.PUSH_FINALLY,
])
// immediate = immediate number, eg #5
const OPCODES_REQUIRING_IMMEDIATE = new Set([
OpCode.MAKE_ARRAY,
OpCode.MAKE_DICT,
OpCode.STR_CONCAT,
])
export function validateBytecode(source: string): ValidationResult {
const errors: ValidationError[] = []
const lines = source.split("\n")
const labels = new Map<string, number>()
const labelReferences = new Map<string, number[]>()
let instructionCount = 0
// First pass: collect labels and check for duplicates
for (let i = 0; i < lines.length; i++) {
const lineNum = i + 1
let line = lines[i]!
// Strip comments
const commentIndex = line.indexOf(';')
if (commentIndex !== -1) {
line = line.slice(0, commentIndex)
}
const trimmed = line.trim()
if (!trimmed) continue
// Check for label definition
if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) {
const labelName = trimmed.slice(1, -1)
if (labels.has(labelName)) {
errors.push({
line: lineNum,
message: `Duplicate label: .${labelName} (first defined at line ${labels.get(labelName)})`,
})
} else {
labels.set(labelName, lineNum)
}
continue
}
instructionCount++
}
// Second pass: validate instructions
instructionCount = 0
for (let i = 0; i < lines.length; i++) {
const lineNum = i + 1
let line = lines[i]!
// Strip comments
const commentIndex = line.indexOf(';')
if (commentIndex !== -1) {
line = line.slice(0, commentIndex)
}
const trimmed = line.trim()
if (!trimmed) continue
// Skip label definitions
if (/^\.[a-zA-Z_][a-zA-Z0-9_]*:$/.test(trimmed)) {
continue
}
instructionCount++
const parts = trimmed.split(/\s+/)
const opName = parts[0]!
const operand = parts.slice(1).join(' ')
// Check if opcode exists
const opCode = OpCode[opName as keyof typeof OpCode]
if (opCode === undefined) {
errors.push({
line: lineNum,
message: `Unknown opcode: ${opName}`,
})
continue
}
// Check operand requirements
if (OPCODES_WITH_OPERANDS.has(opCode) && !operand) {
errors.push({
line: lineNum,
message: `${opName} requires an operand`,
})
continue
}
if (OPCODES_WITHOUT_OPERANDS.has(opCode) && operand) {
errors.push({
line: lineNum,
message: `${opName} does not take an operand`,
})
continue
}
// Validate specific operand formats
if (operand) {
if (OPCODES_REQUIRING_IMMEDIATE_OR_LABEL.has(opCode)) {
if (!operand.startsWith('#') && !operand.startsWith('.')) {
errors.push({
line: lineNum,
message: `${opName} requires immediate (#number) or label (.label), got: ${operand}`,
})
continue
}
}
if (OPCODES_REQUIRING_IMMEDIATE.has(opCode)) {
if (!operand.startsWith('#')) {
errors.push({
line: lineNum,
message: `${opName} requires immediate number (#count), got: ${operand}`,
})
continue
}
}
// Check for label references
if (operand.startsWith('.') && !operand.includes('(')) {
const labelName = operand.slice(1)
if (!labelReferences.has(labelName)) {
labelReferences.set(labelName, [])
}
labelReferences.get(labelName)!.push(lineNum)
}
// Validate MAKE_FUNCTION syntax
if (opCode === OpCode.MAKE_FUNCTION) {
if (!operand.startsWith('(')) {
errors.push({
line: lineNum,
message: `MAKE_FUNCTION requires parameter list: MAKE_FUNCTION (params) address`,
})
continue
}
const match = operand.match(/^(\(.*?\))\s+(.+)$/)
if (!match) {
errors.push({
line: lineNum,
message: `Invalid MAKE_FUNCTION syntax: expected (params) address`,
})
continue
}
const [, paramStr, bodyAddr] = match
// Validate parameter syntax
const paramList = paramStr!.slice(1, -1).trim()
if (paramList) {
const params = paramList.split(/\s+/)
let seenVariadic = false
let seenNamed = false
for (const param of params) {
// Check for invalid order
if (seenVariadic && !param.startsWith('@')) {
errors.push({
line: lineNum,
message: `Invalid parameter order: variadic parameter (...) must come before named parameter (@)`,
})
}
if (seenNamed) {
errors.push({
line: lineNum,
message: `Invalid parameter order: named parameter (@) must be last`,
})
}
// Check parameter format
if (param.startsWith('...')) {
seenVariadic = true
const name = param.slice(3)
if (!isValidIdentifier(name)) {
errors.push({
line: lineNum,
message: `Invalid variadic parameter name: ${param}`,
})
}
} else if (param.startsWith('@')) {
seenNamed = true
const name = param.slice(1)
if (!isValidIdentifier(name)) {
errors.push({
line: lineNum,
message: `Invalid named parameter name: ${param}`,
})
}
} else if (param.includes('=')) {
// Default parameter
const [name] = param.split('=')
if (!isValidIdentifier(name!.trim())) {
errors.push({
line: lineNum,
message: `Invalid parameter name: ${name}`,
})
}
} else {
// Regular parameter
if (!isValidIdentifier(param)) {
errors.push({
line: lineNum,
message: `Invalid parameter name: ${param}`,
})
}
}
}
}
// Validate body address
if (!bodyAddr!.startsWith('.') && !bodyAddr!.startsWith('#')) {
errors.push({
line: lineNum,
message: `Invalid body address: expected .label or #offset`,
})
}
// If it's a label, track it
if (bodyAddr!.startsWith('.')) {
const labelName = bodyAddr!.slice(1)
if (!labelReferences.has(labelName)) {
labelReferences.set(labelName, [])
}
labelReferences.get(labelName)!.push(lineNum)
}
}
// Validate immediate numbers
if (operand.startsWith('#')) {
const numStr = operand.slice(1)
if (!/^-?\d+$/.test(numStr)) {
errors.push({
line: lineNum,
message: `Invalid immediate number: ${operand}`,
})
}
}
// Validate variable names for LOAD/STORE/TRY_LOAD/TRY_CALL
if ((opCode === OpCode.LOAD || opCode === OpCode.STORE ||
opCode === OpCode.TRY_LOAD || opCode === OpCode.TRY_CALL) &&
!isValidIdentifier(operand)) {
errors.push({
line: lineNum,
message: `Invalid variable name: ${operand}`,
})
}
// Validate string constants
if ((operand.startsWith('"') || operand.startsWith("'")) &&
!(operand.endsWith('"') || operand.endsWith("'"))) {
errors.push({
line: lineNum,
message: `Unterminated string: ${operand}`,
})
}
}
}
// Check for undefined label references
for (const [labelName, refLines] of labelReferences) {
if (!labels.has(labelName)) {
for (const refLine of refLines) {
errors.push({
line: refLine,
message: `Undefined label: .${labelName}`,
})
}
}
}
// Sort errors by line number
errors.sort((a, b) => a.line - b.line)
return {
valid: errors.length === 0,
errors,
}
}
export function formatValidationErrors(result: ValidationResult): string {
if (result.valid) {
return "✓ Bytecode is valid"
}
const lines: string[] = [
`✗ Found ${result.errors.length} error${result.errors.length === 1 ? '' : 's'}:`,
'',
]
for (const error of result.errors) {
lines.push(` Line ${error.line}: ${error.message}`)
}
return lines.join('\n')
}