diff --git a/CLAUDE.md b/CLAUDE.md index 99f54aa..81e09d4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -344,3 +344,5 @@ Run `bun test` to verify all tests pass before committing. **Finally blocks**: The compiler must generate explicit JUMPs to finally blocks for successful try/catch completion. The VM only auto-jumps to finally on THROW. **Variable scoping**: STORE updates existing variables in parent scopes or creates in current scope. It does NOT shadow by default. + +**Identifiers**: Variable and parameter names support Unicode and emoji! Valid: `💎`, `🌟`, `変数`, `counter`. Invalid: cannot start with digits or special prefixes (`.`, `#`, `@`, `...`), cannot contain whitespace or syntax characters. diff --git a/GUIDE.md b/GUIDE.md index 6541c29..b8c5c43 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -30,9 +30,11 @@ OPCODE operand ; comment - `JUMP .loop` - jump to label - `MAKE_FUNCTION (x) .body` - function body at label -**Variable names**: Plain identifiers +**Variable names**: Plain identifiers (supports Unicode and emoji!) - `LOAD counter` - load variable - `STORE result` - store variable +- `LOAD 💎` - load emoji variable +- `STORE 変数` - store Unicode variable **Constants**: Literals added to constants pool - Numbers: `PUSH 42`, `PUSH 3.14` @@ -437,6 +439,12 @@ Only `null` and `false` are falsy. Everything else (including `0`, `""`, empty a - STORE updates existing variable or creates in current scope - Functions capture scope at definition time +### Identifiers +Variable and function parameter names support Unicode and emoji: +- Valid: `💎`, `🌟`, `変数`, `counter`, `_private` +- Invalid: Cannot start with digits, `.`, `#`, `@`, or `...` +- Invalid: Cannot contain whitespace or special chars: `;`, `()`, `[]`, `{}`, `=`, `'`, `"` + ### Break Semantics - CALL marks current frame as break target - BREAK unwinds call stack to that target diff --git a/README.md b/README.md index a5cf579..9dca0fb 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Run the simple debugger to see what the instructions are doing: - Arithmetic operations (ADD, SUB, MUL, DIV, MOD) - Comparison operations (EQ, NEQ, LT, GT, LTE, GTE) - Logical operations (NOT, AND/OR patterns with short-circuiting) -- Variable operations (LOAD, STORE) +- Variable operations (LOAD, STORE) with **emoji and Unicode support** (💎 = 5) - Control flow with **relative jumps** (JUMP, JUMP_IF_FALSE, JUMP_IF_TRUE, BREAK, CONTINUE) - Array operations (MAKE_ARRAY, ARRAY_GET, ARRAY_SET, ARRAY_PUSH, ARRAY_LEN) - Dictionary operations (MAKE_DICT, DICT_GET, DICT_SET, DICT_HAS) diff --git a/examples/emoji.ts b/examples/emoji.ts new file mode 100644 index 0000000..c05e463 --- /dev/null +++ b/examples/emoji.ts @@ -0,0 +1,97 @@ +import { toBytecode, run } from "#reef" + +// Example 1: Simple emoji variables +console.log("=== Emoji Variables ===") +const gems = toBytecode([ + ["PUSH", 5], + ["STORE", "💎"], + ["PUSH", 3], + ["STORE", "🌟"], + ["LOAD", "💎"], + ["LOAD", "🌟"], + ["ADD"], + ["HALT"] +]) + +console.log("💎 (5) + 🌟 (3) =", (await run(gems)).value) + +// Example 2: Money calculator +console.log("\n=== Money Calculator ===") +const money = toBytecode(` + PUSH 100 + STORE 💰 + PUSH 50 + STORE 💵 + PUSH 25 + STORE 🪙 + + LOAD 💰 + LOAD 💵 + ADD + LOAD 🪙 + ADD + HALT +`) + +console.log("💰 (100) + 💵 (50) + 🪙 (25) =", (await run(money)).value) + +// Example 3: Function with emoji parameters +console.log("\n=== Emoji Function ===") +const emojiFunc = toBytecode(` + MAKE_FUNCTION (🎯 🎨=50) .paint + STORE paint + JUMP .after + + .paint: + LOAD 🎯 + LOAD 🎨 + MUL + RETURN + + .after: + LOAD paint + PUSH 10 + PUSH 1 + PUSH 0 + CALL + HALT +`) + +console.log("paint(🎯=10, 🎨=50 default) =", (await run(emojiFunc)).value) + +// Example 4: Unicode variables (Japanese) +console.log("\n=== Unicode Variables ===") +const japanese = toBytecode([ + ["PUSH", 42], + ["STORE", "数字"], + ["PUSH", 8], + ["STORE", "ラッキー"], + ["LOAD", "数字"], + ["LOAD", "ラッキー"], + ["ADD"], + ["HALT"] +]) + +console.log("数字 (42) + ラッキー (8) =", (await run(japanese)).value) + +// Example 5: Loop with emoji +console.log("\n=== Emoji Loop ===") +const loop = toBytecode([ + ["PUSH", 0], + ["STORE", "🔢"], + [".🔁:"], + ["LOAD", "🔢"], + ["PUSH", 5], + ["LT"], + ["JUMP_IF_FALSE", ".🛑"], + ["LOAD", "🔢"], + ["PUSH", 1], + ["ADD"], + ["STORE", "🔢"], + ["JUMP", ".🔁"], + [".🛑:"], + ["LOAD", "🔢"], + ["HALT"] +]) + +console.log("Count from 0 to 5 with 🔢:", (await run(loop)).value) diff --git a/src/bytecode.ts b/src/bytecode.ts index 5acab21..dcaac4c 100644 --- a/src/bytecode.ts +++ b/src/bytecode.ts @@ -487,12 +487,11 @@ function toBytecodeFromString(str: string): Bytecode /* throws */ { bytecode.constants.push(toValue(null)) operandValue = bytecode.constants.length - 1 - } else if (/^[a-zA-Z_].*$/.test(operand)) { - // variable - operandValue = operand - } else { - throw new Error(`Invalid operand: ${operand}`) + // Assume it's a variable name if it doesn't match any other pattern + // This allows emoji, Unicode, and other creative identifiers + // (already checked that it doesn't start with . # or match other patterns) + operandValue = operand } } diff --git a/src/validator.ts b/src/validator.ts index c00bfdb..7c55e83 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -10,6 +10,26 @@ export type ValidationResult = { errors: ValidationError[] } +// Valid identifier: no whitespace, no special syntax characters +// Supports Unicode/emoji: 💎, 変数, etc. +function isValidIdentifier(name: string): boolean { + // Must not be empty + if (!name) return false + + // Must not start with special syntax prefixes + if (name.startsWith('.') || name.startsWith('#') || name.startsWith('@') || name.startsWith('...')) { + return false + } + + // Must not start with a digit + if (/^\d/.test(name)) { + return false + } + + // Must not contain whitespace or special syntax characters + return !/[\s;()[\]{}='"#@.]/.test(name) +} + // Opcodes that require operands const OPCODES_WITH_OPERANDS = new Set([ OpCode.PUSH, @@ -209,7 +229,7 @@ export function validateBytecode(source: string): ValidationResult { if (param.startsWith('...')) { seenVariadic = true const name = param.slice(3) - if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { + if (!isValidIdentifier(name)) { errors.push({ line: lineNum, message: `Invalid variadic parameter name: ${param}`, @@ -218,7 +238,7 @@ export function validateBytecode(source: string): ValidationResult { } else if (param.startsWith('@')) { seenNamed = true const name = param.slice(1) - if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { + if (!isValidIdentifier(name)) { errors.push({ line: lineNum, message: `Invalid named parameter name: ${param}`, @@ -227,7 +247,7 @@ export function validateBytecode(source: string): ValidationResult { } else if (param.includes('=')) { // Default parameter const [name, defaultValue] = param.split('=') - if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name!.trim())) { + if (!isValidIdentifier(name!.trim())) { errors.push({ line: lineNum, message: `Invalid parameter name: ${name}`, @@ -235,7 +255,7 @@ export function validateBytecode(source: string): ValidationResult { } } else { // Regular parameter - if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(param)) { + if (!isValidIdentifier(param)) { errors.push({ line: lineNum, message: `Invalid parameter name: ${param}`, @@ -276,7 +296,7 @@ export function validateBytecode(source: string): ValidationResult { // Validate variable names for LOAD/STORE if ((opCode === OpCode.LOAD || opCode === OpCode.STORE) && - !/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(operand)) { + !isValidIdentifier(operand)) { errors.push({ line: lineNum, message: `Invalid variable name: ${operand}`, diff --git a/tests/basic.test.ts b/tests/basic.test.ts index 5b9ea3e..595057d 100644 --- a/tests/basic.test.ts +++ b/tests/basic.test.ts @@ -600,3 +600,117 @@ test("JUMP backward - simple loop", async () => { expect(result).toEqual({ type: 'number', value: 3 }) }) +test("emoji variable names - string format", async () => { + const bytecode = toBytecode(` + PUSH 5 + STORE 💎 + LOAD 💎 + HALT + `) + + const result = await run(bytecode) + expect(result).toEqual({ type: 'number', value: 5 }) +}) + +test("emoji variable names - array format", async () => { + const bytecode = toBytecode([ + ["PUSH", 100], + ["STORE", "💰"], + ["LOAD", "💰"], + ["PUSH", 50], + ["ADD"], + ["HALT"] + ]) + + const result = await run(bytecode) + expect(result).toEqual({ type: 'number', value: 150 }) +}) + +test("unicode variable names - Japanese", async () => { + const bytecode = toBytecode(` + PUSH 42 + STORE 変数 + LOAD 変数 + HALT + `) + + const result = await run(bytecode) + expect(result).toEqual({ type: 'number', value: 42 }) +}) + +test("unicode variable names - Chinese", async () => { + const bytecode = toBytecode([ + ["PUSH", 888], + ["STORE", "数字"], + ["LOAD", "数字"], + ["HALT"] + ]) + + const result = await run(bytecode) + expect(result).toEqual({ type: 'number', value: 888 }) +}) + +test("emoji in function parameters", async () => { + const bytecode = toBytecode(` + MAKE_FUNCTION (💎 🌟) .add + STORE add + JUMP .after + .add: + LOAD 💎 + LOAD 🌟 + ADD + RETURN + .after: + LOAD add + PUSH 10 + PUSH 20 + PUSH 2 + PUSH 0 + CALL + HALT + `) + + const result = await run(bytecode) + expect(result).toEqual({ type: 'number', value: 30 }) +}) + +test("emoji with defaults and variadic", async () => { + const bytecode = toBytecode([ + ["MAKE_FUNCTION", ["🎯=100", "...🎨"], ".fn"], + ["STORE", "fn"], + ["JUMP", ".after"], + [".fn:"], + ["LOAD", "🎯"], + ["RETURN"], + [".after:"], + ["LOAD", "fn"], + ["PUSH", 0], + ["PUSH", 0], + ["CALL"], + ["HALT"] + ]) + + const result = await run(bytecode) + expect(result).toEqual({ type: 'number', value: 100 }) +}) + +test("mixed emoji and regular names", async () => { + const bytecode = toBytecode([ + ["PUSH", 10], + ["STORE", "💎"], + ["PUSH", 20], + ["STORE", "value"], + ["PUSH", 30], + ["STORE", "🌟"], + ["LOAD", "💎"], + ["LOAD", "value"], + ["ADD"], + ["LOAD", "🌟"], + ["ADD"], + ["HALT"] + ]) + + const result = await run(bytecode) + expect(result).toEqual({ type: 'number', value: 60 }) +}) +