add emoji support

This commit is contained in:
Chris Wanstrath 2025-10-07 16:18:44 -07:00
parent 4e2869ebd9
commit 27857bfae8
7 changed files with 252 additions and 12 deletions

View File

@ -344,3 +344,5 @@ Run `bun test` to verify all tests pass before committing.
**Finally blocks**: The compiler must generate explicit JUMPs to finally blocks for successful try/catch completion. The VM only auto-jumps to finally on THROW. **Finally blocks**: The compiler must generate explicit JUMPs to finally blocks for successful try/catch completion. The VM only auto-jumps to finally on THROW.
**Variable scoping**: STORE updates existing variables in parent scopes or creates in current scope. It does NOT shadow by default. **Variable scoping**: STORE updates existing variables in parent scopes or creates in current scope. It does NOT shadow by default.
**Identifiers**: Variable and parameter names support Unicode and emoji! Valid: `💎`, `🌟`, `変数`, `counter`. Invalid: cannot start with digits or special prefixes (`.`, `#`, `@`, `...`), cannot contain whitespace or syntax characters.

View File

@ -30,9 +30,11 @@ OPCODE operand ; comment
- `JUMP .loop` - jump to label - `JUMP .loop` - jump to label
- `MAKE_FUNCTION (x) .body` - function body at label - `MAKE_FUNCTION (x) .body` - function body at label
**Variable names**: Plain identifiers **Variable names**: Plain identifiers (supports Unicode and emoji!)
- `LOAD counter` - load variable - `LOAD counter` - load variable
- `STORE result` - store variable - `STORE result` - store variable
- `LOAD 💎` - load emoji variable
- `STORE 変数` - store Unicode variable
**Constants**: Literals added to constants pool **Constants**: Literals added to constants pool
- Numbers: `PUSH 42`, `PUSH 3.14` - Numbers: `PUSH 42`, `PUSH 3.14`
@ -437,6 +439,12 @@ Only `null` and `false` are falsy. Everything else (including `0`, `""`, empty a
- STORE updates existing variable or creates in current scope - STORE updates existing variable or creates in current scope
- Functions capture scope at definition time - Functions capture scope at definition time
### Identifiers
Variable and function parameter names support Unicode and emoji:
- Valid: `💎`, `🌟`, `変数`, `counter`, `_private`
- Invalid: Cannot start with digits, `.`, `#`, `@`, or `...`
- Invalid: Cannot contain whitespace or special chars: `;`, `()`, `[]`, `{}`, `=`, `'`, `"`
### Break Semantics ### Break Semantics
- CALL marks current frame as break target - CALL marks current frame as break target
- BREAK unwinds call stack to that target - BREAK unwinds call stack to that target

View File

@ -28,7 +28,7 @@ Run the simple debugger to see what the instructions are doing:
- Arithmetic operations (ADD, SUB, MUL, DIV, MOD) - Arithmetic operations (ADD, SUB, MUL, DIV, MOD)
- Comparison operations (EQ, NEQ, LT, GT, LTE, GTE) - Comparison operations (EQ, NEQ, LT, GT, LTE, GTE)
- Logical operations (NOT, AND/OR patterns with short-circuiting) - Logical operations (NOT, AND/OR patterns with short-circuiting)
- Variable operations (LOAD, STORE) - Variable operations (LOAD, STORE) with **emoji and Unicode support** (💎 = 5)
- Control flow with **relative jumps** (JUMP, JUMP_IF_FALSE, JUMP_IF_TRUE, BREAK, CONTINUE) - Control flow with **relative jumps** (JUMP, JUMP_IF_FALSE, JUMP_IF_TRUE, BREAK, CONTINUE)
- Array operations (MAKE_ARRAY, ARRAY_GET, ARRAY_SET, ARRAY_PUSH, ARRAY_LEN) - Array operations (MAKE_ARRAY, ARRAY_GET, ARRAY_SET, ARRAY_PUSH, ARRAY_LEN)
- Dictionary operations (MAKE_DICT, DICT_GET, DICT_SET, DICT_HAS) - Dictionary operations (MAKE_DICT, DICT_GET, DICT_SET, DICT_HAS)

97
examples/emoji.ts Normal file
View File

@ -0,0 +1,97 @@
import { toBytecode, run } from "#reef"
// Example 1: Simple emoji variables
console.log("=== Emoji Variables ===")
const gems = toBytecode([
["PUSH", 5],
["STORE", "💎"],
["PUSH", 3],
["STORE", "🌟"],
["LOAD", "💎"],
["LOAD", "🌟"],
["ADD"],
["HALT"]
])
console.log("💎 (5) + 🌟 (3) =", (await run(gems)).value)
// Example 2: Money calculator
console.log("\n=== Money Calculator ===")
const money = toBytecode(`
PUSH 100
STORE 💰
PUSH 50
STORE 💵
PUSH 25
STORE 🪙
LOAD 💰
LOAD 💵
ADD
LOAD 🪙
ADD
HALT
`)
console.log("💰 (100) + 💵 (50) + 🪙 (25) =", (await run(money)).value)
// Example 3: Function with emoji parameters
console.log("\n=== Emoji Function ===")
const emojiFunc = toBytecode(`
MAKE_FUNCTION (🎯 🎨=50) .paint
STORE paint
JUMP .after
.paint:
LOAD 🎯
LOAD 🎨
MUL
RETURN
.after:
LOAD paint
PUSH 10
PUSH 1
PUSH 0
CALL
HALT
`)
console.log("paint(🎯=10, 🎨=50 default) =", (await run(emojiFunc)).value)
// Example 4: Unicode variables (Japanese)
console.log("\n=== Unicode Variables ===")
const japanese = toBytecode([
["PUSH", 42],
["STORE", "数字"],
["PUSH", 8],
["STORE", "ラッキー"],
["LOAD", "数字"],
["LOAD", "ラッキー"],
["ADD"],
["HALT"]
])
console.log("数字 (42) + ラッキー (8) =", (await run(japanese)).value)
// Example 5: Loop with emoji
console.log("\n=== Emoji Loop ===")
const loop = toBytecode([
["PUSH", 0],
["STORE", "🔢"],
[".🔁:"],
["LOAD", "🔢"],
["PUSH", 5],
["LT"],
["JUMP_IF_FALSE", ".🛑"],
["LOAD", "🔢"],
["PUSH", 1],
["ADD"],
["STORE", "🔢"],
["JUMP", ".🔁"],
[".🛑:"],
["LOAD", "🔢"],
["HALT"]
])
console.log("Count from 0 to 5 with 🔢:", (await run(loop)).value)

View File

@ -487,12 +487,11 @@ function toBytecodeFromString(str: string): Bytecode /* throws */ {
bytecode.constants.push(toValue(null)) bytecode.constants.push(toValue(null))
operandValue = bytecode.constants.length - 1 operandValue = bytecode.constants.length - 1
} else if (/^[a-zA-Z_].*$/.test(operand)) {
// variable
operandValue = operand
} else { } else {
throw new Error(`Invalid operand: ${operand}`) // Assume it's a variable name if it doesn't match any other pattern
// This allows emoji, Unicode, and other creative identifiers
// (already checked that it doesn't start with . # or match other patterns)
operandValue = operand
} }
} }

View File

@ -10,6 +10,26 @@ export type ValidationResult = {
errors: ValidationError[] errors: ValidationError[]
} }
// Valid identifier: no whitespace, no special syntax characters
// Supports Unicode/emoji: 💎, 変数, etc.
function isValidIdentifier(name: string): boolean {
// Must not be empty
if (!name) return false
// Must not start with special syntax prefixes
if (name.startsWith('.') || name.startsWith('#') || name.startsWith('@') || name.startsWith('...')) {
return false
}
// Must not start with a digit
if (/^\d/.test(name)) {
return false
}
// Must not contain whitespace or special syntax characters
return !/[\s;()[\]{}='"#@.]/.test(name)
}
// Opcodes that require operands // Opcodes that require operands
const OPCODES_WITH_OPERANDS = new Set([ const OPCODES_WITH_OPERANDS = new Set([
OpCode.PUSH, OpCode.PUSH,
@ -209,7 +229,7 @@ export function validateBytecode(source: string): ValidationResult {
if (param.startsWith('...')) { if (param.startsWith('...')) {
seenVariadic = true seenVariadic = true
const name = param.slice(3) const name = param.slice(3)
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { if (!isValidIdentifier(name)) {
errors.push({ errors.push({
line: lineNum, line: lineNum,
message: `Invalid variadic parameter name: ${param}`, message: `Invalid variadic parameter name: ${param}`,
@ -218,7 +238,7 @@ export function validateBytecode(source: string): ValidationResult {
} else if (param.startsWith('@')) { } else if (param.startsWith('@')) {
seenNamed = true seenNamed = true
const name = param.slice(1) const name = param.slice(1)
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { if (!isValidIdentifier(name)) {
errors.push({ errors.push({
line: lineNum, line: lineNum,
message: `Invalid named parameter name: ${param}`, message: `Invalid named parameter name: ${param}`,
@ -227,7 +247,7 @@ export function validateBytecode(source: string): ValidationResult {
} else if (param.includes('=')) { } else if (param.includes('=')) {
// Default parameter // Default parameter
const [name, defaultValue] = param.split('=') const [name, defaultValue] = param.split('=')
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name!.trim())) { if (!isValidIdentifier(name!.trim())) {
errors.push({ errors.push({
line: lineNum, line: lineNum,
message: `Invalid parameter name: ${name}`, message: `Invalid parameter name: ${name}`,
@ -235,7 +255,7 @@ export function validateBytecode(source: string): ValidationResult {
} }
} else { } else {
// Regular parameter // Regular parameter
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(param)) { if (!isValidIdentifier(param)) {
errors.push({ errors.push({
line: lineNum, line: lineNum,
message: `Invalid parameter name: ${param}`, message: `Invalid parameter name: ${param}`,
@ -276,7 +296,7 @@ export function validateBytecode(source: string): ValidationResult {
// Validate variable names for LOAD/STORE // Validate variable names for LOAD/STORE
if ((opCode === OpCode.LOAD || opCode === OpCode.STORE) && if ((opCode === OpCode.LOAD || opCode === OpCode.STORE) &&
!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(operand)) { !isValidIdentifier(operand)) {
errors.push({ errors.push({
line: lineNum, line: lineNum,
message: `Invalid variable name: ${operand}`, message: `Invalid variable name: ${operand}`,

View File

@ -600,3 +600,117 @@ test("JUMP backward - simple loop", async () => {
expect(result).toEqual({ type: 'number', value: 3 }) expect(result).toEqual({ type: 'number', value: 3 })
}) })
test("emoji variable names - string format", async () => {
const bytecode = toBytecode(`
PUSH 5
STORE 💎
LOAD 💎
HALT
`)
const result = await run(bytecode)
expect(result).toEqual({ type: 'number', value: 5 })
})
test("emoji variable names - array format", async () => {
const bytecode = toBytecode([
["PUSH", 100],
["STORE", "💰"],
["LOAD", "💰"],
["PUSH", 50],
["ADD"],
["HALT"]
])
const result = await run(bytecode)
expect(result).toEqual({ type: 'number', value: 150 })
})
test("unicode variable names - Japanese", async () => {
const bytecode = toBytecode(`
PUSH 42
STORE
LOAD
HALT
`)
const result = await run(bytecode)
expect(result).toEqual({ type: 'number', value: 42 })
})
test("unicode variable names - Chinese", async () => {
const bytecode = toBytecode([
["PUSH", 888],
["STORE", "数字"],
["LOAD", "数字"],
["HALT"]
])
const result = await run(bytecode)
expect(result).toEqual({ type: 'number', value: 888 })
})
test("emoji in function parameters", async () => {
const bytecode = toBytecode(`
MAKE_FUNCTION (💎 🌟) .add
STORE add
JUMP .after
.add:
LOAD 💎
LOAD 🌟
ADD
RETURN
.after:
LOAD add
PUSH 10
PUSH 20
PUSH 2
PUSH 0
CALL
HALT
`)
const result = await run(bytecode)
expect(result).toEqual({ type: 'number', value: 30 })
})
test("emoji with defaults and variadic", async () => {
const bytecode = toBytecode([
["MAKE_FUNCTION", ["🎯=100", "...🎨"], ".fn"],
["STORE", "fn"],
["JUMP", ".after"],
[".fn:"],
["LOAD", "🎯"],
["RETURN"],
[".after:"],
["LOAD", "fn"],
["PUSH", 0],
["PUSH", 0],
["CALL"],
["HALT"]
])
const result = await run(bytecode)
expect(result).toEqual({ type: 'number', value: 100 })
})
test("mixed emoji and regular names", async () => {
const bytecode = toBytecode([
["PUSH", 10],
["STORE", "💎"],
["PUSH", 20],
["STORE", "value"],
["PUSH", 30],
["STORE", "🌟"],
["LOAD", "💎"],
["LOAD", "value"],
["ADD"],
["LOAD", "🌟"],
["ADD"],
["HALT"]
])
const result = await run(bytecode)
expect(result).toEqual({ type: 'number', value: 60 })
})