From df9af925d33b08a32e69961c212db166e9d97146 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath <2+defunkt@users.noreply.github.com> Date: Tue, 14 Oct 2025 12:14:27 -0700 Subject: [PATCH] STR_CONCAT #n --- GUIDE.md | 43 +++++++++++ SPEC.md | 50 ++++++++++++ src/bytecode.ts | 4 + src/opcode.ts | 3 + src/validator.ts | 2 + src/vm.ts | 10 +++ tests/basic.test.ts | 180 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 292 insertions(+) diff --git a/GUIDE.md b/GUIDE.md index c2587eb..850102a 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -235,6 +235,9 @@ CALL - `DICT_SET` - Pop value, key, dict; mutate dict - `DICT_HAS` - Pop key and dict, push boolean +### Strings +- `STR_CONCAT #N` - Pop N values, convert to strings, concatenate, push result + ### Exceptions - `PUSH_TRY .catch` - Register exception handler - `PUSH_FINALLY .finally` - Add finally to current handler @@ -431,6 +434,46 @@ TRY_CALL unknown ; Pushes "unknown" as string - Shell-like languages where unknown identifiers become strings - Templating systems with optional transformers +### String Concatenation +Build strings from multiple values: +``` +; Simple concatenation +PUSH "Hello" +PUSH " " +PUSH "World" +STR_CONCAT #3 ; → "Hello World" + +; With variables +PUSH "Name: " +LOAD userName +STR_CONCAT #2 ; → "Name: Alice" + +; With expressions and type coercion +PUSH "Result: " +PUSH 10 +PUSH 5 +ADD +STR_CONCAT #2 ; → "Result: 15" + +; Template-like interpolation +PUSH "User " +LOAD userId +PUSH " has " +LOAD count +PUSH " items" +STR_CONCAT #5 ; → "User 42 has 3 items" +``` + +**Composability**: Results can be concatenated again +``` +PUSH "Hello" +PUSH " " +PUSH "World" +STR_CONCAT #3 +PUSH "!" +STR_CONCAT #2 ; → "Hello World!" +``` + ## Key Concepts ### Truthiness diff --git a/SPEC.md b/SPEC.md index 9c64f85..a0c0efa 100644 --- a/SPEC.md +++ b/SPEC.md @@ -509,6 +509,56 @@ Key is coerced to string. Key is coerced to string. **Errors**: Throws if not dict +### String Operations + +#### STR_CONCAT +**Operand**: Number of values to concatenate (number) +**Effect**: Concatenate N values from stack into a single string +**Stack**: [val1, val2, ..., valN] → [string] + +**Behavior**: +1. Pop N values from stack (in reverse order) +2. Convert each value to string using `toString()` +3. Concatenate all strings in order (val1 + val2 + ... + valN) +4. Push resulting string onto stack + +**Type Coercion**: +- Numbers → string representation (e.g., `42` → `"42"`) +- Booleans → `"true"` or `"false"` +- Null → `"null"` +- Strings → identity +- Arrays → `"[item, item]"` format +- Dicts → `"{key: value, ...}"` format +- Functions → `""` + +**Use Cases**: +- Building dynamic strings from multiple parts +- Template string interpolation +- String formatting with mixed types + +**Composability**: +- Results can be concatenated again with additional STR_CONCAT operations +- Can leave values on stack (only consumes specified count) + +**Example**: +``` +PUSH "Hello" +PUSH " " +PUSH "World" +STR_CONCAT #3 ; → "Hello World" + +PUSH "Count: " +PUSH 42 +PUSH ", Active: " +PUSH true +STR_CONCAT #4 ; → "Count: 42, Active: true" +``` + +**Edge Cases**: +- `STR_CONCAT #0` produces empty string `""` +- `STR_CONCAT #1` converts single value to string +- If stack has fewer values than count, behavior depends on implementation (may use empty strings or throw) + ### TypeScript Interop #### CALL_NATIVE diff --git a/src/bytecode.ts b/src/bytecode.ts index c836eb2..66b21ea 100644 --- a/src/bytecode.ts +++ b/src/bytecode.ts @@ -70,6 +70,9 @@ type InstructionTuple = | ["DICT_SET"] | ["DICT_HAS"] + // Strings + | ["STR_CONCAT", number] + // Native | ["CALL_NATIVE", string] @@ -339,6 +342,7 @@ function toBytecodeFromArray(program: ProgramItem[]): Bytecode /* throws */ { case "MAKE_ARRAY": case "MAKE_DICT": + case "STR_CONCAT": operandValue = operand as number break diff --git a/src/opcode.ts b/src/opcode.ts index d5d7dcb..22c97d2 100644 --- a/src/opcode.ts +++ b/src/opcode.ts @@ -59,6 +59,9 @@ export enum OpCode { DICT_SET, // operand: none | stack: [dict, key, value] → [] | mutates dict DICT_HAS, // operand: none | stack: [dict, key] → [boolean] + // strings + STR_CONCAT, // operand: value count (number) | stack: [val1, ..., valN] → [string] | concatenate N values + // typescript interop CALL_NATIVE, // operand: function name (identifier) | stack: [...args] → [result] | consumes entire stack diff --git a/src/validator.ts b/src/validator.ts index 6e63071..9766178 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -43,6 +43,7 @@ const OPCODES_WITH_OPERANDS = new Set([ OpCode.PUSH_FINALLY, OpCode.MAKE_ARRAY, OpCode.MAKE_DICT, + OpCode.STR_CONCAT, OpCode.MAKE_FUNCTION, OpCode.CALL_NATIVE, ]) @@ -91,6 +92,7 @@ const OPCODES_REQUIRING_IMMEDIATE_OR_LABEL = new Set([ const OPCODES_REQUIRING_IMMEDIATE = new Set([ OpCode.MAKE_ARRAY, OpCode.MAKE_DICT, + OpCode.STR_CONCAT, ]) export function validateBytecode(source: string): ValidationResult { diff --git a/src/vm.ts b/src/vm.ts index b4e804b..1958809 100644 --- a/src/vm.ts +++ b/src/vm.ts @@ -334,6 +334,16 @@ export class VM { this.stack.push({ type: 'boolean', value: hasDict.value.has(toString(hasKey)) }) break + case OpCode.STR_CONCAT: + let count = instruction.operand as number + let parts = [] + + while (count-- > 0 && this.stack.length) + parts.unshift(toString(this.stack.pop()!)) + + this.stack.push(toValue(parts.join(''))) + break + case OpCode.MAKE_FUNCTION: const fnDefIdx = instruction.operand as number const fnDef = this.constants[fnDefIdx] diff --git a/tests/basic.test.ts b/tests/basic.test.ts index 5e14837..4d7d22b 100644 --- a/tests/basic.test.ts +++ b/tests/basic.test.ts @@ -674,6 +674,186 @@ test("DICT_HAS - checks key missing", async () => { expect(await run(toBytecode(str))).toEqual({ type: 'boolean', value: false }) }) +test("STR_CONCAT - concats together strings", async () => { + const str = ` + PUSH "Hi " + PUSH "friend" + PUSH "!" + STR_CONCAT #3 + ` + + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "Hi friend!" }) + + const str2 = ` + PUSH "Holy smokes!" + PUSH "It's " + PUSH "alive!" + STR_CONCAT #2 + ` + + expect(await run(toBytecode(str2))).toEqual({ type: 'string', value: "It's alive!" }) + + + const str3 = ` + PUSH 1 + PUSH " + " + PUSH 1 + PUSH " = " + PUSH 1 + PUSH 1 + ADD + STR_CONCAT #5 + ` + + expect(await run(toBytecode(str3))).toEqual({ type: 'string', value: "1 + 1 = 2" }) +}) + +test("STR_CONCAT - empty concat (count=0)", async () => { + const str = ` + PUSH "leftover" + STR_CONCAT #0 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "" }) +}) + +test("STR_CONCAT - single string", async () => { + const str = ` + PUSH "hello" + STR_CONCAT #1 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "hello" }) +}) + +test("STR_CONCAT - converts numbers to strings", async () => { + const str = ` + PUSH 42 + PUSH 100 + PUSH 7 + STR_CONCAT #3 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "421007" }) +}) + +test("STR_CONCAT - converts booleans to strings", async () => { + const str = ` + PUSH "Result: " + PUSH true + STR_CONCAT #2 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "Result: true" }) + + const str2 = ` + PUSH false + PUSH " is false" + STR_CONCAT #2 + ` + expect(await run(toBytecode(str2))).toEqual({ type: 'string', value: "false is false" }) +}) + +test("STR_CONCAT - converts null to strings", async () => { + const str = ` + PUSH "Value: " + PUSH null + STR_CONCAT #2 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "Value: null" }) +}) + +test("STR_CONCAT - mixed types", async () => { + const str = ` + PUSH "Count: " + PUSH 42 + PUSH ", Active: " + PUSH true + PUSH ", Total: " + PUSH null + STR_CONCAT #6 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "Count: 42, Active: true, Total: null" }) +}) + +test("STR_CONCAT - array format", async () => { + const bytecode = toBytecode([ + ["PUSH", "Hello"], + ["PUSH", " "], + ["PUSH", "World"], + ["STR_CONCAT", 3], + ["HALT"] + ]) + + const result = await run(bytecode) + expect(result).toEqual({ type: 'string', value: "Hello World" }) +}) + +test("STR_CONCAT - with variables", async () => { + const str = ` + PUSH "Alice" + STORE name + PUSH "Hello, " + LOAD name + PUSH "!" + STR_CONCAT #3 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "Hello, Alice!" }) +}) + +test("STR_CONCAT - composable (multiple concatenations)", async () => { + const str = ` + PUSH "Hello" + PUSH " " + PUSH "World" + STR_CONCAT #3 + PUSH "!" + STR_CONCAT #2 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "Hello World!" }) +}) + +test("STR_CONCAT - with emoji and unicode", async () => { + const str = ` + PUSH "Hello " + PUSH "🌍" + PUSH "!" + STR_CONCAT #3 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "Hello 🌍!" }) + + const str2 = ` + PUSH "こんにちは" + PUSH "世界" + STR_CONCAT #2 + ` + expect(await run(toBytecode(str2))).toEqual({ type: 'string', value: "こんにちは世界" }) +}) + +test("STR_CONCAT - with expressions", async () => { + const str = ` + PUSH "Result: " + PUSH 10 + PUSH 5 + ADD + STR_CONCAT #2 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "Result: 15" }) +}) + +test("STR_CONCAT - large concat", async () => { + const str = ` + PUSH "a" + PUSH "b" + PUSH "c" + PUSH "d" + PUSH "e" + PUSH "f" + PUSH "g" + PUSH "h" + PUSH "i" + PUSH "j" + STR_CONCAT #10 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: "abcdefghij" }) +}) + test("BREAK - throws error when no break target", async () => { // BREAK requires a break target frame on the call stack // A single function call has no previous frame to mark as break target