From e915868b7c60bacf1e2f5ee797a93dd57ee8a164 Mon Sep 17 00:00:00 2001
From: Chris Wanstrath <chris@ozmm.org>
Date: Thu, 6 Nov 2025 21:04:23 -0800
Subject: [PATCH] interpolation in { curly strings }

---
 src/compiler/compiler.ts            | 26 ++++++++++++
 src/compiler/tests/literals.test.ts | 17 +++++++-
 src/compiler/utils.ts               |  7 +++-
 src/parser/curlyTokenizer.ts        | 62 +++++++++++++++++++++++++++++
 src/parser/tokenizer.ts             | 17 +++++---
 5 files changed, 120 insertions(+), 9 deletions(-)
 create mode 100644 src/parser/curlyTokenizer.ts

diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts
index 429a94b..5f482d1 100644
--- a/src/compiler/compiler.ts
+++ b/src/compiler/compiler.ts
@@ -2,6 +2,7 @@ import { CompilerError } from '#compiler/compilerError.ts'
 import { parser } from '#parser/shrimp.ts'
 import * as terms from '#parser/shrimp.terms'
 import { setGlobals } from '#parser/tokenizer'
+import { tokenizeCurlyString } from '#parser/curlyTokenizer'
 import type { SyntaxNode, Tree } from '@lezer/common'
 import { assert, errorMessage } from '#utils/utils'
 import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
@@ -112,6 +113,9 @@ export class Compiler {
         return [[`PUSH`, number]]
 
       case terms.String: {
+        if (node.firstChild?.type.id === terms.CurlyString)
+          return this.#compileCurlyString(value, input)
+
         const { parts, hasInterpolation } = getStringParts(node, input)
 
         // Simple string without interpolation or escapes - extract text directly
@@ -772,4 +776,26 @@ export class Compiler {
 
     return instructions
   }
+
+  #compileCurlyString(value: string, input: string): ProgramItem[] {
+    const instructions: ProgramItem[] = []
+    const nodes = tokenizeCurlyString(value)
+
+    nodes.forEach((node) => {
+      if (typeof node === 'string') {
+        instructions.push(['PUSH', node])
+      } else {
+        const [input, topNode] = node
+        let child = topNode.topNode.firstChild
+        while (child) {
+          instructions.push(...this.#compileNode(child, input))
+          child = child.nextSibling
+        }
+      }
+    })
+
+    instructions.push(['STR_CONCAT', nodes.length])
+
+    return instructions
+  }
 }
diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts
index 15d77e1..c3481f2 100644
--- a/src/compiler/tests/literals.test.ts
+++ b/src/compiler/tests/literals.test.ts
@@ -177,7 +177,20 @@ describe('curly strings', () => {
     }`).toEvaluateTo("\n      { one }\n      two\n      { three }\n    ")
   })
 
-  test("don't interpolate", () => {
-    expect(`{ sum is $(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `)
+  test('interpolates variables', () => {
+    expect(`name = Bob; { Hello $name! }`).toEvaluateTo(` Hello Bob! `)
+  })
+
+  test("doesn't interpolate escaped variables ", () => {
+    expect(`name = Bob; { Hello \\$name }`).toEvaluateTo(` Hello $name `)
+    expect(`a = 1; b = 2; { sum is \\$(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `)
+  })
+
+  test('interpolates expressions', () => {
+    expect(`a = 1; b = 2; { sum is $(a + b)! }`).toEvaluateTo(` sum is 3! `)
+    expect(`a = 1; b = 2; { sum is { $(a + b) }! }`).toEvaluateTo(` sum is { 3 }! `)
+    expect(`a = 1; b = 2; { sum is $(a + (b * b))! }`).toEvaluateTo(` sum is 5! `)
+    expect(`{ This is $({twisted}). }`).toEvaluateTo(` This is twisted. `)
+    expect(`{ This is $({{twisted}}). }`).toEvaluateTo(` This is {twisted}. `)
   })
 })
\ No newline at end of file
diff --git a/src/compiler/utils.ts b/src/compiler/utils.ts
index 20afa96..c424be2 100644
--- a/src/compiler/utils.ts
+++ b/src/compiler/utils.ts
@@ -251,7 +251,9 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
     return (
       child.type.id === terms.StringFragment ||
       child.type.id === terms.Interpolation ||
-      child.type.id === terms.EscapeSeq
+      child.type.id === terms.EscapeSeq ||
+      child.type.id === terms.CurlyString
+
     )
   })
 
@@ -260,7 +262,8 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
     if (
       part.type.id !== terms.StringFragment &&
       part.type.id !== terms.Interpolation &&
-      part.type.id !== terms.EscapeSeq
+      part.type.id !== terms.EscapeSeq &&
+      part.type.id !== terms.CurlyString
     ) {
       throw new CompilerError(
         `String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`,
diff --git a/src/parser/curlyTokenizer.ts b/src/parser/curlyTokenizer.ts
new file mode 100644
index 0000000..6a6de66
--- /dev/null
+++ b/src/parser/curlyTokenizer.ts
@@ -0,0 +1,62 @@
+import { parser } from '#parser/shrimp.ts'
+import type { Tree } from '@lezer/common'
+import { isIdentStart, isIdentChar } from './tokenizer'
+
+// Turns a { curly string } into separate tokens for interpolation
+export const tokenizeCurlyString = (value: string): (string | [string, Tree])[] => {
+  let pos = 1
+  let start = 1
+  let char = value[pos]
+  const tokens: (string | [string, Tree])[] = []
+
+  while (pos < value.length) {
+    if (char === '$') {
+      // escaped \$
+      if (value[pos - 1] === '\\' && value[pos - 2] !== '\\') {
+        tokens.push(value.slice(start, pos - 1))
+        start = pos
+        char = value[++pos]
+        continue
+      }
+
+      tokens.push(value.slice(start, pos))
+      start = pos
+
+      if (value[pos + 1] === '(') {
+        pos++ // slip opening '('
+
+        char = value[++pos]
+        if (!char) break
+
+        let depth = 0
+        while (char) {
+          if (char === '(') depth++
+          if (char === ')') depth--
+          if (depth < 0) break
+          char = value[++pos]
+        }
+
+        const input = value.slice(start + 2, pos) // skip '$('
+        tokens.push([input, parser.parse(input)])
+        start = ++pos // skip ')'
+      } else {
+        char = value[++pos]
+        if (!char) break
+        if (!isIdentStart(char.charCodeAt(0))) break
+
+        while (char && isIdentChar(char.charCodeAt(0)))
+          char = value[++pos]
+
+        const input = value.slice(start + 1, pos) // skip '$'
+        tokens.push([input, parser.parse(input)])
+        start = pos
+      }
+    }
+
+    char = value[++pos]
+  }
+
+  tokens.push(value.slice(start, pos - 1))
+
+  return tokens
+}
\ No newline at end of file
diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts
index 89a2e14..78970c8 100644
--- a/src/parser/tokenizer.ts
+++ b/src/parser/tokenizer.ts
@@ -20,9 +20,7 @@ export const tokenizer = new ExternalTokenizer(
     const ch = getFullCodePoint(input, 0)
 
     // Handle curly strings
-    if (ch === 123) { // {
-      return consumeCurlyString(input, stack)
-    }
+    if (ch === 123 /* { */) return consumeCurlyString(input, stack)
 
     if (!isWordChar(ch)) return
 
@@ -32,7 +30,7 @@ export const tokenizer = new ExternalTokenizer(
     // Don't consume things that start with - or + followed by a digit (negative/positive numbers)
     if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return
 
-    const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
+    const isValidStart = isIdentStart(ch)
     const canBeWord = stack.canShift(Word)
 
     // Consume all word characters, tracking if it remains a valid identifier
@@ -125,7 +123,7 @@ const consumeWordToken = (
     }
 
     // Track identifier validity: must be lowercase, digit, dash, or emoji/unicode
-    if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && ch !== 63 /* ? */ && !isEmojiOrUnicode(ch)) {
+    if (!isIdentChar(ch)) {
       if (!canBeWord) break
       isValidIdentifier = false
     }
@@ -157,6 +155,7 @@ const consumeRestOfWord = (input: InputStream, startPos: number, canBeWord: bool
   return pos
 }
 
+// Consumes { curly strings } and tracks braces so you can { have { braces { inside { braces } } }
 const consumeCurlyString = (input: InputStream, stack: Stack) => {
   if (!stack.canShift(CurlyString)) return
 
@@ -239,6 +238,14 @@ const chooseIdentifierToken = (input: InputStream, stack: Stack): number => {
 }
 
 // Character classification helpers
+export const isIdentStart = (ch: number): boolean => {
+  return isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
+}
+
+export const isIdentChar = (ch: number): boolean => {
+  return isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */ || isEmojiOrUnicode(ch)
+}
+
 const isWhiteSpace = (ch: number): boolean => {
   return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 13 /* \r */
 }