From e915868b7c60bacf1e2f5ee797a93dd57ee8a164 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Thu, 6 Nov 2025 21:04:23 -0800 Subject: [PATCH] interpolation in { curly strings } --- src/compiler/compiler.ts | 26 ++++++++++++ src/compiler/tests/literals.test.ts | 17 +++++++- src/compiler/utils.ts | 7 +++- src/parser/curlyTokenizer.ts | 62 +++++++++++++++++++++++++++++ src/parser/tokenizer.ts | 17 +++++--- 5 files changed, 120 insertions(+), 9 deletions(-) create mode 100644 src/parser/curlyTokenizer.ts diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index 429a94b..5f482d1 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -2,6 +2,7 @@ import { CompilerError } from '#compiler/compilerError.ts' import { parser } from '#parser/shrimp.ts' import * as terms from '#parser/shrimp.terms' import { setGlobals } from '#parser/tokenizer' +import { tokenizeCurlyString } from '#parser/curlyTokenizer' import type { SyntaxNode, Tree } from '@lezer/common' import { assert, errorMessage } from '#utils/utils' import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm' @@ -112,6 +113,9 @@ export class Compiler { return [[`PUSH`, number]] case terms.String: { + if (node.firstChild?.type.id === terms.CurlyString) + return this.#compileCurlyString(value, input) + const { parts, hasInterpolation } = getStringParts(node, input) // Simple string without interpolation or escapes - extract text directly @@ -772,4 +776,26 @@ export class Compiler { return instructions } + + #compileCurlyString(value: string, input: string): ProgramItem[] { + const instructions: ProgramItem[] = [] + const nodes = tokenizeCurlyString(value) + + nodes.forEach((node) => { + if (typeof node === 'string') { + instructions.push(['PUSH', node]) + } else { + const [input, topNode] = node + let child = topNode.topNode.firstChild + while (child) { + instructions.push(...this.#compileNode(child, input)) + child = child.nextSibling + } + } + }) + + instructions.push(['STR_CONCAT', nodes.length]) + + return instructions + } } diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts index 15d77e1..c3481f2 100644 --- a/src/compiler/tests/literals.test.ts +++ b/src/compiler/tests/literals.test.ts @@ -177,7 +177,20 @@ describe('curly strings', () => { }`).toEvaluateTo("\n { one }\n two\n { three }\n ") }) - test("don't interpolate", () => { - expect(`{ sum is $(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `) + test('interpolates variables', () => { + expect(`name = Bob; { Hello $name! }`).toEvaluateTo(` Hello Bob! `) + }) + + test("doesn't interpolate escaped variables ", () => { + expect(`name = Bob; { Hello \\$name }`).toEvaluateTo(` Hello $name `) + expect(`a = 1; b = 2; { sum is \\$(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `) + }) + + test('interpolates expressions', () => { + expect(`a = 1; b = 2; { sum is $(a + b)! }`).toEvaluateTo(` sum is 3! `) + expect(`a = 1; b = 2; { sum is { $(a + b) }! }`).toEvaluateTo(` sum is { 3 }! `) + expect(`a = 1; b = 2; { sum is $(a + (b * b))! }`).toEvaluateTo(` sum is 5! `) + expect(`{ This is $({twisted}). }`).toEvaluateTo(` This is twisted. `) + expect(`{ This is $({{twisted}}). }`).toEvaluateTo(` This is {twisted}. `) }) }) \ No newline at end of file diff --git a/src/compiler/utils.ts b/src/compiler/utils.ts index 20afa96..c424be2 100644 --- a/src/compiler/utils.ts +++ b/src/compiler/utils.ts @@ -251,7 +251,9 @@ export const getStringParts = (node: SyntaxNode, input: string) => { return ( child.type.id === terms.StringFragment || child.type.id === terms.Interpolation || - child.type.id === terms.EscapeSeq + child.type.id === terms.EscapeSeq || + child.type.id === terms.CurlyString + ) }) @@ -260,7 +262,8 @@ export const getStringParts = (node: SyntaxNode, input: string) => { if ( part.type.id !== terms.StringFragment && part.type.id !== terms.Interpolation && - part.type.id !== terms.EscapeSeq + part.type.id !== terms.EscapeSeq && + part.type.id !== terms.CurlyString ) { throw new CompilerError( `String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`, diff --git a/src/parser/curlyTokenizer.ts b/src/parser/curlyTokenizer.ts new file mode 100644 index 0000000..6a6de66 --- /dev/null +++ b/src/parser/curlyTokenizer.ts @@ -0,0 +1,62 @@ +import { parser } from '#parser/shrimp.ts' +import type { Tree } from '@lezer/common' +import { isIdentStart, isIdentChar } from './tokenizer' + +// Turns a { curly string } into separate tokens for interpolation +export const tokenizeCurlyString = (value: string): (string | [string, Tree])[] => { + let pos = 1 + let start = 1 + let char = value[pos] + const tokens: (string | [string, Tree])[] = [] + + while (pos < value.length) { + if (char === '$') { + // escaped \$ + if (value[pos - 1] === '\\' && value[pos - 2] !== '\\') { + tokens.push(value.slice(start, pos - 1)) + start = pos + char = value[++pos] + continue + } + + tokens.push(value.slice(start, pos)) + start = pos + + if (value[pos + 1] === '(') { + pos++ // slip opening '(' + + char = value[++pos] + if (!char) break + + let depth = 0 + while (char) { + if (char === '(') depth++ + if (char === ')') depth-- + if (depth < 0) break + char = value[++pos] + } + + const input = value.slice(start + 2, pos) // skip '$(' + tokens.push([input, parser.parse(input)]) + start = ++pos // skip ')' + } else { + char = value[++pos] + if (!char) break + if (!isIdentStart(char.charCodeAt(0))) break + + while (char && isIdentChar(char.charCodeAt(0))) + char = value[++pos] + + const input = value.slice(start + 1, pos) // skip '$' + tokens.push([input, parser.parse(input)]) + start = pos + } + } + + char = value[++pos] + } + + tokens.push(value.slice(start, pos - 1)) + + return tokens +} \ No newline at end of file diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 89a2e14..78970c8 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -20,9 +20,7 @@ export const tokenizer = new ExternalTokenizer( const ch = getFullCodePoint(input, 0) // Handle curly strings - if (ch === 123) { // { - return consumeCurlyString(input, stack) - } + if (ch === 123 /* { */) return consumeCurlyString(input, stack) if (!isWordChar(ch)) return @@ -32,7 +30,7 @@ export const tokenizer = new ExternalTokenizer( // Don't consume things that start with - or + followed by a digit (negative/positive numbers) if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return - const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch) + const isValidStart = isIdentStart(ch) const canBeWord = stack.canShift(Word) // Consume all word characters, tracking if it remains a valid identifier @@ -125,7 +123,7 @@ const consumeWordToken = ( } // Track identifier validity: must be lowercase, digit, dash, or emoji/unicode - if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && ch !== 63 /* ? */ && !isEmojiOrUnicode(ch)) { + if (!isIdentChar(ch)) { if (!canBeWord) break isValidIdentifier = false } @@ -157,6 +155,7 @@ const consumeRestOfWord = (input: InputStream, startPos: number, canBeWord: bool return pos } +// Consumes { curly strings } and tracks braces so you can { have { braces { inside { braces } } } const consumeCurlyString = (input: InputStream, stack: Stack) => { if (!stack.canShift(CurlyString)) return @@ -239,6 +238,14 @@ const chooseIdentifierToken = (input: InputStream, stack: Stack): number => { } // Character classification helpers +export const isIdentStart = (ch: number): boolean => { + return isLowercaseLetter(ch) || isEmojiOrUnicode(ch) +} + +export const isIdentChar = (ch: number): boolean => { + return isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */ || isEmojiOrUnicode(ch) +} + const isWhiteSpace = (ch: number): boolean => { return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 13 /* \r */ }