"Double quoted strings" #35
|
|
@ -2,6 +2,7 @@ import { CompilerError } from '#compiler/compilerError.ts'
|
|||
import { parser } from '#parser/shrimp.ts'
|
||||
import * as terms from '#parser/shrimp.terms'
|
||||
import { setGlobals } from '#parser/tokenizer'
|
||||
import { tokenizeCurlyString } from '#parser/curlyTokenizer'
|
||||
import type { SyntaxNode, Tree } from '@lezer/common'
|
||||
import { assert, errorMessage } from '#utils/utils'
|
||||
import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
|
||||
|
|
@ -123,6 +124,9 @@ export class Compiler {
|
|||
return [[`PUSH`, numberValue]]
|
||||
|
||||
case terms.String: {
|
||||
if (node.firstChild?.type.id === terms.CurlyString)
|
||||
return this.#compileCurlyString(value, input)
|
||||
|
||||
const { parts, hasInterpolation } = getStringParts(node, input)
|
||||
|
||||
// Simple string without interpolation or escapes - extract text directly
|
||||
|
|
@ -853,4 +857,26 @@ export class Compiler {
|
|||
|
||||
return instructions
|
||||
}
|
||||
|
||||
#compileCurlyString(value: string, input: string): ProgramItem[] {
|
||||
const instructions: ProgramItem[] = []
|
||||
const nodes = tokenizeCurlyString(value)
|
||||
|
||||
nodes.forEach((node) => {
|
||||
if (typeof node === 'string') {
|
||||
instructions.push(['PUSH', node])
|
||||
} else {
|
||||
const [input, topNode] = node
|
||||
let child = topNode.topNode.firstChild
|
||||
while (child) {
|
||||
instructions.push(...this.#compileNode(child, input))
|
||||
child = child.nextSibling
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
instructions.push(['STR_CONCAT', nodes.length])
|
||||
|
||||
return instructions
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -215,7 +215,20 @@ describe('curly strings', () => {
|
|||
}`).toEvaluateTo("\n { one }\n two\n { three }\n ")
|
||||
})
|
||||
|
||||
test("don't interpolate", () => {
|
||||
expect(`{ sum is $(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `)
|
||||
test('interpolates variables', () => {
|
||||
expect(`name = Bob; { Hello $name! }`).toEvaluateTo(` Hello Bob! `)
|
||||
})
|
||||
|
||||
test("doesn't interpolate escaped variables ", () => {
|
||||
expect(`name = Bob; { Hello \\$name }`).toEvaluateTo(` Hello $name `)
|
||||
expect(`a = 1; b = 2; { sum is \\$(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `)
|
||||
})
|
||||
|
||||
test('interpolates expressions', () => {
|
||||
expect(`a = 1; b = 2; { sum is $(a + b)! }`).toEvaluateTo(` sum is 3! `)
|
||||
expect(`a = 1; b = 2; { sum is { $(a + b) }! }`).toEvaluateTo(` sum is { 3 }! `)
|
||||
expect(`a = 1; b = 2; { sum is $(a + (b * b))! }`).toEvaluateTo(` sum is 5! `)
|
||||
expect(`{ This is $({twisted}). }`).toEvaluateTo(` This is twisted. `)
|
||||
expect(`{ This is $({{twisted}}). }`).toEvaluateTo(` This is {twisted}. `)
|
||||
})
|
||||
})
|
||||
|
|
@ -251,7 +251,9 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
|||
return (
|
||||
child.type.id === terms.StringFragment ||
|
||||
child.type.id === terms.Interpolation ||
|
||||
child.type.id === terms.EscapeSeq
|
||||
child.type.id === terms.EscapeSeq ||
|
||||
child.type.id === terms.CurlyString
|
||||
|
||||
)
|
||||
})
|
||||
|
||||
|
|
@ -260,7 +262,8 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
|||
if (
|
||||
part.type.id !== terms.StringFragment &&
|
||||
part.type.id !== terms.Interpolation &&
|
||||
part.type.id !== terms.EscapeSeq
|
||||
part.type.id !== terms.EscapeSeq &&
|
||||
part.type.id !== terms.CurlyString
|
||||
) {
|
||||
throw new CompilerError(
|
||||
`String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`,
|
||||
|
|
|
|||
62
src/parser/curlyTokenizer.ts
Normal file
62
src/parser/curlyTokenizer.ts
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import { parser } from '#parser/shrimp.ts'
|
||||
import type { Tree } from '@lezer/common'
|
||||
import { isIdentStart, isIdentChar } from './tokenizer'
|
||||
|
||||
// Turns a { curly string } into separate tokens for interpolation
|
||||
export const tokenizeCurlyString = (value: string): (string | [string, Tree])[] => {
|
||||
let pos = 1
|
||||
let start = 1
|
||||
let char = value[pos]
|
||||
const tokens: (string | [string, Tree])[] = []
|
||||
|
||||
while (pos < value.length) {
|
||||
if (char === '$') {
|
||||
// escaped \$
|
||||
if (value[pos - 1] === '\\' && value[pos - 2] !== '\\') {
|
||||
tokens.push(value.slice(start, pos - 1))
|
||||
start = pos
|
||||
char = value[++pos]
|
||||
continue
|
||||
}
|
||||
|
||||
tokens.push(value.slice(start, pos))
|
||||
start = pos
|
||||
|
||||
if (value[pos + 1] === '(') {
|
||||
pos++ // slip opening '('
|
||||
|
||||
char = value[++pos]
|
||||
if (!char) break
|
||||
|
||||
let depth = 0
|
||||
while (char) {
|
||||
if (char === '(') depth++
|
||||
if (char === ')') depth--
|
||||
if (depth < 0) break
|
||||
char = value[++pos]
|
||||
}
|
||||
|
||||
const input = value.slice(start + 2, pos) // skip '$('
|
||||
tokens.push([input, parser.parse(input)])
|
||||
start = ++pos // skip ')'
|
||||
} else {
|
||||
char = value[++pos]
|
||||
if (!char) break
|
||||
if (!isIdentStart(char.charCodeAt(0))) break
|
||||
|
||||
while (char && isIdentChar(char.charCodeAt(0)))
|
||||
char = value[++pos]
|
||||
|
||||
const input = value.slice(start + 1, pos) // skip '$'
|
||||
tokens.push([input, parser.parse(input)])
|
||||
start = pos
|
||||
}
|
||||
}
|
||||
|
||||
char = value[++pos]
|
||||
}
|
||||
|
||||
tokens.push(value.slice(start, pos - 1))
|
||||
|
||||
return tokens
|
||||
}
|
||||
|
|
@ -20,9 +20,7 @@ export const tokenizer = new ExternalTokenizer(
|
|||
const ch = getFullCodePoint(input, 0)
|
||||
|
||||
// Handle curly strings
|
||||
if (ch === 123) { // {
|
||||
return consumeCurlyString(input, stack)
|
||||
}
|
||||
if (ch === 123 /* { */) return consumeCurlyString(input, stack)
|
||||
|
||||
if (!isWordChar(ch)) return
|
||||
|
||||
|
|
@ -32,7 +30,7 @@ export const tokenizer = new ExternalTokenizer(
|
|||
// Don't consume things that start with - or + followed by a digit (negative/positive numbers)
|
||||
if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return
|
||||
|
||||
const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
|
||||
const isValidStart = isIdentStart(ch)
|
||||
const canBeWord = stack.canShift(Word)
|
||||
|
||||
// Consume all word characters, tracking if it remains a valid identifier
|
||||
|
|
@ -125,13 +123,7 @@ const consumeWordToken = (
|
|||
}
|
||||
|
||||
// Track identifier validity: must be lowercase, digit, dash, or emoji/unicode
|
||||
if (
|
||||
!isLowercaseLetter(ch) &&
|
||||
!isDigit(ch) &&
|
||||
ch !== 45 /* - */ &&
|
||||
ch !== 63 /* ? */ &&
|
||||
!isEmojiOrUnicode(ch)
|
||||
) {
|
||||
if (!isIdentChar(ch)) {
|
||||
if (!canBeWord) break
|
||||
isValidIdentifier = false
|
||||
}
|
||||
|
|
@ -163,6 +155,7 @@ const consumeRestOfWord = (input: InputStream, startPos: number, canBeWord: bool
|
|||
return pos
|
||||
}
|
||||
|
||||
// Consumes { curly strings } and tracks braces so you can { have { braces { inside { braces } } }
|
||||
const consumeCurlyString = (input: InputStream, stack: Stack) => {
|
||||
if (!stack.canShift(CurlyString)) return
|
||||
|
||||
|
|
@ -259,6 +252,14 @@ const chooseIdentifierToken = (input: InputStream, stack: Stack): number => {
|
|||
}
|
||||
|
||||
// Character classification helpers
|
||||
export const isIdentStart = (ch: number): boolean => {
|
||||
return isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
|
||||
}
|
||||
|
||||
export const isIdentChar = (ch: number): boolean => {
|
||||
return isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */ || isEmojiOrUnicode(ch)
|
||||
}
|
||||
|
||||
const isWhiteSpace = (ch: number): boolean => {
|
||||
return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 13 /* \r */
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user