shrimp/src/parser/curlyTokenizer.ts
2025-11-08 16:13:40 -08:00

62 lines
1.7 KiB
TypeScript

import { parser } from '#parser/shrimp.ts'
import type { SyntaxNode } from '@lezer/common'
import { isIdentStart, isIdentChar } from './tokenizer'
// Turns a { curly string } into strings and nodes for interpolation
export const tokenizeCurlyString = (value: string): (string | [string, SyntaxNode])[] => {
let pos = 1
let start = 1
let char = value[pos]
const tokens: (string | [string, SyntaxNode])[] = []
while (pos < value.length) {
if (char === '$') {
// escaped \$
if (value[pos - 1] === '\\' && value[pos - 2] !== '\\') {
tokens.push(value.slice(start, pos - 1))
start = pos
char = value[++pos]
continue
}
tokens.push(value.slice(start, pos))
start = pos
if (value[pos + 1] === '(') {
pos++ // slip opening '('
char = value[++pos]
if (!char) break
let depth = 0
while (char) {
if (char === '(') depth++
if (char === ')') depth--
if (depth < 0) break
char = value[++pos]
}
const input = value.slice(start + 2, pos) // skip '$('
tokens.push([input, parser.parse(input).topNode])
start = ++pos // skip ')'
} else {
char = value[++pos]
if (!char) break
if (!isIdentStart(char.charCodeAt(0))) break
while (char && isIdentChar(char.charCodeAt(0)))
char = value[++pos]
const input = value.slice(start + 1, pos) // skip '$'
tokens.push([input, parser.parse(input).topNode])
start = pos-- // backtrack and start over
}
}
char = value[++pos]
}
tokens.push(value.slice(start, pos - 1))
return tokens
}