259 lines
7.4 KiB
TypeScript
259 lines
7.4 KiB
TypeScript
import { SyntaxNode } from './node'
|
|
|
|
/**
|
|
* Parse string contents into fragments, interpolations, and escape sequences.
|
|
*
|
|
* Input: full string including quotes, e.g. "'hello $name'"
|
|
* Output: SyntaxNode tree with StringFragment, Interpolation, EscapeSeq children
|
|
*/
|
|
export const parseString = (input: string, from: number, to: number, parser: any): SyntaxNode => {
|
|
const stringNode = new SyntaxNode('String', from, to)
|
|
const content = input.slice(from, to)
|
|
|
|
// Determine string type
|
|
const firstChar = content[0]
|
|
|
|
// Double-quoted strings: no interpolation or escapes
|
|
if (firstChar === '"') {
|
|
const fragment = new SyntaxNode('DoubleQuote', from, to)
|
|
stringNode.add(fragment)
|
|
return stringNode
|
|
}
|
|
|
|
// Curly strings: interpolation but no escapes
|
|
if (firstChar === '{') {
|
|
parseCurlyString(stringNode, input, from, to, parser)
|
|
return stringNode
|
|
}
|
|
|
|
// Single-quoted strings: interpolation and escapes
|
|
if (firstChar === "'") {
|
|
parseSingleQuoteString(stringNode, input, from, to, parser)
|
|
return stringNode
|
|
}
|
|
|
|
throw `Unknown string type starting with: ${firstChar}`
|
|
}
|
|
|
|
/**
|
|
* Parse single-quoted string: 'hello $name\n'
|
|
* Supports: interpolation ($var, $(expr)), escape sequences (\n, \$, etc)
|
|
*/
|
|
const parseSingleQuoteString = (stringNode: SyntaxNode, input: string, from: number, to: number, parser: any) => {
|
|
let pos = from + 1 // Skip opening '
|
|
let fragmentStart = pos
|
|
|
|
while (pos < to - 1) { // -1 to skip closing '
|
|
const char = input[pos]
|
|
|
|
// Escape sequence
|
|
if (char === '\\' && pos + 1 < to - 1) {
|
|
// Push accumulated fragment
|
|
if (pos > fragmentStart) {
|
|
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
|
|
stringNode.add(frag)
|
|
}
|
|
|
|
// Add escape sequence node
|
|
const escNode = new SyntaxNode('EscapeSeq', pos, pos + 2)
|
|
stringNode.add(escNode)
|
|
|
|
pos += 2
|
|
fragmentStart = pos
|
|
continue
|
|
}
|
|
|
|
// Interpolation
|
|
if (char === '$') {
|
|
// Push accumulated fragment
|
|
if (pos > fragmentStart) {
|
|
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
|
|
stringNode.add(frag)
|
|
}
|
|
|
|
pos++ // Skip $
|
|
|
|
// Parse interpolation content
|
|
if (input[pos] === '(') {
|
|
// Expression interpolation: $(expr)
|
|
const interpStart = pos - 1 // Include the $
|
|
const exprResult = parseInterpolationExpr(input, pos, parser)
|
|
const interpNode = new SyntaxNode('Interpolation', interpStart, exprResult.endPos)
|
|
interpNode.add(exprResult.node)
|
|
stringNode.add(interpNode)
|
|
pos = exprResult.endPos
|
|
} else {
|
|
// Variable interpolation: $name
|
|
const interpStart = pos - 1
|
|
const identEnd = findIdentifierEnd(input, pos, to - 1)
|
|
const identNode = new SyntaxNode('FunctionCallOrIdentifier', pos, identEnd)
|
|
const innerIdent = new SyntaxNode('Identifier', pos, identEnd)
|
|
identNode.add(innerIdent)
|
|
|
|
const interpNode = new SyntaxNode('Interpolation', interpStart, identEnd)
|
|
interpNode.add(identNode)
|
|
stringNode.add(interpNode)
|
|
pos = identEnd
|
|
}
|
|
|
|
fragmentStart = pos
|
|
continue
|
|
}
|
|
|
|
pos++
|
|
}
|
|
|
|
// Push final fragment
|
|
if (pos > fragmentStart && fragmentStart < to - 1) {
|
|
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
|
|
stringNode.add(frag)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse curly string: { hello $name }
|
|
* Supports: interpolation ($var, $(expr)), nested braces
|
|
* Does NOT support: escape sequences (raw content)
|
|
*/
|
|
const parseCurlyString = (stringNode: SyntaxNode, input: string, from: number, to: number, parser: any) => {
|
|
let pos = from + 1 // Skip opening {
|
|
let fragmentStart = from // Include the opening { in the fragment
|
|
let depth = 1
|
|
|
|
while (pos < to && depth > 0) {
|
|
const char = input[pos]
|
|
|
|
// Track brace nesting
|
|
if (char === '{') {
|
|
depth++
|
|
pos++
|
|
continue
|
|
}
|
|
|
|
if (char === '}') {
|
|
depth--
|
|
if (depth === 0) {
|
|
// Push final fragment including closing }
|
|
const frag = new SyntaxNode('CurlyString', fragmentStart, pos + 1)
|
|
stringNode.add(frag)
|
|
break
|
|
}
|
|
pos++
|
|
continue
|
|
}
|
|
|
|
// Interpolation
|
|
if (char === '$') {
|
|
// Push accumulated fragment
|
|
if (pos > fragmentStart) {
|
|
const frag = new SyntaxNode('CurlyString', fragmentStart, pos)
|
|
stringNode.add(frag)
|
|
}
|
|
|
|
pos++ // Skip $
|
|
|
|
// Parse interpolation content
|
|
if (input[pos] === '(') {
|
|
// Expression interpolation: $(expr)
|
|
const interpStart = pos - 1
|
|
const exprResult = parseInterpolationExpr(input, pos, parser)
|
|
const interpNode = new SyntaxNode('Interpolation', interpStart, exprResult.endPos)
|
|
interpNode.add(exprResult.node)
|
|
stringNode.add(interpNode)
|
|
pos = exprResult.endPos
|
|
} else {
|
|
// Variable interpolation: $name
|
|
const interpStart = pos - 1
|
|
const identEnd = findIdentifierEnd(input, pos, to)
|
|
const identNode = new SyntaxNode('FunctionCallOrIdentifier', pos, identEnd)
|
|
const innerIdent = new SyntaxNode('Identifier', pos, identEnd)
|
|
identNode.add(innerIdent)
|
|
|
|
const interpNode = new SyntaxNode('Interpolation', interpStart, identEnd)
|
|
interpNode.add(identNode)
|
|
stringNode.add(interpNode)
|
|
pos = identEnd
|
|
}
|
|
|
|
fragmentStart = pos
|
|
continue
|
|
}
|
|
|
|
pos++
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse a parenthesized expression interpolation: $(a + b)
|
|
* Returns the parsed expression node and the position after the closing )
|
|
* pos is position of the opening ( in the full input string
|
|
*/
|
|
const parseInterpolationExpr = (input: string, pos: number, parser: any): { node: SyntaxNode, endPos: number } => {
|
|
// Find matching closing paren
|
|
let depth = 1
|
|
let start = pos
|
|
let end = pos + 1 // Start after opening (
|
|
|
|
while (end < input.length && depth > 0) {
|
|
if (input[end] === '(') depth++
|
|
if (input[end] === ')') {
|
|
depth--
|
|
if (depth === 0) break
|
|
}
|
|
end++
|
|
}
|
|
|
|
const exprContent = input.slice(start + 1, end) // Content between ( and )
|
|
const closeParen = end
|
|
end++ // Move past closing )
|
|
|
|
// Use the main parser to parse the expression
|
|
const exprNode = parser.parse(exprContent)
|
|
|
|
// Get the first real node (skip Program wrapper)
|
|
const innerNode = exprNode.firstChild || exprNode
|
|
|
|
// Adjust node positions: they're relative to exprContent, need to offset to full input
|
|
const offset = start + 1 // Position where exprContent starts in full input
|
|
adjustNodePositions(innerNode, offset)
|
|
|
|
// Wrap in ParenExpr - use positions in the full string
|
|
const parenNode = new SyntaxNode('ParenExpr', start, closeParen + 1)
|
|
parenNode.add(innerNode)
|
|
|
|
return { node: parenNode, endPos: end }
|
|
}
|
|
|
|
/**
|
|
* Recursively adjust all node positions by adding an offset
|
|
*/
|
|
const adjustNodePositions = (node: SyntaxNode, offset: number) => {
|
|
node.from += offset
|
|
node.to += offset
|
|
|
|
for (const child of node.children) {
|
|
adjustNodePositions(child, offset)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Find the end position of an identifier starting at pos
|
|
* Identifiers: lowercase letter or emoji, followed by letters/digits/dashes/emoji
|
|
*/
|
|
const findIdentifierEnd = (input: string, pos: number, maxPos: number): number => {
|
|
let end = pos
|
|
|
|
while (end < maxPos) {
|
|
const char = input[end]
|
|
|
|
// Stop at non-identifier characters
|
|
if (!/[a-z0-9\-?]/.test(char)) {
|
|
break
|
|
}
|
|
|
|
end++
|
|
}
|
|
|
|
return end
|
|
}
|