This commit is contained in:
Corey Johnson 2025-10-15 16:18:18 -07:00
parent fe7abb8b21
commit d9bc5a64a4
9 changed files with 269 additions and 50 deletions

View File

@ -295,6 +295,7 @@ These discoveries came from implementing string interpolation with external toke
**The most surprising discovery**: Rule names determine whether nodes appear in the parse tree.
**Lowercase rules get inlined** (no tree nodes):
```lezer
statement { assign | expr } // ❌ No "statement" node
assign { x "=" y } // ❌ No "assign" node
@ -302,6 +303,7 @@ expr { x | y } // ❌ No "expr" node
```
**Capitalized rules create tree nodes**:
```lezer
Statement { Assign | Expr } // ✅ Creates Statement node
Assign { x "=" y } // ✅ Creates Assign node
@ -339,6 +341,7 @@ Example: `x = 42` was parsing as `Program(Identifier,"=",Number)` instead of `Pr
**Reality**: External tokenizers work perfectly inside `@skip {}` blocks! The tokenizer gets called even when skip is disabled.
**Working pattern**:
```lezer
@external tokens tokenizer from "./tokenizer" { Identifier, Word }
@ -357,6 +360,7 @@ Interpolation {
### 4. Single-Character Tokens Can Be Literals
**Initial approach**: Define every single character as a token:
```lezer
@tokens {
dollar[@name="$"] { "$" }
@ -365,13 +369,14 @@ Interpolation {
```
**Simpler approach**: Just use literals in the grammar rules:
```lezer
Interpolation {
"$" Identifier | // Literal "$"
"$" "(" expr ")"
}
StringEscape {
EscapeSeq {
"\\" ("$" | "n" | ...) // Literal "\\"
}
```

View File

@ -23,7 +23,7 @@ const DEBUG = false
type Label = `.${string}`
// Process escape sequences in strings
function processEscapeSequence(escapeSeq: string): string {
function processEscapeSeq(escapeSeq: string): string {
// escapeSeq includes the backslash, e.g., "\n", "\$", "\\"
if (escapeSeq.length !== 2) return escapeSeq
@ -130,9 +130,9 @@ export class Compiler {
instructions.push(['PUSH', partValue])
break
case terms.StringEscape:
case terms.EscapeSeq:
// Process escape sequence and push the result
const processed = processEscapeSequence(partValue)
const processed = processEscapeSeq(partValue)
instructions.push(['PUSH', processed])
break

View File

@ -177,7 +177,7 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
return (
child.type.id === terms.StringFragment ||
child.type.id === terms.Interpolation ||
child.type.id === terms.StringEscape
child.type.id === terms.EscapeSeq
)
})
@ -186,10 +186,10 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
if (
part.type.id !== terms.StringFragment &&
part.type.id !== terms.Interpolation &&
part.type.id !== terms.StringEscape
part.type.id !== terms.EscapeSeq
) {
throw new CompilerError(
`String child must be StringFragment, Interpolation, or StringEscape, got ${part.type.name}`,
`String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`,
part.from,
part.to
)

View File

@ -39,7 +39,7 @@
}
@external tokens tokenizer from "./tokenizer" { Identifier, Word }
@external tokens tokenizer from "./tokenizer" { Identifier, WordFragment }
@precedence {
pipe @left,
@ -170,12 +170,13 @@ expression {
@skip {} {
String { "'" stringContent* "'" }
}
stringContent {
StringFragment |
Interpolation |
StringEscape
EscapeSeq
}
Interpolation {
@ -183,10 +184,18 @@ Interpolation {
"$" ParenExpr
}
StringEscape {
EscapeSeq {
"\\" ("$" | "n" | "t" | "r" | "\\" | "'")
}
Word { wordContent+ }
wordContent {
WordFragment | Interpolation | EscapeSeq
}
// We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator.
// Without this, when parsing "my-var" at statement level, the parser can't decide:
// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier

View File

@ -1,7 +1,7 @@
// This file was generated by lezer-generator. You probably shouldn't edit it.
export const
Identifier = 1,
Word = 2,
WordFragment = 2,
Program = 3,
PipeExpr = 4,
FunctionCall = 5,
@ -10,21 +10,22 @@ export const
FunctionCallOrIdentifier = 8,
BinOp = 9,
ConditionalOp = 14,
String = 23,
StringFragment = 24,
Interpolation = 25,
StringEscape = 26,
Number = 27,
Boolean = 28,
FunctionDef = 29,
Params = 31,
colon = 32,
end = 33,
Underscore = 34,
NamedArg = 35,
NamedArgPrefix = 36,
IfExpr = 38,
ThenBlock = 41,
ElsifExpr = 42,
ElseExpr = 44,
Assign = 46
Word = 23,
Interpolation = 24,
EscapeSeq = 25,
String = 26,
StringFragment = 27,
Number = 28,
Boolean = 29,
FunctionDef = 30,
Params = 32,
colon = 33,
end = 34,
Underscore = 35,
NamedArg = 36,
NamedArgPrefix = 37,
IfExpr = 39,
ThenBlock = 42,
ElsifExpr = 43,
ElseExpr = 45,
Assign = 47

View File

@ -4,20 +4,20 @@ import {tokenizer} from "./tokenizer"
import {highlighting} from "./highlight"
export const parser = LRParser.deserialize({
version: 14,
states: ".WQVQaOOO!rQbO'#CdO#SQPO'#CeO#bQPO'#DhO$[QaO'#CcO$cOSO'#CsOOQ`'#Dl'#DlO$qQPO'#DkO%YQaO'#DvOOQ`'#Cy'#CyOOQO'#Di'#DiO%bQPO'#DhO%pQaO'#DzOOQO'#DS'#DSOOQO'#Dh'#DhO%wQPO'#DgOOQ`'#Dg'#DgOOQ`'#D]'#D]QVQaOOOOQ`'#Dk'#DkOOQ`'#Cb'#CbO&PQaO'#DPOOQ`'#Dj'#DjOOQ`'#D^'#D^O&^QbO,58{O&}QaO,59vO%pQaO,59PO%pQaO,59PO'[QbO'#CdO(gQPO'#CeO(wQPO,58}O)YQPO,58}O)TQPO,58}O*TQPO,58}O*]QaO'#CuO*eQWO'#CvOOOO'#Dp'#DpOOOO'#D_'#D_O*yOSO,59_OOQ`,59_,59_OOQ`'#D`'#D`O+XQaO'#C{O+aQPO,5:bO+fQaO'#DbO+kQPO,58zO+|QPO,5:fO,TQPO,5:fOOQ`,5:R,5:ROOQ`-E7Z-E7ZOOQ`,59k,59kOOQ`-E7[-E7[OOQO1G/b1G/bOOQO1G.k1G.kO,YQPO1G.kO%pQaO,59UO%pQaO,59UOOQ`1G.i1G.iOOOO,59a,59aOOOO,59b,59bOOOO-E7]-E7]OOQ`1G.y1G.yOOQ`-E7^-E7^O,tQaO1G/|O-UQbO'#CdOOQO,59|,59|OOQO-E7`-E7`O-uQaO1G0QOOQO1G.p1G.pO.VQPO1G.pO.aQPO7+%hO.fQaO7+%iOOQO'#DU'#DUOOQO7+%l7+%lO.vQaO7+%mOOQ`<<IS<<ISO/^QPO'#DaO/cQaO'#DyO/yQPO<<ITOOQO'#DV'#DVO0OQPO<<IXOOQ`,59{,59{OOQ`-E7_-E7_OOQ`AN>oAN>oO%pQaO'#DWOOQO'#Dc'#DcO0ZQPOAN>sO0fQPO'#DYOOQOAN>sAN>sO0kQPOAN>sO0pQPO,59rO0wQPO,59rOOQO-E7a-E7aOOQOG24_G24_O0|QPOG24_O1RQPO,59tO1WQPO1G/^OOQOLD)yLD)yO.fQaO1G/`O.vQaO7+$xOOQO7+$z7+$zOOQO<<Hd<<Hd",
stateData: "1`~O!YOS~OPPOQUOkUOlUOnWOw[O!aSO!cTO!l`O~OPcOQUOkUOlUOnWOrdOteO!aSO!cTOY!_XZ!_X[!_X]!_XuWX~O_iO!lWX!pWXqWX~PtOYjOZjO[kO]kO~OYjOZjO[kO]kO!l![X!p![Xq![X~OQUOkUOlUO!aSO!cTO~OPlO~P#yOhtO!cwO!erO!fsO~OY!_XZ!_X[!_X]!_X!l![X!p![Xq![X~OPxOpoP~Ou{O!l![X!p![Xq![X~OPcO~P#yO!l!PO!p!PO~OPcOnWOr!RO~P#yOPcOnWOrdOteOuTa!lTa!pTa!bTaqTa~P#yOPPOnWOw[O~P#yO_!_X`!_Xa!_Xb!_Xc!_Xd!_Xe!_Xf!_X!bWX~PtO_!WO`!WOa!WOb!WOc!WOd!WOe!XOf!XO~OYjOZjO[kO]kO~P'{OYjOZjO[kO]kO!b!YO~O!b!YOY!_XZ!_X[!_X]!_X_!_X`!_Xa!_Xb!_Xc!_Xd!_Xe!_Xf!_X~Ou{O!b!YO~OP!ZO!aSO~O!c![O!e![O!f![O!g![O!h![O!i![O~OhtO!c!^O!erO!fsO~OPxOpoX~Op!`O~OP!aO~Ou{O!lSa!pSa!bSaqSa~Op!dO~P'{Op!dO~OYjOZjO[Xi]Xi!lXi!pXi!bXiqXi~OPPOnWOw[O!l!hO~P#yOPcOnWOrdOteOuWX!lWX!pWX!bWXqWX~P#yOPPOnWOw[O!l!kO~P#yO!b^ip^i~P'{Oq!lO~OPPOnWOw[Oq!mP~P#yOPPOnWOw[Oq!mP{!mP}!mP~P#yO!l!rO~OPPOnWOw[Oq!mX{!mX}!mX~P#yOq!tO~Oq!yO{!uO}!xO~Oq#OO{!uO}!xO~Op#QO~Oq#OO~Op#RO~P'{Op#RO~Oq#SO~O!l#TO~O!l#UO~Ok]~",
goto: "+V!pPPPP!q#Q#`#f#Q$RPPPP$hPPPPPPPP$tP%^%^PP%bP%wPPP#`PP%zP&W&Z&dP&hP%z&n&t&|'S'Y'c'jPPP'p't(Y(l(r)nPPP*[PPPPP*`*`P*q*y*yd^Obi!`!d!h!k!n#T#URpSiYOSbi{!`!d!h!k!n#T#UXfPhl!a|UOPS[behijkl!W!X!`!a!d!h!k!n!u#T#UR!ZrdRObi!`!d!h!k!n#T#UQnSQ!UjR!VkQpSQ!O[Q!e!XR!|!u}UOPS[behijkl!W!X!`!a!d!h!k!n!u#T#UTtTvd^Obi!`!d!h!k!n#T#UWdPhl!aR!ReRzWe^Obi!`!d!h!k!n#T#UR!j!dQ!q!kQ#V#TR#W#UT!v!q!wQ!z!qR#P!wQbOR!QbUhPl!aR!ShQvTR!]vQyWR!_yW!n!h!k#T#UR!s!nS|ZqR!c|Q!w!qR!}!wTaObS_ObQ!TiQ!g!`Q!i!dZ!m!h!k!n#T#UdZObi!`!d!h!k!n#T#UQqSR!b{XgPhl!adQObi!`!d!h!k!n#T#UWdPhl!aQmSQ}[Q!ReQ!UjQ!VkQ!e!WQ!f!XR!{!udVObi!`!d!h!k!n#T#UfcP[ehjkl!W!X!a!uRoSTuTvoXOPbehil!`!a!d!h!k!n#T#UQ!o!hV!p!k#T#Ue]Obi!`!d!h!k!n#T#U",
nodeNames: "⚠ Identifier Word Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String StringFragment Interpolation StringEscape Number Boolean FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign",
maxTerm: 78,
states: ".vQVQaOOO#OQbO'#CdO#`QPO'#CeO#nQPO'#DjO$nQaO'#CcO$uQaO'#CtO$}QSO'#CuOOQ`'#Dq'#DqOOQ`'#D`'#D`O%cQaO'#CsO&sOWO'#CvOOQ`'#Dn'#DnO'RQPO'#DmO'jQaO'#DyOOQ`'#Cz'#CzOOQO'#Dk'#DkO'rQPO'#DjO(QQaO'#D}OOQO'#DT'#DTOOQO'#Dj'#DjO(XQPO'#DiOOQ`'#Di'#DiOOQ`'#D^'#D^QVQaOOOOQ`'#Cs'#CsOOQ`'#Dm'#DmOOQ`'#Cb'#CbO(aQaO'#DQOOQ`'#Dl'#DlOOQ`'#D_'#D_O(nQbO,58{O)_QaO,59wO(QQaO,59PO(QQaO,59PO)lQbO'#CdO*wQPO'#CeO+XQPO,58}O+jQPO,58}O+eQPO,58}O,eQPO,58}OOQ`,59`,59`OOQ`,59a,59aOOQ`-E7^-E7^OOOO'#Dx'#DxOOOO'#Da'#DaO,mOWO,59bOOQ`,59b,59bOOQ`'#Db'#DbO,{QaO'#C|O-TQPO,5:eO-YQaO'#DdO-_QPO,58zO-pQPO,5:iO-wQPO,5:iOOQ`,5:T,5:TOOQ`-E7[-E7[OOQ`,59l,59lOOQ`-E7]-E7]OOQO1G/c1G/cOOQO1G.k1G.kO-|QPO1G.kO(QQaO,59UO(QQaO,59UOOQ`1G.i1G.iOOOO-E7_-E7_OOQ`1G.|1G.|OOQ`-E7`-E7`O.hQaO1G0PO.xQbO'#CdOOQO,5:O,5:OOOQO-E7b-E7bO/iQaO1G0TOOQO1G.p1G.pO/yQPO1G.pO0TQPO7+%kO0YQaO7+%lOOQO'#DV'#DVOOQO7+%o7+%oO0jQaO7+%pOOQ`<<IV<<IVO1QQPO'#DcO1VQaO'#D|O1mQPO<<IWOOQO'#DW'#DWO1rQPO<<I[OOQ`,59},59}OOQ`-E7a-E7aOOQ`AN>rAN>rO(QQaO'#DXOOQO'#De'#DeO1}QPOAN>vO2YQPO'#DZOOQOAN>vAN>vO2_QPOAN>vO2dQPO,59sO2kQPO,59sOOQO-E7c-E7cOOQOG24bG24bO2pQPOG24bO2uQPO,59uO2zQPO1G/_OOQOLD)|LD)|O0YQaO1G/aO0jQaO7+$yOOQO7+${7+${OOQO<<He<<He",
stateData: "3S~O![OS~OPPOQVOlZOmZOo]OxaO!cSO!fTO!gUO!kYO!oeO~OPiOQVOlZOmZOo]OsjOukO!cSO!fTO!gUO!kYOY!aXZ!aX[!aX]!aXvWX~O_oO!oWX!sWXrWX~PzOYpOZpO[qO]qO~OYpOZpO[qO]qO!o!^X!s!^Xr!^X~OQVOlZOmZO!cSO!fTO!gUO!kYO~OPrO~P$VOPxO!cSO~O!fyO!gyO!hyO!iyO!jyO!kyO~OQVO!fTO!gUOYgXZgX[gX]gX!ogX!sgX_gX`gXagXbgXcgXdgXegXfgX!dgXqgXrgX~Ok{O!fTO!gUO!k!OO~OY!aXZ!aX[!aX]!aX!o!^X!s!^Xr!^X~OP!POqpP~Ov!SO!o!^X!s!^Xr!^X~OPiO~P$VO!o!WO!s!WO~OPiOo]Os!YO~P$VOPiOo]OsjOukOvTa!oTa!sTa!dTarTa~P$VOPPOo]OxaO~P$VO_!aX`!aXa!aXb!aXc!aXd!aXe!aXf!aX!dWX~PzO_!_O`!_Oa!_Ob!_Oc!_Od!_Oe!`Of!`O~OYpOZpO[qO]qO~P*]OYpOZpO[qO]qO!d!aO~O!d!aOY!aXZ!aX[!aX]!aX_!aX`!aXa!aXb!aXc!aXd!aXe!aXf!aX~Ov!SO!d!aO~Ok{O!fTO!gUO!k!cO~OP!POqpX~Oq!eO~OP!fO~Ov!SO!oSa!sSa!dSarSa~Oq!iO~P*]Oq!iO~OYpOZpO[Xi]Xi!oXi!sXi!dXirXi~OPPOo]OxaO!o!mO~P$VOPiOo]OsjOukOvWX!oWX!sWX!dWXrWX~P$VOPPOo]OxaO!o!pO~P$VO!d^iq^i~P*]Or!qO~OPPOo]OxaOr!pP~P$VOPPOo]OxaOr!pP|!pP!O!pP~P$VO!o!wO~OPPOo]OxaOr!pX|!pX!O!pX~P$VOr!yO~Or#OO|!zO!O!}O~Or#TO|!zO!O!}O~Oq#VO~Or#TO~Oq#WO~P*]Oq#WO~Or#XO~O!o#YO~O!o#ZO~Ol]~",
goto: ",{!sPPPP!t#T#c#i#T$UPPPP$kPPPPPPPP$w%a%a$wPPP&OP&ePPP#cPP&hP&t&w'QP'UP&h'['b'j(X(_(e(n(uPPP({)P)e)w)}*yPP+gPPPPPP,Q,U,UP,g,o,odcOgo!e!i!m!p!s#Y#ZRvSi_OSgo!S!e!i!m!p!s#Y#ZXlPnr!f|ZOPSagknopqr!_!`!e!f!i!m!p!s!z#Y#ZRxTdROgo!e!i!m!p!s#Y#ZQtSQ!]pR!^qQvSQ!VaQ!j!`R#R!z}ZOPSagknopqr!_!`!e!f!i!m!p!s!z#Y#Z!OVOPSXagknopqr!_!`!e!f!i!m!p!s!z#Y#ZT{Y}dcOgo!e!i!m!p!s#Y#ZWjPnr!fR!YkR!R]ecOgo!e!i!m!p!s#Y#ZR!o!iQ!v!pQ#[#YR#]#ZT!{!v!|Q#P!vR#U!|QgOR!XgUnPr!fR!ZnrXOSagopq!_!`!e!i!m!p!s!z#Y#ZYhPknr!fRzXQ}YR!b}Q!Q]R!d!QW!s!m!p#Y#ZR!x!sS!T`wR!h!TQ!|!vR#S!|TfOgSdOgQ![oQ!l!eQ!n!iZ!r!m!p!s#Y#Zd`Ogo!e!i!m!p!s#Y#ZQwSR!g!SXmPnr!fdQOgo!e!i!m!p!s#Y#ZWjPnr!fQsSQ!UaQ!YkQ!]pQ!^qQ!j!_Q!k!`R#Q!zd[Ogo!e!i!m!p!s#Y#ZfiPaknpqr!_!`!f!zRuS!PWOPSXagknopqr!_!`!e!f!i!m!p!s!z#Y#ZT|Y}o^OPgknor!e!f!i!m!p!s#Y#ZQ!t!mV!u!p#Y#ZebOgo!e!i!m!p!s#Y#Z",
nodeNames: "⚠ Identifier WordFragment Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator Word Interpolation EscapeSeq String StringFragment Number Boolean FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign",
maxTerm: 81,
nodeProps: [
["closedBy", 32,"end"],
["openedBy", 33,"colon"]
["closedBy", 33,"end"],
["openedBy", 34,"colon"]
],
propSources: [highlighting],
skippedNodes: [0],
repeatNodeCount: 7,
tokenData: "Hw~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P$_!P!Q,b!Q![*]![!],{!]!^%g!^!_-f!_!`.p!`!a/Z!a#O$_#O#P0e#P#R$_#R#S0j#S#T$_#T#U1T#U#X2i#X#Y5O#Y#Z<U#Z#]2i#]#^Aa#^#b2i#b#cCR#c#dCx#d#f2i#f#gEj#g#h2i#h#iFa#i#o2i#o#p$_#p#qHX#q;'S$_;'S;=`$v<%l~$_~O$_~~HrS$dUhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUhS!YZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUhS!lROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWhSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vU`RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!e~~'dO!c~V'kUhS!aROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUhS!bROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUYRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU[RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWhS]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYhSkROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWhSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWhSkROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V,iUZRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_T-SUhSpPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-mWaRhSOt$_uw$_x!_$_!_!`.V!`#O$_#P;'S$_;'S;=`$v<%lO$_V.^UbRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V.wU_RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V/bWcRhSOt$_uw$_x!_$_!_!`/z!`#O$_#P;'S$_;'S;=`$v<%lO$_V0RUdRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~0jO!f~V0qUhSrROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V1Y[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#b2i#b#c3^#c#o2i#o;'S$_;'S;=`$v<%lO$_U2VUtQhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U2nYhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V3c[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#W2i#W#X4X#X#o2i#o;'S$_;'S;=`$v<%lO$_V4`YeRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V5T^hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#`2i#`#a6P#a#b2i#b#c:d#c#o2i#o;'S$_;'S;=`$v<%lO$_V6U[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h6z#h#o2i#o;'S$_;'S;=`$v<%lO$_V7P^hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y7{#Y#]2i#]#^8r#^#o2i#o;'S$_;'S;=`$v<%lO$_V8SY}PhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V8w[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#Z9m#Z#o2i#o;'S$_;'S;=`$v<%lO$_V9tY{PhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V:i[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#W2i#W#X;_#X#o2i#o;'S$_;'S;=`$v<%lO$_V;fYhSqROt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V<Z]hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#U=S#U#b2i#b#c@j#c#o2i#o;'S$_;'S;=`$v<%lO$_V=X[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#`2i#`#a=}#a#o2i#o;'S$_;'S;=`$v<%lO$_V>S[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h>x#h#o2i#o;'S$_;'S;=`$v<%lO$_V>}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y?s#Y#o2i#o;'S$_;'S;=`$v<%lO$_V?zYlRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V@qYnRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#ZB[#Z#o2i#o;'S$_;'S;=`$v<%lO$_VBcYwPhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^CYY!gWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VC}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gDs#g#o2i#o;'S$_;'S;=`$v<%lO$_VDzYfRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^EqY!iWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$__Fh[!hWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gG^#g#o2i#o;'S$_;'S;=`$v<%lO$_VGc[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#i2i#i#j>x#j#o2i#o;'S$_;'S;=`$v<%lO$_VH`UuRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~HwO!p~",
repeatNodeCount: 8,
tokenData: "Hw~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P$_!P!Q,b!Q![*]![!],{!]!^%g!^!_-f!_!`.p!`!a/Z!a#O$_#O#P0e#P#R$_#R#S0j#S#T$_#T#U1T#U#X2i#X#Y5O#Y#Z<U#Z#]2i#]#^Aa#^#b2i#b#cCR#c#dCx#d#f2i#f#gEj#g#h2i#h#iFa#i#o2i#o#p$_#p#qHX#q;'S$_;'S;=`$v<%l~$_~O$_~~HrW$dUkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_W$yP;=`<%l$__%TUkW![VOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z%nUkW!oROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z&VWkWOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_Z&vU`RkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!f~~'dO!k~Z'kUkW!cROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z(UUkW!dROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z(oUYRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z)YU[RkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z)sWkW]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_Z*dYkWlROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_Z+XWkWOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_Z+xWkWlROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_Z,iUZRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_X-SUkWqPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z-mWaRkWOt$_uw$_x!_$_!_!`.V!`#O$_#P;'S$_;'S;=`$v<%lO$_Z.^UbRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z.wU_RkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z/bWcRkWOt$_uw$_x!_$_!_!`/z!`#O$_#P;'S$_;'S;=`$v<%lO$_Z0RUdRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~0jO!g~Z0qUkWsROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z1Y[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#b2i#b#c3^#c#o2i#o;'S$_;'S;=`$v<%lO$_Y2VUuQkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Y2nYkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z3c[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#W2i#W#X4X#X#o2i#o;'S$_;'S;=`$v<%lO$_Z4`YeRkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z5T^kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#`2i#`#a6P#a#b2i#b#c:d#c#o2i#o;'S$_;'S;=`$v<%lO$_Z6U[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h6z#h#o2i#o;'S$_;'S;=`$v<%lO$_Z7P^kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y7{#Y#]2i#]#^8r#^#o2i#o;'S$_;'S;=`$v<%lO$_Z8SY!OPkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z8w[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#Z9m#Z#o2i#o;'S$_;'S;=`$v<%lO$_Z9tY|PkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z:i[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#W2i#W#X;_#X#o2i#o;'S$_;'S;=`$v<%lO$_Z;fYkWrROt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z<Z]kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#U=S#U#b2i#b#c@j#c#o2i#o;'S$_;'S;=`$v<%lO$_Z=X[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#`2i#`#a=}#a#o2i#o;'S$_;'S;=`$v<%lO$_Z>S[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h>x#h#o2i#o;'S$_;'S;=`$v<%lO$_Z>}[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y?s#Y#o2i#o;'S$_;'S;=`$v<%lO$_Z?zYmRkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z@qYoRkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_ZAf[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#ZB[#Z#o2i#o;'S$_;'S;=`$v<%lO$_ZBcYxPkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^CYY!hSkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_ZC}[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gDs#g#o2i#o;'S$_;'S;=`$v<%lO$_ZDzYfRkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^EqY!jSkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$__Fh[!iSkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gG^#g#o2i#o;'S$_;'S;=`$v<%lO$_ZGc[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#i2i#i#j>x#j#o2i#o;'S$_;'S;=`$v<%lO$_ZH`UvRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~HwO!s~",
tokenizers: [0, 1, 2, 3, tokenizer],
topRules: {"Program":[0,3]},
tokenPrec: 749
tokenPrec: 829
})

View File

@ -55,7 +55,7 @@ describe('string escape sequences', () => {
expect("'price is \\$10'").toMatchTree(`
String
StringFragment ${'price is '}
StringEscape \\$
EscapeSeq \\$
StringFragment 10
`)
})
@ -64,7 +64,7 @@ describe('string escape sequences', () => {
expect("'it\\'s working'").toMatchTree(`
String
StringFragment ${'it'}
StringEscape \\'
EscapeSeq \\'
StringFragment ${'s working'}
`)
})
@ -73,7 +73,7 @@ describe('string escape sequences', () => {
expect("'path\\\\file'").toMatchTree(`
String
StringFragment path
StringEscape \\\\
EscapeSeq \\\\
StringFragment file
`)
})
@ -82,7 +82,7 @@ describe('string escape sequences', () => {
expect("'line1\\nline2'").toMatchTree(`
String
StringFragment line1
StringEscape \\n
EscapeSeq \\n
StringFragment line2
`)
})
@ -91,7 +91,7 @@ describe('string escape sequences', () => {
expect("'col1\\tcol2'").toMatchTree(`
String
StringFragment col1
StringEscape \\t
EscapeSeq \\t
StringFragment col2
`)
})
@ -100,7 +100,7 @@ describe('string escape sequences', () => {
expect("'text\\rmore'").toMatchTree(`
String
StringFragment text
StringEscape \\r
EscapeSeq \\r
StringFragment more
`)
})
@ -108,11 +108,11 @@ describe('string escape sequences', () => {
test('multiple escape sequences', () => {
expect("'\\$10\\nTotal: \\$20'").toMatchTree(`
String
StringEscape \\$
EscapeSeq \\$
StringFragment 10
StringEscape \\n
EscapeSeq \\n
StringFragment ${'Total: '}
StringEscape \\$
EscapeSeq \\$
StringFragment 20
`)
})

View File

@ -0,0 +1,195 @@
import { describe, expect, test } from 'bun:test'
import '../shrimp.grammar' // Importing this so changes cause it to retest!
describe('word interpolation', () => {
test.only('word with variable interpolation', () => {
expect('path/$file').toMatchTree(`
Word
WordFragment path/
Interpolation
Identifier file
`)
})
test('word with expression interpolation', () => {
expect('prefix-$(123)').toMatchTree(`
Word
WordFragment prefix-
Interpolation
leftParen
Number 123
rightParen
`)
})
test('multiple interpolations in word', () => {
expect('$user/$file').toMatchTree(`
Word
Interpolation
Identifier user
WordFragment /
Interpolation
Identifier file
`)
})
test('dollar not followed by identifier stays in word', () => {
expect('price$10').toMatchTree(`
Word
WordFragment price$10
`)
})
test('escaped dollar in word', () => {
expect('price\\$10').toMatchTree(`
Word
WordFragment price
EscapeSeq
WordFragment 10
`)
})
test('interpolation at start of word', () => {
expect('$HOME/documents').toMatchTree(`
Word
Interpolation
Identifier HOME
WordFragment /documents
`)
})
test('interpolation at end of word', () => {
expect('./path/$filename').toMatchTree(`
Word
WordFragment ./path/
Interpolation
Identifier filename
`)
})
test('complex expression interpolation', () => {
expect('output-$(add 1 2).txt').toMatchTree(`
Word
WordFragment output-
Interpolation
leftParen
FunctionCall
Identifier add
PositionalArg
Number 1
PositionalArg
Number 2
rightParen
WordFragment .txt
`)
})
test('emoji in interpolated identifier', () => {
expect('hello/$😎file').toMatchTree(`
Word
WordFragment hello/
Interpolation
Identifier 😎file
`)
})
test('escaped space in word', () => {
expect('my\\ file.txt').toMatchTree(`
Word
WordFragment my
EscapeSeq
WordFragment file.txt
`)
})
test('multiple escapes and interpolations', () => {
expect('pre\\$fix-$var-\\$end').toMatchTree(`
Word
WordFragment pre
EscapeSeq
WordFragment fix-
Interpolation
Identifier var
WordFragment -
EscapeSeq
WordFragment end
`)
})
test('plain word without interpolation still works', () => {
expect('./file.txt').toMatchTree(`
Word
WordFragment ./file.txt
`)
})
test('word with URL-like content', () => {
expect('https://example.com/$path').toMatchTree(`
Word
WordFragment https://example.com/
Interpolation
Identifier path
`)
})
test('nested expression in interpolation', () => {
expect('file-$(multiply (add 1 2) 3).txt').toMatchTree(`
Word
WordFragment file-
Interpolation
leftParen
FunctionCall
Identifier multiply
PositionalArg
ParenExpr
leftParen
FunctionCall
Identifier add
PositionalArg
Number 1
PositionalArg
Number 2
rightParen
PositionalArg
Number 3
rightParen
WordFragment .txt
`)
})
})
describe('word interpolation in function calls', () => {
test('function call with interpolated word argument', () => {
expect('cat /home/$user/file.txt').toMatchTree(`
FunctionCall
Identifier cat
PositionalArg
Word
WordFragment /home/
Interpolation
Identifier user
WordFragment /file.txt
`)
})
test('multiple interpolated word arguments', () => {
expect('cp $src/$file $dest/$file').toMatchTree(`
FunctionCall
Identifier cp
PositionalArg
Word
Interpolation
Identifier src
WordFragment /
Interpolation
Identifier file
PositionalArg
Word
Interpolation
Identifier dest
WordFragment /
Interpolation
Identifier file
`)
})
})

View File

@ -1,5 +1,5 @@
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
import { Identifier, Word } from './shrimp.terms'
import { Identifier, Word, WordFragment } from './shrimp.terms'
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
@ -16,6 +16,15 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack
if (!isWordChar(ch)) break
// Stop at $ if it's followed by identifier start or (
// This allows word interpolation like path/$file or result-$(expr)
if (ch === 36 /* $ */) {
const nextCh = getFullCodePoint(input, pos + 1)
if (isLowercaseLetter(nextCh) || isEmoji(nextCh) || nextCh === 40 /* ( */) {
break
}
}
// Certain characters might end a word or identifier if they are followed by whitespace.
// This allows things like `a = hello; 2` of if `x: y` to parse correctly.
// to work as expected.
@ -34,7 +43,7 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack
}
input.advance(pos)
input.acceptToken(isValidIdentifier ? Identifier : Word)
input.acceptToken(isValidIdentifier ? Identifier : WordFragment)
})
const isWhiteSpace = (ch: number): boolean => {