This commit is contained in:
Corey Johnson 2025-10-15 16:18:18 -07:00
parent fe7abb8b21
commit d9bc5a64a4
9 changed files with 269 additions and 50 deletions

View File

@ -295,6 +295,7 @@ These discoveries came from implementing string interpolation with external toke
**The most surprising discovery**: Rule names determine whether nodes appear in the parse tree. **The most surprising discovery**: Rule names determine whether nodes appear in the parse tree.
**Lowercase rules get inlined** (no tree nodes): **Lowercase rules get inlined** (no tree nodes):
```lezer ```lezer
statement { assign | expr } // ❌ No "statement" node statement { assign | expr } // ❌ No "statement" node
assign { x "=" y } // ❌ No "assign" node assign { x "=" y } // ❌ No "assign" node
@ -302,6 +303,7 @@ expr { x | y } // ❌ No "expr" node
``` ```
**Capitalized rules create tree nodes**: **Capitalized rules create tree nodes**:
```lezer ```lezer
Statement { Assign | Expr } // ✅ Creates Statement node Statement { Assign | Expr } // ✅ Creates Statement node
Assign { x "=" y } // ✅ Creates Assign node Assign { x "=" y } // ✅ Creates Assign node
@ -339,6 +341,7 @@ Example: `x = 42` was parsing as `Program(Identifier,"=",Number)` instead of `Pr
**Reality**: External tokenizers work perfectly inside `@skip {}` blocks! The tokenizer gets called even when skip is disabled. **Reality**: External tokenizers work perfectly inside `@skip {}` blocks! The tokenizer gets called even when skip is disabled.
**Working pattern**: **Working pattern**:
```lezer ```lezer
@external tokens tokenizer from "./tokenizer" { Identifier, Word } @external tokens tokenizer from "./tokenizer" { Identifier, Word }
@ -357,6 +360,7 @@ Interpolation {
### 4. Single-Character Tokens Can Be Literals ### 4. Single-Character Tokens Can Be Literals
**Initial approach**: Define every single character as a token: **Initial approach**: Define every single character as a token:
```lezer ```lezer
@tokens { @tokens {
dollar[@name="$"] { "$" } dollar[@name="$"] { "$" }
@ -365,13 +369,14 @@ Interpolation {
``` ```
**Simpler approach**: Just use literals in the grammar rules: **Simpler approach**: Just use literals in the grammar rules:
```lezer ```lezer
Interpolation { Interpolation {
"$" Identifier | // Literal "$" "$" Identifier | // Literal "$"
"$" "(" expr ")" "$" "(" expr ")"
} }
StringEscape { EscapeSeq {
"\\" ("$" | "n" | ...) // Literal "\\" "\\" ("$" | "n" | ...) // Literal "\\"
} }
``` ```

View File

@ -23,7 +23,7 @@ const DEBUG = false
type Label = `.${string}` type Label = `.${string}`
// Process escape sequences in strings // Process escape sequences in strings
function processEscapeSequence(escapeSeq: string): string { function processEscapeSeq(escapeSeq: string): string {
// escapeSeq includes the backslash, e.g., "\n", "\$", "\\" // escapeSeq includes the backslash, e.g., "\n", "\$", "\\"
if (escapeSeq.length !== 2) return escapeSeq if (escapeSeq.length !== 2) return escapeSeq
@ -130,9 +130,9 @@ export class Compiler {
instructions.push(['PUSH', partValue]) instructions.push(['PUSH', partValue])
break break
case terms.StringEscape: case terms.EscapeSeq:
// Process escape sequence and push the result // Process escape sequence and push the result
const processed = processEscapeSequence(partValue) const processed = processEscapeSeq(partValue)
instructions.push(['PUSH', processed]) instructions.push(['PUSH', processed])
break break

View File

@ -177,7 +177,7 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
return ( return (
child.type.id === terms.StringFragment || child.type.id === terms.StringFragment ||
child.type.id === terms.Interpolation || child.type.id === terms.Interpolation ||
child.type.id === terms.StringEscape child.type.id === terms.EscapeSeq
) )
}) })
@ -186,10 +186,10 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
if ( if (
part.type.id !== terms.StringFragment && part.type.id !== terms.StringFragment &&
part.type.id !== terms.Interpolation && part.type.id !== terms.Interpolation &&
part.type.id !== terms.StringEscape part.type.id !== terms.EscapeSeq
) { ) {
throw new CompilerError( throw new CompilerError(
`String child must be StringFragment, Interpolation, or StringEscape, got ${part.type.name}`, `String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`,
part.from, part.from,
part.to part.to
) )

View File

@ -39,7 +39,7 @@
} }
@external tokens tokenizer from "./tokenizer" { Identifier, Word } @external tokens tokenizer from "./tokenizer" { Identifier, WordFragment }
@precedence { @precedence {
pipe @left, pipe @left,
@ -170,12 +170,13 @@ expression {
@skip {} { @skip {} {
String { "'" stringContent* "'" } String { "'" stringContent* "'" }
} }
stringContent { stringContent {
StringFragment | StringFragment |
Interpolation | Interpolation |
StringEscape EscapeSeq
} }
Interpolation { Interpolation {
@ -183,10 +184,18 @@ Interpolation {
"$" ParenExpr "$" ParenExpr
} }
StringEscape { EscapeSeq {
"\\" ("$" | "n" | "t" | "r" | "\\" | "'") "\\" ("$" | "n" | "t" | "r" | "\\" | "'")
} }
Word { wordContent+ }
wordContent {
WordFragment | Interpolation | EscapeSeq
}
// We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator. // We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator.
// Without this, when parsing "my-var" at statement level, the parser can't decide: // Without this, when parsing "my-var" at statement level, the parser can't decide:
// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier // - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier

View File

@ -1,7 +1,7 @@
// This file was generated by lezer-generator. You probably shouldn't edit it. // This file was generated by lezer-generator. You probably shouldn't edit it.
export const export const
Identifier = 1, Identifier = 1,
Word = 2, WordFragment = 2,
Program = 3, Program = 3,
PipeExpr = 4, PipeExpr = 4,
FunctionCall = 5, FunctionCall = 5,
@ -10,21 +10,22 @@ export const
FunctionCallOrIdentifier = 8, FunctionCallOrIdentifier = 8,
BinOp = 9, BinOp = 9,
ConditionalOp = 14, ConditionalOp = 14,
String = 23, Word = 23,
StringFragment = 24, Interpolation = 24,
Interpolation = 25, EscapeSeq = 25,
StringEscape = 26, String = 26,
Number = 27, StringFragment = 27,
Boolean = 28, Number = 28,
FunctionDef = 29, Boolean = 29,
Params = 31, FunctionDef = 30,
colon = 32, Params = 32,
end = 33, colon = 33,
Underscore = 34, end = 34,
NamedArg = 35, Underscore = 35,
NamedArgPrefix = 36, NamedArg = 36,
IfExpr = 38, NamedArgPrefix = 37,
ThenBlock = 41, IfExpr = 39,
ElsifExpr = 42, ThenBlock = 42,
ElseExpr = 44, ElsifExpr = 43,
Assign = 46 ElseExpr = 45,
Assign = 47

View File

@ -4,20 +4,20 @@ import {tokenizer} from "./tokenizer"
import {highlighting} from "./highlight" import {highlighting} from "./highlight"
export const parser = LRParser.deserialize({ export const parser = LRParser.deserialize({
version: 14, version: 14,
states: ".WQVQaOOO!rQbO'#CdO#SQPO'#CeO#bQPO'#DhO$[QaO'#CcO$cOSO'#CsOOQ`'#Dl'#DlO$qQPO'#DkO%YQaO'#DvOOQ`'#Cy'#CyOOQO'#Di'#DiO%bQPO'#DhO%pQaO'#DzOOQO'#DS'#DSOOQO'#Dh'#DhO%wQPO'#DgOOQ`'#Dg'#DgOOQ`'#D]'#D]QVQaOOOOQ`'#Dk'#DkOOQ`'#Cb'#CbO&PQaO'#DPOOQ`'#Dj'#DjOOQ`'#D^'#D^O&^QbO,58{O&}QaO,59vO%pQaO,59PO%pQaO,59PO'[QbO'#CdO(gQPO'#CeO(wQPO,58}O)YQPO,58}O)TQPO,58}O*TQPO,58}O*]QaO'#CuO*eQWO'#CvOOOO'#Dp'#DpOOOO'#D_'#D_O*yOSO,59_OOQ`,59_,59_OOQ`'#D`'#D`O+XQaO'#C{O+aQPO,5:bO+fQaO'#DbO+kQPO,58zO+|QPO,5:fO,TQPO,5:fOOQ`,5:R,5:ROOQ`-E7Z-E7ZOOQ`,59k,59kOOQ`-E7[-E7[OOQO1G/b1G/bOOQO1G.k1G.kO,YQPO1G.kO%pQaO,59UO%pQaO,59UOOQ`1G.i1G.iOOOO,59a,59aOOOO,59b,59bOOOO-E7]-E7]OOQ`1G.y1G.yOOQ`-E7^-E7^O,tQaO1G/|O-UQbO'#CdOOQO,59|,59|OOQO-E7`-E7`O-uQaO1G0QOOQO1G.p1G.pO.VQPO1G.pO.aQPO7+%hO.fQaO7+%iOOQO'#DU'#DUOOQO7+%l7+%lO.vQaO7+%mOOQ`<<IS<<ISO/^QPO'#DaO/cQaO'#DyO/yQPO<<ITOOQO'#DV'#DVO0OQPO<<IXOOQ`,59{,59{OOQ`-E7_-E7_OOQ`AN>oAN>oO%pQaO'#DWOOQO'#Dc'#DcO0ZQPOAN>sO0fQPO'#DYOOQOAN>sAN>sO0kQPOAN>sO0pQPO,59rO0wQPO,59rOOQO-E7a-E7aOOQOG24_G24_O0|QPOG24_O1RQPO,59tO1WQPO1G/^OOQOLD)yLD)yO.fQaO1G/`O.vQaO7+$xOOQO7+$z7+$zOOQO<<Hd<<Hd", states: ".vQVQaOOO#OQbO'#CdO#`QPO'#CeO#nQPO'#DjO$nQaO'#CcO$uQaO'#CtO$}QSO'#CuOOQ`'#Dq'#DqOOQ`'#D`'#D`O%cQaO'#CsO&sOWO'#CvOOQ`'#Dn'#DnO'RQPO'#DmO'jQaO'#DyOOQ`'#Cz'#CzOOQO'#Dk'#DkO'rQPO'#DjO(QQaO'#D}OOQO'#DT'#DTOOQO'#Dj'#DjO(XQPO'#DiOOQ`'#Di'#DiOOQ`'#D^'#D^QVQaOOOOQ`'#Cs'#CsOOQ`'#Dm'#DmOOQ`'#Cb'#CbO(aQaO'#DQOOQ`'#Dl'#DlOOQ`'#D_'#D_O(nQbO,58{O)_QaO,59wO(QQaO,59PO(QQaO,59PO)lQbO'#CdO*wQPO'#CeO+XQPO,58}O+jQPO,58}O+eQPO,58}O,eQPO,58}OOQ`,59`,59`OOQ`,59a,59aOOQ`-E7^-E7^OOOO'#Dx'#DxOOOO'#Da'#DaO,mOWO,59bOOQ`,59b,59bOOQ`'#Db'#DbO,{QaO'#C|O-TQPO,5:eO-YQaO'#DdO-_QPO,58zO-pQPO,5:iO-wQPO,5:iOOQ`,5:T,5:TOOQ`-E7[-E7[OOQ`,59l,59lOOQ`-E7]-E7]OOQO1G/c1G/cOOQO1G.k1G.kO-|QPO1G.kO(QQaO,59UO(QQaO,59UOOQ`1G.i1G.iOOOO-E7_-E7_OOQ`1G.|1G.|OOQ`-E7`-E7`O.hQaO1G0PO.xQbO'#CdOOQO,5:O,5:OOOQO-E7b-E7bO/iQaO1G0TOOQO1G.p1G.pO/yQPO1G.pO0TQPO7+%kO0YQaO7+%lOOQO'#DV'#DVOOQO7+%o7+%oO0jQaO7+%pOOQ`<<IV<<IVO1QQPO'#DcO1VQaO'#D|O1mQPO<<IWOOQO'#DW'#DWO1rQPO<<I[OOQ`,59},59}OOQ`-E7a-E7aOOQ`AN>rAN>rO(QQaO'#DXOOQO'#De'#DeO1}QPOAN>vO2YQPO'#DZOOQOAN>vAN>vO2_QPOAN>vO2dQPO,59sO2kQPO,59sOOQO-E7c-E7cOOQOG24bG24bO2pQPOG24bO2uQPO,59uO2zQPO1G/_OOQOLD)|LD)|O0YQaO1G/aO0jQaO7+$yOOQO7+${7+${OOQO<<He<<He",
stateData: "1`~O!YOS~OPPOQUOkUOlUOnWOw[O!aSO!cTO!l`O~OPcOQUOkUOlUOnWOrdOteO!aSO!cTOY!_XZ!_X[!_X]!_XuWX~O_iO!lWX!pWXqWX~PtOYjOZjO[kO]kO~OYjOZjO[kO]kO!l![X!p![Xq![X~OQUOkUOlUO!aSO!cTO~OPlO~P#yOhtO!cwO!erO!fsO~OY!_XZ!_X[!_X]!_X!l![X!p![Xq![X~OPxOpoP~Ou{O!l![X!p![Xq![X~OPcO~P#yO!l!PO!p!PO~OPcOnWOr!RO~P#yOPcOnWOrdOteOuTa!lTa!pTa!bTaqTa~P#yOPPOnWOw[O~P#yO_!_X`!_Xa!_Xb!_Xc!_Xd!_Xe!_Xf!_X!bWX~PtO_!WO`!WOa!WOb!WOc!WOd!WOe!XOf!XO~OYjOZjO[kO]kO~P'{OYjOZjO[kO]kO!b!YO~O!b!YOY!_XZ!_X[!_X]!_X_!_X`!_Xa!_Xb!_Xc!_Xd!_Xe!_Xf!_X~Ou{O!b!YO~OP!ZO!aSO~O!c![O!e![O!f![O!g![O!h![O!i![O~OhtO!c!^O!erO!fsO~OPxOpoX~Op!`O~OP!aO~Ou{O!lSa!pSa!bSaqSa~Op!dO~P'{Op!dO~OYjOZjO[Xi]Xi!lXi!pXi!bXiqXi~OPPOnWOw[O!l!hO~P#yOPcOnWOrdOteOuWX!lWX!pWX!bWXqWX~P#yOPPOnWOw[O!l!kO~P#yO!b^ip^i~P'{Oq!lO~OPPOnWOw[Oq!mP~P#yOPPOnWOw[Oq!mP{!mP}!mP~P#yO!l!rO~OPPOnWOw[Oq!mX{!mX}!mX~P#yOq!tO~Oq!yO{!uO}!xO~Oq#OO{!uO}!xO~Op#QO~Oq#OO~Op#RO~P'{Op#RO~Oq#SO~O!l#TO~O!l#UO~Ok]~", stateData: "3S~O![OS~OPPOQVOlZOmZOo]OxaO!cSO!fTO!gUO!kYO!oeO~OPiOQVOlZOmZOo]OsjOukO!cSO!fTO!gUO!kYOY!aXZ!aX[!aX]!aXvWX~O_oO!oWX!sWXrWX~PzOYpOZpO[qO]qO~OYpOZpO[qO]qO!o!^X!s!^Xr!^X~OQVOlZOmZO!cSO!fTO!gUO!kYO~OPrO~P$VOPxO!cSO~O!fyO!gyO!hyO!iyO!jyO!kyO~OQVO!fTO!gUOYgXZgX[gX]gX!ogX!sgX_gX`gXagXbgXcgXdgXegXfgX!dgXqgXrgX~Ok{O!fTO!gUO!k!OO~OY!aXZ!aX[!aX]!aX!o!^X!s!^Xr!^X~OP!POqpP~Ov!SO!o!^X!s!^Xr!^X~OPiO~P$VO!o!WO!s!WO~OPiOo]Os!YO~P$VOPiOo]OsjOukOvTa!oTa!sTa!dTarTa~P$VOPPOo]OxaO~P$VO_!aX`!aXa!aXb!aXc!aXd!aXe!aXf!aX!dWX~PzO_!_O`!_Oa!_Ob!_Oc!_Od!_Oe!`Of!`O~OYpOZpO[qO]qO~P*]OYpOZpO[qO]qO!d!aO~O!d!aOY!aXZ!aX[!aX]!aX_!aX`!aXa!aXb!aXc!aXd!aXe!aXf!aX~Ov!SO!d!aO~Ok{O!fTO!gUO!k!cO~OP!POqpX~Oq!eO~OP!fO~Ov!SO!oSa!sSa!dSarSa~Oq!iO~P*]Oq!iO~OYpOZpO[Xi]Xi!oXi!sXi!dXirXi~OPPOo]OxaO!o!mO~P$VOPiOo]OsjOukOvWX!oWX!sWX!dWXrWX~P$VOPPOo]OxaO!o!pO~P$VO!d^iq^i~P*]Or!qO~OPPOo]OxaOr!pP~P$VOPPOo]OxaOr!pP|!pP!O!pP~P$VO!o!wO~OPPOo]OxaOr!pX|!pX!O!pX~P$VOr!yO~Or#OO|!zO!O!}O~Or#TO|!zO!O!}O~Oq#VO~Or#TO~Oq#WO~P*]Oq#WO~Or#XO~O!o#YO~O!o#ZO~Ol]~",
goto: "+V!pPPPP!q#Q#`#f#Q$RPPPP$hPPPPPPPP$tP%^%^PP%bP%wPPP#`PP%zP&W&Z&dP&hP%z&n&t&|'S'Y'c'jPPP'p't(Y(l(r)nPPP*[PPPPP*`*`P*q*y*yd^Obi!`!d!h!k!n#T#URpSiYOSbi{!`!d!h!k!n#T#UXfPhl!a|UOPS[behijkl!W!X!`!a!d!h!k!n!u#T#UR!ZrdRObi!`!d!h!k!n#T#UQnSQ!UjR!VkQpSQ!O[Q!e!XR!|!u}UOPS[behijkl!W!X!`!a!d!h!k!n!u#T#UTtTvd^Obi!`!d!h!k!n#T#UWdPhl!aR!ReRzWe^Obi!`!d!h!k!n#T#UR!j!dQ!q!kQ#V#TR#W#UT!v!q!wQ!z!qR#P!wQbOR!QbUhPl!aR!ShQvTR!]vQyWR!_yW!n!h!k#T#UR!s!nS|ZqR!c|Q!w!qR!}!wTaObS_ObQ!TiQ!g!`Q!i!dZ!m!h!k!n#T#UdZObi!`!d!h!k!n#T#UQqSR!b{XgPhl!adQObi!`!d!h!k!n#T#UWdPhl!aQmSQ}[Q!ReQ!UjQ!VkQ!e!WQ!f!XR!{!udVObi!`!d!h!k!n#T#UfcP[ehjkl!W!X!a!uRoSTuTvoXOPbehil!`!a!d!h!k!n#T#UQ!o!hV!p!k#T#Ue]Obi!`!d!h!k!n#T#U", goto: ",{!sPPPP!t#T#c#i#T$UPPPP$kPPPPPPPP$w%a%a$wPPP&OP&ePPP#cPP&hP&t&w'QP'UP&h'['b'j(X(_(e(n(uPPP({)P)e)w)}*yPP+gPPPPPP,Q,U,UP,g,o,odcOgo!e!i!m!p!s#Y#ZRvSi_OSgo!S!e!i!m!p!s#Y#ZXlPnr!f|ZOPSagknopqr!_!`!e!f!i!m!p!s!z#Y#ZRxTdROgo!e!i!m!p!s#Y#ZQtSQ!]pR!^qQvSQ!VaQ!j!`R#R!z}ZOPSagknopqr!_!`!e!f!i!m!p!s!z#Y#Z!OVOPSXagknopqr!_!`!e!f!i!m!p!s!z#Y#ZT{Y}dcOgo!e!i!m!p!s#Y#ZWjPnr!fR!YkR!R]ecOgo!e!i!m!p!s#Y#ZR!o!iQ!v!pQ#[#YR#]#ZT!{!v!|Q#P!vR#U!|QgOR!XgUnPr!fR!ZnrXOSagopq!_!`!e!i!m!p!s!z#Y#ZYhPknr!fRzXQ}YR!b}Q!Q]R!d!QW!s!m!p#Y#ZR!x!sS!T`wR!h!TQ!|!vR#S!|TfOgSdOgQ![oQ!l!eQ!n!iZ!r!m!p!s#Y#Zd`Ogo!e!i!m!p!s#Y#ZQwSR!g!SXmPnr!fdQOgo!e!i!m!p!s#Y#ZWjPnr!fQsSQ!UaQ!YkQ!]pQ!^qQ!j!_Q!k!`R#Q!zd[Ogo!e!i!m!p!s#Y#ZfiPaknpqr!_!`!f!zRuS!PWOPSXagknopqr!_!`!e!f!i!m!p!s!z#Y#ZT|Y}o^OPgknor!e!f!i!m!p!s#Y#ZQ!t!mV!u!p#Y#ZebOgo!e!i!m!p!s#Y#Z",
nodeNames: "⚠ Identifier Word Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String StringFragment Interpolation StringEscape Number Boolean FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign", nodeNames: "⚠ Identifier WordFragment Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator Word Interpolation EscapeSeq String StringFragment Number Boolean FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign",
maxTerm: 78, maxTerm: 81,
nodeProps: [ nodeProps: [
["closedBy", 32,"end"], ["closedBy", 33,"end"],
["openedBy", 33,"colon"] ["openedBy", 34,"colon"]
], ],
propSources: [highlighting], propSources: [highlighting],
skippedNodes: [0], skippedNodes: [0],
repeatNodeCount: 7, repeatNodeCount: 8,
tokenData: "Hw~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P$_!P!Q,b!Q![*]![!],{!]!^%g!^!_-f!_!`.p!`!a/Z!a#O$_#O#P0e#P#R$_#R#S0j#S#T$_#T#U1T#U#X2i#X#Y5O#Y#Z<U#Z#]2i#]#^Aa#^#b2i#b#cCR#c#dCx#d#f2i#f#gEj#g#h2i#h#iFa#i#o2i#o#p$_#p#qHX#q;'S$_;'S;=`$v<%l~$_~O$_~~HrS$dUhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUhS!YZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUhS!lROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWhSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vU`RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!e~~'dO!c~V'kUhS!aROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUhS!bROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUYRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU[RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWhS]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYhSkROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWhSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWhSkROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V,iUZRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_T-SUhSpPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-mWaRhSOt$_uw$_x!_$_!_!`.V!`#O$_#P;'S$_;'S;=`$v<%lO$_V.^UbRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V.wU_RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V/bWcRhSOt$_uw$_x!_$_!_!`/z!`#O$_#P;'S$_;'S;=`$v<%lO$_V0RUdRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~0jO!f~V0qUhSrROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V1Y[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#b2i#b#c3^#c#o2i#o;'S$_;'S;=`$v<%lO$_U2VUtQhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U2nYhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V3c[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#W2i#W#X4X#X#o2i#o;'S$_;'S;=`$v<%lO$_V4`YeRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V5T^hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#`2i#`#a6P#a#b2i#b#c:d#c#o2i#o;'S$_;'S;=`$v<%lO$_V6U[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h6z#h#o2i#o;'S$_;'S;=`$v<%lO$_V7P^hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y7{#Y#]2i#]#^8r#^#o2i#o;'S$_;'S;=`$v<%lO$_V8SY}PhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V8w[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#Z9m#Z#o2i#o;'S$_;'S;=`$v<%lO$_V9tY{PhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V:i[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#W2i#W#X;_#X#o2i#o;'S$_;'S;=`$v<%lO$_V;fYhSqROt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V<Z]hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#U=S#U#b2i#b#c@j#c#o2i#o;'S$_;'S;=`$v<%lO$_V=X[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#`2i#`#a=}#a#o2i#o;'S$_;'S;=`$v<%lO$_V>S[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h>x#h#o2i#o;'S$_;'S;=`$v<%lO$_V>}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y?s#Y#o2i#o;'S$_;'S;=`$v<%lO$_V?zYlRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V@qYnRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#ZB[#Z#o2i#o;'S$_;'S;=`$v<%lO$_VBcYwPhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^CYY!gWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VC}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gDs#g#o2i#o;'S$_;'S;=`$v<%lO$_VDzYfRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^EqY!iWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$__Fh[!hWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gG^#g#o2i#o;'S$_;'S;=`$v<%lO$_VGc[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#i2i#i#j>x#j#o2i#o;'S$_;'S;=`$v<%lO$_VH`UuRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~HwO!p~", tokenData: "Hw~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P$_!P!Q,b!Q![*]![!],{!]!^%g!^!_-f!_!`.p!`!a/Z!a#O$_#O#P0e#P#R$_#R#S0j#S#T$_#T#U1T#U#X2i#X#Y5O#Y#Z<U#Z#]2i#]#^Aa#^#b2i#b#cCR#c#dCx#d#f2i#f#gEj#g#h2i#h#iFa#i#o2i#o#p$_#p#qHX#q;'S$_;'S;=`$v<%l~$_~O$_~~HrW$dUkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_W$yP;=`<%l$__%TUkW![VOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z%nUkW!oROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z&VWkWOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_Z&vU`RkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!f~~'dO!k~Z'kUkW!cROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z(UUkW!dROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z(oUYRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z)YU[RkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z)sWkW]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_Z*dYkWlROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_Z+XWkWOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_Z+xWkWlROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_Z,iUZRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_X-SUkWqPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z-mWaRkWOt$_uw$_x!_$_!_!`.V!`#O$_#P;'S$_;'S;=`$v<%lO$_Z.^UbRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z.wU_RkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z/bWcRkWOt$_uw$_x!_$_!_!`/z!`#O$_#P;'S$_;'S;=`$v<%lO$_Z0RUdRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~0jO!g~Z0qUkWsROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Z1Y[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#b2i#b#c3^#c#o2i#o;'S$_;'S;=`$v<%lO$_Y2VUuQkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_Y2nYkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z3c[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#W2i#W#X4X#X#o2i#o;'S$_;'S;=`$v<%lO$_Z4`YeRkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z5T^kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#`2i#`#a6P#a#b2i#b#c:d#c#o2i#o;'S$_;'S;=`$v<%lO$_Z6U[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h6z#h#o2i#o;'S$_;'S;=`$v<%lO$_Z7P^kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y7{#Y#]2i#]#^8r#^#o2i#o;'S$_;'S;=`$v<%lO$_Z8SY!OPkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z8w[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#Z9m#Z#o2i#o;'S$_;'S;=`$v<%lO$_Z9tY|PkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z:i[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#W2i#W#X;_#X#o2i#o;'S$_;'S;=`$v<%lO$_Z;fYkWrROt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z<Z]kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#U=S#U#b2i#b#c@j#c#o2i#o;'S$_;'S;=`$v<%lO$_Z=X[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#`2i#`#a=}#a#o2i#o;'S$_;'S;=`$v<%lO$_Z>S[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h>x#h#o2i#o;'S$_;'S;=`$v<%lO$_Z>}[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y?s#Y#o2i#o;'S$_;'S;=`$v<%lO$_Z?zYmRkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_Z@qYoRkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_ZAf[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#ZB[#Z#o2i#o;'S$_;'S;=`$v<%lO$_ZBcYxPkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^CYY!hSkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_ZC}[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gDs#g#o2i#o;'S$_;'S;=`$v<%lO$_ZDzYfRkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^EqY!jSkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$__Fh[!iSkWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gG^#g#o2i#o;'S$_;'S;=`$v<%lO$_ZGc[kWOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#i2i#i#j>x#j#o2i#o;'S$_;'S;=`$v<%lO$_ZH`UvRkWOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~HwO!s~",
tokenizers: [0, 1, 2, 3, tokenizer], tokenizers: [0, 1, 2, 3, tokenizer],
topRules: {"Program":[0,3]}, topRules: {"Program":[0,3]},
tokenPrec: 749 tokenPrec: 829
}) })

View File

@ -55,7 +55,7 @@ describe('string escape sequences', () => {
expect("'price is \\$10'").toMatchTree(` expect("'price is \\$10'").toMatchTree(`
String String
StringFragment ${'price is '} StringFragment ${'price is '}
StringEscape \\$ EscapeSeq \\$
StringFragment 10 StringFragment 10
`) `)
}) })
@ -64,7 +64,7 @@ describe('string escape sequences', () => {
expect("'it\\'s working'").toMatchTree(` expect("'it\\'s working'").toMatchTree(`
String String
StringFragment ${'it'} StringFragment ${'it'}
StringEscape \\' EscapeSeq \\'
StringFragment ${'s working'} StringFragment ${'s working'}
`) `)
}) })
@ -73,7 +73,7 @@ describe('string escape sequences', () => {
expect("'path\\\\file'").toMatchTree(` expect("'path\\\\file'").toMatchTree(`
String String
StringFragment path StringFragment path
StringEscape \\\\ EscapeSeq \\\\
StringFragment file StringFragment file
`) `)
}) })
@ -82,7 +82,7 @@ describe('string escape sequences', () => {
expect("'line1\\nline2'").toMatchTree(` expect("'line1\\nline2'").toMatchTree(`
String String
StringFragment line1 StringFragment line1
StringEscape \\n EscapeSeq \\n
StringFragment line2 StringFragment line2
`) `)
}) })
@ -91,7 +91,7 @@ describe('string escape sequences', () => {
expect("'col1\\tcol2'").toMatchTree(` expect("'col1\\tcol2'").toMatchTree(`
String String
StringFragment col1 StringFragment col1
StringEscape \\t EscapeSeq \\t
StringFragment col2 StringFragment col2
`) `)
}) })
@ -100,7 +100,7 @@ describe('string escape sequences', () => {
expect("'text\\rmore'").toMatchTree(` expect("'text\\rmore'").toMatchTree(`
String String
StringFragment text StringFragment text
StringEscape \\r EscapeSeq \\r
StringFragment more StringFragment more
`) `)
}) })
@ -108,11 +108,11 @@ describe('string escape sequences', () => {
test('multiple escape sequences', () => { test('multiple escape sequences', () => {
expect("'\\$10\\nTotal: \\$20'").toMatchTree(` expect("'\\$10\\nTotal: \\$20'").toMatchTree(`
String String
StringEscape \\$ EscapeSeq \\$
StringFragment 10 StringFragment 10
StringEscape \\n EscapeSeq \\n
StringFragment ${'Total: '} StringFragment ${'Total: '}
StringEscape \\$ EscapeSeq \\$
StringFragment 20 StringFragment 20
`) `)
}) })

View File

@ -0,0 +1,195 @@
import { describe, expect, test } from 'bun:test'
import '../shrimp.grammar' // Importing this so changes cause it to retest!
describe('word interpolation', () => {
test.only('word with variable interpolation', () => {
expect('path/$file').toMatchTree(`
Word
WordFragment path/
Interpolation
Identifier file
`)
})
test('word with expression interpolation', () => {
expect('prefix-$(123)').toMatchTree(`
Word
WordFragment prefix-
Interpolation
leftParen
Number 123
rightParen
`)
})
test('multiple interpolations in word', () => {
expect('$user/$file').toMatchTree(`
Word
Interpolation
Identifier user
WordFragment /
Interpolation
Identifier file
`)
})
test('dollar not followed by identifier stays in word', () => {
expect('price$10').toMatchTree(`
Word
WordFragment price$10
`)
})
test('escaped dollar in word', () => {
expect('price\\$10').toMatchTree(`
Word
WordFragment price
EscapeSeq
WordFragment 10
`)
})
test('interpolation at start of word', () => {
expect('$HOME/documents').toMatchTree(`
Word
Interpolation
Identifier HOME
WordFragment /documents
`)
})
test('interpolation at end of word', () => {
expect('./path/$filename').toMatchTree(`
Word
WordFragment ./path/
Interpolation
Identifier filename
`)
})
test('complex expression interpolation', () => {
expect('output-$(add 1 2).txt').toMatchTree(`
Word
WordFragment output-
Interpolation
leftParen
FunctionCall
Identifier add
PositionalArg
Number 1
PositionalArg
Number 2
rightParen
WordFragment .txt
`)
})
test('emoji in interpolated identifier', () => {
expect('hello/$😎file').toMatchTree(`
Word
WordFragment hello/
Interpolation
Identifier 😎file
`)
})
test('escaped space in word', () => {
expect('my\\ file.txt').toMatchTree(`
Word
WordFragment my
EscapeSeq
WordFragment file.txt
`)
})
test('multiple escapes and interpolations', () => {
expect('pre\\$fix-$var-\\$end').toMatchTree(`
Word
WordFragment pre
EscapeSeq
WordFragment fix-
Interpolation
Identifier var
WordFragment -
EscapeSeq
WordFragment end
`)
})
test('plain word without interpolation still works', () => {
expect('./file.txt').toMatchTree(`
Word
WordFragment ./file.txt
`)
})
test('word with URL-like content', () => {
expect('https://example.com/$path').toMatchTree(`
Word
WordFragment https://example.com/
Interpolation
Identifier path
`)
})
test('nested expression in interpolation', () => {
expect('file-$(multiply (add 1 2) 3).txt').toMatchTree(`
Word
WordFragment file-
Interpolation
leftParen
FunctionCall
Identifier multiply
PositionalArg
ParenExpr
leftParen
FunctionCall
Identifier add
PositionalArg
Number 1
PositionalArg
Number 2
rightParen
PositionalArg
Number 3
rightParen
WordFragment .txt
`)
})
})
describe('word interpolation in function calls', () => {
test('function call with interpolated word argument', () => {
expect('cat /home/$user/file.txt').toMatchTree(`
FunctionCall
Identifier cat
PositionalArg
Word
WordFragment /home/
Interpolation
Identifier user
WordFragment /file.txt
`)
})
test('multiple interpolated word arguments', () => {
expect('cp $src/$file $dest/$file').toMatchTree(`
FunctionCall
Identifier cp
PositionalArg
Word
Interpolation
Identifier src
WordFragment /
Interpolation
Identifier file
PositionalArg
Word
Interpolation
Identifier dest
WordFragment /
Interpolation
Identifier file
`)
})
})

View File

@ -1,5 +1,5 @@
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
import { Identifier, Word } from './shrimp.terms' import { Identifier, Word, WordFragment } from './shrimp.terms'
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF. // The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
@ -16,6 +16,15 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack
if (!isWordChar(ch)) break if (!isWordChar(ch)) break
// Stop at $ if it's followed by identifier start or (
// This allows word interpolation like path/$file or result-$(expr)
if (ch === 36 /* $ */) {
const nextCh = getFullCodePoint(input, pos + 1)
if (isLowercaseLetter(nextCh) || isEmoji(nextCh) || nextCh === 40 /* ( */) {
break
}
}
// Certain characters might end a word or identifier if they are followed by whitespace. // Certain characters might end a word or identifier if they are followed by whitespace.
// This allows things like `a = hello; 2` of if `x: y` to parse correctly. // This allows things like `a = hello; 2` of if `x: y` to parse correctly.
// to work as expected. // to work as expected.
@ -34,7 +43,7 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack
} }
input.advance(pos) input.advance(pos)
input.acceptToken(isValidIdentifier ? Identifier : Word) input.acceptToken(isValidIdentifier ? Identifier : WordFragment)
}) })
const isWhiteSpace = (ch: number): boolean => { const isWhiteSpace = (ch: number): boolean => {