fix(parser): make DotGet whitespace-sensitive

- Add IdentifierBeforeDot token emitted when identifier immediately precedes '.'
- Move DotGet into @skip {} block using IdentifierBeforeDot
- Prevents 'basename . prop' from parsing as DotGet
- Allows 'basename.prop' to work as expected when identifier is in scope
- Fixes test: 'a word can be contained in parens'

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Corey Johnson 2025-10-17 10:40:28 -07:00
parent d894713744
commit 8a29090364
6 changed files with 166 additions and 97 deletions

View File

@ -23,7 +23,7 @@
Underscore { "_" } Underscore { "_" }
Null { "null" } Null { "null" }
Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar
"fn" [@name=keyword] Fn[@name=keyword] { "fn" }
"if" [@name=keyword] "if" [@name=keyword]
"elsif" [@name=keyword] "elsif" [@name=keyword]
"else" [@name=keyword] "else" [@name=keyword]
@ -43,7 +43,7 @@
} }
@external tokens tokenizer from "./tokenizer" { Identifier, Word } @external tokens tokenizer from "./tokenizer" { Identifier, Word, IdentifierBeforeDot }
@precedence { @precedence {
pipe @left, pipe @left,
@ -108,11 +108,11 @@ FunctionDef {
} }
singleLineFunctionDef { singleLineFunctionDef {
"fn" Params colon consumeToTerminator end Fn Params colon consumeToTerminator end
} }
multilineFunctionDef { multilineFunctionDef {
"fn" Params colon newlineOrSemicolon block end Fn Params colon newlineOrSemicolon block end
} }
IfExpr { IfExpr {
@ -158,10 +158,6 @@ Assign {
Identifier "=" consumeToTerminator Identifier "=" consumeToTerminator
} }
DotGet {
Identifier "." Identifier
}
BinOp { BinOp {
(expression | BinOp) !multiplicative "*" (expression | BinOp) | (expression | BinOp) !multiplicative "*" (expression | BinOp) |
(expression | BinOp) !multiplicative "/" (expression | BinOp) | (expression | BinOp) !multiplicative "/" (expression | BinOp) |
@ -178,8 +174,12 @@ expression {
} }
@skip {} { @skip {} {
DotGet {
IdentifierBeforeDot "." Identifier
}
String { "'" stringContent* "'" } String { "'" stringContent* "'" }
} }
stringContent { stringContent {

View File

@ -2,32 +2,34 @@
export const export const
Identifier = 1, Identifier = 1,
Word = 2, Word = 2,
Program = 3, IdentifierBeforeDot = 3,
PipeExpr = 4, Program = 4,
FunctionCall = 5, PipeExpr = 5,
PositionalArg = 6, FunctionCall = 6,
ParenExpr = 7, PositionalArg = 7,
FunctionCallOrIdentifier = 8, ParenExpr = 8,
BinOp = 9, FunctionCallOrIdentifier = 9,
ConditionalOp = 14, BinOp = 10,
String = 23, ConditionalOp = 15,
StringFragment = 24, String = 24,
Interpolation = 25, StringFragment = 25,
EscapeSeq = 26, Interpolation = 26,
Number = 27, EscapeSeq = 27,
Boolean = 28, Number = 28,
Regex = 29, Boolean = 29,
Null = 30, Regex = 30,
DotGet = 31, Null = 31,
FunctionDef = 32, DotGet = 32,
Params = 34, FunctionDef = 33,
colon = 35, Fn = 34,
end = 36, Params = 35,
Underscore = 37, colon = 36,
NamedArg = 38, end = 37,
NamedArgPrefix = 39, Underscore = 38,
IfExpr = 41, NamedArg = 39,
ThenBlock = 44, NamedArgPrefix = 40,
ElsifExpr = 45, IfExpr = 42,
ElseExpr = 47, ThenBlock = 45,
Assign = 49 ElsifExpr = 46,
ElseExpr = 48,
Assign = 50

View File

@ -5,21 +5,21 @@ import {trackScope} from "./scopeTracker"
import {highlighting} from "./highlight" import {highlighting} from "./highlight"
export const parser = LRParser.deserialize({ export const parser = LRParser.deserialize({
version: 14, version: 14,
states: ".pQVQaOOO#RQbO'#CdO#cQPO'#CeO#qQPO'#DkO$qQaO'#CcO$xOSO'#CsOOQ`'#Do'#DoO%WQPO'#DnO%oQaO'#DzOOQ`'#C|'#C|OOQO'#Dl'#DlO%wQPO'#DkO&VQaO'#EOOOQO'#DV'#DVOOQO'#Dk'#DkO&^QPO'#DjOOQ`'#Dj'#DjOOQ`'#D`'#D`QVQaOOO&wQbO'#DnO'qQaO,59gOOQ`'#Dn'#DnOOQ`'#Cb'#CbO'vQaO'#DSOOQ`'#Dm'#DmOOQ`'#Da'#DaO(TQbO,58{O(tQaO,59yO&VQaO,59PO&VQaO,59PO)RQbO'#CdO*^QPO'#CeO*nQPO,58}O+wQPO,58}O*zQPO,58}O,OQPO,58}O,WQaO'#CuO,`QWO'#CvOOOO'#Ds'#DsOOOO'#Db'#DbO,tOSO,59_OOQ`,59_,59_OOQ`'#Dc'#DcO-SQaO'#DOO-[QPO,5:fO-aQaO'#DeO-fQPO,58zO-wQPO,5:jO.OQPO'#DnO.fQPO,5:jOOQ`,5:U,5:UOOQ`-E7^-E7^OOQ`1G/R1G/ROOQ`,59n,59nOOQ`-E7_-E7_OOQO1G/e1G/eOOQO1G.k1G.kO.kQPO1G.kO&VQaO,59UO&VQaO,59UOOQ`1G.i1G.iOOOO,59a,59aOOOO,59b,59bOOOO-E7`-E7`OOQ`1G.y1G.yOOQ`-E7a-E7aO/VQaO1G0QO/gQbO'#CdOOQO,5:P,5:POOQO-E7c-E7cO0WQaO1G0UOOQO1G.p1G.pO0hQPO1G.pO0rQPO7+%lO0wQaO7+%mOOQO'#DX'#DXOOQO7+%p7+%pO1XQaO7+%qOOQ`<<IW<<IWO1oQPO'#DdO1tQaO'#D}O2[QPO<<IXOOQO'#DY'#DYO2aQPO<<I]OOQ`,5:O,5:OOOQ`-E7b-E7bOOQ`AN>sAN>sO&VQaO'#DZOOQO'#Df'#DfO2lQPOAN>wO2wQPO'#D]OOQOAN>wAN>wO2|QPOAN>wO3RQPO,59uO3YQPO,59uOOQO-E7d-E7dOOQOG24cG24cO3_QPOG24cO3dQPO,59wO3iQPO1G/aOOQOLD)}LD)}O0wQaO1G/cO1XQaO7+${OOQO7+$}7+$}OOQO<<Hg<<Hg", states: ".jQVQaOOO#UQbO'#CeO#fQPO'#CfO#tQPO'#DlO$wQaO'#CdO%OOSO'#CtOOQ`'#Dp'#DpO%^OPO'#C|O%cQPO'#DoO%zQaO'#D{OOQ`'#C}'#C}OOQO'#Dm'#DmO&SQPO'#DlO&bQaO'#EPOOQO'#DW'#DWOOQO'#Dl'#DlO&iQPO'#DkOOQ`'#Dk'#DkOOQ`'#Da'#DaQVQaOOOOQ`'#Do'#DoOOQ`'#Cc'#CcO&qQaO'#DTOOQ`'#Dn'#DnOOQ`'#Db'#DbO'OQbO,58|O'oQaO,59zO&bQaO,59QO&bQaO,59QO'|QbO'#CeO)XQPO'#CfO)iQPO,59OO)zQPO,59OO)uQPO,59OO*uQPO,59OO*}QaO'#CvO+VQWO'#CwOOOO'#Dt'#DtOOOO'#Dc'#DcO+kOSO,59`OOQ`,59`,59`O+yO`O,59hOOQ`'#Dd'#DdO,OQaO'#DPO,WQPO,5:gO,]QaO'#DfO,bQPO,58{O,sQPO,5:kO,zQPO,5:kOOQ`,5:V,5:VOOQ`-E7_-E7_OOQ`,59o,59oOOQ`-E7`-E7`OOQO1G/f1G/fOOQO1G.l1G.lO-PQPO1G.lO&bQaO,59VO&bQaO,59VOOQ`1G.j1G.jOOOO,59b,59bOOOO,59c,59cOOOO-E7a-E7aOOQ`1G.z1G.zOOQ`1G/S1G/SOOQ`-E7b-E7bO-kQaO1G0RO-{QbO'#CeOOQO,5:Q,5:QOOQO-E7d-E7dO.lQaO1G0VOOQO1G.q1G.qO.|QPO1G.qO/WQPO7+%mO/]QaO7+%nOOQO'#DY'#DYOOQO7+%q7+%qO/mQaO7+%rOOQ`<<IX<<IXO0TQPO'#DeO0YQaO'#EOO0pQPO<<IYOOQO'#DZ'#DZO0uQPO<<I^OOQ`,5:P,5:POOQ`-E7c-E7cOOQ`AN>tAN>tO&bQaO'#D[OOQO'#Dg'#DgO1QQPOAN>xO1]QPO'#D^OOQOAN>xAN>xO1bQPOAN>xO1gQPO,59vO1nQPO,59vOOQO-E7e-E7eOOQOG24dG24dO1sQPOG24dO1xQPO,59xO1}QPO1G/bOOQOLD*OLD*OO/]QaO1G/dO/mQaO7+$|OOQO7+%O7+%OOOQO<<Hh<<Hh",
stateData: "3t~O!]OS~OPPOQUOkUOlUOmUOnUOqWOz[O!dSO!fTO!p`O~OPcOQUOkUOlUOmUOnUOqWOufOwgO!dSO!fTO!mdOY!bXZ!bX[!bX]!bXxWX~O_kO!pWX!tWXtWX~PzOYlOZlO[mO]mO~OYlOZlO[mO]mO!p!_X!t!_Xt!_X~OQUOkUOlUOmUOnUO!dSO!fTO~OPnO~P$YOhvO!fyO!htO!iuO~OY!bXZ!bX[!bX]!bX!p!_X!t!_Xt!_X~OPzOsrP~Ox}O!p!_X!t!_Xt!_X~OP!QO~P$YO!p!SO!t!SO~O!mdO!p!bX!t!bX!e!bXt!bX~OP!bXQ!bXk!bXl!bXm!bXn!bXq!bXu!bXw!bXx!bX!d!bX!f!bX~P&fOP!UO~OPcOqWOu!VO~P$YOPcOqWOufOwgOxTa!pTa!tTa!eTatTa~P$YOPPOqWOz[O~P$YO_!bX`!bXa!bXb!bXc!bXd!bXe!bXf!bX!eWX~PzO_![O`![Oa![Ob![Oc![Od![Oe!]Of!]O~OYlOZlO[mO]mO~P)rOYlOZlO[mO]mO!e!^O~OY!bXZ!bX[!bX]!bX_!bX`!bXa!bXb!bXc!bXd!bXe!bXf!bX~O!e!^O~P+POx}O!e!^O~OP!_O!dSO~O!f!`O!h!`O!i!`O!j!`O!k!`O!l!`O~OhvO!f!bO!htO!iuO~OPzOsrX~Os!dO~OP!eO~Ox}O!pSa!tSa!eSatSa~Os!hO~P)rO!mdOs!bX!p!bX!t!bX!e!bXt!bX~P+POs!hO~OYlOZlO[Xi]Xi!pXi!tXi!eXitXi~OPPOqWOz[O!p!lO~P$YOPcOqWOufOwgOxWX!pWX!tWX!eWXtWX~P$YOPPOqWOz[O!p!oO~P$YO!e^is^i~P)rOt!pO~OPPOqWOz[Ot!qP~P$YOPPOqWOz[Ot!qP!O!qP!Q!qP~P$YO!p!vO~OPPOqWOz[Ot!qX!O!qX!Q!qX~P$YOt!xO~Ot!}O!O!yO!Q!|O~Ot#SO!O!yO!Q!|O~Os#UO~Ot#SO~Os#VO~P)rOs#VO~Ot#WO~O!p#XO~O!p#YO~Ok]mZm~", stateData: "2Y~O!^OS~OPPOQUORVOlUOmUOnUOoUOrXO{]O!eSO!gTO!qaO~OPdOQUORVOlUOmUOnUOoUOrXOveOxfO!eSO!gTOZ!cX[!cX]!cX^!cXyXX~O`jO!qXX!uXXuXX~P}OZkO[kO]lO^lO~OZkO[kO]lO^lO!q!`X!u!`Xu!`X~OQUORVOlUOmUOnUOoUO!eSO!gTO~OPmO~P$]OiuO!gxO!isO!jtO~O!nyO~OZ!cX[!cX]!cX^!cX!q!`X!u!`Xu!`X~OPzOtsP~Oy}O!q!`X!u!`Xu!`X~OPdO~P$]O!q!RO!u!RO~OPdOrXOv!TO~P$]OPdOrXOveOxfOyUa!qUa!uUa!fUauUa~P$]OPPOrXO{]O~P$]O`!cXa!cXb!cXc!cXd!cXe!cXf!cXg!cX!fXX~P}O`!YOa!YOb!YOc!YOd!YOe!YOf!ZOg!ZO~OZkO[kO]lO^lO~P(mOZkO[kO]lO^lO!f![O~O!f![OZ!cX[!cX]!cX^!cX`!cXa!cXb!cXc!cXd!cXe!cXf!cXg!cX~Oy}O!f![O~OP!]O!eSO~O!g!^O!i!^O!j!^O!k!^O!l!^O!m!^O~OiuO!g!`O!isO!jtO~OP!aO~OPzOtsX~Ot!cO~OP!dO~Oy}O!qTa!uTa!fTauTa~Ot!gO~P(mOt!gO~OZkO[kO]Yi^Yi!qYi!uYi!fYiuYi~OPPOrXO{]O!q!kO~P$]OPdOrXOveOxfOyXX!qXX!uXX!fXXuXX~P$]OPPOrXO{]O!q!nO~P$]O!f_it_i~P(mOu!oO~OPPOrXO{]Ou!rP~P$]OPPOrXO{]Ou!rP!P!rP!R!rP~P$]O!q!uO~OPPOrXO{]Ou!rX!P!rX!R!rX~P$]Ou!wO~Ou!|O!P!xO!R!{O~Ou#RO!P!xO!R!{O~Ot#TO~Ou#RO~Ot#UO~P(mOt#UO~Ou#VO~O!q#WO~O!q#XO~Ol^n[n~",
goto: "+u!tPPPP!u#U#d#j#U$VPPPP$lPPPPPPPP$xP%b%bPPPP%f&QP&gPPP#dPP&jP&v&y'SP'WP&j'^'d'l'r'x(R(YPPP(`(d(x)[)b*^PPP*zPPPPPP+O+OP+a+i+id^Obk!d!h!l!o!r#X#YRrSiYOSbk}!d!h!l!o!r#X#YXhPjn!e|UOPS[bgjklmn![!]!d!e!h!l!o!r!y#X#YR!_tdRObk!d!h!l!o!r#X#YQpSQ!YlR!ZmQrSQ!R[Q!i!]R#Q!y}UOPS[bgjklmn![!]!d!e!h!l!o!r!y#X#YTvTxdVObk!d!h!l!o!r#X#YiePS[gjlmn![!]!e!yd^Obk!d!h!l!o!r#X#YWfPjn!eR!VgR|We^Obk!d!h!l!o!r#X#YR!n!hQ!u!oQ#Z#XR#[#YT!z!u!{Q#O!uR#T!{QbOR!TbUjPn!eR!WjQxTR!axQ{WR!c{W!r!l!o#X#YR!w!rS!OZsR!g!OQ!{!uR#R!{TaObS_ObQ!XkQ!k!dQ!m!hZ!q!l!o!r#X#YdZObk!d!h!l!o!r#X#YQsSR!f}XiPjn!edQObk!d!h!l!o!r#X#YWfPjn!eQoSQ!P[Q!VgQ!YlQ!ZmQ!i![Q!j!]R#P!ydVObk!d!h!l!o!r#X#YfeP[gjlmn![!]!e!yRqSTwTxoXOPbgjkn!d!e!h!l!o!r#X#YQ!s!lV!t!o#X#Ye]Obk!d!h!l!o!r#X#Y", goto: "+v!uPPPPP!v#V#e#k#V$WPPPP$mPPPPPPPP$yP%c%cPPPP%g&RP&hPPP#ePP&kP&w&z'TP'XP&k'_'e'm's'y(S(ZPPP(a(e(y)])c*_PPP*{PPPPPP+P+PP+b+j+jd_Ocj!c!g!k!n!q#W#XRqSiZOScj}!c!g!k!n!q#W#XXgPim!d|UOPS]cfijklm!Y!Z!c!d!g!k!n!q!x#W#XR!]sdROcj!c!g!k!n!q#W#XQoSQ!WkR!XlQqSQ!Q]Q!h!ZR#P!x}UOPS]cfijklm!Y!Z!c!d!g!k!n!q!x#W#XTuTwdWOcj!c!g!k!n!q#W#XidPS]fiklm!Y!Z!d!xd_Ocj!c!g!k!n!q#W#XWePim!dR!TfR|Xe_Ocj!c!g!k!n!q#W#XR!m!gQ!t!nQ#Y#WR#Z#XT!y!t!zQ!}!tR#S!zQcOR!ScUiPm!dR!UiQwTR!_wQ{XR!b{W!q!k!n#W#XR!v!qS!O[rR!f!OQ!z!tR#Q!zTbOcS`OcQ!VjQ!j!cQ!l!gZ!p!k!n!q#W#Xd[Ocj!c!g!k!n!q#W#XQrSR!e}XhPim!ddQOcj!c!g!k!n!q#W#XWePim!dQnSQ!P]Q!TfQ!WkQ!XlQ!h!YQ!i!ZR#O!xdWOcj!c!g!k!n!q#W#XfdP]fiklm!Y!Z!d!xRpSTvTwoYOPcfijm!c!d!g!k!n!q#W#XQ!r!kV!s!n#W#Xe^Ocj!c!g!k!n!q#W#X",
nodeNames: "⚠ Identifier Word Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String StringFragment Interpolation EscapeSeq Number Boolean Regex Null DotGet FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign", nodeNames: "⚠ Identifier Word IdentifierBeforeDot Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String StringFragment Interpolation EscapeSeq Number Boolean Regex Null DotGet FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign",
maxTerm: 82, maxTerm: 83,
context: trackScope, context: trackScope,
nodeProps: [ nodeProps: [
["closedBy", 35,"end"], ["closedBy", 36,"end"],
["openedBy", 36,"colon"] ["openedBy", 37,"colon"]
], ],
propSources: [highlighting], propSources: [highlighting],
skippedNodes: [0], skippedNodes: [0],
repeatNodeCount: 7, repeatNodeCount: 7,
tokenData: "!&X~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P,b!P!Q,{!Q![*]![!]5j!]!^%g!^!_6T!_!`7_!`!a7x!a#O$_#O#P9S#P#R$_#R#S9X#S#T$_#T#U9r#U#X;W#X#Y=m#Y#ZDs#Z#];W#]#^JO#^#b;W#b#cKp#c#d! Y#d#f;W#f#g!!z#g#h;W#h#i!#q#i#o;W#o#p$_#p#q!%i#q;'S$_;'S;=`$v<%l~$_~O$_~~!&SS$dUhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUhS!]ZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUhS!pROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWhSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vU`RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!h~~'dO!f~V'kUhS!dROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUhS!eROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUYRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU[RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWhS]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYhSkROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWhSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWhSkROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V,iU!mRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-SWhSZROt$_uw$_x!P$_!P!Q-l!Q#O$_#P;'S$_;'S;=`$v<%lO$_V-q^hSOY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q$_!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mV.t^hSmROY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q2e!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mR/uXmROY/pZ!P/p!P!Q0b!Q!}/p!}#O1P#O#P2O#P;'S/p;'S;=`2_<%lO/pR0eP!P!Q0hR0mUmR#Z#[0h#]#^0h#a#b0h#g#h0h#i#j0h#m#n0hR1SVOY1PZ#O1P#O#P1i#P#Q/p#Q;'S1P;'S;=`1x<%lO1PR1lSOY1PZ;'S1P;'S;=`1x<%lO1PR1{P;=`<%l1PR2RSOY/pZ;'S/p;'S;=`2_<%lO/pR2bP;=`<%l/pV2jWhSOt$_uw$_x!P$_!P!Q3S!Q#O$_#P;'S$_;'S;=`$v<%lO$_V3ZbhSmROt$_uw$_x#O$_#P#Z$_#Z#[3S#[#]$_#]#^3S#^#a$_#a#b3S#b#g$_#g#h3S#h#i$_#i#j3S#j#m$_#m#n3S#n;'S$_;'S;=`$v<%lO$_V4h[hSOY4cYZ$_Zt4ctu1Puw4cwx1Px#O4c#O#P1i#P#Q.m#Q;'S4c;'S;=`5^<%lO4cV5aP;=`<%l4cV5gP;=`<%l.mT5qUhSsPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V6[WaRhSOt$_uw$_x!_$_!_!`6t!`#O$_#P;'S$_;'S;=`$v<%lO$_V6{UbRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V7fU_RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V8PWcRhSOt$_uw$_x!_$_!_!`8i!`#O$_#P;'S$_;'S;=`$v<%lO$_V8pUdRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~9XO!i~V9`UhSuROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V9w[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#b;W#b#c;{#c#o;W#o;'S$_;'S;=`$v<%lO$_U:tUwQhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U;]YhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V<Q[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#X<v#X#o;W#o;'S$_;'S;=`$v<%lO$_V<}YeRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V=r^hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#a>n#a#b;W#b#cCR#c#o;W#o;'S$_;'S;=`$v<%lO$_V>s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#h?i#h#o;W#o;'S$_;'S;=`$v<%lO$_V?n^hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#Y@j#Y#];W#]#^Aa#^#o;W#o;'S$_;'S;=`$v<%lO$_V@qY!QPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZB[#Z#o;W#o;'S$_;'S;=`$v<%lO$_VBcY!OPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VCW[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#XC|#X#o;W#o;'S$_;'S;=`$v<%lO$_VDTYhStROt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VDx]hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#UEq#U#b;W#b#cIX#c#o;W#o;'S$_;'S;=`$v<%lO$_VEv[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aFl#a#o;W#o;'S$_;'S;=`$v<%lO$_VFq[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#hGg#h#o;W#o;'S$_;'S;=`$v<%lO$_VGl[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#YHb#Y#o;W#o;'S$_;'S;=`$v<%lO$_VHiYlRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VI`YqRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VJT[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZJy#Z#o;W#o;'S$_;'S;=`$v<%lO$_VKQYzPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__Kw[!jWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jLm#j#o;W#o;'S$_;'S;=`$v<%lO$_VLr[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aMh#a#o;W#o;'S$_;'S;=`$v<%lO$_VMm[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aNc#a#o;W#o;'S$_;'S;=`$v<%lO$_VNjYnRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V! _[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!!T#g#o;W#o;'S$_;'S;=`$v<%lO$_V!![YfRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_^!#RY!lWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__!#x[!kWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!$n#g#o;W#o;'S$_;'S;=`$v<%lO$_V!$s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jGg#j#o;W#o;'S$_;'S;=`$v<%lO$_V!%pUxRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!&XO!t~", tokenData: "!&X~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P,b!P!Q,{!Q![*]![!]5j!]!^%g!^!_6T!_!`7_!`!a7x!a#O$_#O#P9S#P#R$_#R#S9X#S#T$_#T#U9r#U#X;W#X#Y=m#Y#ZDs#Z#];W#]#^JO#^#b;W#b#cKp#c#d! Y#d#f;W#f#g!!z#g#h;W#h#i!#q#i#o;W#o#p$_#p#q!%i#q;'S$_;'S;=`$v<%l~$_~O$_~~!&SS$dUiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUiS!^ZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUiS!qROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWiSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vUaRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!i~~'dO!g~V'kUiS!eROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUiS!fROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUZRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU]RiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWiS^ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYiSlROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWiSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWiSlROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_T,iU!nPiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-SWiS[ROt$_uw$_x!P$_!P!Q-l!Q#O$_#P;'S$_;'S;=`$v<%lO$_V-q^iSOY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q$_!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mV.t^iSnROY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q2e!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mR/uXnROY/pZ!P/p!P!Q0b!Q!}/p!}#O1P#O#P2O#P;'S/p;'S;=`2_<%lO/pR0eP!P!Q0hR0mUnR#Z#[0h#]#^0h#a#b0h#g#h0h#i#j0h#m#n0hR1SVOY1PZ#O1P#O#P1i#P#Q/p#Q;'S1P;'S;=`1x<%lO1PR1lSOY1PZ;'S1P;'S;=`1x<%lO1PR1{P;=`<%l1PR2RSOY/pZ;'S/p;'S;=`2_<%lO/pR2bP;=`<%l/pV2jWiSOt$_uw$_x!P$_!P!Q3S!Q#O$_#P;'S$_;'S;=`$v<%lO$_V3ZbiSnROt$_uw$_x#O$_#P#Z$_#Z#[3S#[#]$_#]#^3S#^#a$_#a#b3S#b#g$_#g#h3S#h#i$_#i#j3S#j#m$_#m#n3S#n;'S$_;'S;=`$v<%lO$_V4h[iSOY4cYZ$_Zt4ctu1Puw4cwx1Px#O4c#O#P1i#P#Q.m#Q;'S4c;'S;=`5^<%lO4cV5aP;=`<%l4cV5gP;=`<%l.mT5qUiStPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V6[WbRiSOt$_uw$_x!_$_!_!`6t!`#O$_#P;'S$_;'S;=`$v<%lO$_V6{UcRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V7fU`RiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V8PWdRiSOt$_uw$_x!_$_!_!`8i!`#O$_#P;'S$_;'S;=`$v<%lO$_V8pUeRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~9XO!j~V9`UiSvROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V9w[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#b;W#b#c;{#c#o;W#o;'S$_;'S;=`$v<%lO$_U:tUxQiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U;]YiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V<Q[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#X<v#X#o;W#o;'S$_;'S;=`$v<%lO$_V<}YfRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V=r^iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#a>n#a#b;W#b#cCR#c#o;W#o;'S$_;'S;=`$v<%lO$_V>s[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#h?i#h#o;W#o;'S$_;'S;=`$v<%lO$_V?n^iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#Y@j#Y#];W#]#^Aa#^#o;W#o;'S$_;'S;=`$v<%lO$_V@qY!RPiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VAf[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZB[#Z#o;W#o;'S$_;'S;=`$v<%lO$_VBcY!PPiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VCW[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#XC|#X#o;W#o;'S$_;'S;=`$v<%lO$_VDTYiSuROt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VDx]iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#UEq#U#b;W#b#cIX#c#o;W#o;'S$_;'S;=`$v<%lO$_VEv[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aFl#a#o;W#o;'S$_;'S;=`$v<%lO$_VFq[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#hGg#h#o;W#o;'S$_;'S;=`$v<%lO$_VGl[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#YHb#Y#o;W#o;'S$_;'S;=`$v<%lO$_VHiYmRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VI`YrRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VJT[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZJy#Z#o;W#o;'S$_;'S;=`$v<%lO$_VKQY{PiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__Kw[!kWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jLm#j#o;W#o;'S$_;'S;=`$v<%lO$_VLr[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aMh#a#o;W#o;'S$_;'S;=`$v<%lO$_VMm[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aNc#a#o;W#o;'S$_;'S;=`$v<%lO$_VNjYoRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V! _[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!!T#g#o;W#o;'S$_;'S;=`$v<%lO$_V!![YgRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_^!#RY!mWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__!#x[!lWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!$n#g#o;W#o;'S$_;'S;=`$v<%lO$_V!$s[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jGg#j#o;W#o;'S$_;'S;=`$v<%lO$_V!%pUyRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!&XO!u~",
tokenizers: [0, 1, 2, 3, tokenizer], tokenizers: [0, 1, 2, 3, tokenizer],
topRules: {"Program":[0,3]}, topRules: {"Program":[0,4]},
tokenPrec: 858 tokenPrec: 786
}) })

View File

@ -282,3 +282,40 @@ describe('Assign', () => {
end end`) end end`)
}) })
}) })
describe('DotGet whitespace sensitivity', () => {
test('no whitespace - DotGet works when identifier in scope', () => {
expect('basename = 5; basename.prop').toMatchTree(`
Assign
Identifier basename
operator =
Number 5
DotGet
IdentifierBeforeDot basename
Identifier prop`)
})
test('space before dot - NOT DotGet, parses as division', () => {
expect('basename = 5; basename / prop').toMatchTree(`
Assign
Identifier basename
operator =
Number 5
BinOp
Identifier basename
operator /
Identifier prop`)
})
test('dot followed by slash is Word, not DotGet', () => {
expect('basename ./cool').toMatchTree(`
FunctionCall
Identifier basename
PositionalArg
Word ./cool`)
})
test('identifier not in scope with dot becomes Word', () => {
expect('readme.txt').toMatchTree(`Word readme.txt`)
})
})

View File

@ -13,7 +13,7 @@ describe('DotGet', () => {
operator = operator =
Number 5 Number 5
DotGet DotGet
Identifier obj IdentifierBeforeDot obj
Identifier prop Identifier prop
`) `)
}) })
@ -26,7 +26,7 @@ describe('DotGet', () => {
Identifier config Identifier config
colon : colon :
DotGet DotGet
Identifier config IdentifierBeforeDot config
Identifier path Identifier path
end end end end
`) `)
@ -40,7 +40,7 @@ describe('DotGet', () => {
Identifier x Identifier x
colon : colon :
DotGet DotGet
Identifier x IdentifierBeforeDot x
Identifier prop Identifier prop
end end end end
Word x.prop Word x.prop
@ -59,10 +59,10 @@ end`).toMatchTree(`
Identifier y Identifier y
colon : colon :
DotGet DotGet
Identifier x IdentifierBeforeDot x
Identifier foo Identifier foo
DotGet DotGet
Identifier y IdentifierBeforeDot y
Identifier bar Identifier bar
end end end end
`) `)
@ -79,7 +79,7 @@ end`).toMatchTree(`
Identifier x Identifier x
colon : colon :
DotGet DotGet
Identifier x IdentifierBeforeDot x
Identifier outer Identifier outer
FunctionDef FunctionDef
keyword fn keyword fn
@ -87,7 +87,7 @@ end`).toMatchTree(`
Identifier y Identifier y
colon : colon :
DotGet DotGet
Identifier y IdentifierBeforeDot y
Identifier inner Identifier inner
end end end end
end end end end
@ -104,7 +104,7 @@ end`).toMatchTree(`
Identifier echo Identifier echo
PositionalArg PositionalArg
DotGet DotGet
Identifier config IdentifierBeforeDot config
Identifier path Identifier path
`) `)
}) })
@ -123,8 +123,18 @@ end`).toMatchTree(`
Identifier echo Identifier echo
PositionalArg PositionalArg
DotGet DotGet
Identifier config IdentifierBeforeDot config
Identifier path Identifier path
`) `)
}) })
test("dot get doesn't work with spaces", () => {
expect('obj . prop').toMatchTree(`
FunctionCall
Identifier obj
PositionalArg
Word .
PositionalArg
Identifier prop`)
})
}) })

View File

@ -1,55 +1,75 @@
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
import { Identifier, Word } from './shrimp.terms' import { Identifier, Word, IdentifierBeforeDot } from './shrimp.terms'
import type { Scope } from './scopeTracker' import type { Scope } from './scopeTracker'
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF. // The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => { export const tokenizer = new ExternalTokenizer(
let ch = getFullCodePoint(input, 0) (input: InputStream, stack: Stack) => {
if (!isWordChar(ch)) return let ch = getFullCodePoint(input, 0)
console.log(`🌭 checking char ${String.fromCodePoint(ch)}`)
if (!isWordChar(ch)) return
let pos = getCharSize(ch) let pos = getCharSize(ch)
let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch) let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch)
const canBeWord = stack.canShift(Word) const canBeWord = stack.canShift(Word)
while (true) { while (true) {
ch = getFullCodePoint(input, pos) ch = getFullCodePoint(input, pos)
// Check for dot and scope - property access detection // Check for dot and scope - property access detection
if (ch === 46 /* . */ && isValidIdentifier) { if (ch === 46 /* . */ && isValidIdentifier) {
const identifierText = input.read(input.pos, input.pos + pos) // Build identifier text by peeking character by character
const scope = stack.context as Scope | undefined let identifierText = ''
for (let i = 0; i < pos; i++) {
const charCode = input.peek(i)
if (charCode === -1) break
// Handle surrogate pairs for emoji
if (charCode >= 0xd800 && charCode <= 0xdbff && i + 1 < pos) {
const low = input.peek(i + 1)
if (low >= 0xdc00 && low <= 0xdfff) {
identifierText += String.fromCharCode(charCode, low)
i++ // Skip the low surrogate
continue
}
}
identifierText += String.fromCharCode(charCode)
}
if (scope?.has(identifierText)) { const scope = stack.context as Scope | undefined
// In scope - stop here, let grammar parse property access
input.advance(pos) if (scope?.has(identifierText)) {
input.acceptToken(Identifier) // In scope - stop here, let grammar parse property access
return input.advance(pos)
input.acceptToken(IdentifierBeforeDot)
return
}
// Not in scope - continue consuming as Word (fall through)
} }
// Not in scope - continue consuming as Word (fall through)
if (!isWordChar(ch)) break
// Certain characters might end a word or identifier if they are followed by whitespace.
// This allows things like `a = hello; 2` of if `x: y` to parse correctly.
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
const nextCh = getFullCodePoint(input, pos + 1)
if (!isWordChar(nextCh)) break
}
// Track identifier validity
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) {
if (!canBeWord) break
isValidIdentifier = false
}
pos += getCharSize(ch)
} }
if (!isWordChar(ch)) break input.advance(pos)
input.acceptToken(isValidIdentifier ? Identifier : Word)
// Certain characters might end a word or identifier if they are followed by whitespace. },
// This allows things like `a = hello; 2` of if `x: y` to parse correctly. { contextual: true }
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) { )
const nextCh = getFullCodePoint(input, pos + 1)
if (!isWordChar(nextCh)) break
}
// Track identifier validity
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) {
if (!canBeWord) break
isValidIdentifier = false
}
pos += getCharSize(ch)
}
input.advance(pos)
input.acceptToken(isValidIdentifier ? Identifier : Word)
}, { contextual: true })
const isWhiteSpace = (ch: number): boolean => { const isWhiteSpace = (ch: number): boolean => {
return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */ return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */