fix(parser): make DotGet whitespace-sensitive

- Add IdentifierBeforeDot token emitted when identifier immediately precedes '.'
- Move DotGet into @skip {} block using IdentifierBeforeDot
- Prevents 'basename . prop' from parsing as DotGet
- Allows 'basename.prop' to work as expected when identifier is in scope
- Fixes test: 'a word can be contained in parens'

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Corey Johnson 2025-10-17 10:40:28 -07:00
parent d894713744
commit 8a29090364
6 changed files with 166 additions and 97 deletions

View File

@ -23,7 +23,7 @@
Underscore { "_" }
Null { "null" }
Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar
"fn" [@name=keyword]
Fn[@name=keyword] { "fn" }
"if" [@name=keyword]
"elsif" [@name=keyword]
"else" [@name=keyword]
@ -43,7 +43,7 @@
}
@external tokens tokenizer from "./tokenizer" { Identifier, Word }
@external tokens tokenizer from "./tokenizer" { Identifier, Word, IdentifierBeforeDot }
@precedence {
pipe @left,
@ -108,11 +108,11 @@ FunctionDef {
}
singleLineFunctionDef {
"fn" Params colon consumeToTerminator end
Fn Params colon consumeToTerminator end
}
multilineFunctionDef {
"fn" Params colon newlineOrSemicolon block end
Fn Params colon newlineOrSemicolon block end
}
IfExpr {
@ -158,10 +158,6 @@ Assign {
Identifier "=" consumeToTerminator
}
DotGet {
Identifier "." Identifier
}
BinOp {
(expression | BinOp) !multiplicative "*" (expression | BinOp) |
(expression | BinOp) !multiplicative "/" (expression | BinOp) |
@ -178,6 +174,10 @@ expression {
}
@skip {} {
DotGet {
IdentifierBeforeDot "." Identifier
}
String { "'" stringContent* "'" }
}

View File

@ -2,32 +2,34 @@
export const
Identifier = 1,
Word = 2,
Program = 3,
PipeExpr = 4,
FunctionCall = 5,
PositionalArg = 6,
ParenExpr = 7,
FunctionCallOrIdentifier = 8,
BinOp = 9,
ConditionalOp = 14,
String = 23,
StringFragment = 24,
Interpolation = 25,
EscapeSeq = 26,
Number = 27,
Boolean = 28,
Regex = 29,
Null = 30,
DotGet = 31,
FunctionDef = 32,
Params = 34,
colon = 35,
end = 36,
Underscore = 37,
NamedArg = 38,
NamedArgPrefix = 39,
IfExpr = 41,
ThenBlock = 44,
ElsifExpr = 45,
ElseExpr = 47,
Assign = 49
IdentifierBeforeDot = 3,
Program = 4,
PipeExpr = 5,
FunctionCall = 6,
PositionalArg = 7,
ParenExpr = 8,
FunctionCallOrIdentifier = 9,
BinOp = 10,
ConditionalOp = 15,
String = 24,
StringFragment = 25,
Interpolation = 26,
EscapeSeq = 27,
Number = 28,
Boolean = 29,
Regex = 30,
Null = 31,
DotGet = 32,
FunctionDef = 33,
Fn = 34,
Params = 35,
colon = 36,
end = 37,
Underscore = 38,
NamedArg = 39,
NamedArgPrefix = 40,
IfExpr = 42,
ThenBlock = 45,
ElsifExpr = 46,
ElseExpr = 48,
Assign = 50

View File

@ -5,21 +5,21 @@ import {trackScope} from "./scopeTracker"
import {highlighting} from "./highlight"
export const parser = LRParser.deserialize({
version: 14,
states: ".pQVQaOOO#RQbO'#CdO#cQPO'#CeO#qQPO'#DkO$qQaO'#CcO$xOSO'#CsOOQ`'#Do'#DoO%WQPO'#DnO%oQaO'#DzOOQ`'#C|'#C|OOQO'#Dl'#DlO%wQPO'#DkO&VQaO'#EOOOQO'#DV'#DVOOQO'#Dk'#DkO&^QPO'#DjOOQ`'#Dj'#DjOOQ`'#D`'#D`QVQaOOO&wQbO'#DnO'qQaO,59gOOQ`'#Dn'#DnOOQ`'#Cb'#CbO'vQaO'#DSOOQ`'#Dm'#DmOOQ`'#Da'#DaO(TQbO,58{O(tQaO,59yO&VQaO,59PO&VQaO,59PO)RQbO'#CdO*^QPO'#CeO*nQPO,58}O+wQPO,58}O*zQPO,58}O,OQPO,58}O,WQaO'#CuO,`QWO'#CvOOOO'#Ds'#DsOOOO'#Db'#DbO,tOSO,59_OOQ`,59_,59_OOQ`'#Dc'#DcO-SQaO'#DOO-[QPO,5:fO-aQaO'#DeO-fQPO,58zO-wQPO,5:jO.OQPO'#DnO.fQPO,5:jOOQ`,5:U,5:UOOQ`-E7^-E7^OOQ`1G/R1G/ROOQ`,59n,59nOOQ`-E7_-E7_OOQO1G/e1G/eOOQO1G.k1G.kO.kQPO1G.kO&VQaO,59UO&VQaO,59UOOQ`1G.i1G.iOOOO,59a,59aOOOO,59b,59bOOOO-E7`-E7`OOQ`1G.y1G.yOOQ`-E7a-E7aO/VQaO1G0QO/gQbO'#CdOOQO,5:P,5:POOQO-E7c-E7cO0WQaO1G0UOOQO1G.p1G.pO0hQPO1G.pO0rQPO7+%lO0wQaO7+%mOOQO'#DX'#DXOOQO7+%p7+%pO1XQaO7+%qOOQ`<<IW<<IWO1oQPO'#DdO1tQaO'#D}O2[QPO<<IXOOQO'#DY'#DYO2aQPO<<I]OOQ`,5:O,5:OOOQ`-E7b-E7bOOQ`AN>sAN>sO&VQaO'#DZOOQO'#Df'#DfO2lQPOAN>wO2wQPO'#D]OOQOAN>wAN>wO2|QPOAN>wO3RQPO,59uO3YQPO,59uOOQO-E7d-E7dOOQOG24cG24cO3_QPOG24cO3dQPO,59wO3iQPO1G/aOOQOLD)}LD)}O0wQaO1G/cO1XQaO7+${OOQO7+$}7+$}OOQO<<Hg<<Hg",
stateData: "3t~O!]OS~OPPOQUOkUOlUOmUOnUOqWOz[O!dSO!fTO!p`O~OPcOQUOkUOlUOmUOnUOqWOufOwgO!dSO!fTO!mdOY!bXZ!bX[!bX]!bXxWX~O_kO!pWX!tWXtWX~PzOYlOZlO[mO]mO~OYlOZlO[mO]mO!p!_X!t!_Xt!_X~OQUOkUOlUOmUOnUO!dSO!fTO~OPnO~P$YOhvO!fyO!htO!iuO~OY!bXZ!bX[!bX]!bX!p!_X!t!_Xt!_X~OPzOsrP~Ox}O!p!_X!t!_Xt!_X~OP!QO~P$YO!p!SO!t!SO~O!mdO!p!bX!t!bX!e!bXt!bX~OP!bXQ!bXk!bXl!bXm!bXn!bXq!bXu!bXw!bXx!bX!d!bX!f!bX~P&fOP!UO~OPcOqWOu!VO~P$YOPcOqWOufOwgOxTa!pTa!tTa!eTatTa~P$YOPPOqWOz[O~P$YO_!bX`!bXa!bXb!bXc!bXd!bXe!bXf!bX!eWX~PzO_![O`![Oa![Ob![Oc![Od![Oe!]Of!]O~OYlOZlO[mO]mO~P)rOYlOZlO[mO]mO!e!^O~OY!bXZ!bX[!bX]!bX_!bX`!bXa!bXb!bXc!bXd!bXe!bXf!bX~O!e!^O~P+POx}O!e!^O~OP!_O!dSO~O!f!`O!h!`O!i!`O!j!`O!k!`O!l!`O~OhvO!f!bO!htO!iuO~OPzOsrX~Os!dO~OP!eO~Ox}O!pSa!tSa!eSatSa~Os!hO~P)rO!mdOs!bX!p!bX!t!bX!e!bXt!bX~P+POs!hO~OYlOZlO[Xi]Xi!pXi!tXi!eXitXi~OPPOqWOz[O!p!lO~P$YOPcOqWOufOwgOxWX!pWX!tWX!eWXtWX~P$YOPPOqWOz[O!p!oO~P$YO!e^is^i~P)rOt!pO~OPPOqWOz[Ot!qP~P$YOPPOqWOz[Ot!qP!O!qP!Q!qP~P$YO!p!vO~OPPOqWOz[Ot!qX!O!qX!Q!qX~P$YOt!xO~Ot!}O!O!yO!Q!|O~Ot#SO!O!yO!Q!|O~Os#UO~Ot#SO~Os#VO~P)rOs#VO~Ot#WO~O!p#XO~O!p#YO~Ok]mZm~",
goto: "+u!tPPPP!u#U#d#j#U$VPPPP$lPPPPPPPP$xP%b%bPPPP%f&QP&gPPP#dPP&jP&v&y'SP'WP&j'^'d'l'r'x(R(YPPP(`(d(x)[)b*^PPP*zPPPPPP+O+OP+a+i+id^Obk!d!h!l!o!r#X#YRrSiYOSbk}!d!h!l!o!r#X#YXhPjn!e|UOPS[bgjklmn![!]!d!e!h!l!o!r!y#X#YR!_tdRObk!d!h!l!o!r#X#YQpSQ!YlR!ZmQrSQ!R[Q!i!]R#Q!y}UOPS[bgjklmn![!]!d!e!h!l!o!r!y#X#YTvTxdVObk!d!h!l!o!r#X#YiePS[gjlmn![!]!e!yd^Obk!d!h!l!o!r#X#YWfPjn!eR!VgR|We^Obk!d!h!l!o!r#X#YR!n!hQ!u!oQ#Z#XR#[#YT!z!u!{Q#O!uR#T!{QbOR!TbUjPn!eR!WjQxTR!axQ{WR!c{W!r!l!o#X#YR!w!rS!OZsR!g!OQ!{!uR#R!{TaObS_ObQ!XkQ!k!dQ!m!hZ!q!l!o!r#X#YdZObk!d!h!l!o!r#X#YQsSR!f}XiPjn!edQObk!d!h!l!o!r#X#YWfPjn!eQoSQ!P[Q!VgQ!YlQ!ZmQ!i![Q!j!]R#P!ydVObk!d!h!l!o!r#X#YfeP[gjlmn![!]!e!yRqSTwTxoXOPbgjkn!d!e!h!l!o!r#X#YQ!s!lV!t!o#X#Ye]Obk!d!h!l!o!r#X#Y",
nodeNames: "⚠ Identifier Word Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String StringFragment Interpolation EscapeSeq Number Boolean Regex Null DotGet FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign",
maxTerm: 82,
states: ".jQVQaOOO#UQbO'#CeO#fQPO'#CfO#tQPO'#DlO$wQaO'#CdO%OOSO'#CtOOQ`'#Dp'#DpO%^OPO'#C|O%cQPO'#DoO%zQaO'#D{OOQ`'#C}'#C}OOQO'#Dm'#DmO&SQPO'#DlO&bQaO'#EPOOQO'#DW'#DWOOQO'#Dl'#DlO&iQPO'#DkOOQ`'#Dk'#DkOOQ`'#Da'#DaQVQaOOOOQ`'#Do'#DoOOQ`'#Cc'#CcO&qQaO'#DTOOQ`'#Dn'#DnOOQ`'#Db'#DbO'OQbO,58|O'oQaO,59zO&bQaO,59QO&bQaO,59QO'|QbO'#CeO)XQPO'#CfO)iQPO,59OO)zQPO,59OO)uQPO,59OO*uQPO,59OO*}QaO'#CvO+VQWO'#CwOOOO'#Dt'#DtOOOO'#Dc'#DcO+kOSO,59`OOQ`,59`,59`O+yO`O,59hOOQ`'#Dd'#DdO,OQaO'#DPO,WQPO,5:gO,]QaO'#DfO,bQPO,58{O,sQPO,5:kO,zQPO,5:kOOQ`,5:V,5:VOOQ`-E7_-E7_OOQ`,59o,59oOOQ`-E7`-E7`OOQO1G/f1G/fOOQO1G.l1G.lO-PQPO1G.lO&bQaO,59VO&bQaO,59VOOQ`1G.j1G.jOOOO,59b,59bOOOO,59c,59cOOOO-E7a-E7aOOQ`1G.z1G.zOOQ`1G/S1G/SOOQ`-E7b-E7bO-kQaO1G0RO-{QbO'#CeOOQO,5:Q,5:QOOQO-E7d-E7dO.lQaO1G0VOOQO1G.q1G.qO.|QPO1G.qO/WQPO7+%mO/]QaO7+%nOOQO'#DY'#DYOOQO7+%q7+%qO/mQaO7+%rOOQ`<<IX<<IXO0TQPO'#DeO0YQaO'#EOO0pQPO<<IYOOQO'#DZ'#DZO0uQPO<<I^OOQ`,5:P,5:POOQ`-E7c-E7cOOQ`AN>tAN>tO&bQaO'#D[OOQO'#Dg'#DgO1QQPOAN>xO1]QPO'#D^OOQOAN>xAN>xO1bQPOAN>xO1gQPO,59vO1nQPO,59vOOQO-E7e-E7eOOQOG24dG24dO1sQPOG24dO1xQPO,59xO1}QPO1G/bOOQOLD*OLD*OO/]QaO1G/dO/mQaO7+$|OOQO7+%O7+%OOOQO<<Hh<<Hh",
stateData: "2Y~O!^OS~OPPOQUORVOlUOmUOnUOoUOrXO{]O!eSO!gTO!qaO~OPdOQUORVOlUOmUOnUOoUOrXOveOxfO!eSO!gTOZ!cX[!cX]!cX^!cXyXX~O`jO!qXX!uXXuXX~P}OZkO[kO]lO^lO~OZkO[kO]lO^lO!q!`X!u!`Xu!`X~OQUORVOlUOmUOnUOoUO!eSO!gTO~OPmO~P$]OiuO!gxO!isO!jtO~O!nyO~OZ!cX[!cX]!cX^!cX!q!`X!u!`Xu!`X~OPzOtsP~Oy}O!q!`X!u!`Xu!`X~OPdO~P$]O!q!RO!u!RO~OPdOrXOv!TO~P$]OPdOrXOveOxfOyUa!qUa!uUa!fUauUa~P$]OPPOrXO{]O~P$]O`!cXa!cXb!cXc!cXd!cXe!cXf!cXg!cX!fXX~P}O`!YOa!YOb!YOc!YOd!YOe!YOf!ZOg!ZO~OZkO[kO]lO^lO~P(mOZkO[kO]lO^lO!f![O~O!f![OZ!cX[!cX]!cX^!cX`!cXa!cXb!cXc!cXd!cXe!cXf!cXg!cX~Oy}O!f![O~OP!]O!eSO~O!g!^O!i!^O!j!^O!k!^O!l!^O!m!^O~OiuO!g!`O!isO!jtO~OP!aO~OPzOtsX~Ot!cO~OP!dO~Oy}O!qTa!uTa!fTauTa~Ot!gO~P(mOt!gO~OZkO[kO]Yi^Yi!qYi!uYi!fYiuYi~OPPOrXO{]O!q!kO~P$]OPdOrXOveOxfOyXX!qXX!uXX!fXXuXX~P$]OPPOrXO{]O!q!nO~P$]O!f_it_i~P(mOu!oO~OPPOrXO{]Ou!rP~P$]OPPOrXO{]Ou!rP!P!rP!R!rP~P$]O!q!uO~OPPOrXO{]Ou!rX!P!rX!R!rX~P$]Ou!wO~Ou!|O!P!xO!R!{O~Ou#RO!P!xO!R!{O~Ot#TO~Ou#RO~Ot#UO~P(mOt#UO~Ou#VO~O!q#WO~O!q#XO~Ol^n[n~",
goto: "+v!uPPPPP!v#V#e#k#V$WPPPP$mPPPPPPPP$yP%c%cPPPP%g&RP&hPPP#ePP&kP&w&z'TP'XP&k'_'e'm's'y(S(ZPPP(a(e(y)])c*_PPP*{PPPPPP+P+PP+b+j+jd_Ocj!c!g!k!n!q#W#XRqSiZOScj}!c!g!k!n!q#W#XXgPim!d|UOPS]cfijklm!Y!Z!c!d!g!k!n!q!x#W#XR!]sdROcj!c!g!k!n!q#W#XQoSQ!WkR!XlQqSQ!Q]Q!h!ZR#P!x}UOPS]cfijklm!Y!Z!c!d!g!k!n!q!x#W#XTuTwdWOcj!c!g!k!n!q#W#XidPS]fiklm!Y!Z!d!xd_Ocj!c!g!k!n!q#W#XWePim!dR!TfR|Xe_Ocj!c!g!k!n!q#W#XR!m!gQ!t!nQ#Y#WR#Z#XT!y!t!zQ!}!tR#S!zQcOR!ScUiPm!dR!UiQwTR!_wQ{XR!b{W!q!k!n#W#XR!v!qS!O[rR!f!OQ!z!tR#Q!zTbOcS`OcQ!VjQ!j!cQ!l!gZ!p!k!n!q#W#Xd[Ocj!c!g!k!n!q#W#XQrSR!e}XhPim!ddQOcj!c!g!k!n!q#W#XWePim!dQnSQ!P]Q!TfQ!WkQ!XlQ!h!YQ!i!ZR#O!xdWOcj!c!g!k!n!q#W#XfdP]fiklm!Y!Z!d!xRpSTvTwoYOPcfijm!c!d!g!k!n!q#W#XQ!r!kV!s!n#W#Xe^Ocj!c!g!k!n!q#W#X",
nodeNames: "⚠ Identifier Word IdentifierBeforeDot Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String StringFragment Interpolation EscapeSeq Number Boolean Regex Null DotGet FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign",
maxTerm: 83,
context: trackScope,
nodeProps: [
["closedBy", 35,"end"],
["openedBy", 36,"colon"]
["closedBy", 36,"end"],
["openedBy", 37,"colon"]
],
propSources: [highlighting],
skippedNodes: [0],
repeatNodeCount: 7,
tokenData: "!&X~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P,b!P!Q,{!Q![*]![!]5j!]!^%g!^!_6T!_!`7_!`!a7x!a#O$_#O#P9S#P#R$_#R#S9X#S#T$_#T#U9r#U#X;W#X#Y=m#Y#ZDs#Z#];W#]#^JO#^#b;W#b#cKp#c#d! Y#d#f;W#f#g!!z#g#h;W#h#i!#q#i#o;W#o#p$_#p#q!%i#q;'S$_;'S;=`$v<%l~$_~O$_~~!&SS$dUhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUhS!]ZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUhS!pROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWhSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vU`RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!h~~'dO!f~V'kUhS!dROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUhS!eROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUYRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU[RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWhS]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYhSkROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWhSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWhSkROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V,iU!mRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-SWhSZROt$_uw$_x!P$_!P!Q-l!Q#O$_#P;'S$_;'S;=`$v<%lO$_V-q^hSOY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q$_!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mV.t^hSmROY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q2e!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mR/uXmROY/pZ!P/p!P!Q0b!Q!}/p!}#O1P#O#P2O#P;'S/p;'S;=`2_<%lO/pR0eP!P!Q0hR0mUmR#Z#[0h#]#^0h#a#b0h#g#h0h#i#j0h#m#n0hR1SVOY1PZ#O1P#O#P1i#P#Q/p#Q;'S1P;'S;=`1x<%lO1PR1lSOY1PZ;'S1P;'S;=`1x<%lO1PR1{P;=`<%l1PR2RSOY/pZ;'S/p;'S;=`2_<%lO/pR2bP;=`<%l/pV2jWhSOt$_uw$_x!P$_!P!Q3S!Q#O$_#P;'S$_;'S;=`$v<%lO$_V3ZbhSmROt$_uw$_x#O$_#P#Z$_#Z#[3S#[#]$_#]#^3S#^#a$_#a#b3S#b#g$_#g#h3S#h#i$_#i#j3S#j#m$_#m#n3S#n;'S$_;'S;=`$v<%lO$_V4h[hSOY4cYZ$_Zt4ctu1Puw4cwx1Px#O4c#O#P1i#P#Q.m#Q;'S4c;'S;=`5^<%lO4cV5aP;=`<%l4cV5gP;=`<%l.mT5qUhSsPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V6[WaRhSOt$_uw$_x!_$_!_!`6t!`#O$_#P;'S$_;'S;=`$v<%lO$_V6{UbRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V7fU_RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V8PWcRhSOt$_uw$_x!_$_!_!`8i!`#O$_#P;'S$_;'S;=`$v<%lO$_V8pUdRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~9XO!i~V9`UhSuROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V9w[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#b;W#b#c;{#c#o;W#o;'S$_;'S;=`$v<%lO$_U:tUwQhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U;]YhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V<Q[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#X<v#X#o;W#o;'S$_;'S;=`$v<%lO$_V<}YeRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V=r^hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#a>n#a#b;W#b#cCR#c#o;W#o;'S$_;'S;=`$v<%lO$_V>s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#h?i#h#o;W#o;'S$_;'S;=`$v<%lO$_V?n^hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#Y@j#Y#];W#]#^Aa#^#o;W#o;'S$_;'S;=`$v<%lO$_V@qY!QPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZB[#Z#o;W#o;'S$_;'S;=`$v<%lO$_VBcY!OPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VCW[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#XC|#X#o;W#o;'S$_;'S;=`$v<%lO$_VDTYhStROt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VDx]hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#UEq#U#b;W#b#cIX#c#o;W#o;'S$_;'S;=`$v<%lO$_VEv[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aFl#a#o;W#o;'S$_;'S;=`$v<%lO$_VFq[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#hGg#h#o;W#o;'S$_;'S;=`$v<%lO$_VGl[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#YHb#Y#o;W#o;'S$_;'S;=`$v<%lO$_VHiYlRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VI`YqRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VJT[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZJy#Z#o;W#o;'S$_;'S;=`$v<%lO$_VKQYzPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__Kw[!jWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jLm#j#o;W#o;'S$_;'S;=`$v<%lO$_VLr[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aMh#a#o;W#o;'S$_;'S;=`$v<%lO$_VMm[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aNc#a#o;W#o;'S$_;'S;=`$v<%lO$_VNjYnRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V! _[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!!T#g#o;W#o;'S$_;'S;=`$v<%lO$_V!![YfRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_^!#RY!lWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__!#x[!kWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!$n#g#o;W#o;'S$_;'S;=`$v<%lO$_V!$s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jGg#j#o;W#o;'S$_;'S;=`$v<%lO$_V!%pUxRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!&XO!t~",
tokenData: "!&X~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P,b!P!Q,{!Q![*]![!]5j!]!^%g!^!_6T!_!`7_!`!a7x!a#O$_#O#P9S#P#R$_#R#S9X#S#T$_#T#U9r#U#X;W#X#Y=m#Y#ZDs#Z#];W#]#^JO#^#b;W#b#cKp#c#d! Y#d#f;W#f#g!!z#g#h;W#h#i!#q#i#o;W#o#p$_#p#q!%i#q;'S$_;'S;=`$v<%l~$_~O$_~~!&SS$dUiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUiS!^ZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUiS!qROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWiSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vUaRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!i~~'dO!g~V'kUiS!eROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUiS!fROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUZRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU]RiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWiS^ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYiSlROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWiSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWiSlROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_T,iU!nPiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-SWiS[ROt$_uw$_x!P$_!P!Q-l!Q#O$_#P;'S$_;'S;=`$v<%lO$_V-q^iSOY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q$_!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mV.t^iSnROY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q2e!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mR/uXnROY/pZ!P/p!P!Q0b!Q!}/p!}#O1P#O#P2O#P;'S/p;'S;=`2_<%lO/pR0eP!P!Q0hR0mUnR#Z#[0h#]#^0h#a#b0h#g#h0h#i#j0h#m#n0hR1SVOY1PZ#O1P#O#P1i#P#Q/p#Q;'S1P;'S;=`1x<%lO1PR1lSOY1PZ;'S1P;'S;=`1x<%lO1PR1{P;=`<%l1PR2RSOY/pZ;'S/p;'S;=`2_<%lO/pR2bP;=`<%l/pV2jWiSOt$_uw$_x!P$_!P!Q3S!Q#O$_#P;'S$_;'S;=`$v<%lO$_V3ZbiSnROt$_uw$_x#O$_#P#Z$_#Z#[3S#[#]$_#]#^3S#^#a$_#a#b3S#b#g$_#g#h3S#h#i$_#i#j3S#j#m$_#m#n3S#n;'S$_;'S;=`$v<%lO$_V4h[iSOY4cYZ$_Zt4ctu1Puw4cwx1Px#O4c#O#P1i#P#Q.m#Q;'S4c;'S;=`5^<%lO4cV5aP;=`<%l4cV5gP;=`<%l.mT5qUiStPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V6[WbRiSOt$_uw$_x!_$_!_!`6t!`#O$_#P;'S$_;'S;=`$v<%lO$_V6{UcRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V7fU`RiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V8PWdRiSOt$_uw$_x!_$_!_!`8i!`#O$_#P;'S$_;'S;=`$v<%lO$_V8pUeRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~9XO!j~V9`UiSvROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V9w[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#b;W#b#c;{#c#o;W#o;'S$_;'S;=`$v<%lO$_U:tUxQiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U;]YiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V<Q[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#X<v#X#o;W#o;'S$_;'S;=`$v<%lO$_V<}YfRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V=r^iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#a>n#a#b;W#b#cCR#c#o;W#o;'S$_;'S;=`$v<%lO$_V>s[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#h?i#h#o;W#o;'S$_;'S;=`$v<%lO$_V?n^iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#Y@j#Y#];W#]#^Aa#^#o;W#o;'S$_;'S;=`$v<%lO$_V@qY!RPiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VAf[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZB[#Z#o;W#o;'S$_;'S;=`$v<%lO$_VBcY!PPiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VCW[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#XC|#X#o;W#o;'S$_;'S;=`$v<%lO$_VDTYiSuROt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VDx]iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#UEq#U#b;W#b#cIX#c#o;W#o;'S$_;'S;=`$v<%lO$_VEv[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aFl#a#o;W#o;'S$_;'S;=`$v<%lO$_VFq[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#hGg#h#o;W#o;'S$_;'S;=`$v<%lO$_VGl[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#YHb#Y#o;W#o;'S$_;'S;=`$v<%lO$_VHiYmRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VI`YrRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VJT[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZJy#Z#o;W#o;'S$_;'S;=`$v<%lO$_VKQY{PiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__Kw[!kWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jLm#j#o;W#o;'S$_;'S;=`$v<%lO$_VLr[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aMh#a#o;W#o;'S$_;'S;=`$v<%lO$_VMm[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aNc#a#o;W#o;'S$_;'S;=`$v<%lO$_VNjYoRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V! _[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!!T#g#o;W#o;'S$_;'S;=`$v<%lO$_V!![YgRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_^!#RY!mWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__!#x[!lWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!$n#g#o;W#o;'S$_;'S;=`$v<%lO$_V!$s[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jGg#j#o;W#o;'S$_;'S;=`$v<%lO$_V!%pUyRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!&XO!u~",
tokenizers: [0, 1, 2, 3, tokenizer],
topRules: {"Program":[0,3]},
tokenPrec: 858
topRules: {"Program":[0,4]},
tokenPrec: 786
})

View File

@ -282,3 +282,40 @@ describe('Assign', () => {
end end`)
})
})
describe('DotGet whitespace sensitivity', () => {
test('no whitespace - DotGet works when identifier in scope', () => {
expect('basename = 5; basename.prop').toMatchTree(`
Assign
Identifier basename
operator =
Number 5
DotGet
IdentifierBeforeDot basename
Identifier prop`)
})
test('space before dot - NOT DotGet, parses as division', () => {
expect('basename = 5; basename / prop').toMatchTree(`
Assign
Identifier basename
operator =
Number 5
BinOp
Identifier basename
operator /
Identifier prop`)
})
test('dot followed by slash is Word, not DotGet', () => {
expect('basename ./cool').toMatchTree(`
FunctionCall
Identifier basename
PositionalArg
Word ./cool`)
})
test('identifier not in scope with dot becomes Word', () => {
expect('readme.txt').toMatchTree(`Word readme.txt`)
})
})

View File

@ -13,7 +13,7 @@ describe('DotGet', () => {
operator =
Number 5
DotGet
Identifier obj
IdentifierBeforeDot obj
Identifier prop
`)
})
@ -26,7 +26,7 @@ describe('DotGet', () => {
Identifier config
colon :
DotGet
Identifier config
IdentifierBeforeDot config
Identifier path
end end
`)
@ -40,7 +40,7 @@ describe('DotGet', () => {
Identifier x
colon :
DotGet
Identifier x
IdentifierBeforeDot x
Identifier prop
end end
Word x.prop
@ -59,10 +59,10 @@ end`).toMatchTree(`
Identifier y
colon :
DotGet
Identifier x
IdentifierBeforeDot x
Identifier foo
DotGet
Identifier y
IdentifierBeforeDot y
Identifier bar
end end
`)
@ -79,7 +79,7 @@ end`).toMatchTree(`
Identifier x
colon :
DotGet
Identifier x
IdentifierBeforeDot x
Identifier outer
FunctionDef
keyword fn
@ -87,7 +87,7 @@ end`).toMatchTree(`
Identifier y
colon :
DotGet
Identifier y
IdentifierBeforeDot y
Identifier inner
end end
end end
@ -104,7 +104,7 @@ end`).toMatchTree(`
Identifier echo
PositionalArg
DotGet
Identifier config
IdentifierBeforeDot config
Identifier path
`)
})
@ -123,8 +123,18 @@ end`).toMatchTree(`
Identifier echo
PositionalArg
DotGet
Identifier config
IdentifierBeforeDot config
Identifier path
`)
})
test("dot get doesn't work with spaces", () => {
expect('obj . prop').toMatchTree(`
FunctionCall
Identifier obj
PositionalArg
Word .
PositionalArg
Identifier prop`)
})
})

View File

@ -1,55 +1,75 @@
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
import { Identifier, Word } from './shrimp.terms'
import { Identifier, Word, IdentifierBeforeDot } from './shrimp.terms'
import type { Scope } from './scopeTracker'
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => {
let ch = getFullCodePoint(input, 0)
if (!isWordChar(ch)) return
export const tokenizer = new ExternalTokenizer(
(input: InputStream, stack: Stack) => {
let ch = getFullCodePoint(input, 0)
console.log(`🌭 checking char ${String.fromCodePoint(ch)}`)
if (!isWordChar(ch)) return
let pos = getCharSize(ch)
let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch)
const canBeWord = stack.canShift(Word)
let pos = getCharSize(ch)
let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch)
const canBeWord = stack.canShift(Word)
while (true) {
ch = getFullCodePoint(input, pos)
while (true) {
ch = getFullCodePoint(input, pos)
// Check for dot and scope - property access detection
if (ch === 46 /* . */ && isValidIdentifier) {
const identifierText = input.read(input.pos, input.pos + pos)
const scope = stack.context as Scope | undefined
// Check for dot and scope - property access detection
if (ch === 46 /* . */ && isValidIdentifier) {
// Build identifier text by peeking character by character
let identifierText = ''
for (let i = 0; i < pos; i++) {
const charCode = input.peek(i)
if (charCode === -1) break
// Handle surrogate pairs for emoji
if (charCode >= 0xd800 && charCode <= 0xdbff && i + 1 < pos) {
const low = input.peek(i + 1)
if (low >= 0xdc00 && low <= 0xdfff) {
identifierText += String.fromCharCode(charCode, low)
i++ // Skip the low surrogate
continue
}
}
identifierText += String.fromCharCode(charCode)
}
if (scope?.has(identifierText)) {
// In scope - stop here, let grammar parse property access
input.advance(pos)
input.acceptToken(Identifier)
return
const scope = stack.context as Scope | undefined
if (scope?.has(identifierText)) {
// In scope - stop here, let grammar parse property access
input.advance(pos)
input.acceptToken(IdentifierBeforeDot)
return
}
// Not in scope - continue consuming as Word (fall through)
}
// Not in scope - continue consuming as Word (fall through)
if (!isWordChar(ch)) break
// Certain characters might end a word or identifier if they are followed by whitespace.
// This allows things like `a = hello; 2` of if `x: y` to parse correctly.
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
const nextCh = getFullCodePoint(input, pos + 1)
if (!isWordChar(nextCh)) break
}
// Track identifier validity
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) {
if (!canBeWord) break
isValidIdentifier = false
}
pos += getCharSize(ch)
}
if (!isWordChar(ch)) break
// Certain characters might end a word or identifier if they are followed by whitespace.
// This allows things like `a = hello; 2` of if `x: y` to parse correctly.
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
const nextCh = getFullCodePoint(input, pos + 1)
if (!isWordChar(nextCh)) break
}
// Track identifier validity
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) {
if (!canBeWord) break
isValidIdentifier = false
}
pos += getCharSize(ch)
}
input.advance(pos)
input.acceptToken(isValidIdentifier ? Identifier : Word)
}, { contextual: true })
input.advance(pos)
input.acceptToken(isValidIdentifier ? Identifier : Word)
},
{ contextual: true }
)
const isWhiteSpace = (ch: number): boolean => {
return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */