fix(parser): make DotGet whitespace-sensitive
- Add IdentifierBeforeDot token emitted when identifier immediately precedes '.'
- Move DotGet into @skip {} block using IdentifierBeforeDot
- Prevents 'basename . prop' from parsing as DotGet
- Allows 'basename.prop' to work as expected when identifier is in scope
- Fixes test: 'a word can be contained in parens'
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
d894713744
commit
8a29090364
|
|
@ -23,7 +23,7 @@
|
||||||
Underscore { "_" }
|
Underscore { "_" }
|
||||||
Null { "null" }
|
Null { "null" }
|
||||||
Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar
|
Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar
|
||||||
"fn" [@name=keyword]
|
Fn[@name=keyword] { "fn" }
|
||||||
"if" [@name=keyword]
|
"if" [@name=keyword]
|
||||||
"elsif" [@name=keyword]
|
"elsif" [@name=keyword]
|
||||||
"else" [@name=keyword]
|
"else" [@name=keyword]
|
||||||
|
|
@ -43,7 +43,7 @@
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@external tokens tokenizer from "./tokenizer" { Identifier, Word }
|
@external tokens tokenizer from "./tokenizer" { Identifier, Word, IdentifierBeforeDot }
|
||||||
|
|
||||||
@precedence {
|
@precedence {
|
||||||
pipe @left,
|
pipe @left,
|
||||||
|
|
@ -108,11 +108,11 @@ FunctionDef {
|
||||||
}
|
}
|
||||||
|
|
||||||
singleLineFunctionDef {
|
singleLineFunctionDef {
|
||||||
"fn" Params colon consumeToTerminator end
|
Fn Params colon consumeToTerminator end
|
||||||
}
|
}
|
||||||
|
|
||||||
multilineFunctionDef {
|
multilineFunctionDef {
|
||||||
"fn" Params colon newlineOrSemicolon block end
|
Fn Params colon newlineOrSemicolon block end
|
||||||
}
|
}
|
||||||
|
|
||||||
IfExpr {
|
IfExpr {
|
||||||
|
|
@ -158,10 +158,6 @@ Assign {
|
||||||
Identifier "=" consumeToTerminator
|
Identifier "=" consumeToTerminator
|
||||||
}
|
}
|
||||||
|
|
||||||
DotGet {
|
|
||||||
Identifier "." Identifier
|
|
||||||
}
|
|
||||||
|
|
||||||
BinOp {
|
BinOp {
|
||||||
(expression | BinOp) !multiplicative "*" (expression | BinOp) |
|
(expression | BinOp) !multiplicative "*" (expression | BinOp) |
|
||||||
(expression | BinOp) !multiplicative "/" (expression | BinOp) |
|
(expression | BinOp) !multiplicative "/" (expression | BinOp) |
|
||||||
|
|
@ -178,8 +174,12 @@ expression {
|
||||||
}
|
}
|
||||||
|
|
||||||
@skip {} {
|
@skip {} {
|
||||||
|
DotGet {
|
||||||
|
IdentifierBeforeDot "." Identifier
|
||||||
|
}
|
||||||
|
|
||||||
String { "'" stringContent* "'" }
|
String { "'" stringContent* "'" }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
stringContent {
|
stringContent {
|
||||||
|
|
|
||||||
|
|
@ -2,32 +2,34 @@
|
||||||
export const
|
export const
|
||||||
Identifier = 1,
|
Identifier = 1,
|
||||||
Word = 2,
|
Word = 2,
|
||||||
Program = 3,
|
IdentifierBeforeDot = 3,
|
||||||
PipeExpr = 4,
|
Program = 4,
|
||||||
FunctionCall = 5,
|
PipeExpr = 5,
|
||||||
PositionalArg = 6,
|
FunctionCall = 6,
|
||||||
ParenExpr = 7,
|
PositionalArg = 7,
|
||||||
FunctionCallOrIdentifier = 8,
|
ParenExpr = 8,
|
||||||
BinOp = 9,
|
FunctionCallOrIdentifier = 9,
|
||||||
ConditionalOp = 14,
|
BinOp = 10,
|
||||||
String = 23,
|
ConditionalOp = 15,
|
||||||
StringFragment = 24,
|
String = 24,
|
||||||
Interpolation = 25,
|
StringFragment = 25,
|
||||||
EscapeSeq = 26,
|
Interpolation = 26,
|
||||||
Number = 27,
|
EscapeSeq = 27,
|
||||||
Boolean = 28,
|
Number = 28,
|
||||||
Regex = 29,
|
Boolean = 29,
|
||||||
Null = 30,
|
Regex = 30,
|
||||||
DotGet = 31,
|
Null = 31,
|
||||||
FunctionDef = 32,
|
DotGet = 32,
|
||||||
Params = 34,
|
FunctionDef = 33,
|
||||||
colon = 35,
|
Fn = 34,
|
||||||
end = 36,
|
Params = 35,
|
||||||
Underscore = 37,
|
colon = 36,
|
||||||
NamedArg = 38,
|
end = 37,
|
||||||
NamedArgPrefix = 39,
|
Underscore = 38,
|
||||||
IfExpr = 41,
|
NamedArg = 39,
|
||||||
ThenBlock = 44,
|
NamedArgPrefix = 40,
|
||||||
ElsifExpr = 45,
|
IfExpr = 42,
|
||||||
ElseExpr = 47,
|
ThenBlock = 45,
|
||||||
Assign = 49
|
ElsifExpr = 46,
|
||||||
|
ElseExpr = 48,
|
||||||
|
Assign = 50
|
||||||
|
|
|
||||||
|
|
@ -5,21 +5,21 @@ import {trackScope} from "./scopeTracker"
|
||||||
import {highlighting} from "./highlight"
|
import {highlighting} from "./highlight"
|
||||||
export const parser = LRParser.deserialize({
|
export const parser = LRParser.deserialize({
|
||||||
version: 14,
|
version: 14,
|
||||||
states: ".pQVQaOOO#RQbO'#CdO#cQPO'#CeO#qQPO'#DkO$qQaO'#CcO$xOSO'#CsOOQ`'#Do'#DoO%WQPO'#DnO%oQaO'#DzOOQ`'#C|'#C|OOQO'#Dl'#DlO%wQPO'#DkO&VQaO'#EOOOQO'#DV'#DVOOQO'#Dk'#DkO&^QPO'#DjOOQ`'#Dj'#DjOOQ`'#D`'#D`QVQaOOO&wQbO'#DnO'qQaO,59gOOQ`'#Dn'#DnOOQ`'#Cb'#CbO'vQaO'#DSOOQ`'#Dm'#DmOOQ`'#Da'#DaO(TQbO,58{O(tQaO,59yO&VQaO,59PO&VQaO,59PO)RQbO'#CdO*^QPO'#CeO*nQPO,58}O+wQPO,58}O*zQPO,58}O,OQPO,58}O,WQaO'#CuO,`QWO'#CvOOOO'#Ds'#DsOOOO'#Db'#DbO,tOSO,59_OOQ`,59_,59_OOQ`'#Dc'#DcO-SQaO'#DOO-[QPO,5:fO-aQaO'#DeO-fQPO,58zO-wQPO,5:jO.OQPO'#DnO.fQPO,5:jOOQ`,5:U,5:UOOQ`-E7^-E7^OOQ`1G/R1G/ROOQ`,59n,59nOOQ`-E7_-E7_OOQO1G/e1G/eOOQO1G.k1G.kO.kQPO1G.kO&VQaO,59UO&VQaO,59UOOQ`1G.i1G.iOOOO,59a,59aOOOO,59b,59bOOOO-E7`-E7`OOQ`1G.y1G.yOOQ`-E7a-E7aO/VQaO1G0QO/gQbO'#CdOOQO,5:P,5:POOQO-E7c-E7cO0WQaO1G0UOOQO1G.p1G.pO0hQPO1G.pO0rQPO7+%lO0wQaO7+%mOOQO'#DX'#DXOOQO7+%p7+%pO1XQaO7+%qOOQ`<<IW<<IWO1oQPO'#DdO1tQaO'#D}O2[QPO<<IXOOQO'#DY'#DYO2aQPO<<I]OOQ`,5:O,5:OOOQ`-E7b-E7bOOQ`AN>sAN>sO&VQaO'#DZOOQO'#Df'#DfO2lQPOAN>wO2wQPO'#D]OOQOAN>wAN>wO2|QPOAN>wO3RQPO,59uO3YQPO,59uOOQO-E7d-E7dOOQOG24cG24cO3_QPOG24cO3dQPO,59wO3iQPO1G/aOOQOLD)}LD)}O0wQaO1G/cO1XQaO7+${OOQO7+$}7+$}OOQO<<Hg<<Hg",
|
states: ".jQVQaOOO#UQbO'#CeO#fQPO'#CfO#tQPO'#DlO$wQaO'#CdO%OOSO'#CtOOQ`'#Dp'#DpO%^OPO'#C|O%cQPO'#DoO%zQaO'#D{OOQ`'#C}'#C}OOQO'#Dm'#DmO&SQPO'#DlO&bQaO'#EPOOQO'#DW'#DWOOQO'#Dl'#DlO&iQPO'#DkOOQ`'#Dk'#DkOOQ`'#Da'#DaQVQaOOOOQ`'#Do'#DoOOQ`'#Cc'#CcO&qQaO'#DTOOQ`'#Dn'#DnOOQ`'#Db'#DbO'OQbO,58|O'oQaO,59zO&bQaO,59QO&bQaO,59QO'|QbO'#CeO)XQPO'#CfO)iQPO,59OO)zQPO,59OO)uQPO,59OO*uQPO,59OO*}QaO'#CvO+VQWO'#CwOOOO'#Dt'#DtOOOO'#Dc'#DcO+kOSO,59`OOQ`,59`,59`O+yO`O,59hOOQ`'#Dd'#DdO,OQaO'#DPO,WQPO,5:gO,]QaO'#DfO,bQPO,58{O,sQPO,5:kO,zQPO,5:kOOQ`,5:V,5:VOOQ`-E7_-E7_OOQ`,59o,59oOOQ`-E7`-E7`OOQO1G/f1G/fOOQO1G.l1G.lO-PQPO1G.lO&bQaO,59VO&bQaO,59VOOQ`1G.j1G.jOOOO,59b,59bOOOO,59c,59cOOOO-E7a-E7aOOQ`1G.z1G.zOOQ`1G/S1G/SOOQ`-E7b-E7bO-kQaO1G0RO-{QbO'#CeOOQO,5:Q,5:QOOQO-E7d-E7dO.lQaO1G0VOOQO1G.q1G.qO.|QPO1G.qO/WQPO7+%mO/]QaO7+%nOOQO'#DY'#DYOOQO7+%q7+%qO/mQaO7+%rOOQ`<<IX<<IXO0TQPO'#DeO0YQaO'#EOO0pQPO<<IYOOQO'#DZ'#DZO0uQPO<<I^OOQ`,5:P,5:POOQ`-E7c-E7cOOQ`AN>tAN>tO&bQaO'#D[OOQO'#Dg'#DgO1QQPOAN>xO1]QPO'#D^OOQOAN>xAN>xO1bQPOAN>xO1gQPO,59vO1nQPO,59vOOQO-E7e-E7eOOQOG24dG24dO1sQPOG24dO1xQPO,59xO1}QPO1G/bOOQOLD*OLD*OO/]QaO1G/dO/mQaO7+$|OOQO7+%O7+%OOOQO<<Hh<<Hh",
|
||||||
stateData: "3t~O!]OS~OPPOQUOkUOlUOmUOnUOqWOz[O!dSO!fTO!p`O~OPcOQUOkUOlUOmUOnUOqWOufOwgO!dSO!fTO!mdOY!bXZ!bX[!bX]!bXxWX~O_kO!pWX!tWXtWX~PzOYlOZlO[mO]mO~OYlOZlO[mO]mO!p!_X!t!_Xt!_X~OQUOkUOlUOmUOnUO!dSO!fTO~OPnO~P$YOhvO!fyO!htO!iuO~OY!bXZ!bX[!bX]!bX!p!_X!t!_Xt!_X~OPzOsrP~Ox}O!p!_X!t!_Xt!_X~OP!QO~P$YO!p!SO!t!SO~O!mdO!p!bX!t!bX!e!bXt!bX~OP!bXQ!bXk!bXl!bXm!bXn!bXq!bXu!bXw!bXx!bX!d!bX!f!bX~P&fOP!UO~OPcOqWOu!VO~P$YOPcOqWOufOwgOxTa!pTa!tTa!eTatTa~P$YOPPOqWOz[O~P$YO_!bX`!bXa!bXb!bXc!bXd!bXe!bXf!bX!eWX~PzO_![O`![Oa![Ob![Oc![Od![Oe!]Of!]O~OYlOZlO[mO]mO~P)rOYlOZlO[mO]mO!e!^O~OY!bXZ!bX[!bX]!bX_!bX`!bXa!bXb!bXc!bXd!bXe!bXf!bX~O!e!^O~P+POx}O!e!^O~OP!_O!dSO~O!f!`O!h!`O!i!`O!j!`O!k!`O!l!`O~OhvO!f!bO!htO!iuO~OPzOsrX~Os!dO~OP!eO~Ox}O!pSa!tSa!eSatSa~Os!hO~P)rO!mdOs!bX!p!bX!t!bX!e!bXt!bX~P+POs!hO~OYlOZlO[Xi]Xi!pXi!tXi!eXitXi~OPPOqWOz[O!p!lO~P$YOPcOqWOufOwgOxWX!pWX!tWX!eWXtWX~P$YOPPOqWOz[O!p!oO~P$YO!e^is^i~P)rOt!pO~OPPOqWOz[Ot!qP~P$YOPPOqWOz[Ot!qP!O!qP!Q!qP~P$YO!p!vO~OPPOqWOz[Ot!qX!O!qX!Q!qX~P$YOt!xO~Ot!}O!O!yO!Q!|O~Ot#SO!O!yO!Q!|O~Os#UO~Ot#SO~Os#VO~P)rOs#VO~Ot#WO~O!p#XO~O!p#YO~Ok]mZm~",
|
stateData: "2Y~O!^OS~OPPOQUORVOlUOmUOnUOoUOrXO{]O!eSO!gTO!qaO~OPdOQUORVOlUOmUOnUOoUOrXOveOxfO!eSO!gTOZ!cX[!cX]!cX^!cXyXX~O`jO!qXX!uXXuXX~P}OZkO[kO]lO^lO~OZkO[kO]lO^lO!q!`X!u!`Xu!`X~OQUORVOlUOmUOnUOoUO!eSO!gTO~OPmO~P$]OiuO!gxO!isO!jtO~O!nyO~OZ!cX[!cX]!cX^!cX!q!`X!u!`Xu!`X~OPzOtsP~Oy}O!q!`X!u!`Xu!`X~OPdO~P$]O!q!RO!u!RO~OPdOrXOv!TO~P$]OPdOrXOveOxfOyUa!qUa!uUa!fUauUa~P$]OPPOrXO{]O~P$]O`!cXa!cXb!cXc!cXd!cXe!cXf!cXg!cX!fXX~P}O`!YOa!YOb!YOc!YOd!YOe!YOf!ZOg!ZO~OZkO[kO]lO^lO~P(mOZkO[kO]lO^lO!f![O~O!f![OZ!cX[!cX]!cX^!cX`!cXa!cXb!cXc!cXd!cXe!cXf!cXg!cX~Oy}O!f![O~OP!]O!eSO~O!g!^O!i!^O!j!^O!k!^O!l!^O!m!^O~OiuO!g!`O!isO!jtO~OP!aO~OPzOtsX~Ot!cO~OP!dO~Oy}O!qTa!uTa!fTauTa~Ot!gO~P(mOt!gO~OZkO[kO]Yi^Yi!qYi!uYi!fYiuYi~OPPOrXO{]O!q!kO~P$]OPdOrXOveOxfOyXX!qXX!uXX!fXXuXX~P$]OPPOrXO{]O!q!nO~P$]O!f_it_i~P(mOu!oO~OPPOrXO{]Ou!rP~P$]OPPOrXO{]Ou!rP!P!rP!R!rP~P$]O!q!uO~OPPOrXO{]Ou!rX!P!rX!R!rX~P$]Ou!wO~Ou!|O!P!xO!R!{O~Ou#RO!P!xO!R!{O~Ot#TO~Ou#RO~Ot#UO~P(mOt#UO~Ou#VO~O!q#WO~O!q#XO~Ol^n[n~",
|
||||||
goto: "+u!tPPPP!u#U#d#j#U$VPPPP$lPPPPPPPP$xP%b%bPPPP%f&QP&gPPP#dPP&jP&v&y'SP'WP&j'^'d'l'r'x(R(YPPP(`(d(x)[)b*^PPP*zPPPPPP+O+OP+a+i+id^Obk!d!h!l!o!r#X#YRrSiYOSbk}!d!h!l!o!r#X#YXhPjn!e|UOPS[bgjklmn![!]!d!e!h!l!o!r!y#X#YR!_tdRObk!d!h!l!o!r#X#YQpSQ!YlR!ZmQrSQ!R[Q!i!]R#Q!y}UOPS[bgjklmn![!]!d!e!h!l!o!r!y#X#YTvTxdVObk!d!h!l!o!r#X#YiePS[gjlmn![!]!e!yd^Obk!d!h!l!o!r#X#YWfPjn!eR!VgR|We^Obk!d!h!l!o!r#X#YR!n!hQ!u!oQ#Z#XR#[#YT!z!u!{Q#O!uR#T!{QbOR!TbUjPn!eR!WjQxTR!axQ{WR!c{W!r!l!o#X#YR!w!rS!OZsR!g!OQ!{!uR#R!{TaObS_ObQ!XkQ!k!dQ!m!hZ!q!l!o!r#X#YdZObk!d!h!l!o!r#X#YQsSR!f}XiPjn!edQObk!d!h!l!o!r#X#YWfPjn!eQoSQ!P[Q!VgQ!YlQ!ZmQ!i![Q!j!]R#P!ydVObk!d!h!l!o!r#X#YfeP[gjlmn![!]!e!yRqSTwTxoXOPbgjkn!d!e!h!l!o!r#X#YQ!s!lV!t!o#X#Ye]Obk!d!h!l!o!r#X#Y",
|
goto: "+v!uPPPPP!v#V#e#k#V$WPPPP$mPPPPPPPP$yP%c%cPPPP%g&RP&hPPP#ePP&kP&w&z'TP'XP&k'_'e'm's'y(S(ZPPP(a(e(y)])c*_PPP*{PPPPPP+P+PP+b+j+jd_Ocj!c!g!k!n!q#W#XRqSiZOScj}!c!g!k!n!q#W#XXgPim!d|UOPS]cfijklm!Y!Z!c!d!g!k!n!q!x#W#XR!]sdROcj!c!g!k!n!q#W#XQoSQ!WkR!XlQqSQ!Q]Q!h!ZR#P!x}UOPS]cfijklm!Y!Z!c!d!g!k!n!q!x#W#XTuTwdWOcj!c!g!k!n!q#W#XidPS]fiklm!Y!Z!d!xd_Ocj!c!g!k!n!q#W#XWePim!dR!TfR|Xe_Ocj!c!g!k!n!q#W#XR!m!gQ!t!nQ#Y#WR#Z#XT!y!t!zQ!}!tR#S!zQcOR!ScUiPm!dR!UiQwTR!_wQ{XR!b{W!q!k!n#W#XR!v!qS!O[rR!f!OQ!z!tR#Q!zTbOcS`OcQ!VjQ!j!cQ!l!gZ!p!k!n!q#W#Xd[Ocj!c!g!k!n!q#W#XQrSR!e}XhPim!ddQOcj!c!g!k!n!q#W#XWePim!dQnSQ!P]Q!TfQ!WkQ!XlQ!h!YQ!i!ZR#O!xdWOcj!c!g!k!n!q#W#XfdP]fiklm!Y!Z!d!xRpSTvTwoYOPcfijm!c!d!g!k!n!q#W#XQ!r!kV!s!n#W#Xe^Ocj!c!g!k!n!q#W#X",
|
||||||
nodeNames: "⚠ Identifier Word Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String StringFragment Interpolation EscapeSeq Number Boolean Regex Null DotGet FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign",
|
nodeNames: "⚠ Identifier Word IdentifierBeforeDot Program PipeExpr FunctionCall PositionalArg ParenExpr FunctionCallOrIdentifier BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String StringFragment Interpolation EscapeSeq Number Boolean Regex Null DotGet FunctionDef keyword Params colon end Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword Assign",
|
||||||
maxTerm: 82,
|
maxTerm: 83,
|
||||||
context: trackScope,
|
context: trackScope,
|
||||||
nodeProps: [
|
nodeProps: [
|
||||||
["closedBy", 35,"end"],
|
["closedBy", 36,"end"],
|
||||||
["openedBy", 36,"colon"]
|
["openedBy", 37,"colon"]
|
||||||
],
|
],
|
||||||
propSources: [highlighting],
|
propSources: [highlighting],
|
||||||
skippedNodes: [0],
|
skippedNodes: [0],
|
||||||
repeatNodeCount: 7,
|
repeatNodeCount: 7,
|
||||||
tokenData: "!&X~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P,b!P!Q,{!Q![*]![!]5j!]!^%g!^!_6T!_!`7_!`!a7x!a#O$_#O#P9S#P#R$_#R#S9X#S#T$_#T#U9r#U#X;W#X#Y=m#Y#ZDs#Z#];W#]#^JO#^#b;W#b#cKp#c#d! Y#d#f;W#f#g!!z#g#h;W#h#i!#q#i#o;W#o#p$_#p#q!%i#q;'S$_;'S;=`$v<%l~$_~O$_~~!&SS$dUhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUhS!]ZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUhS!pROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWhSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vU`RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!h~~'dO!f~V'kUhS!dROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUhS!eROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUYRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU[RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWhS]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYhSkROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWhSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWhSkROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V,iU!mRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-SWhSZROt$_uw$_x!P$_!P!Q-l!Q#O$_#P;'S$_;'S;=`$v<%lO$_V-q^hSOY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q$_!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mV.t^hSmROY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q2e!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mR/uXmROY/pZ!P/p!P!Q0b!Q!}/p!}#O1P#O#P2O#P;'S/p;'S;=`2_<%lO/pR0eP!P!Q0hR0mUmR#Z#[0h#]#^0h#a#b0h#g#h0h#i#j0h#m#n0hR1SVOY1PZ#O1P#O#P1i#P#Q/p#Q;'S1P;'S;=`1x<%lO1PR1lSOY1PZ;'S1P;'S;=`1x<%lO1PR1{P;=`<%l1PR2RSOY/pZ;'S/p;'S;=`2_<%lO/pR2bP;=`<%l/pV2jWhSOt$_uw$_x!P$_!P!Q3S!Q#O$_#P;'S$_;'S;=`$v<%lO$_V3ZbhSmROt$_uw$_x#O$_#P#Z$_#Z#[3S#[#]$_#]#^3S#^#a$_#a#b3S#b#g$_#g#h3S#h#i$_#i#j3S#j#m$_#m#n3S#n;'S$_;'S;=`$v<%lO$_V4h[hSOY4cYZ$_Zt4ctu1Puw4cwx1Px#O4c#O#P1i#P#Q.m#Q;'S4c;'S;=`5^<%lO4cV5aP;=`<%l4cV5gP;=`<%l.mT5qUhSsPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V6[WaRhSOt$_uw$_x!_$_!_!`6t!`#O$_#P;'S$_;'S;=`$v<%lO$_V6{UbRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V7fU_RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V8PWcRhSOt$_uw$_x!_$_!_!`8i!`#O$_#P;'S$_;'S;=`$v<%lO$_V8pUdRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~9XO!i~V9`UhSuROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V9w[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#b;W#b#c;{#c#o;W#o;'S$_;'S;=`$v<%lO$_U:tUwQhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U;]YhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V<Q[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#X<v#X#o;W#o;'S$_;'S;=`$v<%lO$_V<}YeRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V=r^hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#a>n#a#b;W#b#cCR#c#o;W#o;'S$_;'S;=`$v<%lO$_V>s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#h?i#h#o;W#o;'S$_;'S;=`$v<%lO$_V?n^hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#Y@j#Y#];W#]#^Aa#^#o;W#o;'S$_;'S;=`$v<%lO$_V@qY!QPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZB[#Z#o;W#o;'S$_;'S;=`$v<%lO$_VBcY!OPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VCW[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#XC|#X#o;W#o;'S$_;'S;=`$v<%lO$_VDTYhStROt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VDx]hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#UEq#U#b;W#b#cIX#c#o;W#o;'S$_;'S;=`$v<%lO$_VEv[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aFl#a#o;W#o;'S$_;'S;=`$v<%lO$_VFq[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#hGg#h#o;W#o;'S$_;'S;=`$v<%lO$_VGl[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#YHb#Y#o;W#o;'S$_;'S;=`$v<%lO$_VHiYlRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VI`YqRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VJT[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZJy#Z#o;W#o;'S$_;'S;=`$v<%lO$_VKQYzPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__Kw[!jWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jLm#j#o;W#o;'S$_;'S;=`$v<%lO$_VLr[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aMh#a#o;W#o;'S$_;'S;=`$v<%lO$_VMm[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aNc#a#o;W#o;'S$_;'S;=`$v<%lO$_VNjYnRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V! _[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!!T#g#o;W#o;'S$_;'S;=`$v<%lO$_V!![YfRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_^!#RY!lWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__!#x[!kWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!$n#g#o;W#o;'S$_;'S;=`$v<%lO$_V!$s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jGg#j#o;W#o;'S$_;'S;=`$v<%lO$_V!%pUxRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!&XO!t~",
|
tokenData: "!&X~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P,b!P!Q,{!Q![*]![!]5j!]!^%g!^!_6T!_!`7_!`!a7x!a#O$_#O#P9S#P#R$_#R#S9X#S#T$_#T#U9r#U#X;W#X#Y=m#Y#ZDs#Z#];W#]#^JO#^#b;W#b#cKp#c#d! Y#d#f;W#f#g!!z#g#h;W#h#i!#q#i#o;W#o#p$_#p#q!%i#q;'S$_;'S;=`$v<%l~$_~O$_~~!&SS$dUiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUiS!^ZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUiS!qROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWiSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vUaRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!i~~'dO!g~V'kUiS!eROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUiS!fROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUZRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU]RiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWiS^ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYiSlROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWiSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWiSlROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_T,iU!nPiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-SWiS[ROt$_uw$_x!P$_!P!Q-l!Q#O$_#P;'S$_;'S;=`$v<%lO$_V-q^iSOY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q$_!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mV.t^iSnROY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q2e!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mR/uXnROY/pZ!P/p!P!Q0b!Q!}/p!}#O1P#O#P2O#P;'S/p;'S;=`2_<%lO/pR0eP!P!Q0hR0mUnR#Z#[0h#]#^0h#a#b0h#g#h0h#i#j0h#m#n0hR1SVOY1PZ#O1P#O#P1i#P#Q/p#Q;'S1P;'S;=`1x<%lO1PR1lSOY1PZ;'S1P;'S;=`1x<%lO1PR1{P;=`<%l1PR2RSOY/pZ;'S/p;'S;=`2_<%lO/pR2bP;=`<%l/pV2jWiSOt$_uw$_x!P$_!P!Q3S!Q#O$_#P;'S$_;'S;=`$v<%lO$_V3ZbiSnROt$_uw$_x#O$_#P#Z$_#Z#[3S#[#]$_#]#^3S#^#a$_#a#b3S#b#g$_#g#h3S#h#i$_#i#j3S#j#m$_#m#n3S#n;'S$_;'S;=`$v<%lO$_V4h[iSOY4cYZ$_Zt4ctu1Puw4cwx1Px#O4c#O#P1i#P#Q.m#Q;'S4c;'S;=`5^<%lO4cV5aP;=`<%l4cV5gP;=`<%l.mT5qUiStPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V6[WbRiSOt$_uw$_x!_$_!_!`6t!`#O$_#P;'S$_;'S;=`$v<%lO$_V6{UcRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V7fU`RiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V8PWdRiSOt$_uw$_x!_$_!_!`8i!`#O$_#P;'S$_;'S;=`$v<%lO$_V8pUeRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~9XO!j~V9`UiSvROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V9w[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#b;W#b#c;{#c#o;W#o;'S$_;'S;=`$v<%lO$_U:tUxQiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U;]YiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V<Q[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#X<v#X#o;W#o;'S$_;'S;=`$v<%lO$_V<}YfRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V=r^iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#a>n#a#b;W#b#cCR#c#o;W#o;'S$_;'S;=`$v<%lO$_V>s[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#h?i#h#o;W#o;'S$_;'S;=`$v<%lO$_V?n^iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#Y@j#Y#];W#]#^Aa#^#o;W#o;'S$_;'S;=`$v<%lO$_V@qY!RPiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VAf[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZB[#Z#o;W#o;'S$_;'S;=`$v<%lO$_VBcY!PPiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VCW[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#XC|#X#o;W#o;'S$_;'S;=`$v<%lO$_VDTYiSuROt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VDx]iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#UEq#U#b;W#b#cIX#c#o;W#o;'S$_;'S;=`$v<%lO$_VEv[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aFl#a#o;W#o;'S$_;'S;=`$v<%lO$_VFq[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#hGg#h#o;W#o;'S$_;'S;=`$v<%lO$_VGl[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#YHb#Y#o;W#o;'S$_;'S;=`$v<%lO$_VHiYmRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VI`YrRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VJT[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZJy#Z#o;W#o;'S$_;'S;=`$v<%lO$_VKQY{PiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__Kw[!kWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jLm#j#o;W#o;'S$_;'S;=`$v<%lO$_VLr[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aMh#a#o;W#o;'S$_;'S;=`$v<%lO$_VMm[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aNc#a#o;W#o;'S$_;'S;=`$v<%lO$_VNjYoRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V! _[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!!T#g#o;W#o;'S$_;'S;=`$v<%lO$_V!![YgRiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_^!#RY!mWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__!#x[!lWiSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!$n#g#o;W#o;'S$_;'S;=`$v<%lO$_V!$s[iSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jGg#j#o;W#o;'S$_;'S;=`$v<%lO$_V!%pUyRiSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!&XO!u~",
|
||||||
tokenizers: [0, 1, 2, 3, tokenizer],
|
tokenizers: [0, 1, 2, 3, tokenizer],
|
||||||
topRules: {"Program":[0,3]},
|
topRules: {"Program":[0,4]},
|
||||||
tokenPrec: 858
|
tokenPrec: 786
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -282,3 +282,40 @@ describe('Assign', () => {
|
||||||
end end`)
|
end end`)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('DotGet whitespace sensitivity', () => {
|
||||||
|
test('no whitespace - DotGet works when identifier in scope', () => {
|
||||||
|
expect('basename = 5; basename.prop').toMatchTree(`
|
||||||
|
Assign
|
||||||
|
Identifier basename
|
||||||
|
operator =
|
||||||
|
Number 5
|
||||||
|
DotGet
|
||||||
|
IdentifierBeforeDot basename
|
||||||
|
Identifier prop`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('space before dot - NOT DotGet, parses as division', () => {
|
||||||
|
expect('basename = 5; basename / prop').toMatchTree(`
|
||||||
|
Assign
|
||||||
|
Identifier basename
|
||||||
|
operator =
|
||||||
|
Number 5
|
||||||
|
BinOp
|
||||||
|
Identifier basename
|
||||||
|
operator /
|
||||||
|
Identifier prop`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('dot followed by slash is Word, not DotGet', () => {
|
||||||
|
expect('basename ./cool').toMatchTree(`
|
||||||
|
FunctionCall
|
||||||
|
Identifier basename
|
||||||
|
PositionalArg
|
||||||
|
Word ./cool`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('identifier not in scope with dot becomes Word', () => {
|
||||||
|
expect('readme.txt').toMatchTree(`Word readme.txt`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ describe('DotGet', () => {
|
||||||
operator =
|
operator =
|
||||||
Number 5
|
Number 5
|
||||||
DotGet
|
DotGet
|
||||||
Identifier obj
|
IdentifierBeforeDot obj
|
||||||
Identifier prop
|
Identifier prop
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
|
|
@ -26,7 +26,7 @@ describe('DotGet', () => {
|
||||||
Identifier config
|
Identifier config
|
||||||
colon :
|
colon :
|
||||||
DotGet
|
DotGet
|
||||||
Identifier config
|
IdentifierBeforeDot config
|
||||||
Identifier path
|
Identifier path
|
||||||
end end
|
end end
|
||||||
`)
|
`)
|
||||||
|
|
@ -40,7 +40,7 @@ describe('DotGet', () => {
|
||||||
Identifier x
|
Identifier x
|
||||||
colon :
|
colon :
|
||||||
DotGet
|
DotGet
|
||||||
Identifier x
|
IdentifierBeforeDot x
|
||||||
Identifier prop
|
Identifier prop
|
||||||
end end
|
end end
|
||||||
Word x.prop
|
Word x.prop
|
||||||
|
|
@ -59,10 +59,10 @@ end`).toMatchTree(`
|
||||||
Identifier y
|
Identifier y
|
||||||
colon :
|
colon :
|
||||||
DotGet
|
DotGet
|
||||||
Identifier x
|
IdentifierBeforeDot x
|
||||||
Identifier foo
|
Identifier foo
|
||||||
DotGet
|
DotGet
|
||||||
Identifier y
|
IdentifierBeforeDot y
|
||||||
Identifier bar
|
Identifier bar
|
||||||
end end
|
end end
|
||||||
`)
|
`)
|
||||||
|
|
@ -79,7 +79,7 @@ end`).toMatchTree(`
|
||||||
Identifier x
|
Identifier x
|
||||||
colon :
|
colon :
|
||||||
DotGet
|
DotGet
|
||||||
Identifier x
|
IdentifierBeforeDot x
|
||||||
Identifier outer
|
Identifier outer
|
||||||
FunctionDef
|
FunctionDef
|
||||||
keyword fn
|
keyword fn
|
||||||
|
|
@ -87,7 +87,7 @@ end`).toMatchTree(`
|
||||||
Identifier y
|
Identifier y
|
||||||
colon :
|
colon :
|
||||||
DotGet
|
DotGet
|
||||||
Identifier y
|
IdentifierBeforeDot y
|
||||||
Identifier inner
|
Identifier inner
|
||||||
end end
|
end end
|
||||||
end end
|
end end
|
||||||
|
|
@ -104,7 +104,7 @@ end`).toMatchTree(`
|
||||||
Identifier echo
|
Identifier echo
|
||||||
PositionalArg
|
PositionalArg
|
||||||
DotGet
|
DotGet
|
||||||
Identifier config
|
IdentifierBeforeDot config
|
||||||
Identifier path
|
Identifier path
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
|
|
@ -123,8 +123,18 @@ end`).toMatchTree(`
|
||||||
Identifier echo
|
Identifier echo
|
||||||
PositionalArg
|
PositionalArg
|
||||||
DotGet
|
DotGet
|
||||||
Identifier config
|
IdentifierBeforeDot config
|
||||||
Identifier path
|
Identifier path
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("dot get doesn't work with spaces", () => {
|
||||||
|
expect('obj . prop').toMatchTree(`
|
||||||
|
FunctionCall
|
||||||
|
Identifier obj
|
||||||
|
PositionalArg
|
||||||
|
Word .
|
||||||
|
PositionalArg
|
||||||
|
Identifier prop`)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -1,55 +1,75 @@
|
||||||
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
|
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
|
||||||
import { Identifier, Word } from './shrimp.terms'
|
import { Identifier, Word, IdentifierBeforeDot } from './shrimp.terms'
|
||||||
import type { Scope } from './scopeTracker'
|
import type { Scope } from './scopeTracker'
|
||||||
|
|
||||||
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
|
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
|
||||||
|
|
||||||
export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => {
|
export const tokenizer = new ExternalTokenizer(
|
||||||
let ch = getFullCodePoint(input, 0)
|
(input: InputStream, stack: Stack) => {
|
||||||
if (!isWordChar(ch)) return
|
let ch = getFullCodePoint(input, 0)
|
||||||
|
console.log(`🌭 checking char ${String.fromCodePoint(ch)}`)
|
||||||
|
if (!isWordChar(ch)) return
|
||||||
|
|
||||||
let pos = getCharSize(ch)
|
let pos = getCharSize(ch)
|
||||||
let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch)
|
let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch)
|
||||||
const canBeWord = stack.canShift(Word)
|
const canBeWord = stack.canShift(Word)
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
ch = getFullCodePoint(input, pos)
|
ch = getFullCodePoint(input, pos)
|
||||||
|
|
||||||
// Check for dot and scope - property access detection
|
// Check for dot and scope - property access detection
|
||||||
if (ch === 46 /* . */ && isValidIdentifier) {
|
if (ch === 46 /* . */ && isValidIdentifier) {
|
||||||
const identifierText = input.read(input.pos, input.pos + pos)
|
// Build identifier text by peeking character by character
|
||||||
const scope = stack.context as Scope | undefined
|
let identifierText = ''
|
||||||
|
for (let i = 0; i < pos; i++) {
|
||||||
|
const charCode = input.peek(i)
|
||||||
|
if (charCode === -1) break
|
||||||
|
// Handle surrogate pairs for emoji
|
||||||
|
if (charCode >= 0xd800 && charCode <= 0xdbff && i + 1 < pos) {
|
||||||
|
const low = input.peek(i + 1)
|
||||||
|
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||||
|
identifierText += String.fromCharCode(charCode, low)
|
||||||
|
i++ // Skip the low surrogate
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
identifierText += String.fromCharCode(charCode)
|
||||||
|
}
|
||||||
|
|
||||||
if (scope?.has(identifierText)) {
|
const scope = stack.context as Scope | undefined
|
||||||
// In scope - stop here, let grammar parse property access
|
|
||||||
input.advance(pos)
|
if (scope?.has(identifierText)) {
|
||||||
input.acceptToken(Identifier)
|
// In scope - stop here, let grammar parse property access
|
||||||
return
|
input.advance(pos)
|
||||||
|
input.acceptToken(IdentifierBeforeDot)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Not in scope - continue consuming as Word (fall through)
|
||||||
}
|
}
|
||||||
// Not in scope - continue consuming as Word (fall through)
|
|
||||||
|
if (!isWordChar(ch)) break
|
||||||
|
|
||||||
|
// Certain characters might end a word or identifier if they are followed by whitespace.
|
||||||
|
// This allows things like `a = hello; 2` of if `x: y` to parse correctly.
|
||||||
|
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
|
||||||
|
const nextCh = getFullCodePoint(input, pos + 1)
|
||||||
|
if (!isWordChar(nextCh)) break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track identifier validity
|
||||||
|
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) {
|
||||||
|
if (!canBeWord) break
|
||||||
|
isValidIdentifier = false
|
||||||
|
}
|
||||||
|
|
||||||
|
pos += getCharSize(ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isWordChar(ch)) break
|
input.advance(pos)
|
||||||
|
input.acceptToken(isValidIdentifier ? Identifier : Word)
|
||||||
// Certain characters might end a word or identifier if they are followed by whitespace.
|
},
|
||||||
// This allows things like `a = hello; 2` of if `x: y` to parse correctly.
|
{ contextual: true }
|
||||||
if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) {
|
)
|
||||||
const nextCh = getFullCodePoint(input, pos + 1)
|
|
||||||
if (!isWordChar(nextCh)) break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Track identifier validity
|
|
||||||
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) {
|
|
||||||
if (!canBeWord) break
|
|
||||||
isValidIdentifier = false
|
|
||||||
}
|
|
||||||
|
|
||||||
pos += getCharSize(ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
input.advance(pos)
|
|
||||||
input.acceptToken(isValidIdentifier ? Identifier : Word)
|
|
||||||
}, { contextual: true })
|
|
||||||
|
|
||||||
const isWhiteSpace = (ch: number): boolean => {
|
const isWhiteSpace = (ch: number): boolean => {
|
||||||
return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */
|
return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user