From d8947137441a3bf838fd13978b7777d7313d17f7 Mon Sep 17 00:00:00 2001 From: Corey Johnson Date: Fri, 17 Oct 2025 07:42:07 -0700 Subject: [PATCH] feat(parser): complete DotGet implementation with scope tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fixed tokenizer input.read() to use absolute positions - Fixed FN_KEYWORD term ID (33 after DotGet added to expression) - Added DotGet to expression for use as function argument - All 8 DotGet tests passing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/parser/scopeTracker.ts | 2 +- src/parser/shrimp.grammar | 2 +- src/parser/shrimp.terms.ts | 16 ++-- src/parser/shrimp.ts | 16 ++-- src/parser/tests/dot-get.test.ts | 130 +++++++++++++++++++++++++++++++ src/parser/tokenizer.ts | 2 +- 6 files changed, 149 insertions(+), 19 deletions(-) create mode 100644 src/parser/tests/dot-get.test.ts diff --git a/src/parser/scopeTracker.ts b/src/parser/scopeTracker.ts index 08c64dc..7c292ac 100644 --- a/src/parser/scopeTracker.ts +++ b/src/parser/scopeTracker.ts @@ -43,7 +43,7 @@ let pendingIdentifiers: string[] = [] let isInParams = false // Term ID for 'fn' keyword - verified by parsing and inspecting the tree -const FN_KEYWORD = 32 +const FN_KEYWORD = 33 export const trackScope = new ContextTracker({ start: new Scope(null, new Set()), diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index c9c4a9c..95509d8 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -174,7 +174,7 @@ ParenExpr { } expression { - expressionWithoutIdentifier | Identifier + expressionWithoutIdentifier | DotGet | Identifier } @skip {} { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 251d3b3..b7aeb71 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -18,14 +18,14 @@ export const Boolean = 28, Regex = 29, Null = 30, - FunctionDef = 31, - Params = 33, - colon = 34, - end = 35, - Underscore = 36, - NamedArg = 37, - NamedArgPrefix = 38, - DotGet = 40, + DotGet = 31, + FunctionDef = 32, + Params = 34, + colon = 35, + end = 36, + Underscore = 37, + NamedArg = 38, + NamedArgPrefix = 39, IfExpr = 41, ThenBlock = 44, ElsifExpr = 45, diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 5826da6..c4d2886 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -5,21 +5,21 @@ import {trackScope} from "./scopeTracker" import {highlighting} from "./highlight" export const parser = LRParser.deserialize({ version: 14, - states: ".dQVQaOOO#OQbO'#CdO#cQPO'#CeO#qQPO'#DkO$qQaO'#CcO$xOSO'#CsOOQ`'#Do'#DoO%WQPO'#DnO%oQaO'#DyOOQ`'#C{'#C{OOQO'#Dl'#DlO%wQPO'#DkO&VQaO'#EOOOQO'#DV'#DVOOQO'#Dk'#DkO&^QPO'#DjOOQ`'#Dj'#DjOOQ`'#D`'#D`QVQaOOOOQ`'#Dn'#DnOOQ`'#Cb'#CbO&fQaO'#DROOQ`'#Dm'#DmOOQ`'#Da'#DaO&sQbO,58{O'dQaO,59pO'iQaO,59yO&VQaO,59PO&VQaO,59PO'vQbO'#CdO)RQPO'#CeO)cQPO,58}O)tQPO,58}O)oQPO,58}O*oQPO,58}O*wQaO'#CuO+PQWO'#CvOOOO'#Ds'#DsOOOO'#Db'#DbO+eOSO,59_OOQ`,59_,59_OOQ`'#Dc'#DcO+sQaO'#C}O+{QPO,5:eO,QQaO'#DeO,VQPO,58zO,hQPO,5:jO,oQPO,5:jOOQ`,5:U,5:UOOQ`-E7^-E7^OOQ`,59m,59mOOQ`-E7_-E7_OOQO1G/[1G/[OOQO1G/e1G/eOOQO1G.k1G.kO,tQPO1G.kO&VQaO,59UO&VQaO,59UOOQ`1G.i1G.iOOOO,59a,59aOOOO,59b,59bOOOO-E7`-E7`OOQ`1G.y1G.yOOQ`-E7a-E7aO-`QaO1G0PO-pQbO'#CdOOQO,5:P,5:POOQO-E7c-E7cO.aQaO1G0UOOQO1G.p1G.pO.qQPO1G.pO.{QPO7+%kO/QQaO7+%lOOQO'#DX'#DXOOQO7+%p7+%pO/bQaO7+%qOOQ`<rAN>rO&VQaO'#DZOOQO'#Df'#DfO0uQPOAN>wO1QQPO'#D]OOQOAN>wAN>wO1VQPOAN>wO1[QPO,59uO1cQPO,59uOOQO-E7d-E7dOOQOG24cG24cO1hQPOG24cO1mQPO,59wO1rQPO1G/aOOQOLD)}LD)}O/QQaO1G/cO/bQaO7+${OOQO7+$}7+$}OOQO<sAN>sO&VQaO'#DZOOQO'#Df'#DfO2lQPOAN>wO2wQPO'#D]OOQOAN>wAN>wO2|QPOAN>wO3RQPO,59uO3YQPO,59uOOQO-E7d-E7dOOQOG24cG24cO3_QPOG24cO3dQPO,59wO3iQPO1G/aOOQOLD)}LD)}O0wQaO1G/cO1XQaO7+${OOQO7+$}7+$}OOQO<n#a#b;W#b#cCR#c#o;W#o;'S$_;'S;=`$v<%lO$_V>s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#h?i#h#o;W#o;'S$_;'S;=`$v<%lO$_V?n^hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#Y@j#Y#];W#]#^Aa#^#o;W#o;'S$_;'S;=`$v<%lO$_V@qY!QPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZB[#Z#o;W#o;'S$_;'S;=`$v<%lO$_VBcY!OPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VCW[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#XC|#X#o;W#o;'S$_;'S;=`$v<%lO$_VDTYhSsROt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VDx]hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#UEq#U#b;W#b#cIX#c#o;W#o;'S$_;'S;=`$v<%lO$_VEv[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aFl#a#o;W#o;'S$_;'S;=`$v<%lO$_VFq[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#hGg#h#o;W#o;'S$_;'S;=`$v<%lO$_VGl[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#YHb#Y#o;W#o;'S$_;'S;=`$v<%lO$_VHiYlRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VI`YpRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VJT[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZJy#Z#o;W#o;'S$_;'S;=`$v<%lO$_VKQYzPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__Kw[!jWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jLm#j#o;W#o;'S$_;'S;=`$v<%lO$_VLr[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aMh#a#o;W#o;'S$_;'S;=`$v<%lO$_VMm[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aNc#a#o;W#o;'S$_;'S;=`$v<%lO$_VNjYnRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V! _[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!!T#g#o;W#o;'S$_;'S;=`$v<%lO$_V!![YfRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_^!#RY!lWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__!#x[!kWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!$n#g#o;W#o;'S$_;'S;=`$v<%lO$_V!$s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jGg#j#o;W#o;'S$_;'S;=`$v<%lO$_V!%pUwRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!&XO!t~", + tokenData: "!&X~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P,b!P!Q,{!Q![*]![!]5j!]!^%g!^!_6T!_!`7_!`!a7x!a#O$_#O#P9S#P#R$_#R#S9X#S#T$_#T#U9r#U#X;W#X#Y=m#Y#ZDs#Z#];W#]#^JO#^#b;W#b#cKp#c#d! Y#d#f;W#f#g!!z#g#h;W#h#i!#q#i#o;W#o#p$_#p#q!%i#q;'S$_;'S;=`$v<%l~$_~O$_~~!&SS$dUhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUhS!]ZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUhS!pROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWhSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vU`RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!h~~'dO!f~V'kUhS!dROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUhS!eROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUYRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU[RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWhS]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYhSkROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWhSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWhSkROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V,iU!mRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V-SWhSZROt$_uw$_x!P$_!P!Q-l!Q#O$_#P;'S$_;'S;=`$v<%lO$_V-q^hSOY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q$_!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mV.t^hSmROY.mYZ$_Zt.mtu/puw.mwx/px!P.m!P!Q2e!Q!}.m!}#O4c#O#P2O#P;'S.m;'S;=`5d<%lO.mR/uXmROY/pZ!P/p!P!Q0b!Q!}/p!}#O1P#O#P2O#P;'S/p;'S;=`2_<%lO/pR0eP!P!Q0hR0mUmR#Z#[0h#]#^0h#a#b0h#g#h0h#i#j0h#m#n0hR1SVOY1PZ#O1P#O#P1i#P#Q/p#Q;'S1P;'S;=`1x<%lO1PR1lSOY1PZ;'S1P;'S;=`1x<%lO1PR1{P;=`<%l1PR2RSOY/pZ;'S/p;'S;=`2_<%lO/pR2bP;=`<%l/pV2jWhSOt$_uw$_x!P$_!P!Q3S!Q#O$_#P;'S$_;'S;=`$v<%lO$_V3ZbhSmROt$_uw$_x#O$_#P#Z$_#Z#[3S#[#]$_#]#^3S#^#a$_#a#b3S#b#g$_#g#h3S#h#i$_#i#j3S#j#m$_#m#n3S#n;'S$_;'S;=`$v<%lO$_V4h[hSOY4cYZ$_Zt4ctu1Puw4cwx1Px#O4c#O#P1i#P#Q.m#Q;'S4c;'S;=`5^<%lO4cV5aP;=`<%l4cV5gP;=`<%l.mT5qUhSsPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V6[WaRhSOt$_uw$_x!_$_!_!`6t!`#O$_#P;'S$_;'S;=`$v<%lO$_V6{UbRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V7fU_RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V8PWcRhSOt$_uw$_x!_$_!_!`8i!`#O$_#P;'S$_;'S;=`$v<%lO$_V8pUdRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~9XO!i~V9`UhSuROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V9w[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#b;W#b#c;{#c#o;W#o;'S$_;'S;=`$v<%lO$_U:tUwQhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U;]YhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_Vn#a#b;W#b#cCR#c#o;W#o;'S$_;'S;=`$v<%lO$_V>s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#h?i#h#o;W#o;'S$_;'S;=`$v<%lO$_V?n^hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#Y@j#Y#];W#]#^Aa#^#o;W#o;'S$_;'S;=`$v<%lO$_V@qY!QPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZB[#Z#o;W#o;'S$_;'S;=`$v<%lO$_VBcY!OPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VCW[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#W;W#W#XC|#X#o;W#o;'S$_;'S;=`$v<%lO$_VDTYhStROt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VDx]hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#UEq#U#b;W#b#cIX#c#o;W#o;'S$_;'S;=`$v<%lO$_VEv[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aFl#a#o;W#o;'S$_;'S;=`$v<%lO$_VFq[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#g;W#g#hGg#h#o;W#o;'S$_;'S;=`$v<%lO$_VGl[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#X;W#X#YHb#Y#o;W#o;'S$_;'S;=`$v<%lO$_VHiYlRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VI`YqRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_VJT[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#Y;W#Y#ZJy#Z#o;W#o;'S$_;'S;=`$v<%lO$_VKQYzPhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__Kw[!jWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jLm#j#o;W#o;'S$_;'S;=`$v<%lO$_VLr[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aMh#a#o;W#o;'S$_;'S;=`$v<%lO$_VMm[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#`;W#`#aNc#a#o;W#o;'S$_;'S;=`$v<%lO$_VNjYnRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_V! _[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!!T#g#o;W#o;'S$_;'S;=`$v<%lO$_V!![YfRhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$_^!#RY!lWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#o;W#o;'S$_;'S;=`$v<%lO$__!#x[!kWhSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#f;W#f#g!$n#g#o;W#o;'S$_;'S;=`$v<%lO$_V!$s[hSOt$_uw$_x!_$_!_!`:m!`#O$_#P#T$_#T#i;W#i#jGg#j#o;W#o;'S$_;'S;=`$v<%lO$_V!%pUxRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!&XO!t~", tokenizers: [0, 1, 2, 3, tokenizer], topRules: {"Program":[0,3]}, - tokenPrec: 775 + tokenPrec: 858 }) diff --git a/src/parser/tests/dot-get.test.ts b/src/parser/tests/dot-get.test.ts new file mode 100644 index 0000000..18e774f --- /dev/null +++ b/src/parser/tests/dot-get.test.ts @@ -0,0 +1,130 @@ +import { describe, test, expect } from 'bun:test' +import '../../testSetup' + +describe('DotGet', () => { + test('readme.txt is Word when readme not in scope', () => { + expect('readme.txt').toMatchTree(`Word readme.txt`) + }) + + test('obj.prop is DotGet when obj is assigned', () => { + expect('obj = 5; obj.prop').toMatchTree(` + Assign + Identifier obj + operator = + Number 5 + DotGet + Identifier obj + Identifier prop + `) + }) + + test('function parameters are in scope within function body', () => { + expect('fn config: config.path end').toMatchTree(` + FunctionDef + keyword fn + Params + Identifier config + colon : + DotGet + Identifier config + Identifier path + end end + `) + }) + + test('parameters out of scope outside function', () => { + expect('fn x: x.prop end; x.prop').toMatchTree(` + FunctionDef + keyword fn + Params + Identifier x + colon : + DotGet + Identifier x + Identifier prop + end end + Word x.prop + `) + }) + + test('multiple parameters work correctly', () => { + expect(`fn x y: + x.foo + y.bar +end`).toMatchTree(` + FunctionDef + keyword fn + Params + Identifier x + Identifier y + colon : + DotGet + Identifier x + Identifier foo + DotGet + Identifier y + Identifier bar + end end + `) + }) + + test('nested functions with scope isolation', () => { + expect(`fn x: + x.outer + fn y: y.inner end +end`).toMatchTree(` + FunctionDef + keyword fn + Params + Identifier x + colon : + DotGet + Identifier x + Identifier outer + FunctionDef + keyword fn + Params + Identifier y + colon : + DotGet + Identifier y + Identifier inner + end end + end end + `) + }) + + test('dot get works as function argument', () => { + expect('config = 42; echo config.path').toMatchTree(` + Assign + Identifier config + operator = + Number 42 + FunctionCall + Identifier echo + PositionalArg + DotGet + Identifier config + Identifier path + `) + }) + + test('mixed file paths and dot get', () => { + expect('config = 42; cat readme.txt; echo config.path').toMatchTree(` + Assign + Identifier config + operator = + Number 42 + FunctionCall + Identifier cat + PositionalArg + Word readme.txt + FunctionCall + Identifier echo + PositionalArg + DotGet + Identifier config + Identifier path + `) + }) +}) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 3011d4b..03d874d 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -17,7 +17,7 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack // Check for dot and scope - property access detection if (ch === 46 /* . */ && isValidIdentifier) { - const identifierText = input.read(0, pos) + const identifierText = input.read(input.pos, input.pos + pos) const scope = stack.context as Scope | undefined if (scope?.has(identifierText)) {