From 7da4c1496293526f176eb8d43d864496d48c3764 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 16:30:45 -0700 Subject: [PATCH] parse arrays --- src/parser/shrimp.grammar | 6 ++- src/parser/shrimp.terms.ts | 33 +++++++------- src/parser/shrimp.ts | 22 +++++----- src/parser/tests/literals.test.ts | 72 +++++++++++++++++++++++++++++++ src/parser/tokenizer.ts | 8 +++- 5 files changed, 112 insertions(+), 29 deletions(-) create mode 100644 src/parser/tests/literals.test.ts diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index 0968765..d7edba5 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -191,6 +191,10 @@ EscapeSeq { "\\" ("$" | "n" | "t" | "r" | "\\" | "'") } +Array { + "[" (newlineOrSemicolon | expression)* "]" +} + // We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator. // Without this, when parsing "my-var" at statement level, the parser can't decide: // - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier @@ -200,7 +204,7 @@ EscapeSeq { // to go through ambiguousFunctionCall (which is what we want semantically). // Yes, it is annoying and I gave up trying to use GLR to fix it. expressionWithoutIdentifier { - ParenExpr | Word | String | Number | Boolean | Regex | @specialize[@name=Null] + ParenExpr | Word | String | Number | Boolean | Regex | Array | @specialize[@name=Null] } block { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 6ea2f2a..9712d60 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -31,19 +31,20 @@ export const EscapeSeq = 29, Boolean = 30, Regex = 31, - Null = 32, - ConditionalOp = 33, - FunctionDef = 34, - Params = 35, - colon = 36, - keyword = 50, - PositionalArg = 38, - Underscore = 39, - NamedArg = 40, - NamedArgPrefix = 41, - IfExpr = 43, - SingleLineThenBlock = 45, - ThenBlock = 46, - ElseIfExpr = 47, - ElseExpr = 49, - Assign = 51 + Array = 32, + Null = 33, + ConditionalOp = 34, + FunctionDef = 35, + Params = 36, + colon = 37, + keyword = 51, + PositionalArg = 39, + Underscore = 40, + NamedArg = 41, + NamedArgPrefix = 42, + IfExpr = 44, + SingleLineThenBlock = 46, + ThenBlock = 47, + ElseIfExpr = 48, + ElseExpr = 50, + Assign = 52 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 40ba69f..9e934e5 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,24 +4,24 @@ import {operatorTokenizer} from "./operatorTokenizer" import {tokenizer, specializeKeyword} from "./tokenizer" import {trackScope} from "./scopeTracker" import {highlighting} from "./highlight" -const spec_Identifier = {__proto__:null,null:64, end:74, if:88, elseif:96, else:100} +const spec_Identifier = {__proto__:null,null:66, end:76, if:90, elseif:98, else:102} export const parser = LRParser.deserialize({ version: 14, - states: "/SQYQbOOO!TOpO'#CqO#aQcO'#CtO$ZOSO'#CvO%aQcO'#DsOOQa'#Ds'#DsO&gQcO'#DrO'OQRO'#CuO'^QcO'#DnO'uQbO'#D{OOQ`'#DO'#DOO'}QbO'#CsOOQ`'#Do'#DoO(oQbO'#DnO(}QbO'#EROOQ`'#DX'#DXO)lQRO'#DaOOQ`'#Dn'#DnO)qQQO'#DmOOQ`'#Dm'#DmOOQ`'#Db'#DbQYQbOOO)yObO,59]OOQa'#Dr'#DrOOQ`'#DS'#DSO*RQbO'#DUOOQ`'#EQ'#EQOOQ`'#Df'#DfO*]QbO,59[O*pQbO'#CxO*xQWO'#CyOOOO'#Du'#DuOOOO'#Dc'#DcO+^OSO,59bOOQa,59b,59bO(}QbO,59aO(}QbO,59aOOQ`'#Dd'#DdO+lQbO'#DPO+tQQO,5:gO+yQRO,59_O-`QRO'#CuO-pQRO,59_O-|QQO,59_O.RQQO,59_O.ZQbO'#DgO.fQbO,59ZO.wQRO,5:mO/OQQO,5:mO/TQbO,59{OOQ`,5:X,5:XOOQ`-E7`-E7`OOQa1G.w1G.wOOQ`,59p,59pOOQ`-E7d-E7dOOOO,59d,59dOOOO,59e,59eOOOO-E7a-E7aOOQa1G.|1G.|OOQa1G.{1G.{O/_QcO1G.{OOQ`-E7b-E7bO/yQbO1G0ROOQa1G.y1G.yO(}QbO,59iO(}QbO,59iO!YQbO'#CtO$iQbO'#CpOOQ`,5:R,5:ROOQ`-E7e-E7eO0WQbO1G0XOOQ`1G/g1G/gO0eQbO7+%mO0jQbO7+%nOOQO1G/T1G/TO0zQRO1G/TOOQ`'#DZ'#DZO1UQbO7+%sO1ZQbO7+%tOOQ`<tAN>tO(}QbO'#D]OOQ`'#Dh'#DhO2nQbOAN>zO2yQQO'#D_OOQ`AN>zAN>zO3OQbOAN>zO3TQRO,59wO3[QQO,59wOOQ`-E7f-E7fOOQ`G24fG24fO3aQbOG24fO3fQQO,59yO3kQQO1G/cOOQ`LD*QLD*QO0jQbO1G/eO1ZQbO7+$}OOQ`7+%P7+%POOQ`<yAN>yO*VQbO'#D^OOQ`'#Dj'#DjO4QQbOAN?OO4]QQO'#D`OOQ`AN?OAN?OO4bQbOAN?OO4gQRO,59xO4nQQO,59xOOQ`-E7h-E7hOOQ`G24jG24jO4sQbOG24jO4xQQO,59zO4}QQO1G/dOOQ`LD*ULD*UO1|QbO1G/fO2mQbO7+%OOOQ`7+%Q7+%QOOQ`<i~RzOX#uXY$dYZ$}Zp#upq$dqs#ust%htu'Puw#uwx'Uxy'Zyz'tz{#u{|(_|}#u}!O(_!O!P#u!P!Q+R!Q![(|![!]3n!]!^$}!^#O#u#O#P4X#P#R#u#R#S4^#S#T#u#T#Y4w#Y#Z6V#Z#b4w#b#c:e#c#f4w#f#g;[#g#h4w#h#idS#zUkSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uS$aP;=`<%l#u^$kUkS!_YOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU%UUkS!qQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u^%oZkS!`YOY%hYZ#uZt%htu&buw%hwx&bx#O%h#O#P&b#P;'S%h;'S;=`&y<%lO%hY&gS!`YOY&bZ;'S&b;'S;=`&s<%lO&bY&vP;=`<%l&b^&|P;=`<%l%h~'UO!j~~'ZO!h~U'bUkS!eQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU'{UkS!sQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU(dWkSOt#uuw#ux!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)TYkSfQOt#uuw#ux!O#u!O!P)s!P!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)xWkSOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU*iWkSfQOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU+WWkSOt#uuw#ux!P#u!P!Q+p!Q#O#u#P;'S#u;'S;=`$^<%lO#uU+u^kSOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q#u!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qU,x^kSoQOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q0i!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qQ-yXoQOY-tZ!P-t!P!Q.f!Q!}-t!}#O/T#O#P0S#P;'S-t;'S;=`0c<%lO-tQ.iP!P!Q.lQ.qUoQ#Z#[.l#]#^.l#a#b.l#g#h.l#i#j.l#m#n.lQ/WVOY/TZ#O/T#O#P/m#P#Q-t#Q;'S/T;'S;=`/|<%lO/TQ/pSOY/TZ;'S/T;'S;=`/|<%lO/TQ0PP;=`<%l/TQ0VSOY-tZ;'S-t;'S;=`0c<%lO-tQ0fP;=`<%l-tU0nWkSOt#uuw#ux!P#u!P!Q1W!Q#O#u#P;'S#u;'S;=`$^<%lO#uU1_bkSoQOt#uuw#ux#O#u#P#Z#u#Z#[1W#[#]#u#]#^1W#^#a#u#a#b1W#b#g#u#g#h1W#h#i#u#i#j1W#j#m#u#m#n1W#n;'S#u;'S;=`$^<%lO#uU2l[kSOY2gYZ#uZt2gtu/Tuw2gwx/Tx#O2g#O#P/m#P#Q,q#Q;'S2g;'S;=`3b<%lO2gU3eP;=`<%l2gU3kP;=`<%l,qU3uUkStQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~4^O!k~U4eUkSwQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU4|YkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#uU5sUyQkSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU6[ZkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#U6}#U#o4w#o;'S#u;'S;=`$^<%lO#uU7S[kSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#`4w#`#a7x#a#o4w#o;'S#u;'S;=`$^<%lO#uU7}[kSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#g4w#g#h8s#h#o4w#o;'S#u;'S;=`$^<%lO#uU8x[kSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#X4w#X#Y9n#Y#o4w#o;'S#u;'S;=`$^<%lO#uU9uYnQkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^:lY!lWkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^;cY!nWkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^QUzQkSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~>iO!w~", - tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!d~~", 11)], + repeatNodeCount: 8, + tokenData: "?s~R|OX#{XY$jYZ%TZp#{pq$jqs#{st%ntu'Vuw#{wx'[xy'ayz'zz{#{{|(e|}#{}!O(e!O!P#{!P!Q+X!Q![)S![!]3t!]!^%T!^!}#{!}#O4_#O#P4x#P#Q4}#Q#R#{#R#S5h#S#T#{#T#Y6R#Y#Z7a#Z#b6R#b#c;o#c#f6R#f#gY#g#o6R#o;'S#{;'S;=`$d<%lO#{U>_[kSOt#{uw#{x!_#{!_!`6v!`#O#{#P#T#{#T#i6R#i#j9}#j#o6R#o;'S#{;'S;=`$d<%lO#{U?[U{QkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~?sO!{~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!f~~", 11)], topRules: {"Program":[0,18]}, specialized: [{term: 13, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 13, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 860 + tokenPrec: 924 }) diff --git a/src/parser/tests/literals.test.ts b/src/parser/tests/literals.test.ts new file mode 100644 index 0000000..c48e0b3 --- /dev/null +++ b/src/parser/tests/literals.test.ts @@ -0,0 +1,72 @@ +import { expect, describe, test } from 'bun:test' + +import '../shrimp.grammar' // Importing this so changes cause it to retest! + +describe('array literals', () => { + test('work with numbers', () => { + expect('[1 2 3]').toMatchTree(` + Array + Number 1 + Number 2 + Number 3 + `) + }) + + test('work with strings', () => { + expect("['one' 'two' 'three']").toMatchTree(` + Array + String + StringFragment one + String + StringFragment two + String + StringFragment three + `) + }) + + test('work with identifiers', () => { + expect('[one two three]').toMatchTree(` + Array + Identifier one + Identifier two + Identifier three + `) + }) + + test('can be nested', () => { + expect('[one [two [three]]]').toMatchTree(` + Array + Identifier one + Array + Identifier two + Array + Identifier three + `) + }) + + test('can span multiple lines', () => { + expect(`[ + 1 + 2 + 3 + ]`).toMatchTree(` + Array + Number 1 + Number 2 + Number 3 + `) + }) + + test('can span multiple w/o calling functions', () => { + expect(`[ + one + two + three + ]`).toMatchTree(` + Array + Identifier one + Identifier two + Identifier three + `) + }) +}) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 0db5545..cef4446 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -195,7 +195,13 @@ const isWhiteSpace = (ch: number): boolean => { } const isWordChar = (ch: number): boolean => { - return !isWhiteSpace(ch) && ch !== 10 /* \n */ && ch !== 41 /* ) */ && ch !== -1 /* EOF */ + return ( + !isWhiteSpace(ch) && + ch !== 10 /* \n */ && + ch !== 41 /* ) */ && + ch !== 93 /* ] */ && + ch !== -1 /* EOF */ + ) } const isLowercaseLetter = (ch: number): boolean => {