From 7da4c1496293526f176eb8d43d864496d48c3764 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 16:30:45 -0700 Subject: [PATCH 1/8] parse arrays --- src/parser/shrimp.grammar | 6 ++- src/parser/shrimp.terms.ts | 33 +++++++------- src/parser/shrimp.ts | 22 +++++----- src/parser/tests/literals.test.ts | 72 +++++++++++++++++++++++++++++++ src/parser/tokenizer.ts | 8 +++- 5 files changed, 112 insertions(+), 29 deletions(-) create mode 100644 src/parser/tests/literals.test.ts diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index 0968765..d7edba5 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -191,6 +191,10 @@ EscapeSeq { "\\" ("$" | "n" | "t" | "r" | "\\" | "'") } +Array { + "[" (newlineOrSemicolon | expression)* "]" +} + // We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator. // Without this, when parsing "my-var" at statement level, the parser can't decide: // - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier @@ -200,7 +204,7 @@ EscapeSeq { // to go through ambiguousFunctionCall (which is what we want semantically). // Yes, it is annoying and I gave up trying to use GLR to fix it. expressionWithoutIdentifier { - ParenExpr | Word | String | Number | Boolean | Regex | @specialize[@name=Null] + ParenExpr | Word | String | Number | Boolean | Regex | Array | @specialize[@name=Null] } block { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 6ea2f2a..9712d60 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -31,19 +31,20 @@ export const EscapeSeq = 29, Boolean = 30, Regex = 31, - Null = 32, - ConditionalOp = 33, - FunctionDef = 34, - Params = 35, - colon = 36, - keyword = 50, - PositionalArg = 38, - Underscore = 39, - NamedArg = 40, - NamedArgPrefix = 41, - IfExpr = 43, - SingleLineThenBlock = 45, - ThenBlock = 46, - ElseIfExpr = 47, - ElseExpr = 49, - Assign = 51 + Array = 32, + Null = 33, + ConditionalOp = 34, + FunctionDef = 35, + Params = 36, + colon = 37, + keyword = 51, + PositionalArg = 39, + Underscore = 40, + NamedArg = 41, + NamedArgPrefix = 42, + IfExpr = 44, + SingleLineThenBlock = 46, + ThenBlock = 47, + ElseIfExpr = 48, + ElseExpr = 50, + Assign = 52 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 40ba69f..9e934e5 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,24 +4,24 @@ import {operatorTokenizer} from "./operatorTokenizer" import {tokenizer, specializeKeyword} from "./tokenizer" import {trackScope} from "./scopeTracker" import {highlighting} from "./highlight" -const spec_Identifier = {__proto__:null,null:64, end:74, if:88, elseif:96, else:100} +const spec_Identifier = {__proto__:null,null:66, end:76, if:90, elseif:98, else:102} export const parser = LRParser.deserialize({ version: 14, - states: "/SQYQbOOO!TOpO'#CqO#aQcO'#CtO$ZOSO'#CvO%aQcO'#DsOOQa'#Ds'#DsO&gQcO'#DrO'OQRO'#CuO'^QcO'#DnO'uQbO'#D{OOQ`'#DO'#DOO'}QbO'#CsOOQ`'#Do'#DoO(oQbO'#DnO(}QbO'#EROOQ`'#DX'#DXO)lQRO'#DaOOQ`'#Dn'#DnO)qQQO'#DmOOQ`'#Dm'#DmOOQ`'#Db'#DbQYQbOOO)yObO,59]OOQa'#Dr'#DrOOQ`'#DS'#DSO*RQbO'#DUOOQ`'#EQ'#EQOOQ`'#Df'#DfO*]QbO,59[O*pQbO'#CxO*xQWO'#CyOOOO'#Du'#DuOOOO'#Dc'#DcO+^OSO,59bOOQa,59b,59bO(}QbO,59aO(}QbO,59aOOQ`'#Dd'#DdO+lQbO'#DPO+tQQO,5:gO+yQRO,59_O-`QRO'#CuO-pQRO,59_O-|QQO,59_O.RQQO,59_O.ZQbO'#DgO.fQbO,59ZO.wQRO,5:mO/OQQO,5:mO/TQbO,59{OOQ`,5:X,5:XOOQ`-E7`-E7`OOQa1G.w1G.wOOQ`,59p,59pOOQ`-E7d-E7dOOOO,59d,59dOOOO,59e,59eOOOO-E7a-E7aOOQa1G.|1G.|OOQa1G.{1G.{O/_QcO1G.{OOQ`-E7b-E7bO/yQbO1G0ROOQa1G.y1G.yO(}QbO,59iO(}QbO,59iO!YQbO'#CtO$iQbO'#CpOOQ`,5:R,5:ROOQ`-E7e-E7eO0WQbO1G0XOOQ`1G/g1G/gO0eQbO7+%mO0jQbO7+%nOOQO1G/T1G/TO0zQRO1G/TOOQ`'#DZ'#DZO1UQbO7+%sO1ZQbO7+%tOOQ`<tAN>tO(}QbO'#D]OOQ`'#Dh'#DhO2nQbOAN>zO2yQQO'#D_OOQ`AN>zAN>zO3OQbOAN>zO3TQRO,59wO3[QQO,59wOOQ`-E7f-E7fOOQ`G24fG24fO3aQbOG24fO3fQQO,59yO3kQQO1G/cOOQ`LD*QLD*QO0jQbO1G/eO1ZQbO7+$}OOQ`7+%P7+%POOQ`<yAN>yO*VQbO'#D^OOQ`'#Dj'#DjO4QQbOAN?OO4]QQO'#D`OOQ`AN?OAN?OO4bQbOAN?OO4gQRO,59xO4nQQO,59xOOQ`-E7h-E7hOOQ`G24jG24jO4sQbOG24jO4xQQO,59zO4}QQO1G/dOOQ`LD*ULD*UO1|QbO1G/fO2mQbO7+%OOOQ`7+%Q7+%QOOQ`<i~RzOX#uXY$dYZ$}Zp#upq$dqs#ust%htu'Puw#uwx'Uxy'Zyz'tz{#u{|(_|}#u}!O(_!O!P#u!P!Q+R!Q![(|![!]3n!]!^$}!^#O#u#O#P4X#P#R#u#R#S4^#S#T#u#T#Y4w#Y#Z6V#Z#b4w#b#c:e#c#f4w#f#g;[#g#h4w#h#idS#zUkSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uS$aP;=`<%l#u^$kUkS!_YOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU%UUkS!qQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u^%oZkS!`YOY%hYZ#uZt%htu&buw%hwx&bx#O%h#O#P&b#P;'S%h;'S;=`&y<%lO%hY&gS!`YOY&bZ;'S&b;'S;=`&s<%lO&bY&vP;=`<%l&b^&|P;=`<%l%h~'UO!j~~'ZO!h~U'bUkS!eQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU'{UkS!sQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU(dWkSOt#uuw#ux!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)TYkSfQOt#uuw#ux!O#u!O!P)s!P!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)xWkSOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU*iWkSfQOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU+WWkSOt#uuw#ux!P#u!P!Q+p!Q#O#u#P;'S#u;'S;=`$^<%lO#uU+u^kSOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q#u!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qU,x^kSoQOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q0i!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qQ-yXoQOY-tZ!P-t!P!Q.f!Q!}-t!}#O/T#O#P0S#P;'S-t;'S;=`0c<%lO-tQ.iP!P!Q.lQ.qUoQ#Z#[.l#]#^.l#a#b.l#g#h.l#i#j.l#m#n.lQ/WVOY/TZ#O/T#O#P/m#P#Q-t#Q;'S/T;'S;=`/|<%lO/TQ/pSOY/TZ;'S/T;'S;=`/|<%lO/TQ0PP;=`<%l/TQ0VSOY-tZ;'S-t;'S;=`0c<%lO-tQ0fP;=`<%l-tU0nWkSOt#uuw#ux!P#u!P!Q1W!Q#O#u#P;'S#u;'S;=`$^<%lO#uU1_bkSoQOt#uuw#ux#O#u#P#Z#u#Z#[1W#[#]#u#]#^1W#^#a#u#a#b1W#b#g#u#g#h1W#h#i#u#i#j1W#j#m#u#m#n1W#n;'S#u;'S;=`$^<%lO#uU2l[kSOY2gYZ#uZt2gtu/Tuw2gwx/Tx#O2g#O#P/m#P#Q,q#Q;'S2g;'S;=`3b<%lO2gU3eP;=`<%l2gU3kP;=`<%l,qU3uUkStQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~4^O!k~U4eUkSwQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU4|YkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#uU5sUyQkSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU6[ZkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#U6}#U#o4w#o;'S#u;'S;=`$^<%lO#uU7S[kSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#`4w#`#a7x#a#o4w#o;'S#u;'S;=`$^<%lO#uU7}[kSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#g4w#g#h8s#h#o4w#o;'S#u;'S;=`$^<%lO#uU8x[kSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#X4w#X#Y9n#Y#o4w#o;'S#u;'S;=`$^<%lO#uU9uYnQkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^:lY!lWkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^;cY!nWkSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^QUzQkSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~>iO!w~", - tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!d~~", 11)], + repeatNodeCount: 8, + tokenData: "?s~R|OX#{XY$jYZ%TZp#{pq$jqs#{st%ntu'Vuw#{wx'[xy'ayz'zz{#{{|(e|}#{}!O(e!O!P#{!P!Q+X!Q![)S![!]3t!]!^%T!^!}#{!}#O4_#O#P4x#P#Q4}#Q#R#{#R#S5h#S#T#{#T#Y6R#Y#Z7a#Z#b6R#b#c;o#c#f6R#f#gY#g#o6R#o;'S#{;'S;=`$d<%lO#{U>_[kSOt#{uw#{x!_#{!_!`6v!`#O#{#P#T#{#T#i6R#i#j9}#j#o6R#o;'S#{;'S;=`$d<%lO#{U?[U{QkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~?sO!{~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!f~~", 11)], topRules: {"Program":[0,18]}, specialized: [{term: 13, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 13, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 860 + tokenPrec: 924 }) diff --git a/src/parser/tests/literals.test.ts b/src/parser/tests/literals.test.ts new file mode 100644 index 0000000..c48e0b3 --- /dev/null +++ b/src/parser/tests/literals.test.ts @@ -0,0 +1,72 @@ +import { expect, describe, test } from 'bun:test' + +import '../shrimp.grammar' // Importing this so changes cause it to retest! + +describe('array literals', () => { + test('work with numbers', () => { + expect('[1 2 3]').toMatchTree(` + Array + Number 1 + Number 2 + Number 3 + `) + }) + + test('work with strings', () => { + expect("['one' 'two' 'three']").toMatchTree(` + Array + String + StringFragment one + String + StringFragment two + String + StringFragment three + `) + }) + + test('work with identifiers', () => { + expect('[one two three]').toMatchTree(` + Array + Identifier one + Identifier two + Identifier three + `) + }) + + test('can be nested', () => { + expect('[one [two [three]]]').toMatchTree(` + Array + Identifier one + Array + Identifier two + Array + Identifier three + `) + }) + + test('can span multiple lines', () => { + expect(`[ + 1 + 2 + 3 + ]`).toMatchTree(` + Array + Number 1 + Number 2 + Number 3 + `) + }) + + test('can span multiple w/o calling functions', () => { + expect(`[ + one + two + three + ]`).toMatchTree(` + Array + Identifier one + Identifier two + Identifier three + `) + }) +}) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 0db5545..cef4446 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -195,7 +195,13 @@ const isWhiteSpace = (ch: number): boolean => { } const isWordChar = (ch: number): boolean => { - return !isWhiteSpace(ch) && ch !== 10 /* \n */ && ch !== 41 /* ) */ && ch !== -1 /* EOF */ + return ( + !isWhiteSpace(ch) && + ch !== 10 /* \n */ && + ch !== 41 /* ) */ && + ch !== 93 /* ] */ && + ch !== -1 /* EOF */ + ) } const isLowercaseLetter = (ch: number): boolean => { From 339c09eb8c3f2b7a23f5e4c098321605eec7b75e Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 16:47:33 -0700 Subject: [PATCH 2/8] compile array literals --- src/compiler/compiler.ts | 7 ++++++ src/compiler/tests/literals.test.ts | 36 +++++++++++++++++++++++++++++ src/testSetup.ts | 12 +++++----- 3 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 src/compiler/tests/literals.test.ts diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index b7e6274..d05f2ce 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -468,6 +468,13 @@ export class Compiler { return instructions } + case terms.Array: { + const children = getAllChildren(node) + const instructions: ProgramItem[] = children.map((x) => this.#compileNode(x, input)).flat() + instructions.push(['MAKE_ARRAY', children.length]) + return instructions + } + default: throw new CompilerError( `Compiler doesn't know how to handle a "${node.type.name}" node.`, diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts new file mode 100644 index 0000000..4be5579 --- /dev/null +++ b/src/compiler/tests/literals.test.ts @@ -0,0 +1,36 @@ +import { describe } from 'bun:test' +import { expect, test } from 'bun:test' + +describe('array literals', () => { + test('work with numbers', () => { + expect('[1 2 3]').toEvaluateTo([1, 2, 3]) + }) + + test('work with strings', () => { + expect("['one' 'two' 'three']").toEvaluateTo(['one', 'two', 'three']) + }) + + test('work with identifiers', () => { + expect('[one two three]').toEvaluateTo(['one', 'two', 'three']) + }) + + test('can be nested', () => { + expect('[one [two [three]]]').toEvaluateTo(['one', ['two', ['three']]]) + }) + + test('can span multiple lines', () => { + expect(`[ + 1 + 2 + 3 + ]`).toEvaluateTo([1, 2, 3]) + }) + + test('can span multiple w/o calling functions', () => { + expect(`[ + one + two + three + ]`).toEvaluateTo(['one', 'two', 'three']) + }) +}) diff --git a/src/testSetup.ts b/src/testSetup.ts index 8e1f4b8..89203e1 100644 --- a/src/testSetup.ts +++ b/src/testSetup.ts @@ -93,11 +93,7 @@ expect.extend({ } }, - async toEvaluateTo( - received: unknown, - expected: unknown, - globals: Record = {} - ) { + async toEvaluateTo(received: unknown, expected: unknown, globals: Record = {}) { assert(typeof received === 'string', 'toEvaluateTo can only be used with string values') try { @@ -109,7 +105,7 @@ expect.extend({ if (expected instanceof RegExp) expected = String(expected) if (value instanceof RegExp) value = String(value) - if (value === expected) { + if (isEqual(value, expected)) { return { pass: true } } else { return { @@ -165,3 +161,7 @@ const trimWhitespace = (str: string): string => { }) .join('\n') } + +function isEqual(a: any, b: any): boolean { + return typeof a === 'object' ? JSON.stringify(a) === JSON.stringify(b) : a === b +} From 34c11776369a1d8a5b67d4ac5bbb59fbafe876fe Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 17:03:41 -0700 Subject: [PATCH 3/8] more tests --- packages/ReefVM | 1 + src/compiler/tests/literals.test.ts | 60 +++++++++++++ src/parser/tests/literals.test.ts | 131 ++++++++++++++++++++++++++++ 3 files changed, 192 insertions(+) create mode 160000 packages/ReefVM diff --git a/packages/ReefVM b/packages/ReefVM new file mode 160000 index 0000000..97b6722 --- /dev/null +++ b/packages/ReefVM @@ -0,0 +1 @@ +Subproject commit 97b6722a113417398a1c47d583bfe07a906f87a0 diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts index 4be5579..66d0c01 100644 --- a/src/compiler/tests/literals.test.ts +++ b/src/compiler/tests/literals.test.ts @@ -33,4 +33,64 @@ describe('array literals', () => { three ]`).toEvaluateTo(['one', 'two', 'three']) }) + + test('empty arrays', () => { + expect('[]').toEvaluateTo([]) + }) + + test('mixed types', () => { + expect("[1 'two' three true null]").toEvaluateTo([1, 'two', 'three', true, null]) + }) + + test('semicolons as separators', () => { + expect('[1; 2; 3]').toEvaluateTo([1, 2, 3]) + }) + + test('expressions in arrays', () => { + expect('[(1 + 2) (3 * 4)]').toEvaluateTo([3, 12]) + }) + + test('mixed separators - spaces and newlines', () => { + expect(`[1 2 +3 4]`).toEvaluateTo([1, 2, 3, 4]) + }) + + test('mixed separators - spaces and semicolons', () => { + expect('[1 2; 3 4]').toEvaluateTo([1, 2, 3, 4]) + }) + + test('empty lines within arrays', () => { + expect(`[1 + +2]`).toEvaluateTo([1, 2]) + }) + + test('comments within arrays', () => { + expect(`[1 # first + 2 # second + ]`).toEvaluateTo([1, 2]) + }) + + test('complex nested multiline', () => { + expect(`[ + [1 2] + [3 4] + [5 6] +]`).toEvaluateTo([[1, 2], [3, 4], [5, 6]]) + }) + + test('boolean and null literals', () => { + expect('[true false null]').toEvaluateTo([true, false, null]) + }) + + test('regex literals', () => { + expect('[//[0-9]+//]').toEvaluateTo([/[0-9]+/]) + }) + + test('trailing newlines', () => { + expect(`[ +1 +2 +]`).toEvaluateTo([1, 2]) + }) }) diff --git a/src/parser/tests/literals.test.ts b/src/parser/tests/literals.test.ts index c48e0b3..84d09a8 100644 --- a/src/parser/tests/literals.test.ts +++ b/src/parser/tests/literals.test.ts @@ -69,4 +69,135 @@ describe('array literals', () => { Identifier three `) }) + + test('empty arrays', () => { + expect('[]').toMatchTree(` + Array [] + `) + }) + + test('mixed types', () => { + expect("[1 'two' three true null]").toMatchTree(` + Array + Number 1 + String + StringFragment two + Identifier three + Boolean true + Null null + `) + }) + + test('semicolons as separators', () => { + expect('[1; 2; 3]').toMatchTree(` + Array + Number 1 + Number 2 + Number 3 + `) + }) + + test('expressions in arrays', () => { + expect('[(1 + 2) (3 * 4)]').toMatchTree(` + Array + ParenExpr + BinOp + Number 1 + Plus + + Number 2 + ParenExpr + BinOp + Number 3 + Star * + Number 4 + `) + }) + + test('mixed separators - spaces and newlines', () => { + expect(`[1 2 +3 4]`).toMatchTree(` + Array + Number 1 + Number 2 + Number 3 + Number 4 + `) + }) + + test('mixed separators - spaces and semicolons', () => { + expect('[1 2; 3 4]').toMatchTree(` + Array + Number 1 + Number 2 + Number 3 + Number 4 + `) + }) + + test('empty lines within arrays', () => { + expect(`[1 + +2]`).toMatchTree(` + Array + Number 1 + Number 2 + `) + }) + + test('comments within arrays', () => { + expect(`[ # something... + 1 # first + 2 # second + ]`).toMatchTree(` + Array + Number 1 + Number 2 + `) + }) + + test('complex nested multiline', () => { + expect(`[ + [1 2] + [3 4] + [5 6] +]`).toMatchTree(` + Array + Array + Number 1 + Number 2 + Array + Number 3 + Number 4 + Array + Number 5 + Number 6 + `) + }) + + test('boolean and null literals', () => { + expect('[true false null]').toMatchTree(` + Array + Boolean true + Boolean false + Null null + `) + }) + + test('regex literals', () => { + expect('[//[0-9]+//]').toMatchTree(` + Array + Regex //[0-9]+// + `) + }) + + test('trailing newlines', () => { + expect(`[ +1 +2 +]`).toMatchTree(` + Array + Number 1 + Number 2 + `) + }) }) From 982054eb54acf0a50bb4aaa5691490c41ce53b04 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 21:09:15 -0700 Subject: [PATCH 4/8] [a=1 b=2 c=3] and [=] (empty dict) --- src/compiler/compiler.ts | 20 ++ src/compiler/tests/literals.test.ts | 63 +++++- src/parser/shrimp.grammar | 9 +- src/parser/shrimp.terms.ts | 29 +-- src/parser/shrimp.ts | 20 +- src/parser/tests/literals.test.ts | 289 ++++++++++++++++++++++++++++ src/testSetup.ts | 29 ++- src/utils/tree.ts | 27 +-- 8 files changed, 433 insertions(+), 53 deletions(-) diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index d05f2ce..7ce259f 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -470,11 +470,31 @@ export class Compiler { case terms.Array: { const children = getAllChildren(node) + // todo: [ = ] const instructions: ProgramItem[] = children.map((x) => this.#compileNode(x, input)).flat() instructions.push(['MAKE_ARRAY', children.length]) return instructions } + case terms.Dict: { + const children = getAllChildren(node) + const instructions: ProgramItem[] = [] + + children.forEach((node) => { + const keyNode = node.firstChild + const valueNode = node.firstChild.nextSibling + + // name= -> name + const key = input.slice(keyNode.from, keyNode.to).slice(0, -1) + instructions.push(['PUSH', key]) + + instructions.push(...this.#compileNode(valueNode, input)) + }) + + instructions.push(['MAKE_DICT', children.length]) + return instructions + } + default: throw new CompilerError( `Compiler doesn't know how to handle a "${node.type.name}" node.`, diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts index 66d0c01..5c93e2a 100644 --- a/src/compiler/tests/literals.test.ts +++ b/src/compiler/tests/literals.test.ts @@ -76,7 +76,11 @@ describe('array literals', () => { [1 2] [3 4] [5 6] -]`).toEvaluateTo([[1, 2], [3, 4], [5, 6]]) +]`).toEvaluateTo([ + [1, 2], + [3, 4], + [5, 6], + ]) }) test('boolean and null literals', () => { @@ -94,3 +98,60 @@ describe('array literals', () => { ]`).toEvaluateTo([1, 2]) }) }) + +describe('dict literals', () => { + test('work with numbers', () => { + expect('[a=1 b=2 c=3]').toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) + + test('work with strings', () => { + expect("[a='one' b='two' c='three']").toEvaluateTo({ a: 'one', b: 'two', c: 'three' }) + }) + + test('work with identifiers', () => { + expect('[a=one b=two c=three]').toEvaluateTo({ a: 'one', b: 'two', c: 'three' }) + }) + + test('can be nested', () => { + expect('[a=one b=[two [c=three]]]').toEvaluateTo({ a: 'one', b: ['two', { c: 'three' }] }) + }) + + test('can span multiple lines', () => { + expect(`[ + a=1 + b=2 + c=3 + ]`).toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) + + test('empty dict', () => { + expect('[=]').toEvaluateTo({}) + expect('[ = ]').toEvaluateTo({}) + + test('mixed types', () => { + expect("[a=1 b='two' c=three d=true e=null]").toEvaluateTo({ + a: 1, + b: 'two', + c: 'three', + d: true, + e: null, + }) + + test('semicolons as separators', () => { + expect('[a=1; b=2; c=3]').toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) + + test('expressions in dicts', () => { + expect('[a=(1 + 2) b=(3 * 4)]').toEvaluateTo({ a: 3, b: 12 }) + }) + + test('empty lines within dicts', () => { + expect(`[a=1 + + b=2 + + c=3]`).toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) + }) + }) +}) diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index d7edba5..ee89d1c 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -191,8 +191,13 @@ EscapeSeq { "\\" ("$" | "n" | "t" | "r" | "\\" | "'") } +Dict { + "[=]" | + "[" newlineOrSemicolon* NamedArg (newlineOrSemicolon | NamedArg)* "]" +} + Array { - "[" (newlineOrSemicolon | expression)* "]" + "[" newlineOrSemicolon* (expression (newlineOrSemicolon | expression)*)? "]" } // We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator. @@ -204,7 +209,7 @@ Array { // to go through ambiguousFunctionCall (which is what we want semantically). // Yes, it is annoying and I gave up trying to use GLR to fix it. expressionWithoutIdentifier { - ParenExpr | Word | String | Number | Boolean | Regex | Array | @specialize[@name=Null] + ParenExpr | Word | String | Number | Boolean | Regex | Dict | Array | @specialize[@name=Null] } block { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 9712d60..69d6d47 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -31,20 +31,21 @@ export const EscapeSeq = 29, Boolean = 30, Regex = 31, - Array = 32, - Null = 33, - ConditionalOp = 34, + Dict = 32, + NamedArg = 33, + NamedArgPrefix = 34, FunctionDef = 35, Params = 36, colon = 37, - keyword = 51, - PositionalArg = 39, - Underscore = 40, - NamedArg = 41, - NamedArgPrefix = 42, - IfExpr = 44, - SingleLineThenBlock = 46, - ThenBlock = 47, - ElseIfExpr = 48, - ElseExpr = 50, - Assign = 52 + keyword = 52, + Underscore = 39, + Array = 40, + Null = 41, + ConditionalOp = 42, + PositionalArg = 43, + IfExpr = 45, + SingleLineThenBlock = 47, + ThenBlock = 48, + ElseIfExpr = 49, + ElseExpr = 51, + Assign = 53 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 9e934e5..4865cb6 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,24 +4,24 @@ import {operatorTokenizer} from "./operatorTokenizer" import {tokenizer, specializeKeyword} from "./tokenizer" import {trackScope} from "./scopeTracker" import {highlighting} from "./highlight" -const spec_Identifier = {__proto__:null,null:66, end:76, if:90, elseif:98, else:102} +const spec_Identifier = {__proto__:null,end:76, null:82, if:92, elseif:100, else:104} export const parser = LRParser.deserialize({ version: 14, - states: "/xQYQbOOO!WOpO'#CqO#gQcO'#CtO$aOSO'#CvO%dQbO'#C|O&fQcO'#DuOOQa'#Du'#DuO'lQcO'#DtO(TQRO'#CuO(cQcO'#DpO(zQbO'#EQOOQ`'#DP'#DPO)SQbO'#CsOOQ`'#Dq'#DqO)wQbO'#DpO*VQbO'#EVOOQ`'#DY'#DYO*wQRO'#DbOOQ`'#Dp'#DpO*|QQO'#DoOOQ`'#Do'#DoOOQ`'#Dc'#DcQYQbOOO+UObO,59]OOQa'#Dt'#DtOOQ`'#DT'#DTO+^QbO'#DVOOQ`'#EU'#EUOOQ`'#Dh'#DhO+hQbO,59[O+{QbO'#CxO,TQWO'#CyOOOO'#Dw'#DwOOOO'#Dd'#DdO,iOSO,59bOOQa,59b,59bOOQ`'#De'#DeO,wQbO,59hOOQa,59h,59hO*VQbO,59aO*VQbO,59aOOQ`'#Df'#DfO-OQbO'#DQO-WQQO,5:lO-]QRO,59_O.rQRO'#CuO/SQRO,59_O/`QQO,59_O/eQQO,59_O/mQbO'#DiO/xQbO,59ZO0ZQRO,5:qO0bQQO,5:qO0gQbO,59|OOQ`,5:Z,5:ZOOQ`-E7a-E7aOOQa1G.w1G.wOOQ`,59q,59qOOQ`-E7f-E7fOOOO,59d,59dOOOO,59e,59eOOOO-E7b-E7bOOQa1G.|1G.|OOQ`-E7c-E7cOOQa1G/S1G/SOOQa1G.{1G.{O0qQcO1G.{OOQ`-E7d-E7dO1]QbO1G0WOOQa1G.y1G.yO*VQbO,59jO*VQbO,59jO!]QbO'#CtO%kQbO'#CpOOQ`,5:T,5:TOOQ`-E7g-E7gO1jQbO1G0]OOQ`1G/h1G/hO1wQbO7+%rO1|QbO7+%sOOQO1G/U1G/UO2^QRO1G/UOOQ`'#D['#D[O2hQbO7+%wO2mQbO7+%xOOQ`<yAN>yO*VQbO'#D^OOQ`'#Dj'#DjO4QQbOAN?OO4]QQO'#D`OOQ`AN?OAN?OO4bQbOAN?OO4gQRO,59xO4nQQO,59xOOQ`-E7h-E7hOOQ`G24jG24jO4sQbOG24jO4xQQO,59zO4}QQO1G/dOOQ`LD*ULD*UO1|QbO1G/fO2mQbO7+%OOOQ`7+%Q7+%QOOQ`<|AN>|O*iQbO'#D_OOQ`'#Dm'#DmO5xQbOAN?SO6TQQO'#DaOOQ`AN?SAN?SO6YQbOAN?SO6_QRO,59yO6fQQO,59yOOQ`-E7k-E7kOOQ`G24nG24nO6kQbOG24nO6pQQO,59{O6uQQO1G/eOOQ`LD*YLD*YO3_QbO1G/gO4eQbO7+%POOQ`7+%R7+%ROOQ`<Y#g#o6R#o;'S#{;'S;=`$d<%lO#{U>_[kSOt#{uw#{x!_#{!_!`6v!`#O#{#P#T#{#T#i6R#i#j9}#j#o6R#o;'S#{;'S;=`$d<%lO#{U?[U{QkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~?sO!{~", - tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!f~~", 11)], + repeatNodeCount: 10, + tokenData: "AO~R|OX#{XY$jYZ%TZp#{pq$jqs#{st%ntu'Vuw#{wx'[xy'ayz'zz{#{{|(e|}#{}!O(e!O!P#{!P!Q+X!Q![)S![!]3t!]!^%T!^!}#{!}#O4_#O#P6T#P#Q6Y#Q#R#{#R#S6s#S#T#{#T#Y7^#Y#Z8l#Z#b7^#b#ch#i#o7^#o#p#{#p#q@`#q;'S#{;'S;=`$d<%l~#{~O#{~~@yS$QUkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{S$gP;=`<%l#{^$qUkS!dYOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U%[UkS!vQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{^%uZkS!eYOY%nYZ#{Zt%ntu&huw%nwx&hx#O%n#O#P&h#P;'S%n;'S;=`'P<%lO%nY&mS!eYOY&hZ;'S&h;'S;=`&y<%lO&hY&|P;=`<%l&h^'SP;=`<%l%n~'[O!o~~'aO!m~U'hUkS!jQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U(RUkS!{QOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U(jWkSOt#{uw#{x!Q#{!Q![)S![#O#{#P;'S#{;'S;=`$d<%lO#{U)ZYkSfQOt#{uw#{x!O#{!O!P)y!P!Q#{!Q![)S![#O#{#P;'S#{;'S;=`$d<%lO#{U*OWkSOt#{uw#{x!Q#{!Q![*h![#O#{#P;'S#{;'S;=`$d<%lO#{U*oWkSfQOt#{uw#{x!Q#{!Q![*h![#O#{#P;'S#{;'S;=`$d<%lO#{U+^WkSOt#{uw#{x!P#{!P!Q+v!Q#O#{#P;'S#{;'S;=`$d<%lO#{U+{^kSOY,wYZ#{Zt,wtu-zuw,wwx-zx!P,w!P!Q#{!Q!},w!}#O2m#O#P0Y#P;'S,w;'S;=`3n<%lO,wU-O^kSoQOY,wYZ#{Zt,wtu-zuw,wwx-zx!P,w!P!Q0o!Q!},w!}#O2m#O#P0Y#P;'S,w;'S;=`3n<%lO,wQ.PXoQOY-zZ!P-z!P!Q.l!Q!}-z!}#O/Z#O#P0Y#P;'S-z;'S;=`0i<%lO-zQ.oP!P!Q.rQ.wUoQ#Z#[.r#]#^.r#a#b.r#g#h.r#i#j.r#m#n.rQ/^VOY/ZZ#O/Z#O#P/s#P#Q-z#Q;'S/Z;'S;=`0S<%lO/ZQ/vSOY/ZZ;'S/Z;'S;=`0S<%lO/ZQ0VP;=`<%l/ZQ0]SOY-zZ;'S-z;'S;=`0i<%lO-zQ0lP;=`<%l-zU0tWkSOt#{uw#{x!P#{!P!Q1^!Q#O#{#P;'S#{;'S;=`$d<%lO#{U1ebkSoQOt#{uw#{x#O#{#P#Z#{#Z#[1^#[#]#{#]#^1^#^#a#{#a#b1^#b#g#{#g#h1^#h#i#{#i#j1^#j#m#{#m#n1^#n;'S#{;'S;=`$d<%lO#{U2r[kSOY2mYZ#{Zt2mtu/Zuw2mwx/Zx#O2m#O#P/s#P#Q,w#Q;'S2m;'S;=`3h<%lO2mU3kP;=`<%l2mU3qP;=`<%l,wU3{UkSuQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U4fW!uQkSOt#{uw#{x!_#{!_!`5O!`#O#{#P;'S#{;'S;=`$d<%lO#{U5TVkSOt#{uw#{x#O#{#P#Q5j#Q;'S#{;'S;=`$d<%lO#{U5qU!tQkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~6YO!p~U6aU!zQkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U6zUkSwQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U7cYkSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#o7^#o;'S#{;'S;=`$d<%lO#{U8YUrQkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U8qZkSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#U9d#U#o7^#o;'S#{;'S;=`$d<%lO#{U9i[kSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#`7^#`#a:_#a#o7^#o;'S#{;'S;=`$d<%lO#{U:d[kSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#g7^#g#h;Y#h#o7^#o;'S#{;'S;=`$d<%lO#{U;_[kSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#X7^#X#Yo[!rWkSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#f7^#f#g?e#g#o7^#o;'S#{;'S;=`$d<%lO#{U?j[kSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#i7^#i#j;Y#j#o7^#o;'S#{;'S;=`$d<%lO#{U@gU|QkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~AOO#P~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!i~~", 11)], topRules: {"Program":[0,18]}, specialized: [{term: 13, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 13, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 924 + tokenPrec: 1008 }) diff --git a/src/parser/tests/literals.test.ts b/src/parser/tests/literals.test.ts index 84d09a8..693da17 100644 --- a/src/parser/tests/literals.test.ts +++ b/src/parser/tests/literals.test.ts @@ -201,3 +201,292 @@ describe('array literals', () => { `) }) }) + +describe('dict literals', () => { + test('work with numbers', () => { + expect('[a=1 b=2 c=3]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('work with strings', () => { + expect("[a='one' b='two' c='three']").toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + String + StringFragment one + NamedArg + NamedArgPrefix b= + String + StringFragment two + NamedArg + NamedArgPrefix c= + String + StringFragment three + `) + }) + + test('work with identifiers', () => { + expect('[a=one b=two c=three]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Identifier one + NamedArg + NamedArgPrefix b= + Identifier two + NamedArg + NamedArgPrefix c= + Identifier three + `) + }) + + test('can be nested', () => { + expect('[a=one b=[two [c=three]]]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Identifier one + NamedArg + NamedArgPrefix b= + Array + Identifier two + Dict + NamedArg + NamedArgPrefix c= + Identifier three + `) + }) + + test('can span multiple lines', () => { + expect(`[ + a=1 + b=2 + c=3 + ]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + test('empty dict', () => { + expect('[=]').toMatchTree(` + Dict [=] + `) + + expect('[ = ]').toMatchTree(` + Array + Word = + `) + }) + + test('mixed types', () => { + expect("[a=1 b='two' c=three d=true e=null]").toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + String + StringFragment two + NamedArg + NamedArgPrefix c= + Identifier three + NamedArg + NamedArgPrefix d= + Boolean true + NamedArg + NamedArgPrefix e= + Null null + `) + }) + + test('semicolons as separators', () => { + expect('[a=1; b=2; c=3]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('expressions in dicts', () => { + expect('[a=(1 + 2) b=(3 * 4)]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + ParenExpr + BinOp + Number 1 + Plus + + Number 2 + NamedArg + NamedArgPrefix b= + ParenExpr + BinOp + Number 3 + Star * + Number 4 + `) + }) + + test('mixed separators - spaces and newlines', () => { + expect(`[a=1 b=2 +c=3]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('empty lines within dicts', () => { + expect(`[a=1 + +b=2 + +c=3]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('comments within dicts', () => { + expect(`[ # something... + a=1 # first + b=2 # second + + c=3 + ]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('complex nested multiline', () => { + expect(`[ + a=[a=1 b=2] + b=[b=3 c=4] + c=[c=5 d=6] +]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix b= + Dict + NamedArg + NamedArgPrefix b= + Number 3 + NamedArg + NamedArgPrefix c= + Number 4 + NamedArg + NamedArgPrefix c= + Dict + NamedArg + NamedArgPrefix c= + Number 5 + NamedArg + NamedArgPrefix d= + Number 6 + `) + }) + + test('boolean and null literals', () => { + expect('[a=true b=false c=null]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Boolean true + NamedArg + NamedArgPrefix b= + Boolean false + NamedArg + NamedArgPrefix c= + Null null + `) + }) + + test('regex literals', () => { + expect('[pattern=//[0-9]+//]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix pattern= + Regex //[0-9]+// + `) + }) + + test('trailing newlines', () => { + expect(`[ +a=1 +b=2 +c=3 + +]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) +}) diff --git a/src/testSetup.ts b/src/testSetup.ts index 89203e1..10b715e 100644 --- a/src/testSetup.ts +++ b/src/testSetup.ts @@ -109,7 +109,10 @@ expect.extend({ return { pass: true } } else { return { - message: () => `Expected evaluation to be ${expected}, but got ${value}`, + message: () => + `Expected evaluation to be ${JSON.stringify(expected)}, but got ${JSON.stringify( + value + )}`, pass: false, } } @@ -163,5 +166,27 @@ const trimWhitespace = (str: string): string => { } function isEqual(a: any, b: any): boolean { - return typeof a === 'object' ? JSON.stringify(a) === JSON.stringify(b) : a === b + if (a === null && b === null) return true + + switch (typeof a) { + case 'string': + case 'number': + case 'boolean': + case 'undefined': + return a === b + default: + return JSON.stringify(sortKeys(a)) === JSON.stringify(sortKeys(b)) + } +} + +function sortKeys(o: any): any { + if (Array.isArray(o)) return o.map(sortKeys) + if (o && typeof o === 'object' && o.constructor === Object) + return Object.keys(o) + .sort() + .reduce((r, k) => { + r[k] = sortKeys(o[k]) + return r + }, {} as any) + return o } diff --git a/src/utils/tree.ts b/src/utils/tree.ts index 1682d21..7a2b36a 100644 --- a/src/utils/tree.ts +++ b/src/utils/tree.ts @@ -1,6 +1,6 @@ import { Tree, TreeCursor } from '@lezer/common' import { assertNever } from '#utils/utils' -import { type Value } from 'reefvm' +import { type Value, fromValue } from 'reefvm' export const treeToString = (tree: Tree, input: string): string => { const lines: string[] = [] @@ -35,27 +35,6 @@ export const treeToString = (tree: Tree, input: string): string => { } export const VMResultToValue = (result: Value): unknown => { - if ( - result.type === 'number' || - result.type === 'boolean' || - result.type === 'string' || - result.type === 'regex' - ) { - return result.value - } else if (result.type === 'null') { - return null - } else if (result.type === 'array') { - return result.value.map(VMResultToValue) - } else if (result.type === 'dict') { - const obj: Record = {} - for (const [key, val] of Object.entries(result.value)) { - obj[key] = VMResultToValue(val) - } - - return obj - } else if (result.type === 'function') { - return Function - } else { - assertNever(result) - } + if (result.type === 'function') return Function + else return fromValue(result) } From 8112515278f4d3db99180c08c3287053833ad6f8 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 21:18:24 -0700 Subject: [PATCH 5/8] [ = ] --- src/compiler/compiler.ts | 13 ++++++++++- src/compiler/tests/literals.test.ts | 36 ++++++++++++++--------------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index 7ce259f..4f8b15b 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -470,7 +470,18 @@ export class Compiler { case terms.Array: { const children = getAllChildren(node) - // todo: [ = ] + + // We can easily parse [=] as an empty dict, but `[ = ]` is tougher. + // = can be a valid word, and also valid in words, so for now we cheat + // and check for arrays that look like `[ = ]` to interpret them as + // empty dicts + if (children.length === 1 && children[0].name === 'Word') { + const child = children[0] + if (input.slice(child.from, child.to) === '=') { + return [['MAKE_DICT', 0]] + } + } + const instructions: ProgramItem[] = children.map((x) => this.#compileNode(x, input)).flat() instructions.push(['MAKE_ARRAY', children.length]) return instructions diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts index 5c93e2a..666a4b5 100644 --- a/src/compiler/tests/literals.test.ts +++ b/src/compiler/tests/literals.test.ts @@ -127,31 +127,31 @@ describe('dict literals', () => { test('empty dict', () => { expect('[=]').toEvaluateTo({}) expect('[ = ]').toEvaluateTo({}) + }) - test('mixed types', () => { - expect("[a=1 b='two' c=three d=true e=null]").toEvaluateTo({ - a: 1, - b: 'two', - c: 'three', - d: true, - e: null, - }) + test('mixed types', () => { + expect("[a=1 b='two' c=three d=true e=null]").toEvaluateTo({ + a: 1, + b: 'two', + c: 'three', + d: true, + e: null, + }) + }) - test('semicolons as separators', () => { - expect('[a=1; b=2; c=3]').toEvaluateTo({ a: 1, b: 2, c: 3 }) - }) + test('semicolons as separators', () => { + expect('[a=1; b=2; c=3]').toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) - test('expressions in dicts', () => { - expect('[a=(1 + 2) b=(3 * 4)]').toEvaluateTo({ a: 3, b: 12 }) - }) + test('expressions in dicts', () => { + expect('[a=(1 + 2) b=(3 * 4)]').toEvaluateTo({ a: 3, b: 12 }) + }) - test('empty lines within dicts', () => { - expect(`[a=1 + test('empty lines within dicts', () => { + expect(`[a=1 b=2 c=3]`).toEvaluateTo({ a: 1, b: 2, c: 3 }) - }) - }) }) }) From 1aa15701352594c979f16d6ebdf11f64848c581e Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 21:36:02 -0700 Subject: [PATCH 6/8] add barus minimus docs --- CLAUDE.md | 13 +++++++++++++ example.shrimp | 24 ++++++++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 356bd09..e0e372d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -207,6 +207,19 @@ Implementation files: **Why this matters**: This enables shell-like file paths (`readme.txt`) while supporting dictionary/array access (`config.path`) without quotes, determined entirely at parse time based on lexical scope. +**Array and dict literals**: Square brackets `[]` create both arrays and dicts, distinguished by content: +- **Arrays**: Space/newline/semicolon-separated args that work like calling a function → `[1 2 3]` (call functions using parens eg `[1 (double 4) 200]`) +- **Dicts**: NamedArg syntax (key=value pairs) → `[a=1 b=2]` +- **Empty array**: `[]` (standard empty brackets) +- **Empty dict**: `[=]` (exactly this, no spaces) + +Implementation details: +- Grammar rules (shrimp.grammar:194-201): Dict uses `NamedArg` nodes, Array uses `expression` nodes +- Parser distinguishes at parse time based on whether first element contains `=` +- Both support multiline, comments, and nesting +- Separators: spaces, newlines (`\n`), or semicolons (`;`) work interchangeably +- Test files: `src/parser/tests/literals.test.ts` and `src/compiler/tests/literals.test.ts` + **EOF handling**: The grammar uses `(statement | newlineOrSemicolon)+ eof?` to handle empty lines and end-of-file without infinite loops. ## Compiler Architecture diff --git a/example.shrimp b/example.shrimp index 53c563b..f1a9a05 100644 --- a/example.shrimp +++ b/example.shrimp @@ -42,13 +42,13 @@ a-file = file.txt 3 # symbols can be assigned to functions. The body of the function comes after a colon `:` -add = fn x y: x + y +add = do x y: x + y add 1 2 --- 3 # Functions can have multiple lines, they are terminated with `end` -sub = fn x y: +sub = do x y: x - y end @@ -82,9 +82,25 @@ add 1 (sub 5 2) 4 +# Arrays use square brackets with space-separated elements +numbers = [1 2 3] +shopping-list = [apples bananas carrots] +empty-array = [] + +# Dicts use square brackets with key=value pairs +config = [name=Shrimp version=1.0 debug=true] +empty-dict = [=] + +# Nested structures work naturally +nested = [ + users=[ + [name=Alice age=30] + [name=Bob age=25] + ] + settings=[debug=true timeout=5000] +] + # HOLD UP -- how do we handle arrays? -- how do we handle hashes? - conditionals - loops \ No newline at end of file From b03610761b6ad695026ea634acc2b01beaa7defd Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 21:52:15 -0700 Subject: [PATCH 7/8] shh --- src/compiler/compiler.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index 4f8b15b..c22e117 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -475,8 +475,8 @@ export class Compiler { // = can be a valid word, and also valid in words, so for now we cheat // and check for arrays that look like `[ = ]` to interpret them as // empty dicts - if (children.length === 1 && children[0].name === 'Word') { - const child = children[0] + if (children.length === 1 && children[0]!.name === 'Word') { + const child = children[0]! if (input.slice(child.from, child.to) === '=') { return [['MAKE_DICT', 0]] } @@ -493,13 +493,13 @@ export class Compiler { children.forEach((node) => { const keyNode = node.firstChild - const valueNode = node.firstChild.nextSibling + const valueNode = node.firstChild!.nextSibling // name= -> name - const key = input.slice(keyNode.from, keyNode.to).slice(0, -1) + const key = input.slice(keyNode!.from, keyNode!.to).slice(0, -1) instructions.push(['PUSH', key]) - instructions.push(...this.#compileNode(valueNode, input)) + instructions.push(...this.#compileNode(valueNode!, input)) }) instructions.push(['MAKE_DICT', children.length]) From e1ba9c630d4054bf95fc69b67e24617cd02f6ce0 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Tue, 28 Oct 2025 21:52:41 -0700 Subject: [PATCH 8/8] important note --- src/compiler/compiler.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index c22e117..bbedc09 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -472,7 +472,7 @@ export class Compiler { const children = getAllChildren(node) // We can easily parse [=] as an empty dict, but `[ = ]` is tougher. - // = can be a valid word, and also valid in words, so for now we cheat + // = can be a valid word, and is also valid inside words, so for now we cheat // and check for arrays that look like `[ = ]` to interpret them as // empty dicts if (children.length === 1 && children[0]!.name === 'Word') {