diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index d05f2ce..7ce259f 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -470,11 +470,31 @@ export class Compiler { case terms.Array: { const children = getAllChildren(node) + // todo: [ = ] const instructions: ProgramItem[] = children.map((x) => this.#compileNode(x, input)).flat() instructions.push(['MAKE_ARRAY', children.length]) return instructions } + case terms.Dict: { + const children = getAllChildren(node) + const instructions: ProgramItem[] = [] + + children.forEach((node) => { + const keyNode = node.firstChild + const valueNode = node.firstChild.nextSibling + + // name= -> name + const key = input.slice(keyNode.from, keyNode.to).slice(0, -1) + instructions.push(['PUSH', key]) + + instructions.push(...this.#compileNode(valueNode, input)) + }) + + instructions.push(['MAKE_DICT', children.length]) + return instructions + } + default: throw new CompilerError( `Compiler doesn't know how to handle a "${node.type.name}" node.`, diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts index 66d0c01..5c93e2a 100644 --- a/src/compiler/tests/literals.test.ts +++ b/src/compiler/tests/literals.test.ts @@ -76,7 +76,11 @@ describe('array literals', () => { [1 2] [3 4] [5 6] -]`).toEvaluateTo([[1, 2], [3, 4], [5, 6]]) +]`).toEvaluateTo([ + [1, 2], + [3, 4], + [5, 6], + ]) }) test('boolean and null literals', () => { @@ -94,3 +98,60 @@ describe('array literals', () => { ]`).toEvaluateTo([1, 2]) }) }) + +describe('dict literals', () => { + test('work with numbers', () => { + expect('[a=1 b=2 c=3]').toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) + + test('work with strings', () => { + expect("[a='one' b='two' c='three']").toEvaluateTo({ a: 'one', b: 'two', c: 'three' }) + }) + + test('work with identifiers', () => { + expect('[a=one b=two c=three]').toEvaluateTo({ a: 'one', b: 'two', c: 'three' }) + }) + + test('can be nested', () => { + expect('[a=one b=[two [c=three]]]').toEvaluateTo({ a: 'one', b: ['two', { c: 'three' }] }) + }) + + test('can span multiple lines', () => { + expect(`[ + a=1 + b=2 + c=3 + ]`).toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) + + test('empty dict', () => { + expect('[=]').toEvaluateTo({}) + expect('[ = ]').toEvaluateTo({}) + + test('mixed types', () => { + expect("[a=1 b='two' c=three d=true e=null]").toEvaluateTo({ + a: 1, + b: 'two', + c: 'three', + d: true, + e: null, + }) + + test('semicolons as separators', () => { + expect('[a=1; b=2; c=3]').toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) + + test('expressions in dicts', () => { + expect('[a=(1 + 2) b=(3 * 4)]').toEvaluateTo({ a: 3, b: 12 }) + }) + + test('empty lines within dicts', () => { + expect(`[a=1 + + b=2 + + c=3]`).toEvaluateTo({ a: 1, b: 2, c: 3 }) + }) + }) + }) +}) diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index d7edba5..ee89d1c 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -191,8 +191,13 @@ EscapeSeq { "\\" ("$" | "n" | "t" | "r" | "\\" | "'") } +Dict { + "[=]" | + "[" newlineOrSemicolon* NamedArg (newlineOrSemicolon | NamedArg)* "]" +} + Array { - "[" (newlineOrSemicolon | expression)* "]" + "[" newlineOrSemicolon* (expression (newlineOrSemicolon | expression)*)? "]" } // We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator. @@ -204,7 +209,7 @@ Array { // to go through ambiguousFunctionCall (which is what we want semantically). // Yes, it is annoying and I gave up trying to use GLR to fix it. expressionWithoutIdentifier { - ParenExpr | Word | String | Number | Boolean | Regex | Array | @specialize[@name=Null] + ParenExpr | Word | String | Number | Boolean | Regex | Dict | Array | @specialize[@name=Null] } block { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 9712d60..69d6d47 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -31,20 +31,21 @@ export const EscapeSeq = 29, Boolean = 30, Regex = 31, - Array = 32, - Null = 33, - ConditionalOp = 34, + Dict = 32, + NamedArg = 33, + NamedArgPrefix = 34, FunctionDef = 35, Params = 36, colon = 37, - keyword = 51, - PositionalArg = 39, - Underscore = 40, - NamedArg = 41, - NamedArgPrefix = 42, - IfExpr = 44, - SingleLineThenBlock = 46, - ThenBlock = 47, - ElseIfExpr = 48, - ElseExpr = 50, - Assign = 52 + keyword = 52, + Underscore = 39, + Array = 40, + Null = 41, + ConditionalOp = 42, + PositionalArg = 43, + IfExpr = 45, + SingleLineThenBlock = 47, + ThenBlock = 48, + ElseIfExpr = 49, + ElseExpr = 51, + Assign = 53 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 9e934e5..4865cb6 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,24 +4,24 @@ import {operatorTokenizer} from "./operatorTokenizer" import {tokenizer, specializeKeyword} from "./tokenizer" import {trackScope} from "./scopeTracker" import {highlighting} from "./highlight" -const spec_Identifier = {__proto__:null,null:66, end:76, if:90, elseif:98, else:102} +const spec_Identifier = {__proto__:null,end:76, null:82, if:92, elseif:100, else:104} export const parser = LRParser.deserialize({ version: 14, - states: "/xQYQbOOO!WOpO'#CqO#gQcO'#CtO$aOSO'#CvO%dQbO'#C|O&fQcO'#DuOOQa'#Du'#DuO'lQcO'#DtO(TQRO'#CuO(cQcO'#DpO(zQbO'#EQOOQ`'#DP'#DPO)SQbO'#CsOOQ`'#Dq'#DqO)wQbO'#DpO*VQbO'#EVOOQ`'#DY'#DYO*wQRO'#DbOOQ`'#Dp'#DpO*|QQO'#DoOOQ`'#Do'#DoOOQ`'#Dc'#DcQYQbOOO+UObO,59]OOQa'#Dt'#DtOOQ`'#DT'#DTO+^QbO'#DVOOQ`'#EU'#EUOOQ`'#Dh'#DhO+hQbO,59[O+{QbO'#CxO,TQWO'#CyOOOO'#Dw'#DwOOOO'#Dd'#DdO,iOSO,59bOOQa,59b,59bOOQ`'#De'#DeO,wQbO,59hOOQa,59h,59hO*VQbO,59aO*VQbO,59aOOQ`'#Df'#DfO-OQbO'#DQO-WQQO,5:lO-]QRO,59_O.rQRO'#CuO/SQRO,59_O/`QQO,59_O/eQQO,59_O/mQbO'#DiO/xQbO,59ZO0ZQRO,5:qO0bQQO,5:qO0gQbO,59|OOQ`,5:Z,5:ZOOQ`-E7a-E7aOOQa1G.w1G.wOOQ`,59q,59qOOQ`-E7f-E7fOOOO,59d,59dOOOO,59e,59eOOOO-E7b-E7bOOQa1G.|1G.|OOQ`-E7c-E7cOOQa1G/S1G/SOOQa1G.{1G.{O0qQcO1G.{OOQ`-E7d-E7dO1]QbO1G0WOOQa1G.y1G.yO*VQbO,59jO*VQbO,59jO!]QbO'#CtO%kQbO'#CpOOQ`,5:T,5:TOOQ`-E7g-E7gO1jQbO1G0]OOQ`1G/h1G/hO1wQbO7+%rO1|QbO7+%sOOQO1G/U1G/UO2^QRO1G/UOOQ`'#D['#D[O2hQbO7+%wO2mQbO7+%xOOQ`<yAN>yO*VQbO'#D^OOQ`'#Dj'#DjO4QQbOAN?OO4]QQO'#D`OOQ`AN?OAN?OO4bQbOAN?OO4gQRO,59xO4nQQO,59xOOQ`-E7h-E7hOOQ`G24jG24jO4sQbOG24jO4xQQO,59zO4}QQO1G/dOOQ`LD*ULD*UO1|QbO1G/fO2mQbO7+%OOOQ`7+%Q7+%QOOQ`<|AN>|O*iQbO'#D_OOQ`'#Dm'#DmO5xQbOAN?SO6TQQO'#DaOOQ`AN?SAN?SO6YQbOAN?SO6_QRO,59yO6fQQO,59yOOQ`-E7k-E7kOOQ`G24nG24nO6kQbOG24nO6pQQO,59{O6uQQO1G/eOOQ`LD*YLD*YO3_QbO1G/gO4eQbO7+%POOQ`7+%R7+%ROOQ`<Y#g#o6R#o;'S#{;'S;=`$d<%lO#{U>_[kSOt#{uw#{x!_#{!_!`6v!`#O#{#P#T#{#T#i6R#i#j9}#j#o6R#o;'S#{;'S;=`$d<%lO#{U?[U{QkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~?sO!{~", - tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!f~~", 11)], + repeatNodeCount: 10, + tokenData: "AO~R|OX#{XY$jYZ%TZp#{pq$jqs#{st%ntu'Vuw#{wx'[xy'ayz'zz{#{{|(e|}#{}!O(e!O!P#{!P!Q+X!Q![)S![!]3t!]!^%T!^!}#{!}#O4_#O#P6T#P#Q6Y#Q#R#{#R#S6s#S#T#{#T#Y7^#Y#Z8l#Z#b7^#b#ch#i#o7^#o#p#{#p#q@`#q;'S#{;'S;=`$d<%l~#{~O#{~~@yS$QUkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{S$gP;=`<%l#{^$qUkS!dYOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U%[UkS!vQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{^%uZkS!eYOY%nYZ#{Zt%ntu&huw%nwx&hx#O%n#O#P&h#P;'S%n;'S;=`'P<%lO%nY&mS!eYOY&hZ;'S&h;'S;=`&y<%lO&hY&|P;=`<%l&h^'SP;=`<%l%n~'[O!o~~'aO!m~U'hUkS!jQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U(RUkS!{QOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U(jWkSOt#{uw#{x!Q#{!Q![)S![#O#{#P;'S#{;'S;=`$d<%lO#{U)ZYkSfQOt#{uw#{x!O#{!O!P)y!P!Q#{!Q![)S![#O#{#P;'S#{;'S;=`$d<%lO#{U*OWkSOt#{uw#{x!Q#{!Q![*h![#O#{#P;'S#{;'S;=`$d<%lO#{U*oWkSfQOt#{uw#{x!Q#{!Q![*h![#O#{#P;'S#{;'S;=`$d<%lO#{U+^WkSOt#{uw#{x!P#{!P!Q+v!Q#O#{#P;'S#{;'S;=`$d<%lO#{U+{^kSOY,wYZ#{Zt,wtu-zuw,wwx-zx!P,w!P!Q#{!Q!},w!}#O2m#O#P0Y#P;'S,w;'S;=`3n<%lO,wU-O^kSoQOY,wYZ#{Zt,wtu-zuw,wwx-zx!P,w!P!Q0o!Q!},w!}#O2m#O#P0Y#P;'S,w;'S;=`3n<%lO,wQ.PXoQOY-zZ!P-z!P!Q.l!Q!}-z!}#O/Z#O#P0Y#P;'S-z;'S;=`0i<%lO-zQ.oP!P!Q.rQ.wUoQ#Z#[.r#]#^.r#a#b.r#g#h.r#i#j.r#m#n.rQ/^VOY/ZZ#O/Z#O#P/s#P#Q-z#Q;'S/Z;'S;=`0S<%lO/ZQ/vSOY/ZZ;'S/Z;'S;=`0S<%lO/ZQ0VP;=`<%l/ZQ0]SOY-zZ;'S-z;'S;=`0i<%lO-zQ0lP;=`<%l-zU0tWkSOt#{uw#{x!P#{!P!Q1^!Q#O#{#P;'S#{;'S;=`$d<%lO#{U1ebkSoQOt#{uw#{x#O#{#P#Z#{#Z#[1^#[#]#{#]#^1^#^#a#{#a#b1^#b#g#{#g#h1^#h#i#{#i#j1^#j#m#{#m#n1^#n;'S#{;'S;=`$d<%lO#{U2r[kSOY2mYZ#{Zt2mtu/Zuw2mwx/Zx#O2m#O#P/s#P#Q,w#Q;'S2m;'S;=`3h<%lO2mU3kP;=`<%l2mU3qP;=`<%l,wU3{UkSuQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U4fW!uQkSOt#{uw#{x!_#{!_!`5O!`#O#{#P;'S#{;'S;=`$d<%lO#{U5TVkSOt#{uw#{x#O#{#P#Q5j#Q;'S#{;'S;=`$d<%lO#{U5qU!tQkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~6YO!p~U6aU!zQkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U6zUkSwQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U7cYkSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#o7^#o;'S#{;'S;=`$d<%lO#{U8YUrQkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U8qZkSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#U9d#U#o7^#o;'S#{;'S;=`$d<%lO#{U9i[kSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#`7^#`#a:_#a#o7^#o;'S#{;'S;=`$d<%lO#{U:d[kSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#g7^#g#h;Y#h#o7^#o;'S#{;'S;=`$d<%lO#{U;_[kSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#X7^#X#Yo[!rWkSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#f7^#f#g?e#g#o7^#o;'S#{;'S;=`$d<%lO#{U?j[kSOt#{uw#{x!_#{!_!`8R!`#O#{#P#T#{#T#i7^#i#j;Y#j#o7^#o;'S#{;'S;=`$d<%lO#{U@gU|QkSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~AOO#P~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!i~~", 11)], topRules: {"Program":[0,18]}, specialized: [{term: 13, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 13, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 924 + tokenPrec: 1008 }) diff --git a/src/parser/tests/literals.test.ts b/src/parser/tests/literals.test.ts index 84d09a8..693da17 100644 --- a/src/parser/tests/literals.test.ts +++ b/src/parser/tests/literals.test.ts @@ -201,3 +201,292 @@ describe('array literals', () => { `) }) }) + +describe('dict literals', () => { + test('work with numbers', () => { + expect('[a=1 b=2 c=3]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('work with strings', () => { + expect("[a='one' b='two' c='three']").toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + String + StringFragment one + NamedArg + NamedArgPrefix b= + String + StringFragment two + NamedArg + NamedArgPrefix c= + String + StringFragment three + `) + }) + + test('work with identifiers', () => { + expect('[a=one b=two c=three]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Identifier one + NamedArg + NamedArgPrefix b= + Identifier two + NamedArg + NamedArgPrefix c= + Identifier three + `) + }) + + test('can be nested', () => { + expect('[a=one b=[two [c=three]]]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Identifier one + NamedArg + NamedArgPrefix b= + Array + Identifier two + Dict + NamedArg + NamedArgPrefix c= + Identifier three + `) + }) + + test('can span multiple lines', () => { + expect(`[ + a=1 + b=2 + c=3 + ]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + test('empty dict', () => { + expect('[=]').toMatchTree(` + Dict [=] + `) + + expect('[ = ]').toMatchTree(` + Array + Word = + `) + }) + + test('mixed types', () => { + expect("[a=1 b='two' c=three d=true e=null]").toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + String + StringFragment two + NamedArg + NamedArgPrefix c= + Identifier three + NamedArg + NamedArgPrefix d= + Boolean true + NamedArg + NamedArgPrefix e= + Null null + `) + }) + + test('semicolons as separators', () => { + expect('[a=1; b=2; c=3]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('expressions in dicts', () => { + expect('[a=(1 + 2) b=(3 * 4)]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + ParenExpr + BinOp + Number 1 + Plus + + Number 2 + NamedArg + NamedArgPrefix b= + ParenExpr + BinOp + Number 3 + Star * + Number 4 + `) + }) + + test('mixed separators - spaces and newlines', () => { + expect(`[a=1 b=2 +c=3]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('empty lines within dicts', () => { + expect(`[a=1 + +b=2 + +c=3]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('comments within dicts', () => { + expect(`[ # something... + a=1 # first + b=2 # second + + c=3 + ]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) + + test('complex nested multiline', () => { + expect(`[ + a=[a=1 b=2] + b=[b=3 c=4] + c=[c=5 d=6] +]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix b= + Dict + NamedArg + NamedArgPrefix b= + Number 3 + NamedArg + NamedArgPrefix c= + Number 4 + NamedArg + NamedArgPrefix c= + Dict + NamedArg + NamedArgPrefix c= + Number 5 + NamedArg + NamedArgPrefix d= + Number 6 + `) + }) + + test('boolean and null literals', () => { + expect('[a=true b=false c=null]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Boolean true + NamedArg + NamedArgPrefix b= + Boolean false + NamedArg + NamedArgPrefix c= + Null null + `) + }) + + test('regex literals', () => { + expect('[pattern=//[0-9]+//]').toMatchTree(` + Dict + NamedArg + NamedArgPrefix pattern= + Regex //[0-9]+// + `) + }) + + test('trailing newlines', () => { + expect(`[ +a=1 +b=2 +c=3 + +]`).toMatchTree(` + Dict + NamedArg + NamedArgPrefix a= + Number 1 + NamedArg + NamedArgPrefix b= + Number 2 + NamedArg + NamedArgPrefix c= + Number 3 + `) + }) +}) diff --git a/src/testSetup.ts b/src/testSetup.ts index 89203e1..10b715e 100644 --- a/src/testSetup.ts +++ b/src/testSetup.ts @@ -109,7 +109,10 @@ expect.extend({ return { pass: true } } else { return { - message: () => `Expected evaluation to be ${expected}, but got ${value}`, + message: () => + `Expected evaluation to be ${JSON.stringify(expected)}, but got ${JSON.stringify( + value + )}`, pass: false, } } @@ -163,5 +166,27 @@ const trimWhitespace = (str: string): string => { } function isEqual(a: any, b: any): boolean { - return typeof a === 'object' ? JSON.stringify(a) === JSON.stringify(b) : a === b + if (a === null && b === null) return true + + switch (typeof a) { + case 'string': + case 'number': + case 'boolean': + case 'undefined': + return a === b + default: + return JSON.stringify(sortKeys(a)) === JSON.stringify(sortKeys(b)) + } +} + +function sortKeys(o: any): any { + if (Array.isArray(o)) return o.map(sortKeys) + if (o && typeof o === 'object' && o.constructor === Object) + return Object.keys(o) + .sort() + .reduce((r, k) => { + r[k] = sortKeys(o[k]) + return r + }, {} as any) + return o } diff --git a/src/utils/tree.ts b/src/utils/tree.ts index 1682d21..7a2b36a 100644 --- a/src/utils/tree.ts +++ b/src/utils/tree.ts @@ -1,6 +1,6 @@ import { Tree, TreeCursor } from '@lezer/common' import { assertNever } from '#utils/utils' -import { type Value } from 'reefvm' +import { type Value, fromValue } from 'reefvm' export const treeToString = (tree: Tree, input: string): string => { const lines: string[] = [] @@ -35,27 +35,6 @@ export const treeToString = (tree: Tree, input: string): string => { } export const VMResultToValue = (result: Value): unknown => { - if ( - result.type === 'number' || - result.type === 'boolean' || - result.type === 'string' || - result.type === 'regex' - ) { - return result.value - } else if (result.type === 'null') { - return null - } else if (result.type === 'array') { - return result.value.map(VMResultToValue) - } else if (result.type === 'dict') { - const obj: Record = {} - for (const [key, val] of Object.entries(result.value)) { - obj[key] = VMResultToValue(val) - } - - return obj - } else if (result.type === 'function') { - return Function - } else { - assertNever(result) - } + if (result.type === 'function') return Function + else return fromValue(result) }