diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index d52576e..cc151fe 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -211,16 +211,37 @@ export class Compiler { } case terms.DotGet: { + // DotGet is parsed into a nested tree because it's hard to parse it into a flat one. + // However, we want a flat tree - so we're going to pretend like we are getting one from the parser. + // + // This: DotGet(config, DotGet(script, name)) + // Becomes: DotGet(config, script, name) const { objectName, property } = getDotGetParts(node, input) const instructions: ProgramItem[] = [] + instructions.push(['TRY_LOAD', objectName]) - if (property.type.id === terms.ParenExpr) { - instructions.push(...this.#compileNode(property, input)) - } else { - const propertyValue = input.slice(property.from, property.to) - instructions.push(['PUSH', propertyValue]) + + const flattenProperty = (prop: SyntaxNode): void => { + if (prop.type.id === terms.DotGet) { + const nestedParts = getDotGetParts(prop, input) + + const nestedObjectValue = input.slice(nestedParts.object.from, nestedParts.object.to) + instructions.push(['PUSH', nestedObjectValue]) + instructions.push(['DOT_GET']) + + flattenProperty(nestedParts.property) + } else { + if (prop.type.id === terms.ParenExpr) { + instructions.push(...this.#compileNode(prop, input)) + } else { + const propertyValue = input.slice(prop.from, prop.to) + instructions.push(['PUSH', propertyValue]) + } + instructions.push(['DOT_GET']) + } } - instructions.push(['DOT_GET']) + + flattenProperty(property) return instructions } diff --git a/src/compiler/tests/compiler.test.ts b/src/compiler/tests/compiler.test.ts index 45fdb39..46977bc 100644 --- a/src/compiler/tests/compiler.test.ts +++ b/src/compiler/tests/compiler.test.ts @@ -284,6 +284,43 @@ describe('dot get', () => { test('use parens expr with dot-get', () => { expect(`a = 1; arr = array 'a' 'b' 'c'; arr.(1 + a)`).toEvaluateTo('c', { array }) }) + + test('chained dot get: two levels', () => { + expect(`obj = [inner=[value=42]]; obj.inner.value`).toEvaluateTo(42) + }) + + test('chained dot get: three levels', () => { + expect(`obj = [a=[b=[c=123]]]; obj.a.b.c`).toEvaluateTo(123) + }) + + test('chained dot get: four levels', () => { + expect(`obj = [w=[x=[y=[z='deep']]]]; obj.w.x.y.z`).toEvaluateTo('deep') + }) + + test('chained dot get with numeric index', () => { + expect(`obj = [items=[1 2 3]]; obj.items.0`).toEvaluateTo(1) + }) + + test('chained dot get in expression', () => { + expect(`config = [server=[port=3000]]; config.server.port + 1`).toEvaluateTo(3001) + }) + + test('chained dot get as function argument', () => { + const double = (x: number) => x * 2 + expect(`obj = [val=[num=21]]; double obj.val.num`).toEvaluateTo(42, { double }) + }) + + test('chained dot get in binary operation', () => { + expect(`a = [x=[y=10]]; b = [x=[y=20]]; a.x.y + b.x.y`).toEvaluateTo(30) + }) + + test('chained dot get with parens at end', () => { + expect(`idx = 1; obj = [items=[10 20 30]]; obj.items.(idx)`).toEvaluateTo(20) + }) + + test('mixed chained and simple dot get', () => { + expect(`obj = [a=1 b=[c=2]]; obj.a + obj.b.c`).toEvaluateTo(3) + }) }) describe('default params', () => { diff --git a/src/compiler/utils.ts b/src/compiler/utils.ts index c424be2..2aae236 100644 --- a/src/compiler/utils.ts +++ b/src/compiler/utils.ts @@ -301,9 +301,9 @@ export const getDotGetParts = (node: SyntaxNode, input: string) => { ) } - if (![terms.Identifier, terms.Number, terms.ParenExpr].includes(property.type.id)) { + if (![terms.Identifier, terms.Number, terms.ParenExpr, terms.DotGet].includes(property.type.id)) { throw new CompilerError( - `DotGet property must be an Identifier or Number, got ${property.type.name}`, + `DotGet property must be an Identifier, Number, ParenExpr, or DotGet, got ${property.type.name}`, property.from, property.to ) @@ -311,7 +311,7 @@ export const getDotGetParts = (node: SyntaxNode, input: string) => { const objectName = input.slice(object.from, object.to) - return { objectName, property } + return { object, objectName, property } } export const getTryExprParts = (node: SyntaxNode, input: string) => { diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index 80adcdb..b8aa46a 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -239,7 +239,7 @@ expression { @skip {} { DotGet { - IdentifierBeforeDot dot (Number | Identifier | ParenExpr) + IdentifierBeforeDot dot (DotGet | Number | Identifier | ParenExpr) } String { diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 051f00e..b941d88 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -7,9 +7,9 @@ import {highlighting} from "./highlight" const spec_Identifier = {__proto__:null,while:78, null:112, catch:118, finally:124, end:126, if:134, else:140, try:158, throw:162, import:166} export const parser = LRParser.deserialize({ version: 14, - states: "=|QYQbOOO!mOpO'#DXO!rOSO'#D`OOQa'#D`'#D`O%mQcO'#DvO(mQcO'#EiOOQ`'#Ew'#EwO)WQRO'#DwO+]QcO'#EgO+vQbO'#DVOOQa'#Dy'#DyO.[QbO'#DzOOQa'#Ei'#EiO.cQcO'#EiO0aQcO'#EhO1fQcO'#EgO1sQRO'#ESOOQ`'#Eg'#EgO2[QbO'#EgO2cQQO'#EfOOQ`'#Ef'#EfOOQ`'#EU'#EUQYQbOOO2nQbO'#D[O2yQbO'#DpO3tQbO'#DSO4oQQO'#D|O3tQbO'#EOO4tQbO'#EQO4|ObO,59sO5XQbO'#DbO5aQWO'#DcOOOO'#Eo'#EoOOOO'#EZ'#EZO5uOSO,59zOOQa,59z,59zOOQ`'#DZ'#DZO6TQbO'#DoOOQ`'#Em'#EmOOQ`'#E^'#E^O6_QbO,5:^OOQa'#Eh'#EhO3tQbO,5:cO3tQbO,5:cO3tQbO,5:cO3tQbO,5:cO3tQbO,59pO3tQbO,59pO3tQbO,59pO3tQbO,59pOOQ`'#EW'#EWO+vQbO,59qO7XQcO'#DvO7`QcO'#EiO7gQRO,59qO7qQQO,59qO7vQQO,59qO8OQQO,59qO8ZQRO,59qO8sQRO,59qO8zQQO'#DQO9PQbO,5:fO9WQQO,5:eOOQa,5:f,5:fO9cQbO,5:fO9mQbO,5:oO9mQbO,5:nO:}QbO,5:gO;UQbO,59lOOQ`,5;Q,5;QO9mQbO'#EVOOQ`-E8S-E8SOOQ`'#EX'#EXO;pQbO'#D]O;{QbO'#D^OOQO'#EY'#EYO;sQQO'#D]O`QRO'#EvOOQO'#Ev'#EvO>gQQO,5:[O>lQRO,59nO>sQRO,59nO:}QbO,5:hO?RQcO,5:jO@aQcO,5:jO@}QcO,5:jOArQbO,5:lOOQ`'#Eb'#EbO4tQbO,5:lOOQa1G/_1G/_OOOO,59|,59|OOOO,59},59}OOOO-E8X-E8XOOQa1G/f1G/fOOQ`,5:Z,5:ZOOQ`-E8[-E8[OOQa1G/}1G/}OCkQcO1G/}OCuQcO1G/}OETQcO1G/}OE_QcO1G/}OElQcO1G/}OOQa1G/[1G/[OF}QcO1G/[OGUQcO1G/[OG]QcO1G/[OH[QcO1G/[OGdQcO1G/[OOQ`-E8U-E8UOHrQRO1G/]OH|QQO1G/]OIRQQO1G/]OIZQQO1G/]OIfQRO1G/]OImQRO1G/]OItQbO,59rOJOQQO1G/]OOQa1G/]1G/]OJWQQO1G0POOQa1G0Q1G0QOJcQbO1G0QOOQO'#E`'#E`OJWQQO1G0POOQa1G0P1G0POOQ`'#Ea'#EaOJcQbO1G0QOJmQbO1G0ZOKXQbO1G0YOKsQbO'#DjOLUQbO'#DjOLiQbO1G0ROOQ`-E8T-E8TOOQ`,5:q,5:qOOQ`-E8V-E8VOLtQQO,59wOOQO,59x,59xOOQO-E8W-E8WOL|QbO1G/bO:}QbO1G/vO:}QbO1G/YOMTQbO1G0SOM`QbO1G0WOM}QbO1G0WOOQ`-E8`-E8`ONUQQO7+$wOOQa7+$w7+$wON^QQO1G/^ONfQQO7+%kOOQa7+%k7+%kONqQbO7+%lOOQa7+%l7+%lOOQO-E8^-E8^OOQ`-E8_-E8_OOQ`'#E['#E[ON{QQO'#E[O! TQbO'#EuOOQ`,5:U,5:UO! hQbO'#DhO! mQQO'#DkOOQ`7+%m7+%mO! rQbO7+%mO! wQbO7+%mO!!PQbO7+$|O!!_QbO7+$|O!!oQbO7+%bO!!wQbO7+$tOOQ`7+%n7+%nO!!|QbO7+%nO!#RQbO7+%nO!#ZQbO7+%rOOQa<sAN>sOOQ`AN>SAN>SO!%zQbOAN>SO!&PQbOAN>SOOQ`-E8]-E8]OOQ`AN>hAN>hO!&XQbOAN>hO2yQbO,5:_O:}QbO,5:aOOQ`AN>tAN>tPItQbO'#EWOOQ`7+%Y7+%YOOQ`G23nG23nO!&^QbOG23nP!%^QbO'#DsOOQ`G24SG24SO!&cQQO1G/yOOQ`1G/{1G/{OOQ`LD)YLD)YO:}QbO7+%eOOQ`<cQRO'#EvOOQO'#Ev'#EvO>jQQO,5:[O>oQRO,59nO>vQRO,59nO;QQbO,5:hO?UQcO,5:jO@dQcO,5:jOAQQcO,5:jOAuQbO,5:lOOQ`'#Eb'#EbO4tQbO,5:lOOQa1G/_1G/_OOOO,59|,59|OOOO,59},59}OOOO-E8X-E8XOOQa1G/f1G/fOOQ`,5:Z,5:ZOOQ`-E8[-E8[OOQa1G/}1G/}OCnQcO1G/}OCxQcO1G/}OEWQcO1G/}OEbQcO1G/}OEoQcO1G/}OOQa1G/[1G/[OGQQcO1G/[OGXQcO1G/[OG`QcO1G/[OH_QcO1G/[OGgQcO1G/[OOQ`-E8U-E8UOHuQRO1G/]OIPQQO1G/]OIUQQO1G/]OI^QQO1G/]OIiQRO1G/]OIpQRO1G/]OIwQbO,59rOJRQQO1G/]OOQa1G/]1G/]OJZQQO1G0POOQa1G0Q1G0QOJfQbO1G0QOOQO'#E`'#E`OJZQQO1G0POOQa1G0P1G0POOQ`'#Ea'#EaOJfQbO1G0QOJpQbO1G0ZOK[QbO1G0YOKvQbO'#DjOLXQbO'#DjOLlQbO1G0ROOQ`-E8T-E8TOOQ`,5:q,5:qOOQ`-E8V-E8VOLwQQO,59wOOQO,59x,59xOOQO-E8W-E8WOMPQbO1G/bO;QQbO1G/vO;QQbO1G/YOMWQbO1G0SOMcQbO1G0WONQQbO1G0WOOQ`-E8`-E8`ONXQQO7+$wOOQa7+$w7+$wONaQQO1G/^ONiQQO7+%kOOQa7+%k7+%kONtQbO7+%lOOQa7+%l7+%lOOQO-E8^-E8^OOQ`-E8_-E8_OOQ`'#E['#E[O! OQQO'#E[O! WQbO'#EuOOQ`,5:U,5:UO! kQbO'#DhO! pQQO'#DkOOQ`7+%m7+%mO! uQbO7+%mO! zQbO7+%mO!!SQbO7+$|O!!bQbO7+$|O!!rQbO7+%bO!!zQbO7+$tOOQ`7+%n7+%nO!#PQbO7+%nO!#UQbO7+%nO!#^QbO7+%rOOQa<sAN>sOOQ`AN>SAN>SO!%}QbOAN>SO!&SQbOAN>SOOQ`-E8]-E8]OOQ`AN>hAN>hO!&[QbOAN>hO2yQbO,5:_O;QQbO,5:aOOQ`AN>tAN>tPIwQbO'#EWOOQ`7+%Y7+%YOOQ`G23nG23nO!&aQbOG23nP!%aQbO'#DsOOQ`G24SG24SO!&fQQO1G/yOOQ`1G/{1G/{OOQ`LD)YLD)YO;QQbO7+%eOOQ`< (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 28, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 2370 + tokenPrec: 2373 }) diff --git a/src/parser/tests/dot-get.test.ts b/src/parser/tests/dot-get.test.ts index f781937..4181ce6 100644 --- a/src/parser/tests/dot-get.test.ts +++ b/src/parser/tests/dot-get.test.ts @@ -298,4 +298,154 @@ end`).toMatchTree(` Number 2 `) }) + + // NOTE: these are parsed as DotGet(meta, DotGet(script, name)) because that's easiest, + // but the compiler flattens them + test('chained dot get: meta.script.name', () => { + expect('meta = 42; meta.script.name').toMatchTree(` + Assign + AssignableIdentifier meta + Eq = + Number 42 + FunctionCallOrIdentifier + DotGet + IdentifierBeforeDot meta + DotGet + IdentifierBeforeDot script + Identifier name + `) + }) + + test('chained dot get: a.b.c.d', () => { + expect('a = 1; a.b.c.d').toMatchTree(` + Assign + AssignableIdentifier a + Eq = + Number 1 + FunctionCallOrIdentifier + DotGet + IdentifierBeforeDot a + DotGet + IdentifierBeforeDot b + DotGet + IdentifierBeforeDot c + Identifier d + `) + }) + + test('chained dot get in function call', () => { + expect('config = 1; echo config.db.host').toMatchTree(` + Assign + AssignableIdentifier config + Eq = + Number 1 + FunctionCall + Identifier echo + PositionalArg + DotGet + IdentifierBeforeDot config + DotGet + IdentifierBeforeDot db + Identifier host + `) + }) + + test('chained dot get with numeric index at end', () => { + expect('obj = 1; obj.items.0').toMatchTree(` + Assign + AssignableIdentifier obj + Eq = + Number 1 + FunctionCallOrIdentifier + DotGet + IdentifierBeforeDot obj + DotGet + IdentifierBeforeDot items + Number 0 + `) + }) + + test('chained dot get with ParenExpr at end', () => { + expect('obj = 1; obj.items.(i)').toMatchTree(` + Assign + AssignableIdentifier obj + Eq = + Number 1 + FunctionCallOrIdentifier + DotGet + IdentifierBeforeDot obj + DotGet + IdentifierBeforeDot items + ParenExpr + FunctionCallOrIdentifier + Identifier i + `) + }) + + test('not in scope remains Word with chained dots', () => { + expect('readme.md.bak').toMatchTree(`Word readme.md.bak`) + }) + + test('chained dot get in nested functions', () => { + expect(`do cfg: + do inner: + cfg.db.host + end +end`).toMatchTree(` + FunctionDef + Do do + Params + Identifier cfg + colon : + FunctionDef + Do do + Params + Identifier inner + colon : + FunctionCallOrIdentifier + DotGet + IdentifierBeforeDot cfg + DotGet + IdentifierBeforeDot db + Identifier host + keyword end + keyword end + `) + }) + + test('mixed simple and chained dot get', () => { + expect('obj = 1; obj.a; obj.b.c').toMatchTree(` + Assign + AssignableIdentifier obj + Eq = + Number 1 + FunctionCallOrIdentifier + DotGet + IdentifierBeforeDot obj + Identifier a + FunctionCallOrIdentifier + DotGet + IdentifierBeforeDot obj + DotGet + IdentifierBeforeDot b + Identifier c + `) + }) + + test.skip('chained numeric dot get: row.2.1.b', () => { + expect('row = []; row.2.1').toMatchTree(` + Assign + AssignableIdentifier row + Eq = + Array [] + FunctionCallOrIdentifier + DotGet + IdentifierBeforeDot row + DotGet + Number 2 + DotGet + Number 1 + Identifier b + `) + }) }) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 8ad55c2..75c027a 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -1,5 +1,5 @@ import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' -import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, CurlyString } from './shrimp.terms' +import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, CurlyString, DotGet } from './shrimp.terms' // doobie doobie do (we need the `do` keyword to know when we're defining params) export function specializeKeyword(ident: string) { @@ -187,11 +187,21 @@ const checkForDotGet = (input: InputStream, stack: Stack, pos: number): number | const identifierText = buildIdentifierText(input, pos) const context = stack.context as { scope: { has(name: string): boolean } } | undefined - // If identifier is in scope, this is property access (e.g., obj.prop) - // If not in scope, it should be consumed as a Word (e.g., file.txt) - return context?.scope.has(identifierText) || globals.includes(identifierText) - ? IdentifierBeforeDot - : null + // Check if identifier is in scope (lexical scope or globals) + const inScope = context?.scope.has(identifierText) || globals.includes(identifierText) + + // property access + if (inScope) return IdentifierBeforeDot + + // Not in scope - check if we're inside a DotGet chain + // Inside the @skip {} block where DotGet is defined, Word cannot be shifted + // but Identifier can be. This tells us we're at the RHS of a DotGet. + const canShiftIdentifier = stack.canShift(Identifier) + const canShiftWord = stack.canShift(Word) + const inDotGetChain = canShiftIdentifier && !canShiftWord + + // continue if we're inside a DotGet + return inDotGetChain ? IdentifierBeforeDot : null } // Decide between AssignableIdentifier and Identifier using grammar state + peek-ahead