From a5b2802c0c5bfbaecce4d437fa4b8d3fc915c7ff Mon Sep 17 00:00:00 2001 From: Chris Wanstrath <2+defunkt@users.noreply.github.com> Date: Tue, 25 Nov 2025 13:08:28 -0800 Subject: [PATCH] try to match lezer API more closely --- src/parser/node.ts | 193 +++++++++++++++++++++++++++++++++++++++++- src/parser/parser2.ts | 18 ++-- 2 files changed, 198 insertions(+), 13 deletions(-) diff --git a/src/parser/node.ts b/src/parser/node.ts index 31942da..15dd8fc 100644 --- a/src/parser/node.ts +++ b/src/parser/node.ts @@ -1,4 +1,5 @@ -import { type Token, TokenType } from "./tokenizer2" +import { type Token, TokenType } from './tokenizer2' +import * as term from './shrimp.terms' export type NodeType = | 'Program' @@ -110,15 +111,191 @@ export const operators: Record = { '|': 'operator', } +export class Tree { + constructor(public topNode: SyntaxNode) { } +} + +// TODO: TEMPORARY SHIM +class SyntaxNodeType { + constructor(public nodeType: NodeType) { } + + is(other: string) { + return this.nodeType === other + } + + get id(): number { + switch (this.nodeType) { + case 'Program': + return term.Program + + case 'Block': + return term.Block + + case 'FunctionCall': + return term.FunctionCall + + case 'FunctionCallOrIdentifier': + return term.FunctionCallOrIdentifier + + case 'FunctionCallWithBlock': + return term.FunctionCallWithBlock + + case 'PositionalArg': + return term.PositionalArg + + case 'NamedArg': + return term.NamedArg + + case 'FunctionDef': + return term.FunctionDef + + case 'Params': + return term.Params + + case 'NamedParam': + return term.NamedParam + + case 'Null': + return term.Null + + case 'Boolean': + return term.Boolean + + case 'Number': + return term.Number + + case 'String': + return term.String + + case 'StringFragment': + return term.StringFragment + + case 'CurlyString': + return term.CurlyString + + case 'DoubleQuote': + return term.DoubleQuote + + case 'EscapeSeq': + return term.EscapeSeq + + case 'Interpolation': + return term.Interpolation + + case 'Regex': + return term.Regex + + case 'Identifier': + return term.Identifier + + case 'AssignableIdentifier': + return term.AssignableIdentifier + + case 'IdentifierBeforeDot': + return term.IdentifierBeforeDot + + case 'Word': + return term.Word + + case 'Array': + return term.Array + + case 'Dict': + return term.Dict + + case 'Comment': + return term.Comment + + case 'BinOp': + return term.BinOp + + case 'ConditionalOp': + return term.ConditionalOp + + case 'ParenExpr': + return term.ParenExpr + + case 'Assign': + return term.Assign + + case 'CompoundAssign': + return term.CompoundAssign + + case 'DotGet': + return term.DotGet + + case 'PipeExpr': + return term.PipeExpr + + case 'IfExpr': + return term.IfExpr + + case 'ElseIfExpr': + return term.ElseIfExpr + + case 'ElseExpr': + return term.ElseExpr + + case 'WhileExpr': + return term.WhileExpr + + case 'TryExpr': + return term.TryExpr + + case 'CatchExpr': + return term.CatchExpr + + case 'FinallyExpr': + return term.FinallyExpr + + case 'Throw': + return term.Throw + + case 'Eq': + return term.Eq + + case 'Modulo': + return term.Modulo + + case 'Plus': + return term.Plus + + case 'Star': + return term.Star + + case 'Slash': + return term.Slash + + case 'Import': + return term.Import + + case 'Do': + return term.Do + + case 'colon': + return term.colon + + case 'keyword': + return term.keyword + + } + return 0 + } + + get name(): string { + return this.nodeType + } +} + export class SyntaxNode { - type: NodeType + #type: NodeType from: number to: number parent: SyntaxNode | null children: SyntaxNode[] = [] constructor(type: NodeType, from: number, to: number, parent: SyntaxNode | null = null) { - this.type = type + this.#type = type this.from = from this.to = to this.parent = parent @@ -128,8 +305,16 @@ export class SyntaxNode { return new SyntaxNode(TokenType[token.type] as NodeType, token.from, token.to, parent ?? null) } + get type(): SyntaxNodeType { + return new SyntaxNodeType(this.#type) + } + + set type(name: NodeType) { + this.#type = name + } + get name(): string { - return this.type + return this.type.name } get isError(): boolean { diff --git a/src/parser/parser2.ts b/src/parser/parser2.ts index 3919476..64f62bc 100644 --- a/src/parser/parser2.ts +++ b/src/parser/parser2.ts @@ -118,12 +118,12 @@ export class Parser { expr = this.exprWithPrecedence() // check for destructuring - if (expr.type === 'Array' && this.is($T.Operator, '=')) + if (expr.type.is('Array') && this.is($T.Operator, '=')) return this.destructure(expr) // check for parens function call // ex: (ref my-func) my-arg - if (expr.type === 'ParenExpr' && !this.isExprEnd()) + if (expr.type.is('ParenExpr') && !this.isExprEnd()) expr = this.functionCall(expr) // one | echo @@ -321,7 +321,7 @@ export class Parser { } // atoms are the basic building blocks: literals, identifiers, words - atom(): SyntaxNode { + atom(): SyntaxNode { if (this.is($T.String)) return this.string() @@ -507,7 +507,7 @@ export class Parser { if (!this.scope.has(ident)) return this.word(left) - if (left.type === 'Identifier') left.type = 'IdentifierBeforeDot' + if (left.type.is('Identifier')) left.type = 'IdentifierBeforeDot' let parts = [] while (this.is($T.Operator, '.')) { @@ -527,7 +527,7 @@ export class Parser { const dotGet = this.dotGet() // dotget not in scope, regular Word - if (dotGet.type === 'Word') return dotGet + if (dotGet.type.is('Word')) return dotGet if (this.isExprEnd()) return this.functionCallOrIdentifier(dotGet) @@ -580,7 +580,7 @@ export class Parser { inner = this.dotGet() // if the dotGet was just a Word, bail - if (inner.type === 'Word') return inner + if (inner.type.is('Word')) return inner } inner ??= this.identifier() @@ -679,7 +679,7 @@ export class Parser { const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix)) const val = this.value() - if (!['Null', 'Boolean', 'Number', 'String'].includes(val.type)) + if (!['Null', 'Boolean', 'Number', 'String'].includes(val.type.name)) throw `[namedParam] default value must be Null|Bool|Num|Str, got ${val.type}\n\n ${this.input}\n` const node = new SyntaxNode('NamedParam', prefix.from, val.to) @@ -887,9 +887,9 @@ function collapseDotGets(origNodes: SyntaxNode[]): SyntaxNode { while (nodes.length > 0) { const left = nodes.pop()! - if (left.type === 'Identifier') left.type = 'IdentifierBeforeDot' + if (left.type.is('Identifier')) left.type = 'IdentifierBeforeDot' - const dot = new SyntaxNode("DotGet", left.from, right.to); + const dot = new SyntaxNode("DotGet", left.from, right.to) dot.push(left, right) right = dot