From 597a25da80c3adf90ebd277a19b313806a267627 Mon Sep 17 00:00:00 2001 From: Corey Johnson Date: Fri, 10 Oct 2025 15:50:09 -0700 Subject: [PATCH] hell yeah --- packages/ReefVM | 2 +- src/compiler/compiler.test.ts | 57 +++++++- src/compiler/compiler.ts | 253 +++++++++++++++++----------------- src/compiler/compilerError.ts | 69 +++++++++- src/compiler/utils.ts | 158 +++++++++++++++++++++ src/editor/plugins/theme.tsx | 6 + src/parser/parser.test.ts | 234 +++++++++++++++++++++++++++---- src/parser/shrimp.grammar | 126 ++++++++++++----- src/parser/shrimp.terms.ts | 25 ++-- src/parser/shrimp.ts | 20 +-- src/parser/tokenizer.ts | 2 +- 11 files changed, 736 insertions(+), 216 deletions(-) create mode 100644 src/compiler/utils.ts diff --git a/packages/ReefVM b/packages/ReefVM index 146b0a2..82e7b18 160000 --- a/packages/ReefVM +++ b/packages/ReefVM @@ -1 +1 @@ -Subproject commit 146b0a28831161e03966746acce7d5fc7fe2229d +Subproject commit 82e7b181ec1b0a2df4d76ca529b4736c9e56383b diff --git a/src/compiler/compiler.test.ts b/src/compiler/compiler.test.ts index 4a13cad..1008eda 100644 --- a/src/compiler/compiler.test.ts +++ b/src/compiler/compiler.test.ts @@ -63,7 +63,7 @@ describe('compiler', () => { }) test('function', () => { - expect(`add = fn a b: a + b; add`).toEvaluateTo(Function) + expect(`fn a b: a + b`).toEvaluateTo(Function) }) test('function call', () => { @@ -73,6 +73,61 @@ describe('compiler', () => { test('function call with no args', () => { expect(`bloop = fn: 'bloop'; bloop`).toEvaluateTo('bloop') }) + + test('simple conditionals', () => { + expect(`(3 < 6)`).toEvaluateTo(true) + expect(`(10 > 20)`).toEvaluateTo(false) + expect(`(4 <= 9)`).toEvaluateTo(true) + expect(`(15 >= 20)`).toEvaluateTo(false) + expect(`(7 = 7)`).toEvaluateTo(true) + expect(`(5 != 5)`).toEvaluateTo(false) + expect(`('shave' and 'haircut')`).toEvaluateTo('haircut') + expect(`(false and witness)`).toEvaluateTo(false) + expect(`('pride' or 'prejudice')`).toEvaluateTo('pride') + expect(`(false or false)`).toEvaluateTo(false) + }) + + test('if', () => { + expect(`if 3 < 9: + shire + end`).toEvaluateTo('shire') + }) + + test('if else', () => { + expect(`if false: + grey + else: + white + end`).toEvaluateTo('white') + }) + + test('if elsif', () => { + expect(`if false: + boromir + elsif true: + frodo + end`).toEvaluateTo('frodo') + }) + + test('if elsif else', () => { + expect(`if false: + destroyed + elsif true: + fire + else: + darkness + end`).toEvaluateTo('fire') + + expect(`if false: + king + elsif false: + elf + elsif true: + dwarf + else: + scattered + end`).toEvaluateTo('dwarf') + }) }) describe('errors', () => { diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index e304d2c..eef5396 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -4,12 +4,21 @@ import * as terms from '#parser/shrimp.terms' import type { SyntaxNode, Tree } from '@lezer/common' import { assert, errorMessage } from '#utils/utils' import { toBytecode, type Bytecode } from 'reefvm' -import { compile } from 'tailwindcss' +import { + checkTreeForErrors, + getAllChildren, + getAssignmentParts, + getBinaryParts, + getFunctionCallParts, + getFunctionDefParts, + getIfExprParts, + getNamedArgParts, +} from '#compiler/utils' export class Compiler { - fnCounter = 0 instructions: string[] = [] - labels = new Map() + fnLabels = new Map() + ifLabelCount = 0 bytecode: Bytecode constructor(public input: string) { @@ -24,17 +33,17 @@ export class Compiler { this.#compileCst(cst, input) // Add the labels - for (const [label, labelInstructions] of this.labels) { + for (const [label, labelInstructions] of this.fnLabels) { this.instructions.push(`${label}:`) this.instructions.push(...labelInstructions.map((instr) => ` ${instr}`)) this.instructions.push(' RETURN') } - // console.log(`🌭`, this.instructions.join('\n')) + // console.log(`\nšŸ¤– instructions:\n----------------\n${this.instructions.join('\n')}\n\n`) this.bytecode = toBytecode(this.instructions.join('\n')) } catch (error) { if (error instanceof CompilerError) { - throw new Error(`Compiler Error:\n${error.toReadableString(input)}`) + throw new Error(error.toReadableString(input)) } else { throw new Error(`Unknown error during compilation:\n${errorMessage(error)}`) } @@ -119,13 +128,13 @@ export class Compiler { case terms.FunctionDef: { const { paramNames, bodyNode } = getFunctionDefParts(node, input) const instructions: string[] = [] - const functionName = `.func_${this.labels.size}` + const functionName = `.func_${this.fnLabels.size}` const bodyInstructions: string[] = [] - if (this.labels.has(functionName)) { + if (this.fnLabels.has(functionName)) { throw new CompilerError(`Function name collision: ${functionName}`, node.from, node.to) } - this.labels.set(functionName, bodyInstructions) + this.fnLabels.set(functionName, bodyInstructions) instructions.push(`MAKE_FUNCTION (${paramNames}) ${functionName}`) bodyInstructions.push(...this.#compileNode(bodyNode, input)) @@ -134,9 +143,7 @@ export class Compiler { } case terms.FunctionCallOrIdentifier: { - // For now, just treat them all like identifiers, but we might - // need something like TRY_CALL in the future. - return [`TRY_LOAD ${value}`] + return [`TRY_CALL ${value}`] } /* @@ -173,126 +180,112 @@ export class Compiler { return instructions } + case terms.ThenBlock: { + const instructions = getAllChildren(node) + .map((child) => this.#compileNode(child, input)) + .flat() + + return instructions + } + + case terms.IfExpr: { + const { conditionNode, thenBlock, elseIfBlocks, elseThenBlock } = getIfExprParts( + node, + input + ) + const instructions: string[] = [] + instructions.push(...this.#compileNode(conditionNode, input)) + this.ifLabelCount++ + const elseLabel = `.else_${this.ifLabelCount}` + const endLabel = `.end_${this.ifLabelCount}` + + const thenBlockInstructions = this.#compileNode(thenBlock, input) + instructions.push(`JUMP_IF_FALSE #${thenBlockInstructions.length + 1}`) + instructions.push(...thenBlockInstructions) + instructions.push(`JUMP ${endLabel}`) + + // Else if + elseIfBlocks.forEach(({ conditional, thenBlock }, index) => { + instructions.push(...this.#compileNode(conditional, input)) + const elseIfInstructions = this.#compileNode(thenBlock, input) + instructions.push(`JUMP_IF_FALSE #${elseIfInstructions.length + 1}`) + instructions.push(...elseIfInstructions) + instructions.push(`JUMP ${endLabel}`) + }) + + // Else + instructions.push(`${elseLabel}:`) + if (elseThenBlock) { + const elseThenInstructions = this.#compileNode(elseThenBlock, input).map((i) => ` ${i}`) + instructions.push(...elseThenInstructions) + } else { + instructions.push(` PUSH null`) + } + + instructions.push(`${endLabel}:`) + + return instructions + } + + // - `EQ`, `NEQ`, `LT`, `GT`, `LTE`, `GTE` - Pop 2, push boolean + case terms.ConditionalOp: { + const instructions: string[] = [] + const { left, op, right } = getBinaryParts(node) + const leftInstructions: string[] = this.#compileNode(left, input) + const rightInstructions: string[] = this.#compileNode(right, input) + + const opValue = input.slice(op.from, op.to) + switch (opValue) { + case '=': + instructions.push(...leftInstructions, ...rightInstructions, 'EQ') + break + + case '!=': + instructions.push(...leftInstructions, ...rightInstructions, 'NEQ') + break + + case '<': + instructions.push(...leftInstructions, ...rightInstructions, 'LT') + break + + case '>': + instructions.push(...leftInstructions, ...rightInstructions, 'GT') + break + + case '<=': + instructions.push(...leftInstructions, ...rightInstructions, 'LTE') + break + + case '>=': + instructions.push(...leftInstructions, ...rightInstructions, 'GTE') + break + + case 'and': + instructions.push(...leftInstructions) + instructions.push('DUP') + instructions.push(`JUMP_IF_FALSE #${rightInstructions.length + 1}`) + instructions.push('POP') + instructions.push(...rightInstructions) + break + + case 'or': + instructions.push(...leftInstructions) + instructions.push('PUSH 9') + instructions.push(`JUMP_IF_TRUE #${rightInstructions.length + 1}`) + instructions.push('POP') + instructions.push(...rightInstructions) + + break + + default: + throw new CompilerError(`Unsupported conditional operator: ${opValue}`, op.from, op.to) + } + + return instructions + } + default: throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to) } } } - -// Helper functions for extracting node parts -const getAllChildren = (node: SyntaxNode): SyntaxNode[] => { - const children: SyntaxNode[] = [] - let child = node.firstChild - while (child) { - children.push(child) - child = child.nextSibling - } - return children -} - -const getBinaryParts = (node: SyntaxNode) => { - const children = getAllChildren(node) - const [left, op, right] = children - - if (!left || !op || !right) { - throw new CompilerError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to) - } - - return { left, op, right } -} - -const getAssignmentParts = (node: SyntaxNode) => { - const children = getAllChildren(node) - const [left, equals, right] = children - - if (!left || left.type.id !== terms.Identifier) { - throw new CompilerError( - `Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`, - node.from, - node.to - ) - } else if (!equals || !right) { - throw new CompilerError( - `Assign expected 3 children, got ${children.length}`, - node.from, - node.to - ) - } - - return { identifier: left, right } -} - -const checkTreeForErrors = (tree: Tree, input: string): string[] => { - const errors: string[] = [] - tree.iterate({ - enter: (node) => { - if (node.type.isError) { - const errorText = input.slice(node.from, node.to) - errors.push(`Syntax error at ${node.from}-${node.to}: "${errorText}"`) - } - }, - }) - - return errors -} - -const getFunctionDefParts = (node: SyntaxNode, input: string) => { - const children = getAllChildren(node) - const [fnKeyword, paramsNode, colon, bodyNode] = children - - if (!fnKeyword || !paramsNode || !colon || !bodyNode) { - throw new CompilerError( - `FunctionDef expected 5 children, got ${children.length}`, - node.from, - node.to - ) - } - - const paramNames = getAllChildren(paramsNode) - .map((param) => { - if (param.type.id !== terms.Identifier) { - throw new CompilerError( - `FunctionDef params must be Identifiers, got ${param.type.name}`, - param.from, - param.to - ) - } - return input.slice(param.from, param.to) - }) - .join(' ') - - return { paramNames, bodyNode } -} - -const getFunctionCallParts = (node: SyntaxNode, input: string) => { - const [identifierNode, ...args] = getAllChildren(node) - - if (!identifierNode) { - throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to) - } - - const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg) - const positionalArgs = args - .filter((arg) => arg.type.id === terms.PositionalArg) - .map((arg) => { - const child = arg.firstChild - if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to) - - return child - }) - - return { identifierNode, namedArgs, positionalArgs } -} - -const getNamedArgParts = (node: SyntaxNode, input: string) => { - const children = getAllChildren(node) - const [namedArgPrefix, valueNode] = getAllChildren(node) - - if (!namedArgPrefix || !valueNode) { - const message = `NamedArg expected 2 children, got ${children.length}` - throw new CompilerError(message, node.from, node.to) - } - - const name = input.slice(namedArgPrefix.from, namedArgPrefix.to - 2) // Remove the trailing = - return { name, valueNode } -} diff --git a/src/compiler/compilerError.ts b/src/compiler/compilerError.ts index b009695..96c44f4 100644 --- a/src/compiler/compilerError.ts +++ b/src/compiler/compilerError.ts @@ -5,12 +5,71 @@ export class CompilerError extends Error { this.message = message } + // This code is A MESS, but I don't really care because once we get it right we'll never touch it again. toReadableString(input: string) { - const pointer = ' '.repeat(this.from) + '^'.repeat(this.to - this.from) - const message = `${this.message} at "${input.slice(this.from, this.to)}" (${this.from}:${ - this.to - })` + const lineInfo = this.lineAtPosition(input) + if (!lineInfo) { + return `${this.message} at position ${this.from}:${this.to}` + } - return `${input}\n${pointer}\n${message}` + const { lineNumber, columnStart, columnEnd } = lineInfo + const previousSevenLines = input.split('\n').slice(Math.max(0, lineNumber - 8), lineNumber) + const padding = lineNumber.toString().length + const ws = ' '.repeat(padding + 1) + const lines = previousSevenLines + .map((line, index) => { + const currentLineNumber = lineNumber - previousSevenLines.length + index + 1 + return `${grey(currentLineNumber.toString().padStart(padding))} │ ${line}` + }) + .join('\n') + + const underlineStartLen = (columnEnd - columnStart) / 2 + const underlineEndLen = columnEnd - columnStart - underlineStartLen + const underline = + ' '.repeat(columnStart - 1) + + '─'.repeat(underlineStartLen) + + '┬' + + '─'.repeat(underlineEndLen) + + const messageWithArrow = + ' '.repeat(columnStart + underlineStartLen - 1) + '╰── ' + blue(this.message) + + const message = `${green('')} +${ws}╭───┨ ${red('Compiler Error')} ā”ƒ +${ws}│ +${lines} +${ws}│ ${underline} +${ws}│ ${messageWithArrow} +${ws}╰─── + ` + + return `${message}` + } + + lineAtPosition(input: string) { + const lines = input.split('\n') + let currentPos = 0 + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (this.from >= currentPos && this.from <= currentPos + line.length) { + const columnStart = this.from - currentPos + 1 + const columnEnd = columnStart + (this.to - this.from) - 1 + + // If the error spans multiple lines, so just return the line start + if (columnEnd > line.length) { + return { lineNumber: i + 1, columnStart, columnEnd: line.length, text: line } + } + return { lineNumber: i + 1, columnStart, columnEnd, text: line } + } + currentPos += line.length + 1 // +1 for the newline character + } } } + +const red = (text: string) => `\x1b[31m${text}\x1b[0m` +const green = (text: string) => `\x1b[32m${text}\x1b[0m` +const blue = (text: string) => `\x1b[34m${text}\x1b[0m` +const grey = (text: string) => `\x1b[90m${text}\x1b[0m` +const underline = (text: string) => `\x1b[4m${text}\x1b[0m` +const bold = (text: string) => `\x1b[1m${text}\x1b[0m` diff --git a/src/compiler/utils.ts b/src/compiler/utils.ts new file mode 100644 index 0000000..3410dde --- /dev/null +++ b/src/compiler/utils.ts @@ -0,0 +1,158 @@ +import { CompilerError } from '#compiler/compilerError.ts' +import * as terms from '#parser/shrimp.terms' +import type { SyntaxNode, Tree } from '@lezer/common' + +export const checkTreeForErrors = (tree: Tree, input: string): string[] => { + const errors: string[] = [] + tree.iterate({ + enter: (node) => { + if (node.type.isError) { + const errorText = input.slice(node.from, node.to) + errors.push(`Syntax error at ${node.from}-${node.to}: "${errorText}"`) + } + }, + }) + + return errors +} + +export const getAllChildren = (node: SyntaxNode): SyntaxNode[] => { + const children: SyntaxNode[] = [] + let child = node.firstChild + while (child) { + children.push(child) + child = child.nextSibling + } + return children +} + +export const getBinaryParts = (node: SyntaxNode) => { + const children = getAllChildren(node) + const [left, op, right] = children + + if (!left || !op || !right) { + throw new CompilerError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to) + } + + return { left, op, right } +} + +export const getAssignmentParts = (node: SyntaxNode) => { + const children = getAllChildren(node) + const [left, equals, right] = children + + if (!left || left.type.id !== terms.Identifier) { + throw new CompilerError( + `Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`, + node.from, + node.to + ) + } else if (!equals || !right) { + throw new CompilerError( + `Assign expected 3 children, got ${children.length}`, + node.from, + node.to + ) + } + + return { identifier: left, right } +} + +export const getFunctionDefParts = (node: SyntaxNode, input: string) => { + const children = getAllChildren(node) + const [fnKeyword, paramsNode, colon, bodyNode] = children + + if (!fnKeyword || !paramsNode || !colon || !bodyNode) { + throw new CompilerError( + `FunctionDef expected 5 children, got ${children.length}`, + node.from, + node.to + ) + } + + const paramNames = getAllChildren(paramsNode) + .map((param) => { + if (param.type.id !== terms.Identifier) { + throw new CompilerError( + `FunctionDef params must be Identifiers, got ${param.type.name}`, + param.from, + param.to + ) + } + return input.slice(param.from, param.to) + }) + .join(' ') + + return { paramNames, bodyNode } +} + +export const getFunctionCallParts = (node: SyntaxNode, input: string) => { + const [identifierNode, ...args] = getAllChildren(node) + + if (!identifierNode) { + throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to) + } + + const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg) + const positionalArgs = args + .filter((arg) => arg.type.id === terms.PositionalArg) + .map((arg) => { + const child = arg.firstChild + if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to) + + return child + }) + + return { identifierNode, namedArgs, positionalArgs } +} + +export const getNamedArgParts = (node: SyntaxNode, input: string) => { + const children = getAllChildren(node) + const [namedArgPrefix, valueNode] = getAllChildren(node) + + if (!namedArgPrefix || !valueNode) { + const message = `NamedArg expected 2 children, got ${children.length}` + throw new CompilerError(message, node.from, node.to) + } + + const name = input.slice(namedArgPrefix.from, namedArgPrefix.to - 2) // Remove the trailing = + return { name, valueNode } +} + +export const getIfExprParts = (node: SyntaxNode, input: string) => { + const children = getAllChildren(node) + + const [ifKeyword, conditionNode, _colon, thenBlock, ...rest] = children + if (!ifKeyword || !conditionNode || !thenBlock) { + throw new CompilerError( + `IfExpr expected at least 4 children, got ${children.length}`, + node.from, + node.to + ) + } + + let elseIfBlocks: { conditional: SyntaxNode; thenBlock: SyntaxNode }[] = [] + let elseThenBlock: SyntaxNode | undefined + rest.forEach((child) => { + const parts = getAllChildren(child) + + if (child.type.id === terms.ElseExpr) { + if (parts.length !== 3) { + const message = `ElseExpr expected 1 child, got ${parts.length}` + throw new CompilerError(message, child.from, child.to) + } + elseThenBlock = parts.at(-1) + } else if (child.type.id === terms.ElsifExpr) { + const [_keyword, conditional, _colon, thenBlock] = parts + if (!conditional || !thenBlock) { + const names = parts.map((p) => p.type.name).join(', ') + const message = `ElsifExpr expected conditional and thenBlock, got ${names}` + throw new CompilerError(message, child.from, child.to) + } + + elseIfBlocks.push({ conditional, thenBlock }) + } + }) + + return { conditionNode, thenBlock, elseThenBlock, elseIfBlocks } +} diff --git a/src/editor/plugins/theme.tsx b/src/editor/plugins/theme.tsx index 7dbbbc5..ee9d4a7 100644 --- a/src/editor/plugins/theme.tsx +++ b/src/editor/plugins/theme.tsx @@ -54,6 +54,12 @@ export const shrimpTheme = EditorView.theme( outline: 'none', height: '100%', }, + '.cm-matchingBracket': { + backgroundColor: '#FF5370', + }, + '.cm-nonmatchingBracket': { + backgroundColor: '#C3E88D', + }, }, { dark: true } ) diff --git a/src/parser/parser.test.ts b/src/parser/parser.test.ts index 0aa725c..2adb6c9 100644 --- a/src/parser/parser.test.ts +++ b/src/parser/parser.test.ts @@ -2,6 +2,7 @@ import { expect, describe, test } from 'bun:test' import { afterEach } from 'bun:test' import { resetCommandSource, setCommandSource } from '#editor/commands' import { beforeEach } from 'bun:test' + import './shrimp.grammar' // Importing this so changes cause it to retest! describe('calling functions', () => { @@ -81,7 +82,7 @@ describe('Identifier', () => { }) describe('Parentheses', () => { - test('parses expressions with parentheses correctly', () => { + test('allows binOps with parentheses correctly', () => { expect('(2 + 3)').toMatchTree(` ParenExpr BinOp @@ -90,6 +91,54 @@ describe('Parentheses', () => { Number 3`) }) + test('allows numbers, strings, and booleans with parentheses correctly', () => { + expect('(42)').toMatchTree(` + ParenExpr + Number 42`) + + expect("('hello')").toMatchTree(` + ParenExpr + String hello`) + + expect('(true)').toMatchTree(` + ParenExpr + Boolean true`) + + expect('(false)').toMatchTree(` + ParenExpr + Boolean false`) + }) + + test('allows function calls in parens', () => { + expect('(echo 3)').toMatchTree(` + ParenExpr + FunctionCall + Identifier echo + PositionalArg + Number 3`) + + expect('(echo)').toMatchTree(` + ParenExpr + FunctionCallOrIdentifier + Identifier echo`) + }) + + test('allows conditionals in parens', () => { + expect('(a > b)').toMatchTree(` + ParenExpr + ConditionalOp + Identifier a + operator > + Identifier b`) + + expect('(a and b)').toMatchTree(` + ParenExpr + ConditionalOp + Identifier a + operator and + Identifier b`) + }) + test('allows parens in function calls', () => { expect('echo (3 + 3)').toMatchTree(` FunctionCall @@ -198,19 +247,19 @@ describe('Fn', () => { test('parses function no parameters', () => { expect('fn: 1').toMatchTree(` FunctionDef - fn fn + keyword fn Params - : : + colon : Number 1`) }) test('parses function with single parameter', () => { expect('fn x: x + 1').toMatchTree(` FunctionDef - fn fn + keyword fn Params Identifier x - : : + colon : BinOp Identifier x operator + @@ -220,11 +269,11 @@ describe('Fn', () => { test('parses function with multiple parameters', () => { expect('fn x y: x * y').toMatchTree(` FunctionDef - fn fn + keyword fn Params Identifier x Identifier y - : : + colon : BinOp Identifier x operator * @@ -237,11 +286,11 @@ describe('Fn', () => { x + 9 end`).toMatchTree(` FunctionDef - fn fn + keyword fn Params Identifier x Identifier y - : : + colon : BinOp Identifier x operator * @@ -280,11 +329,11 @@ describe('newlines', () => { y = 2`).toMatchTree(` Assign Identifier x - = = + operator = Number 5 Assign Identifier y - = = + operator = Number 2`) }) @@ -292,11 +341,11 @@ y = 2`).toMatchTree(` expect(`x = 5; y = 2`).toMatchTree(` Assign Identifier x - = = + operator = Number 5 Assign Identifier y - = = + operator = Number 2`) }) @@ -304,7 +353,7 @@ y = 2`).toMatchTree(` expect(`a = hello; 2`).toMatchTree(` Assign Identifier a - = = + operator = FunctionCallOrIdentifier Identifier hello Number 2`) @@ -316,7 +365,7 @@ describe('Assign', () => { expect('x = 5').toMatchTree(` Assign Identifier x - = = + operator = Number 5`) }) @@ -324,7 +373,7 @@ describe('Assign', () => { expect('x = 5 + 3').toMatchTree(` Assign Identifier x - = = + operator = BinOp Number 5 operator + @@ -335,13 +384,13 @@ describe('Assign', () => { expect('add = fn a b: a + b').toMatchTree(` Assign Identifier add - = = + operator = FunctionDef - fn fn + keyword fn Params Identifier a Identifier b - : : + colon : BinOp Identifier a operator + @@ -349,6 +398,141 @@ describe('Assign', () => { }) }) +describe('if/elsif/else', () => { + test('parses single line if', () => { + expect(`if y = 1: 'cool'`).toMatchTree(` + IfExpr + keyword if + ConditionalOp + Identifier y + operator = + Number 1 + colon : + ThenBlock + String cool + `) + + expect('a = if x: 2').toMatchTree(` + Assign + Identifier a + operator = + IfExpr + keyword if + Identifier x + colon : + ThenBlock + Number 2 + `) + }) + + test('parses multiline if', () => { + expect(` + if x < 9: + yes + end`).toMatchTree(` + IfExpr + keyword if + ConditionalOp + Identifier x + operator < + Number 9 + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier yes + end end + `) + }) + + test('parses multiline if with else', () => { + expect(`if with-else: + x + else: + y + end`).toMatchTree(` + IfExpr + keyword if + Identifier with-else + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier x + ElseExpr + keyword else + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier y + end end + `) + }) + + test('parses multiline if with elsif', () => { + expect(`if with-elsif: + x + elsif another-condition: + y + end`).toMatchTree(` + IfExpr + keyword if + Identifier with-elsif + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier x + ElsifExpr + keyword elsif + Identifier another-condition + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier y + end end + `) + }) + + test('parses multiline if with multiple elsif and else', () => { + expect(`if with-elsif-else: + x + elsif another-condition: + y + elsif yet-another-condition: + z + else: + oh-no + end`).toMatchTree(` + IfExpr + keyword if + Identifier with-elsif-else + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier x + ElsifExpr + keyword elsif + Identifier another-condition + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier y + ElsifExpr + keyword elsif + Identifier yet-another-condition + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier z + ElseExpr + keyword else + colon : + ThenBlock + FunctionCallOrIdentifier + Identifier oh-no + end end + `) + }) +}) + describe('multiline', () => { test('parses multiline strings', () => { expect(`'first'\n'second'`).toMatchTree(` @@ -367,16 +551,16 @@ describe('multiline', () => { `).toMatchTree(` Assign Identifier add - = = + operator = FunctionDef - fn fn + keyword fn Params Identifier a Identifier b - : : + colon : Assign Identifier result - = = + operator = BinOp Identifier a operator + @@ -406,11 +590,11 @@ end Number 3 FunctionDef - fn fn + keyword fn Params Identifier x Identifier y - : : + colon : FunctionCallOrIdentifier Identifier x end end diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index 6f0c82f..b51dfda 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -2,12 +2,7 @@ @skip { space } -@top Program { (statement | newlineOrSemicolon)+ eof?} - -statement { - line newlineOrSemicolon | - line eof -} +@top Program { item* } @tokens { @precedence { Number "-" } @@ -21,10 +16,20 @@ statement { space { " " | "\t" } leftParen { "(" } rightParen { ")" } - ":" - "fn" - "end" - "=" + colon[closedBy="end", @name="colon"] { ":" } + end[openedBy="colon", @name="end"] { "end" } + "fn" [@name=keyword] + "if" [@name=keyword] + "elsif" [@name=keyword] + "else" [@name=keyword] + "and" [@name=operator] + "or" [@name=operator] + "!=" [@name=operator] + "<" [@name=operator] + "<=" [@name=operator] + ">" [@name=operator] + ">=" [@name=operator] + "=" [@name=operator] "+"[@name=operator] "-"[@name=operator] "*"[@name=operator] @@ -39,28 +44,30 @@ statement { call } -line { - FunctionCall | - FunctionCallOrIdentifier | +item { + consumeToTerminator newlineOrSemicolon | + consumeToTerminator eof | + newlineOrSemicolon // allow blank lines +} + + +consumeToTerminator { + ambiguousFunctionCall | + IfExpr | FunctionDef | Assign | + BinOp | expressionWithoutIdentifier } -expression { - expressionWithoutIdentifier | Identifier -} - -expressionWithoutIdentifier { - BinOp | - valueWithoutIdentifier -} - - FunctionCallOrIdentifier { Identifier } +ambiguousFunctionCall { + FunctionCall | FunctionCallOrIdentifier +} + FunctionCall { Identifier arg+ } @@ -70,11 +77,11 @@ arg { } PositionalArg { - value + expression } NamedArg { - NamedArgPrefix value + NamedArgPrefix expression } FunctionDef { @@ -82,11 +89,46 @@ FunctionDef { } singleLineFunctionDef { - "fn" Params ":" expression + "fn" Params colon consumeToTerminator } multilineFunctionDef { - "fn" Params ":" newlineOrSemicolon (line newlineOrSemicolon)* "end" + "fn" Params colon newlineOrSemicolon block end +} + +IfExpr { + singleLineIf | multilineIf +} + +singleLineIf { + "if" (ConditionalOp | expression) colon ThenBlock { consumeToTerminator } +} + +multilineIf { + "if" (ConditionalOp | expression) colon newlineOrSemicolon ThenBlock ElsifExpr* ElseExpr? end +} + +ElsifExpr { + "elsif" (ConditionalOp | expression) colon newlineOrSemicolon ThenBlock +} + +ElseExpr { + "else" colon newlineOrSemicolon ThenBlock +} + +ThenBlock { + block +} + +ConditionalOp { + expression "=" expression | + expression "!=" expression | + expression "<" expression | + expression "<=" expression | + expression ">" expression | + expression ">=" expression | + expression "and" (expression | ConditionalOp) | + expression "or" (expression | ConditionalOp) } Params { @@ -94,24 +136,36 @@ Params { } Assign { - Identifier "=" line + Identifier "=" consumeToTerminator } BinOp { - expression !multiplicative "*" expression | - expression !multiplicative "/" expression | - expression !additive "+" expression | - expression !additive "-" expression + (expression | BinOp) !multiplicative "*" (expression | BinOp) | + (expression | BinOp) !multiplicative "/" (expression | BinOp) | + (expression | BinOp) !additive "+" (expression | BinOp) | + (expression | BinOp) !additive "-" (expression | BinOp) } ParenExpr { - leftParen (expressionWithoutIdentifier | FunctionCall | FunctionCallOrIdentifier) rightParen + leftParen (ambiguousFunctionCall | BinOp | expressionWithoutIdentifier | ConditionalOp ) rightParen } -value { - valueWithoutIdentifier | Identifier +expression { + expressionWithoutIdentifier | Identifier } -valueWithoutIdentifier { +// We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator. +// Without this, when parsing "my-var" at statement level, the parser can't decide: +// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier +// - expression → Identifier +// Both want the same Identifier token! So we use expressionWithoutIdentifier +// to remove Identifier from the second path, forcing standalone identifiers +// to go through ambiguousFunctionCall (which is what we want semantically). +// Yes, it is annoying and I gave up trying to use GLR to fix it. +expressionWithoutIdentifier { ParenExpr | Word | String | Number | Boolean } + +block { + (consumeToTerminator newlineOrSemicolon)* +} \ No newline at end of file diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index e909d7b..31f08df 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -7,12 +7,19 @@ export const PositionalArg = 5, ParenExpr = 6, BinOp = 7, - FunctionCallOrIdentifier = 12, - String = 13, - Number = 14, - Boolean = 15, - NamedArg = 16, - NamedArgPrefix = 17, - FunctionDef = 18, - Params = 20, - Assign = 23 + ConditionalOp = 12, + String = 21, + Number = 22, + Boolean = 23, + NamedArg = 24, + NamedArgPrefix = 25, + FunctionCallOrIdentifier = 26, + IfExpr = 27, + colon = 29, + ThenBlock = 31, + ElsifExpr = 32, + ElseExpr = 34, + end = 36, + FunctionDef = 37, + Params = 39, + Assign = 40 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 2564936..8674a14 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,16 +4,20 @@ import {tokenizer} from "./tokenizer" import {highlighting} from "./highlight" export const parser = LRParser.deserialize({ version: 14, - states: "'[OVQTOOOqQPO'#DTO!zQUO'#DTO#XQPOOOOQO'#DS'#DSO#xQTO'#CbOOQS'#DQ'#DQO$PQTO'#DVOOQO'#Cn'#CnOOQO'#C}'#C}O$XQPO'#C|OOQS'#Cu'#CuQ$aQTOOOOQS'#DP'#DPOOQS'#Ca'#CaO$hQTO'#ClOOQS'#DO'#DOOOQS'#Cv'#CvO$oQUO,58zO%SQTO,59_O%^QTO,58}O%^QTO,58}O%eQPO,58|O%vQUO'#DTO%}QPO,58|OOQS'#Cw'#CwO&SQTO'#CpO&[QPO,59qOOQS,59h,59hOOQS-E6s-E6sQOQPOOOOQS,59W,59WOOQS-E6t-E6tOOQO1G.y1G.yOOQO'#DT'#DTOOQO1G.i1G.iO&aQPO1G.iOOQS1G.h1G.hOOQS-E6u-E6uO&xQTO1G/]O'SQPO7+$wO'hQTO7+$xO'uQPO'#CxO'zQTO<OAN>O", - stateData: "([~OoOS~OPQOQUO]UO^UO_UOcVOuTO{ZO~OWwXXwXYwXZwX{qX|qX~OP]OQUO]UO^UO_UOa_OuTOWwXXwXYwXZwX~OhcO{[X|[X~P!VOWdOXdOYeOZeO~OQUO]UO^UO_UOuTO~OPgO~P#gOPiOedP~O{lO|lO~O|nO~PVOP]O~P#gOP]Oa_O{Sa|SaxSa~P#gOPQOcVO~P#gOPrO~P#gOxuOWwXXwXYwXZwX~Ox[X~P!VOxuO~OPiOedX~OewO~OWdOXdOYViZVi{Vi|VixVi~OPrO{yO~P#gOWdOXdOYeOZeO{yq|yq~OPQOcVOf|O~P#gO{}O~OPQOcVOf!PO~P#gO^Z~", - goto: "%d{PPPP|!W!]!lPPPP|PPP!WP!wP#OPP!wP#R#X#`#fPPP#l#p#{$Q$YP$k$zP%]%]YXO[cy{RhTV`QbgkUOQT[_bcdegwy{cSOT[cdewy{ZXO[cy{RkVQ[ORm[SbQgRpbQjVRvjQ{yR!O{TZO[SYO[QqcTzy{VaQbgU^QbgRo_bSOT[cdewy{X]Q_bgYPO[cy{QfTVrdew[ROT[cy{QsdQteRxwZWO[cy{", - nodeNames: "⚠ Identifier Word Program FunctionCall PositionalArg ParenExpr BinOp operator operator operator operator FunctionCallOrIdentifier String Number Boolean NamedArg NamedArgPrefix FunctionDef fn Params : end Assign =", - maxTerm: 44, + states: "+pQVQTOOOtQPO'#CcO!SQPO'#D`O!yQTO'#CbOOQS'#Dd'#DdO#QQPO'#DcO#fQUO'#DcOOQO'#Da'#DaO$VQTO'#DgOOQO'#Cw'#CwO$^QTO'#DkOOQO'#DR'#DROOQO'#D`'#D`O$fQPO'#D_OOQS'#D_'#D_OOQS'#DV'#DVQVQTOOO$VQTO,58}O$VQTO,58}O%YQPO'#CcO%jQPO,58|O%vQPO,58|O&sQPO,58|O&zQUO'#DcOOQS'#Dc'#DcOOQS'#Ca'#CaO$VQTO'#CtOOQS'#Db'#DbOOQS'#DW'#DWO'hQUO,58zO'{QTO,59pO(YQPO,5:RO(aQPO,5:ROOQS'#DZ'#DZO(fQTO'#DTO(nQPO,5:VOOQS,59y,59yOOQS-E7T-E7TOOQO1G.i1G.iO(sQPO1G.iO$VQTO,59SO$VQTO,59SOOQS1G.h1G.hOOQS,59`,59`OOQS-E7U-E7UOOQO1G/[1G/[O)[QTO1G/mOOQS-E7X-E7XO)lQTO1G/qOOQO1G.n1G.nO)|QPO1G.nOOQO'#Cz'#CzOOQO7+%X7+%XO*WQTO7+%YOOQO7+%]7+%]O*nQTO7+%^O+OQPO'#DXO+TQTO'#DjOOQO'#C{'#C{O+kQPO<`O,WQPO'#DOOOQOAN>`AN>`O,RQPOAN>`OOQOAN>dAN>dO,]QPO,59hO,dQPO,59hOOQO-E7W-E7WOOQOG23zG23zO,iQPOG23zO,nQPO,59jO,sQPO1G/SOOQOLD)fLD)fO*nQTO1G/UO*WQTO7+$nOOQO7+$p7+$pOOQO<