diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e9e226e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "packages/ReefVM"] + path = packages/ReefVM + url = git@54.219.130.253:defunkt/ReefVM.git diff --git a/README.md b/README.md index 437b913..eb3e113 100644 --- a/README.md +++ b/README.md @@ -13,15 +13,9 @@ Shrimp is a shell-like scripting language that combines the simplicity of comman - **Unbound symbols become strings** - `echo hello` treats `hello` as a string if not defined - **Simplicity over cleverness** - Each feature should work one way, consistently. Two simple features that are easy to explain beat one complex feature that requires lots of explanation -## Current Status & Goals - -### Today's Implementation Goals -1. ✅ **Interpreter Setup** - Renamed evaluator to interpreter for clarity -2. **Command Execution** - Support calling external commands and built-in functions -3. **Variable Assignment** - Implement assignment with validation using Lezer context tracking - ### Parser Features -- ✅ Distinguishes between identifiers (assignable) and words (non-assignable) + +- ✅ Distinguishes between identifiers (assignable) and words e(non-assignable) - ✅ Smart tokenization for named args (`lines=30` splits, but `./path=value` stays together) - ✅ Handles ambiguous cases (bare identifier could be function call or variable reference) @@ -30,6 +24,7 @@ Shrimp is a shell-like scripting language that combines the simplicity of comman See `src/parser/example.shrimp` for language examples and `src/parser/shrimp.grammar` for the full grammar. ### Key Token Types + - **Identifier** - Lowercase/emoji start, can contain dashes/numbers (assignable) - **Word** - Any non-whitespace that isn't a valid identifier (paths, URLs, etc.) - **FunctionCall** - Identifier followed by arguments diff --git a/bun.lock b/bun.lock index b15057d..ca7e5c4 100644 --- a/bun.lock +++ b/bun.lock @@ -9,6 +9,7 @@ "bun-plugin-tailwind": "^0.0.15", "codemirror": "^6.0.2", "hono": "^4.9.8", + "reefvm": "workspace:*", "tailwindcss": "^4.1.11", }, "devDependencies": { @@ -17,6 +18,15 @@ "@types/bun": "latest", }, }, + "packages/ReefVM": { + "name": "reefvm", + "devDependencies": { + "@types/bun": "latest", + }, + "peerDependencies": { + "typescript": "^5", + }, + }, }, "packages": { "@codemirror/autocomplete": ["@codemirror/autocomplete@6.19.0", "", { "dependencies": { "@codemirror/language": "^6.0.0", "@codemirror/state": "^6.0.0", "@codemirror/view": "^6.17.0", "@lezer/common": "^1.0.0" } }, "sha512-61Hfv3cF07XvUxNeC3E7jhG8XNi1Yom1G0lRC936oLnlF+jrbrv8rc/J98XlYzcsAoTVupfsf5fLej1aI8kyIg=="], @@ -61,6 +71,8 @@ "hono": ["hono@4.9.8", "", {}, "sha512-JW8Bb4RFWD9iOKxg5PbUarBYGM99IcxFl2FPBo2gSJO11jjUDqlP1Bmfyqt8Z/dGhIQ63PMA9LdcLefXyIasyg=="], + "reefvm": ["reefvm@workspace:packages/ReefVM"], + "style-mod": ["style-mod@4.1.2", "", {}, "sha512-wnD1HyVqpJUI2+eKZ+eo1UwghftP6yuFheBqqe+bWCotBjC2K1YnteJILRMs3SM4V/0dLEW1SC27MWP5y+mwmw=="], "tailwindcss": ["tailwindcss@4.1.13", "", {}, "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w=="], @@ -70,5 +82,9 @@ "undici-types": ["undici-types@7.12.0", "", {}, "sha512-goOacqME2GYyOZZfb5Lgtu+1IDmAlAEu5xnD3+xTzS10hT0vzpf0SPjkXwAw9Jm+4n/mQGDP3LO8CPbYROeBfQ=="], "w3c-keyname": ["w3c-keyname@2.2.8", "", {}, "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ=="], + + "reefvm/@types/bun": ["@types/bun@1.2.23", "", { "dependencies": { "bun-types": "1.2.23" } }, "sha512-le8ueOY5b6VKYf19xT3McVbXqLqmxzPXHsQT/q9JHgikJ2X22wyTW3g3ohz2ZMnp7dod6aduIiq8A14Xyimm0A=="], + + "reefvm/@types/bun/bun-types": ["bun-types@1.2.23", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-R9f0hKAZXgFU3mlrA0YpE/fiDvwV0FT9rORApt2aQVWSuJDzZOyB5QLc0N/4HF57CS8IXJ6+L5E4W1bW6NS2Aw=="], } } diff --git a/package.json b/package.json index c83ee53..9553e6b 100644 --- a/package.json +++ b/package.json @@ -3,12 +3,16 @@ "version": "0.1.0", "private": true, "type": "module", + "workspaces": [ + "packages/*" + ], "scripts": { "pretest": "bun generate-parser", "serve": "bun --hot src/server/server.tsx", "generate-parser": "lezer-generator src/parser/shrimp.grammar --typeScript -o src/parser/shrimp.ts" }, "dependencies": { + "reefvm": "workspace:*", "@codemirror/view": "^6.38.3", "@lezer/generator": "^1.8.0", "bun-plugin-tailwind": "^0.0.15", diff --git a/packages/ReefVM b/packages/ReefVM new file mode 160000 index 0000000..146b0a2 --- /dev/null +++ b/packages/ReefVM @@ -0,0 +1 @@ +Subproject commit 146b0a28831161e03966746acce7d5fc7fe2229d diff --git a/src/compiler/compiler.test.ts b/src/compiler/compiler.test.ts new file mode 100644 index 0000000..5d8aad4 --- /dev/null +++ b/src/compiler/compiler.test.ts @@ -0,0 +1,89 @@ +import { describe } from 'bun:test' +import { expect, test } from 'bun:test' + +describe('compiler', () => { + test('number literal', () => { + expect('42').toEvaluateTo(42) + }) + + test('negative number', () => { + expect('-5').toEvaluateTo(-5) + }) + + test('string literal', () => { + expect(`'hello'`).toEvaluateTo('hello') + }) + + test('boolean true', () => { + expect('true').toEvaluateTo(true) + }) + + test('boolean false', () => { + expect('false').toEvaluateTo(false) + }) + + test('addition', () => { + expect('2 + 3').toEvaluateTo(5) + }) + + test('subtraction', () => { + expect('10 - 4').toEvaluateTo(6) + }) + + test('multiplication', () => { + expect('3 * 4').toEvaluateTo(12) + }) + + test('division', () => { + expect('15 / 3').toEvaluateTo(5) + }) + + test('assign number', () => { + expect('x = 5; x').toEvaluateTo(5) + }) + + test('emoji assignment to number', () => { + expect('💎 = 5; 💎').toEvaluateTo(5) + }) + + test('unbound identifier', () => { + expect('a = hello; a').toEvaluateTo('hello') + }) + + test('assign string', () => { + expect(`name = 'Alice'; name`).toEvaluateTo('Alice') + }) + + test('assign expression', () => { + expect('sum = 2 + 3; sum').toEvaluateTo(5) + }) + + test('parentheses', () => { + expect('(2 + 3) * 4').toEvaluateTo(20) + }) + + test('function', () => { + expect(`add = fn a b: a + b; add`).toEvaluateTo(Function) + }) + + test('function call', () => { + expect(`add = fn a b: a + b; add 2 9`).toEvaluateTo(11) + }) +}) + +describe('errors', () => { + test('syntax error', () => { + expect('2 + ').toFailEvaluation() + }) +}) + +describe('multiline tests', () => { + test.only('multiline function', () => { + expect(` + add = fn a b: + result = a + b + result + add 3 4 + `).toEvaluateTo(7) + }) +}) diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts new file mode 100644 index 0000000..58dcb6d --- /dev/null +++ b/src/compiler/compiler.ts @@ -0,0 +1,297 @@ +import { CompilerError } from '#compiler/compilerError.ts' +import { parser } from '#parser/shrimp.ts' +import * as terms from '#parser/shrimp.terms' +import type { SyntaxNode, Tree } from '@lezer/common' +import { assert, errorMessage } from '#utils/utils' +import { toBytecode, type Bytecode } from 'reefvm' +import { compile } from 'tailwindcss' + +export class Compiler { + fnCounter = 0 + instructions: string[] = [] + labels = new Map() + bytecode: Bytecode + + constructor(public input: string) { + try { + const cst = parser.parse(input) + const errors = checkTreeForErrors(cst, input) + + if (errors.length > 0) { + throw new CompilerError(`Syntax errors found:\n${errors.join('\n')}`, 0, input.length) + } + + this.#compileCst(cst, input) + + // Add the labels + for (const [label, labelInstructions] of this.labels) { + this.instructions.push(`${label}:`) + this.instructions.push(...labelInstructions.map((instr) => ` ${instr}`)) + this.instructions.push(' RETURN') + } + + // console.log(`🌭`, this.instructions.join('\n')) + this.bytecode = toBytecode(this.instructions.join('\n')) + } catch (error) { + if (error instanceof CompilerError) { + throw new Error(`Compiler Error:\n${error.toReadableString(input)}`) + } else { + throw new Error(`Unknown error during compilation:\n${errorMessage(error)}`) + } + } + } + + #compileCst(cst: Tree, input: string) { + const isProgram = cst.topNode.type.id === terms.Program + assert(isProgram, `Expected Program node, got ${cst.topNode.type.name}`) + + let child = cst.topNode.firstChild + while (child) { + this.instructions.push(...this.#compileNode(child, input)) + child = child.nextSibling + } + + this.instructions.push('HALT') + } + + #compileNode(node: SyntaxNode, input: string): string[] { + const value = input.slice(node.from, node.to) + switch (node.type.id) { + case terms.Number: + return [`PUSH ${value}`] + + case terms.String: + const strValue = value.slice(1, -1).replace(/\\/g, '') + return [`PUSH "${strValue}"`] + + case terms.Boolean: { + return [`PUSH ${value}`] + } + + case terms.Identifier: { + return [`TRY_LOAD ${value}`] + } + + // For now, just treat them all like identifiers + case terms.FunctionCallOrIdentifier: { + return [`TRY_LOAD ${value}`] + } + + case terms.BinOp: { + const { left, op, right } = getBinaryParts(node) + const instructions: string[] = [] + instructions.push(...this.#compileNode(left, input)) + instructions.push(...this.#compileNode(right, input)) + + const opValue = input.slice(op.from, op.to) + switch (opValue) { + case '+': + instructions.push('ADD') + break + case '-': + instructions.push('SUB') + break + case '*': + instructions.push('MUL') + break + case '/': + instructions.push('DIV') + break + default: + throw new CompilerError(`Unsupported binary operator: ${opValue}`, op.from, op.to) + } + + return instructions + } + + case terms.Assign: { + const { identifier, right } = getAssignmentParts(node) + const instructions: string[] = [] + instructions.push(...this.#compileNode(right, input)) + const identifierName = input.slice(identifier.from, identifier.to) + instructions.push(`STORE ${identifierName}`) + + return instructions + } + + case terms.ParenExpr: { + const child = node.firstChild + if (!child) return [] // I guess it is empty parentheses? + + return this.#compileNode(child, input) + } + + case terms.FunctionDef: { + const { paramNames, bodyNode } = getFunctionDefParts(node, input) + const instructions: string[] = [] + const functionName = `.func_${this.labels.size}` + const bodyInstructions: string[] = [] + if (this.labels.has(functionName)) { + throw new CompilerError(`Function name collision: ${functionName}`, node.from, node.to) + } + + this.labels.set(functionName, bodyInstructions) + + instructions.push(`MAKE_FUNCTION (${paramNames}) ${functionName}`) + bodyInstructions.push(...this.#compileNode(bodyNode, input)) + + return instructions + } + + /* + ### Function Calls + Stack order (bottom to top): + + LOAD fn + PUSH arg1 ; Positional args + PUSH arg2 + PUSH "name" ; Named arg key + PUSH "value" ; Named arg value + PUSH 2 ; Positional count + PUSH 1 ; Named count + CALL + */ + case terms.FunctionCall: { + const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts(node, input) + const instructions: string[] = [] + instructions.push(...this.#compileNode(identifierNode, input)) + + positionalArgs.forEach((arg) => { + instructions.push(...this.#compileNode(arg, input)) + }) + + namedArgs.forEach((arg) => { + const { name, valueNode } = getNamedArgParts(arg, input) + instructions.push(`PUSH "${name}"`) + instructions.push(...this.#compileNode(valueNode, input)) + }) + + instructions.push(`PUSH ${positionalArgs.length}`) + instructions.push(`PUSH ${namedArgs.length}`) + instructions.push(`CALL`) + return instructions + } + + default: + throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to) + } + } +} + +// Helper functions for extracting node parts +const getAllChildren = (node: SyntaxNode): SyntaxNode[] => { + const children: SyntaxNode[] = [] + let child = node.firstChild + while (child) { + children.push(child) + child = child.nextSibling + } + return children +} + +const getBinaryParts = (node: SyntaxNode) => { + const children = getAllChildren(node) + const [left, op, right] = children + + if (!left || !op || !right) { + throw new CompilerError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to) + } + + return { left, op, right } +} + +const getAssignmentParts = (node: SyntaxNode) => { + const children = getAllChildren(node) + const [left, equals, right] = children + + if (!left || left.type.id !== terms.Identifier) { + throw new CompilerError( + `Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`, + node.from, + node.to + ) + } else if (!equals || !right) { + throw new CompilerError( + `Assign expected 3 children, got ${children.length}`, + node.from, + node.to + ) + } + + return { identifier: left, right } +} + +const checkTreeForErrors = (tree: Tree, input: string): string[] => { + const errors: string[] = [] + tree.iterate({ + enter: (node) => { + if (node.type.isError) { + const errorText = input.slice(node.from, node.to) + errors.push(`Syntax error at ${node.from}-${node.to}: "${errorText}"`) + } + }, + }) + + return errors +} + +const getFunctionDefParts = (node: SyntaxNode, input: string) => { + const children = getAllChildren(node) + const [fnKeyword, paramsNode, colon, bodyNode] = children + + if (!fnKeyword || !paramsNode || !colon || !bodyNode) { + throw new CompilerError( + `FunctionDef expected 5 children, got ${children.length}`, + node.from, + node.to + ) + } + + const paramNames = getAllChildren(paramsNode) + .map((param) => { + if (param.type.id !== terms.Identifier) { + throw new CompilerError( + `FunctionDef params must be Identifiers, got ${param.type.name}`, + param.from, + param.to + ) + } + return input.slice(param.from, param.to) + }) + .join(' ') + + return { paramNames, bodyNode } +} + +const getFunctionCallParts = (node: SyntaxNode, input: string) => { + const [identifierNode, ...args] = getAllChildren(node) + + if (!identifierNode) { + throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to) + } + + const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg) + const positionalArgs = args + .filter((arg) => arg.type.id === terms.PositionalArg) + .map((arg) => { + const child = arg.firstChild + if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to) + + return child + }) + + return { identifierNode, namedArgs, positionalArgs } +} + +const getNamedArgParts = (node: SyntaxNode, input: string) => { + const children = getAllChildren(node) + const [namedArgPrefix, valueNode] = getAllChildren(node) + + if (!namedArgPrefix || !valueNode) { + const message = `NamedArg expected 2 children, got ${children.length}` + throw new CompilerError(message, node.from, node.to) + } + + const name = input.slice(namedArgPrefix.from, namedArgPrefix.to - 2) // Remove the trailing = + return { name, valueNode } +} diff --git a/src/interpreter/runtimeError.ts b/src/compiler/compilerError.ts similarity index 84% rename from src/interpreter/runtimeError.ts rename to src/compiler/compilerError.ts index 95f24ba..b009695 100644 --- a/src/interpreter/runtimeError.ts +++ b/src/compiler/compilerError.ts @@ -1,7 +1,7 @@ -export class RuntimeError extends Error { +export class CompilerError extends Error { constructor(message: string, private from: number, private to: number) { super(message) - this.name = 'RuntimeError' + this.name = 'CompilerError' this.message = message } diff --git a/src/interpreter/treeHelper.ts b/src/compiler/treeHelper.ts similarity index 100% rename from src/interpreter/treeHelper.ts rename to src/compiler/treeHelper.ts diff --git a/src/editor/plugins/keymap.ts b/src/editor/plugins/keymap.ts index 6a74e82..ddcb328 100644 --- a/src/editor/plugins/keymap.ts +++ b/src/editor/plugins/keymap.ts @@ -1,24 +1,28 @@ import { outputSignal } from '#editor/editor' -import { evaluate } from '#interpreter/evaluator' -import { parser } from '#parser/shrimp' +import { Compiler } from '#compiler/compiler' import { errorMessage, log } from '#utils/utils' import { keymap } from '@codemirror/view' +import { run, VM } from 'reefvm' export const shrimpKeymap = keymap.of([ { key: 'Cmd-Enter', run: (view) => { const input = view.state.doc.toString() - const context = new Map() - try { - const tree = parser.parse(input) - const output = evaluate(input, tree, context) - outputSignal.emit({ output: String(output) }) - } catch (error) { - log.error(error) - outputSignal.emit({ error: `${errorMessage(error)}` }) - } + runInput(input) return true }, }, ]) + +const runInput = async (input: string) => { + try { + const compiler = new Compiler(input) + const vm = new VM(compiler.bytecode) + const output = await vm.run() + outputSignal.emit({ output: String(output.value) }) + } catch (error) { + log.error(error) + outputSignal.emit({ error: `${errorMessage(error)}` }) + } +} diff --git a/src/interpreter/evaluator.test.ts b/src/interpreter/evaluator.test.ts deleted file mode 100644 index ec4921c..0000000 --- a/src/interpreter/evaluator.test.ts +++ /dev/null @@ -1,87 +0,0 @@ -// import { resetCommandSource, setCommandSource, type CommandShape } from '#editor/commands' -// import { expect, test } from 'bun:test' - -// test('number literal', () => { -// expect('42').toEvaluateTo(42) -// }) - -// test('negative number', () => { -// expect('-5').toEvaluateTo(-5) -// }) - -// test('string literal', () => { -// expect(`'hello'`).toEvaluateTo('hello') -// }) - -// test('boolean true', () => { -// expect('true').toEvaluateTo(true) -// }) - -// test('boolean false', () => { -// expect('false').toEvaluateTo(false) -// }) - -// test('addition', () => { -// expect('2 + 3').toEvaluateTo(5) -// }) - -// test('subtraction', () => { -// expect('10 - 4').toEvaluateTo(6) -// }) - -// test('multiplication', () => { -// expect('3 * 4').toEvaluateTo(12) -// }) - -// test('division', () => { -// expect('15 / 3').toEvaluateTo(5) -// }) - -// test('assign number', () => { -// expect('x = 5').toEvaluateTo(5) -// }) - -// test('emoji assignment to number', () => { -// expect('💎 = 5').toEvaluateTo(5) -// }) - -// test('assign string', () => { -// expect(`name = 'Alice'`).toEvaluateTo('Alice') -// }) - -// test('assign expression', () => { -// expect('sum = 2 + 3').toEvaluateTo(5) -// }) - -// test('parentheses', () => { -// expect('(2 + 3) * 4').toEvaluateTo(20) -// }) - -// test('simple command', () => { -// const commands: CommandShape[] = [ -// { -// command: 'echo', -// args: [{ name: 'text', type: 'string' }], -// execute: (text: string) => text, -// }, -// ] - -// withCommands(commands, () => { -// expect(`echo 'hello'`).toEvaluateTo('hello') -// }) -// }) - -// test.only('function', () => { -// expect(`add = fn a b: a + b; add 2 4`).toEvaluateTo(5) -// }) - -// const withCommands = (commands: CommandShape[], fn: () => void) => { -// try { -// setCommandSource(() => commands) -// fn() -// } catch (e) { -// throw e -// } finally { -// resetCommandSource() -// } -// } diff --git a/src/interpreter/evaluator.ts b/src/interpreter/evaluator.ts deleted file mode 100644 index 3d9231c..0000000 --- a/src/interpreter/evaluator.ts +++ /dev/null @@ -1,377 +0,0 @@ -import { Tree, type SyntaxNode } from '@lezer/common' -import * as terms from '../parser/shrimp.terms.ts' -import { RuntimeError } from '#interpreter/runtimeError.ts' -import { assert } from 'console' -import { assertNever } from '#utils/utils.tsx' -import { matchingCommands, type CommandShape } from '#editor/commands.ts' - -export const evaluate = (input: string, tree: Tree, context: Context) => { - let result = undefined - let child = tree.topNode.firstChild - try { - while (child) { - result = evaluateNode(child, input, context) - child = child.nextSibling - } - } catch (error) { - if (error instanceof RuntimeError) { - throw new Error(error.toReadableString(input)) - } else { - throw new Error('Unknown error during evaluation') - } - } - - return result -} - -const evaluateNode = (node: SyntaxNode, input: string, context: Context): any => { - try { - const evalNode = syntaxNodeToEvalNode(node, input, context) - return evaluateEvalNode(evalNode, input, context) - } catch (error) { - if (error instanceof RuntimeError) { - throw error - } else { - console.error(error) - throw new RuntimeError('Error evaluating node', node.from, node.to) - } - } -} - -type ResolvedArg = { - value: any - resolved: boolean -} - -const evaluateEvalNode = (evalNode: EvalNode, input: string, context: Context): any => { - switch (evalNode.kind) { - case 'number': - case 'string': - case 'boolean': - return evalNode.value - - case 'identifier': { - const name = evalNode.name - if (context.has(name)) { - return context.get(name) - } else { - throw new RuntimeError(`Undefined variable "${name}"`, evalNode.node.from, evalNode.node.to) - } - } - - case 'assignment': { - const name = evalNode.name - const value = evaluateEvalNode(evalNode.value, input, context) - context.set(name, value) - return value - } - - case 'binop': { - const left = evaluateEvalNode(evalNode.left, input, context) - const right = evaluateEvalNode(evalNode.right, input, context) - - if (evalNode.op === '+') { - return left + right - } else if (evalNode.op === '-') { - return left - right - } else if (evalNode.op === '*') { - return left * right - } else if (evalNode.op === '/') { - return left / right - } else { - throw new RuntimeError( - `Unsupported operator "${evalNode.op}"`, - evalNode.node.from, - evalNode.node.to - ) - } - } - - case 'function': { - const func = (...args: any[]) => { - if (args.length !== evalNode.params.length) { - throw new RuntimeError( - `Function expected ${evalNode.params.length} arguments, got ${args.length}`, - evalNode.node.from, - evalNode.node.to - ) - } - - // Create new context with parameter bindings - const localContext = new Map(context) - evalNode.params.forEach((param, index) => { - localContext.set(param, args[index]) - }) - - // Evaluate function body with new context - return evaluateEvalNode(evalNode.body, input, localContext) - } - - return func - } - - case 'command': { - const { match: command } = matchingCommands(evalNode.name) - if (!command) { - const { from, to } = evalNode.node - throw new RuntimeError(`Unknown command "${evalNode.name}"`, from, to) - } - - const resolvedArgs: ResolvedArg[] = command.args.map((argShape) => ({ - value: argShape.default, - resolved: argShape.optional ? true : argShape.default !== undefined, - })) - - // Filter the args into named and positional - const namedArgNodes: NamedArgEvalNode[] = [] - const positionalArgNodes: PositionalArgEvalNode[] = [] - evalNode.args.forEach((arg) => { - const isNamedArg = 'name' in arg && arg.name !== undefined - isNamedArg ? namedArgNodes.push(arg) : positionalArgNodes.push(arg) - }) - - // First set the named args - namedArgNodes.forEach((arg) => { - const shapeIndex = command.args.findIndex((def) => def.name === arg.name) - const shape = command.args[shapeIndex] - - if (!shape) { - const { from, to } = arg.node - throw new RuntimeError(`Unknown argument "${arg.name}"`, from, to) - } else if (resolvedArgs[shapeIndex]?.resolved) { - const { from, to } = arg.node - throw new RuntimeError(`Argument "${arg.name}" already set`, from, to) - } - - const value = evaluateEvalNode(arg.value, input, context) - resolvedArgs[shapeIndex] = { value, resolved: true } - }) - - // Now set the positional args in order - let unresolvedIndex = resolvedArgs.findIndex((arg) => !arg.resolved) - positionalArgNodes.forEach((arg) => { - const value = evaluateEvalNode(arg.value, input, context) - if (unresolvedIndex === -1) { - const { from, to } = arg.node - throw new RuntimeError(`Too many positional arguments`, from, to) - } - - resolvedArgs[unresolvedIndex] = { value, resolved: true } - unresolvedIndex = resolvedArgs.findIndex((arg) => !arg.resolved) - }) - - let executor - if (typeof command.execute === 'string') { - throw new RuntimeError( - `Path-based commands aren't supported yet...`, - evalNode.node.from, - evalNode.node.to - ) - // Dynamic imports are not supported in Bun test environment - // See: - // const { default: importedExecutor } = await import(command.execute) - // executor = importedExecutor - // if (typeof executor !== 'function') { - // throw new RuntimeError( - // `Module "${command.execute}" for command ${command.command} does not export a default function`, - // evalNode.node.from, - // evalNode.node.to - // ) - // } - } else { - executor = command.execute - } - - const argValues = resolvedArgs.map((arg) => arg.value) - const result = executor(...argValues) - return result - } - - default: - assertNever(evalNode) - } -} - -type Operators = '+' | '-' | '*' | '/' -type Context = Map -type NamedArgEvalNode = { kind: 'arg'; value: EvalNode; name: string; node: SyntaxNode } -type PositionalArgEvalNode = { kind: 'arg'; value: EvalNode; node: SyntaxNode } -type ArgEvalNode = NamedArgEvalNode | PositionalArgEvalNode -type IdentifierEvalNode = { kind: 'identifier'; name: string; node: SyntaxNode } -type EvalNode = - | { kind: 'number'; value: number; node: SyntaxNode } - | { kind: 'string'; value: string; node: SyntaxNode } - | { kind: 'boolean'; value: boolean; node: SyntaxNode } - | { kind: 'binop'; op: Operators; left: EvalNode; right: EvalNode; node: SyntaxNode } - | { kind: 'assignment'; name: string; value: EvalNode; node: SyntaxNode } - | { kind: 'command'; name: string; args: ArgEvalNode[]; node: SyntaxNode } - | { kind: 'function'; params: string[]; body: EvalNode; node: SyntaxNode } - | IdentifierEvalNode - -const syntaxNodeToEvalNode = (node: SyntaxNode, input: string, context: Context): EvalNode => { - const value = input.slice(node.from, node.to) - - switch (node.type.id) { - case terms.Number: - return { kind: 'number', value: parseFloat(value), node } - - case terms.String: - return { kind: 'string', value: value.slice(1, -1), node } // Remove quotes - - case terms.Boolean: - return { kind: 'boolean', value: value === 'true', node } - - case terms.Identifier: - return { kind: 'identifier', name: value, node } - - case terms.BinOp: { - const { left, op, right } = getBinaryParts(node) - const opString = input.slice(op.from, op.to) as Operators - const leftNode = syntaxNodeToEvalNode(left, input, context) - const rightNode = syntaxNodeToEvalNode(right, input, context) - return { kind: 'binop', op: opString, left: leftNode, right: rightNode, node } - } - - case terms.Assignment: { - const { identifier, value: expr } = getAssignmentParts(node) - const name = input.slice(identifier.from, identifier.to) - const value = syntaxNodeToEvalNode(expr, input, context) - return { kind: 'assignment', name, value, node } - } - - case terms.ParenExpr: { - const expr = getParenParts(node) - return syntaxNodeToEvalNode(expr, input, context) - } - - case terms.CommandCall: { - const { commandName, argNodes } = extractCommand(node, input) - - const args = argNodes.map((argNode) => { - const children = getAllChildren(argNode) - - if (argNode.type.id === terms.Arg) { - const [child] = children - if (!child) { - throw new Error(`Parser bug: Arg node has ${children.length} children, expected 1`) - } - const value = syntaxNodeToEvalNode(child, input, context) - return { kind: 'arg', value, node: argNode } as const - } - - if (argNode.type.id === terms.NamedArg) { - const [nameChild, valueChild] = children - if (!nameChild || !valueChild) { - throw new Error(`Parser bug: NamedArg node has ${children.length} children, expected 2`) - } - const namePrefix = input.slice(nameChild.from, nameChild.to) - const name = namePrefix.slice(0, -1) // Remove '=' - const value = syntaxNodeToEvalNode(valueChild, input, context) - return { kind: 'arg', name, value, node: argNode } as const - } - - throw new Error(`Parser bug: Unexpected arg node type: ${argNode.type.name}`) - }) - - return { kind: 'command', name: commandName, args, node } - } - - case terms.Function: { - const children = getAllChildren(node) - if (children.length < 3) { - throw new Error( - `Parser bug: Function node has ${children.length} children, expected at least 3` - ) - } - - // Structure: fn params : body - const [_fn, paramsNode, _colon, ...bodyNodes] = children - - // Extract parameter names - const paramNodes = getAllChildren(paramsNode) - const params = paramNodes.map((paramNode) => { - if (paramNode.type.id !== terms.Identifier) { - throw new Error(`Parser bug: Function parameter is not an identifier`) - } - return input.slice(paramNode.from, paramNode.to) - }) - - // For now, assume body is a single expression (the rest of the children) - const bodyNode = bodyNodes[0] - if (!bodyNode) { - throw new Error(`Parser bug: Function missing body`) - } - - const body = syntaxNodeToEvalNode(bodyNode, input, context) - return { kind: 'function', params, body, node } - } - } - - throw new RuntimeError(`Unsupported node type "${node.type.name}"`, node.from, node.to) -} - -// Helper functions for extracting node parts -const getAllChildren = (node: SyntaxNode): SyntaxNode[] => { - const children: SyntaxNode[] = [] - let child = node.firstChild - while (child) { - children.push(child) - child = child.nextSibling - } - return children -} - -const getBinaryParts = (node: SyntaxNode) => { - const children = getAllChildren(node) - const [left, op, right] = children - - if (!left || !op || !right) { - throw new RuntimeError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to) - } - - return { left, op, right } -} - -const getAssignmentParts = (node: SyntaxNode) => { - const children = getAllChildren(node) - const [identifier, _equals, value] = children - - if (!identifier || !_equals || !value) { - throw new RuntimeError( - `Assignment expected 3 children, got ${children.length}`, - node.from, - node.to - ) - } - - return { identifier, value } -} - -const getParenParts = (node: SyntaxNode) => { - const children = getAllChildren(node) - const [_leftParen, expr, _rightParen] = children - - if (!_leftParen || !expr || !_rightParen) { - throw new RuntimeError( - `ParenExpr expected 3 children, got ${children.length}`, - node.from, - node.to - ) - } - - return expr -} - -const extractCommand = (node: SyntaxNode, input: string) => { - const children = getAllChildren(node) - const commandNode = children[0] // The Command node - - if (!commandNode || commandNode.type.id !== terms.Command) { - throw new RuntimeError('Invalid command structure', node.from, node.to) - } - - const commandNameNode = commandNode.firstChild ?? commandNode - const commandName = input.slice(commandNameNode.from, commandNameNode.to) - const argNodes = children.slice(1) // All the Arg/NamedArg nodes - return { commandName, commandNode, argNodes } -} diff --git a/src/parser/shrimp.test.ts b/src/parser/parser.test.ts similarity index 76% rename from src/parser/shrimp.test.ts rename to src/parser/parser.test.ts index 5348eba..7d11038 100644 --- a/src/parser/shrimp.test.ts +++ b/src/parser/parser.test.ts @@ -61,12 +61,6 @@ describe('calling functions', () => { `) }) - test.skip('when no commands match, falls back to Identifier', () => { - expect('omgwtf').toMatchTree(` - Identifier omgwtf - `) - }) - test('Incomplete namedArg', () => { expect('tail lines=').toMatchTree(` FunctionCall @@ -108,6 +102,16 @@ describe('Parentheses', () => { Number 3`) }) + test('a word can be contained in parens', () => { + expect('(basename ./cool)').toMatchTree(` + ParenExpr + FunctionCall + Identifier basename + PositionalArg + Word ./cool + `) + }) + test('nested parentheses', () => { expect('(2 + (1 * 4))').toMatchTree(` ParenExpr @@ -192,17 +196,16 @@ describe('BinOp', () => { describe('Fn', () => { test('parses function no parameters', () => { - expect('fn: 1 end').toMatchTree(` + expect('fn: 1').toMatchTree(` FunctionDef fn fn Params : : - Number 1 - end end`) + Number 1`) }) test('parses function with single parameter', () => { - expect('fn x: x + 1 end').toMatchTree(` + expect('fn x: x + 1').toMatchTree(` FunctionDef fn fn Params @@ -211,12 +214,11 @@ describe('Fn', () => { BinOp Identifier x operator + - Number 1 - end end`) + Number 1`) }) test('parses function with multiple parameters', () => { - expect('fn x y: x * y end').toMatchTree(` + expect('fn x y: x * y').toMatchTree(` FunctionDef fn fn Params @@ -226,8 +228,7 @@ describe('Fn', () => { BinOp Identifier x operator * - Identifier y - end end`) + Identifier y`) }) test('parses multiline function with multiple statements', () => { @@ -273,10 +274,55 @@ describe('ambiguity', () => { }) }) -describe('Assignment', () => { +describe('newlines', () => { + test('parses multiple statements separated by newlines', () => { + expect(`x = 5 +y = 2`).toMatchTree(` + Assign + Identifier x + = = + Number 5 + Assign + Identifier y + = = + Number 2`) + }) + + test('parses statements separated by semicolons', () => { + expect(`x = 5; y = 2`).toMatchTree(` + Assign + Identifier x + = = + Number 5 + Assign + Identifier y + = = + Number 2`) + }) + + test('parses statement with word and a semicolon', () => { + expect(`a = hello; 2`).toMatchTree(` + Assign + Identifier a + = = + FunctionCallOrIdentifier + Identifier hello + Number 2`) + }) +}) + +describe('Assign', () => { + test('parses simple assignment', () => { + expect('x = 5').toMatchTree(` + Assign + Identifier x + = = + Number 5`) + }) + test('parses assignment with addition', () => { expect('x = 5 + 3').toMatchTree(` - Assignment + Assign Identifier x = = BinOp @@ -286,8 +332,8 @@ describe('Assignment', () => { }) test('parses assignment with functions', () => { - expect('add = fn a b: a + b end').toMatchTree(` - Assignment + expect('add = fn a b: a + b').toMatchTree(` + Assign Identifier add = = FunctionDef @@ -299,7 +345,31 @@ describe('Assignment', () => { BinOp Identifier a operator + - Identifier b - end end`) + Identifier b`) + }) +}) + +describe('whitespace', () => { + test('trims leading and trailing whitespace in expected tree', () => { + expect(` + 3 + + + fn x y: + x +end + +`).toMatchTree(` + Number 3 + + FunctionDef + fn fn + Params + Identifier x + Identifier y + : : + Identifier x + end end + `) }) }) diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index f4c897d..304d33e 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -2,7 +2,12 @@ @skip { space } -@top Program { (line newline)* } +@top Program { (statement | newlineOrSemicolon)+ eof?} + +statement { + line newlineOrSemicolon | + line eof +} @tokens { @precedence { Number "-" } @@ -11,8 +16,9 @@ Number { "-"? $[0-9]+ ('.' $[0-9]+)? } Boolean { "true" | "false" } String { '\'' !["]* '\'' } - newline { "\n" | @eof } - space { " " } + newlineOrSemicolon { "\n" | ";" } + eof { @eof } + space { " " | "\t" } leftParen { "(" } rightParen { ")" } ":" @@ -37,7 +43,7 @@ line { FunctionCall | FunctionCallOrIdentifier | FunctionDef | - Assignment | + Assign | expressionWithoutIdentifier } @@ -76,18 +82,18 @@ FunctionDef { } singleLineFunctionDef { - "fn" Params ":" expression "end" + "fn" Params ":" expression } multiLineFunctionDef { - "fn" Params ":" newline (expression newline)* "end" + "fn" Params ":" newlineOrSemicolon (expression newlineOrSemicolon)* "end" } Params { Identifier* } -Assignment { +Assign { Identifier "=" line } diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index c76ef37..e909d7b 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -15,4 +15,4 @@ export const NamedArgPrefix = 17, FunctionDef = 18, Params = 20, - Assignment = 23 + Assign = 23 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index ef659f6..5de7b07 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,16 +4,16 @@ import {tokenizer} from "./tokenizers" import {highlighting} from "./highlight.js" export const parser = LRParser.deserialize({ version: 14, - states: "'UQVQTOOOnQPO'#DSO!tQUO'#DSO#OQPOOOOQO'#DR'#DRO#oQTO'#CbOOQS'#DP'#DPO#vQTO'#DUOOQO'#Cn'#CnOOQO'#C|'#C|O$OQPO'#CuQVQTOOOOQS'#DO'#DOOOQS'#Ca'#CaO$TQTO'#ClOOQS'#C}'#C}OOQS'#Cv'#CvO$[QUO,58zOVQTO,59_O$lQTO,58}O$lQTO,58}O$sQPO,58|O%UQUO'#DSO%]QPO,58|OOQS'#Cw'#CwO%bQTO'#CpO%jQPO,59pOOQS,59a,59aOOQS-E6s-E6sOOQS,59W,59WOOQS-E6t-E6tOOQO1G.y1G.yOOQO'#DS'#DSOOQO1G.i1G.iO%oQPO1G.iOOQS1G.h1G.hOOQS-E6u-E6uO&WQTO1G/[O&bQPO7+$vO&sQTO7+$wOOQO<OAN>O", + stateData: "(b~OoOS~OPQOQUO]UO^UO_UOcVOuTO{ZO~OWwXXwXYwXZwX{qX|qX~OP]OQUO]UO^UO_UOa_OuTOWwXXwXYwXZwX~OhcO{[X|[X~P!VOWdOXdOYeOZeO~OQUO]UO^UO_UOuTO~OPgO~P#gOPiOedP~O{lO|lO~O|nO~PVOP]O~P#gOP]Oa_O{Sa|SaxSa~P#gOPQOcVO~P#gOPrO~P#gOxuOWwXXwXYwXZwX~Ox[X~P!VOxuO~OPiOedX~OewO~OWdOXdOYViZVi{Vi|VixVi~OPrO{yO~P#gOWdOXdOYeOZeO{yq|yq~OPrOf|O~P#gOWdOXdOYeOZeO{}O~OPrOf!PO~P#gO^Z~", + goto: "%[{PPPP|!U!Z!jPPPP|PPP!UP!uP!zPP!uP!}#T#[#bPPP#h#l#s#x$QP$c$rP%V%VUXO[cRhTV`QbgkUOQT[_bcdegwy{cSOT[cdewy{VXO[cRkVQ[ORm[SbQgRpbQjVRvjQ{yR!O{TZO[SYO[RqcVaQbgU^QbgRo_bSOT[cdewy{X]Q_bgUPO[cQfTZrdewy{WROT[cQsdQteQxwTzy{VWO[c", + nodeNames: "⚠ Identifier Word Program FunctionCall PositionalArg ParenExpr BinOp operator operator operator operator FunctionCallOrIdentifier String Number Boolean NamedArg NamedArgPrefix FunctionDef fn Params : end Assign =", + maxTerm: 44, propSources: [highlighting], skippedNodes: [0], repeatNodeCount: 4, - tokenData: "(t~RcYZ!^pq!cwx!hxy#lyz#qz{#v{|#{}!O$Q!P!Q$s!Q![$Y![!]$x!_!`$}#T#X%S#X#Y%b#Y#Z&]#Z#h%S#h#i(U#i#o%S~~!^~!cOz~~!hOo~~!kUOr!hsw!hwx!}x;'S!h;'S;=`#f<%lO!h~#SU]~Or!hsw!hwx!}x;'S!h;'S;=`#f<%lO!h~#iP;=`<%l!h~#qOt~~#vOw~~#{OW~~$QOY~~$VPZ~!Q![$Y~$_Q^~!O!P$e!Q![$Y~$hP!Q![$k~$pP^~!Q![$k~$xOX~~$}Oe~~%SOh~Q%VQ!_!`%]#T#o%SQ%bOaQR%eS!_!`%]#T#b%S#b#c%q#c#o%SR%tS!_!`%]#T#W%S#W#X&Q#X#o%SR&VQfP!_!`%]#T#o%S~&`T!_!`%]#T#U&o#U#b%S#b#c'y#c#o%S~&rS!_!`%]#T#`%S#`#a'O#a#o%S~'RS!_!`%]#T#g%S#g#h'_#h#o%S~'bS!_!`%]#T#X%S#X#Y'n#Y#o%S~'sQ_~!_!`%]#T#o%SR(OQcP!_!`%]#T#o%S~(XS!_!`%]#T#f%S#f#g(e#g#o%S~(hS!_!`%]#T#i%S#i#j'_#j#o%S", + tokenData: ")P~ReXY!dYZ!ipq!dwx!nxy#ryz#wz{#|{|$R}!O$W!P!Q$y!Q![$`![!]%O!]!^!i!_!`%T#T#X%Y#X#Y%h#Y#Z&c#Z#h%Y#h#i([#i#o%Y~~(z~!iOo~~!nO{~~!qUOr!nsw!nwx#Tx;'S!n;'S;=`#l<%lO!n~#YU]~Or!nsw!nwx#Tx;'S!n;'S;=`#l<%lO!n~#oP;=`<%l!n~#wOu~~#|Ox~~$ROW~~$WOY~~$]PZ~!Q![$`~$eQ^~!O!P$k!Q![$`~$nP!Q![$q~$vP^~!Q![$q~%OOX~~%TOe~~%YOh~Q%]Q!_!`%c#T#o%YQ%hOaQR%kS!_!`%c#T#b%Y#b#c%w#c#o%YR%zS!_!`%c#T#W%Y#W#X&W#X#o%YR&]QfP!_!`%c#T#o%Y~&fT!_!`%c#T#U&u#U#b%Y#b#c(P#c#o%Y~&xS!_!`%c#T#`%Y#`#a'U#a#o%Y~'XS!_!`%c#T#g%Y#g#h'e#h#o%Y~'hS!_!`%c#T#X%Y#X#Y't#Y#o%Y~'yQ_~!_!`%c#T#o%YR(UQcP!_!`%c#T#o%Y~(_S!_!`%c#T#f%Y#f#g(k#g#o%Y~(nS!_!`%c#T#i%Y#i#j'e#j#o%Y~)PO|~", tokenizers: [0, 1, tokenizer], topRules: {"Program":[0,3]}, - tokenPrec: 302 + tokenPrec: 337 }) diff --git a/src/parser/tokenizers.ts b/src/parser/tokenizers.ts index 07b6a8c..66c53a6 100644 --- a/src/parser/tokenizers.ts +++ b/src/parser/tokenizers.ts @@ -13,6 +13,16 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack ch = getFullCodePoint(input, pos) if (isWhitespace(ch) || ch === -1) break + // Certain characters might end a word or identifier if they are followed by whitespace. + // This allows things like `a = hello; 2` or a = (basename ./file.txt) + // to work as expected. + if (canBeWord && (ch === 59 /* ; */ || ch === 41) /* ) */) { + const nextCh = getFullCodePoint(input, pos + 1) + if (isWhitespace(nextCh) || nextCh === -1) { + break + } + } + // Track identifier validity if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) { if (!canBeWord) break diff --git a/src/testSetup.ts b/src/testSetup.ts index 7f9c94b..0e73deb 100644 --- a/src/testSetup.ts +++ b/src/testSetup.ts @@ -2,8 +2,9 @@ import { expect } from 'bun:test' import { Tree, TreeCursor } from '@lezer/common' import { parser } from '#parser/shrimp' import { $ } from 'bun' -import { assert } from '#utils/utils' -import { evaluate } from '#interpreter/evaluator' +import { assert, assertNever, errorMessage } from '#utils/utils' +import { Compiler } from '#compiler/compiler' +import { VM, type Value } from 'reefvm' const regenerateParser = async () => { let generate = true @@ -32,7 +33,8 @@ declare module 'bun:test' { toMatchTree(expected: string): T toMatchExpression(expected: string): T toFailParse(): T - toEvaluateTo(expected: unknown): T + toEvaluateTo(expected: unknown): Promise + toFailEvaluation(): Promise } } @@ -91,40 +93,22 @@ expect.extend({ } }, - toEvaluateTo(received: unknown, expected: unknown) { + async toEvaluateTo(received: unknown, expected: unknown) { assert(typeof received === 'string', 'toEvaluateTo can only be used with string values') try { - const tree = parser.parse(received) - let hasErrors = false - tree.iterate({ - enter(n) { - if (n.type.isError) { - hasErrors = true - return false - } - }, - }) + const compiler = new Compiler(received) + const vm = new VM(compiler.bytecode) + await vm.run() + const result = await vm.run() + const value = VMResultToValue(result) - if (hasErrors) { - const actual = treeToString(tree, received) - return { - message: () => - `Expected input to evaluate successfully, but it had syntax errors:\n${actual}`, - pass: false, - } + if (value === expected) { + return { pass: true } } else { - const context = new Map() - const result = evaluate(received, tree, context) - if (Object.is(result, expected)) { - return { pass: true } - } else { - const expectedStr = JSON.stringify(expected) - const resultStr = JSON.stringify(result) - return { - message: () => `Expected evaluation to be ${expectedStr}, but got ${resultStr}`, - pass: false, - } + return { + message: () => `Expected evaluation to be ${expected}, but got ${value}`, + pass: false, } } } catch (error) { @@ -134,6 +118,26 @@ expect.extend({ } } }, + + async toFailEvaluation(received: unknown) { + assert(typeof received === 'string', 'toFailEvaluation can only be used with string values') + + try { + const compiler = new Compiler(received) + const vm = new VM(compiler.bytecode) + await vm.run() + + return { + message: () => `Expected evaluation to fail, but it succeeded.`, + pass: false, + } + } catch (error) { + return { + message: () => `Evaluation failed as expected: ${errorMessage(error)}`, + pass: true, + } + } + }, }) const treeToString = (tree: Tree, input: string): string => { @@ -187,9 +191,23 @@ const trimWhitespace = (str: string): string => { .join('\n') } -const expectString = (value: unknown): string => { - if (typeof value !== 'string') { - throw new Error('Expected a string input') +const VMResultToValue = (result: Value): unknown => { + if (result.type === 'number' || result.type === 'boolean' || result.type === 'string') { + return result.value + } else if (result.type === 'null') { + return null + } else if (result.type === 'array') { + return result.value.map(VMResultToValue) + } else if (result.type === 'dict') { + const obj: Record = {} + for (const [key, val] of Object.entries(result.value)) { + obj[key] = VMResultToValue(val) + } + + return obj + } else if (result.type === 'function') { + return Function + } else { + assertNever(result) } - return value } diff --git a/src/utils/utils.tsx b/src/utils/utils.tsx index 75eab6f..b6d535f 100644 --- a/src/utils/utils.tsx +++ b/src/utils/utils.tsx @@ -12,7 +12,7 @@ export const errorMessage = (error: unknown) => { return String(error) } -export function assert(condition: boolean, message: string): asserts condition { +export function assert(condition: any, message: string): asserts condition { if (!condition) { throw new Error(message) }