This commit is contained in:
Corey Johnson 2025-10-08 13:56:17 -07:00
parent 4e16d84b3e
commit 7f52e5e7e3
19 changed files with 607 additions and 558 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "packages/ReefVM"]
path = packages/ReefVM
url = git@54.219.130.253:defunkt/ReefVM.git

View File

@ -13,15 +13,9 @@ Shrimp is a shell-like scripting language that combines the simplicity of comman
- **Unbound symbols become strings** - `echo hello` treats `hello` as a string if not defined
- **Simplicity over cleverness** - Each feature should work one way, consistently. Two simple features that are easy to explain beat one complex feature that requires lots of explanation
## Current Status & Goals
### Today's Implementation Goals
1. ✅ **Interpreter Setup** - Renamed evaluator to interpreter for clarity
2. **Command Execution** - Support calling external commands and built-in functions
3. **Variable Assignment** - Implement assignment with validation using Lezer context tracking
### Parser Features
- ✅ Distinguishes between identifiers (assignable) and words (non-assignable)
- ✅ Distinguishes between identifiers (assignable) and words e(non-assignable)
- ✅ Smart tokenization for named args (`lines=30` splits, but `./path=value` stays together)
- ✅ Handles ambiguous cases (bare identifier could be function call or variable reference)
@ -30,6 +24,7 @@ Shrimp is a shell-like scripting language that combines the simplicity of comman
See `src/parser/example.shrimp` for language examples and `src/parser/shrimp.grammar` for the full grammar.
### Key Token Types
- **Identifier** - Lowercase/emoji start, can contain dashes/numbers (assignable)
- **Word** - Any non-whitespace that isn't a valid identifier (paths, URLs, etc.)
- **FunctionCall** - Identifier followed by arguments

View File

@ -9,6 +9,7 @@
"bun-plugin-tailwind": "^0.0.15",
"codemirror": "^6.0.2",
"hono": "^4.9.8",
"reefvm": "workspace:*",
"tailwindcss": "^4.1.11",
},
"devDependencies": {
@ -17,6 +18,15 @@
"@types/bun": "latest",
},
},
"packages/ReefVM": {
"name": "reefvm",
"devDependencies": {
"@types/bun": "latest",
},
"peerDependencies": {
"typescript": "^5",
},
},
},
"packages": {
"@codemirror/autocomplete": ["@codemirror/autocomplete@6.19.0", "", { "dependencies": { "@codemirror/language": "^6.0.0", "@codemirror/state": "^6.0.0", "@codemirror/view": "^6.17.0", "@lezer/common": "^1.0.0" } }, "sha512-61Hfv3cF07XvUxNeC3E7jhG8XNi1Yom1G0lRC936oLnlF+jrbrv8rc/J98XlYzcsAoTVupfsf5fLej1aI8kyIg=="],
@ -61,6 +71,8 @@
"hono": ["hono@4.9.8", "", {}, "sha512-JW8Bb4RFWD9iOKxg5PbUarBYGM99IcxFl2FPBo2gSJO11jjUDqlP1Bmfyqt8Z/dGhIQ63PMA9LdcLefXyIasyg=="],
"reefvm": ["reefvm@workspace:packages/ReefVM"],
"style-mod": ["style-mod@4.1.2", "", {}, "sha512-wnD1HyVqpJUI2+eKZ+eo1UwghftP6yuFheBqqe+bWCotBjC2K1YnteJILRMs3SM4V/0dLEW1SC27MWP5y+mwmw=="],
"tailwindcss": ["tailwindcss@4.1.13", "", {}, "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w=="],
@ -70,5 +82,9 @@
"undici-types": ["undici-types@7.12.0", "", {}, "sha512-goOacqME2GYyOZZfb5Lgtu+1IDmAlAEu5xnD3+xTzS10hT0vzpf0SPjkXwAw9Jm+4n/mQGDP3LO8CPbYROeBfQ=="],
"w3c-keyname": ["w3c-keyname@2.2.8", "", {}, "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ=="],
"reefvm/@types/bun": ["@types/bun@1.2.23", "", { "dependencies": { "bun-types": "1.2.23" } }, "sha512-le8ueOY5b6VKYf19xT3McVbXqLqmxzPXHsQT/q9JHgikJ2X22wyTW3g3ohz2ZMnp7dod6aduIiq8A14Xyimm0A=="],
"reefvm/@types/bun/bun-types": ["bun-types@1.2.23", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-R9f0hKAZXgFU3mlrA0YpE/fiDvwV0FT9rORApt2aQVWSuJDzZOyB5QLc0N/4HF57CS8IXJ6+L5E4W1bW6NS2Aw=="],
}
}

View File

@ -3,12 +3,16 @@
"version": "0.1.0",
"private": true,
"type": "module",
"workspaces": [
"packages/*"
],
"scripts": {
"pretest": "bun generate-parser",
"serve": "bun --hot src/server/server.tsx",
"generate-parser": "lezer-generator src/parser/shrimp.grammar --typeScript -o src/parser/shrimp.ts"
},
"dependencies": {
"reefvm": "workspace:*",
"@codemirror/view": "^6.38.3",
"@lezer/generator": "^1.8.0",
"bun-plugin-tailwind": "^0.0.15",

1
packages/ReefVM Submodule

@ -0,0 +1 @@
Subproject commit 146b0a28831161e03966746acce7d5fc7fe2229d

View File

@ -0,0 +1,89 @@
import { describe } from 'bun:test'
import { expect, test } from 'bun:test'
describe('compiler', () => {
test('number literal', () => {
expect('42').toEvaluateTo(42)
})
test('negative number', () => {
expect('-5').toEvaluateTo(-5)
})
test('string literal', () => {
expect(`'hello'`).toEvaluateTo('hello')
})
test('boolean true', () => {
expect('true').toEvaluateTo(true)
})
test('boolean false', () => {
expect('false').toEvaluateTo(false)
})
test('addition', () => {
expect('2 + 3').toEvaluateTo(5)
})
test('subtraction', () => {
expect('10 - 4').toEvaluateTo(6)
})
test('multiplication', () => {
expect('3 * 4').toEvaluateTo(12)
})
test('division', () => {
expect('15 / 3').toEvaluateTo(5)
})
test('assign number', () => {
expect('x = 5; x').toEvaluateTo(5)
})
test('emoji assignment to number', () => {
expect('💎 = 5; 💎').toEvaluateTo(5)
})
test('unbound identifier', () => {
expect('a = hello; a').toEvaluateTo('hello')
})
test('assign string', () => {
expect(`name = 'Alice'; name`).toEvaluateTo('Alice')
})
test('assign expression', () => {
expect('sum = 2 + 3; sum').toEvaluateTo(5)
})
test('parentheses', () => {
expect('(2 + 3) * 4').toEvaluateTo(20)
})
test('function', () => {
expect(`add = fn a b: a + b; add`).toEvaluateTo(Function)
})
test('function call', () => {
expect(`add = fn a b: a + b; add 2 9`).toEvaluateTo(11)
})
})
describe('errors', () => {
test('syntax error', () => {
expect('2 + ').toFailEvaluation()
})
})
describe('multiline tests', () => {
test.only('multiline function', () => {
expect(`
add = fn a b:
result = a + b
result
add 3 4
`).toEvaluateTo(7)
})
})

297
src/compiler/compiler.ts Normal file
View File

@ -0,0 +1,297 @@
import { CompilerError } from '#compiler/compilerError.ts'
import { parser } from '#parser/shrimp.ts'
import * as terms from '#parser/shrimp.terms'
import type { SyntaxNode, Tree } from '@lezer/common'
import { assert, errorMessage } from '#utils/utils'
import { toBytecode, type Bytecode } from 'reefvm'
import { compile } from 'tailwindcss'
export class Compiler {
fnCounter = 0
instructions: string[] = []
labels = new Map<string, string[]>()
bytecode: Bytecode
constructor(public input: string) {
try {
const cst = parser.parse(input)
const errors = checkTreeForErrors(cst, input)
if (errors.length > 0) {
throw new CompilerError(`Syntax errors found:\n${errors.join('\n')}`, 0, input.length)
}
this.#compileCst(cst, input)
// Add the labels
for (const [label, labelInstructions] of this.labels) {
this.instructions.push(`${label}:`)
this.instructions.push(...labelInstructions.map((instr) => ` ${instr}`))
this.instructions.push(' RETURN')
}
// console.log(`🌭`, this.instructions.join('\n'))
this.bytecode = toBytecode(this.instructions.join('\n'))
} catch (error) {
if (error instanceof CompilerError) {
throw new Error(`Compiler Error:\n${error.toReadableString(input)}`)
} else {
throw new Error(`Unknown error during compilation:\n${errorMessage(error)}`)
}
}
}
#compileCst(cst: Tree, input: string) {
const isProgram = cst.topNode.type.id === terms.Program
assert(isProgram, `Expected Program node, got ${cst.topNode.type.name}`)
let child = cst.topNode.firstChild
while (child) {
this.instructions.push(...this.#compileNode(child, input))
child = child.nextSibling
}
this.instructions.push('HALT')
}
#compileNode(node: SyntaxNode, input: string): string[] {
const value = input.slice(node.from, node.to)
switch (node.type.id) {
case terms.Number:
return [`PUSH ${value}`]
case terms.String:
const strValue = value.slice(1, -1).replace(/\\/g, '')
return [`PUSH "${strValue}"`]
case terms.Boolean: {
return [`PUSH ${value}`]
}
case terms.Identifier: {
return [`TRY_LOAD ${value}`]
}
// For now, just treat them all like identifiers
case terms.FunctionCallOrIdentifier: {
return [`TRY_LOAD ${value}`]
}
case terms.BinOp: {
const { left, op, right } = getBinaryParts(node)
const instructions: string[] = []
instructions.push(...this.#compileNode(left, input))
instructions.push(...this.#compileNode(right, input))
const opValue = input.slice(op.from, op.to)
switch (opValue) {
case '+':
instructions.push('ADD')
break
case '-':
instructions.push('SUB')
break
case '*':
instructions.push('MUL')
break
case '/':
instructions.push('DIV')
break
default:
throw new CompilerError(`Unsupported binary operator: ${opValue}`, op.from, op.to)
}
return instructions
}
case terms.Assign: {
const { identifier, right } = getAssignmentParts(node)
const instructions: string[] = []
instructions.push(...this.#compileNode(right, input))
const identifierName = input.slice(identifier.from, identifier.to)
instructions.push(`STORE ${identifierName}`)
return instructions
}
case terms.ParenExpr: {
const child = node.firstChild
if (!child) return [] // I guess it is empty parentheses?
return this.#compileNode(child, input)
}
case terms.FunctionDef: {
const { paramNames, bodyNode } = getFunctionDefParts(node, input)
const instructions: string[] = []
const functionName = `.func_${this.labels.size}`
const bodyInstructions: string[] = []
if (this.labels.has(functionName)) {
throw new CompilerError(`Function name collision: ${functionName}`, node.from, node.to)
}
this.labels.set(functionName, bodyInstructions)
instructions.push(`MAKE_FUNCTION (${paramNames}) ${functionName}`)
bodyInstructions.push(...this.#compileNode(bodyNode, input))
return instructions
}
/*
### Function Calls
Stack order (bottom to top):
LOAD fn
PUSH arg1 ; Positional args
PUSH arg2
PUSH "name" ; Named arg key
PUSH "value" ; Named arg value
PUSH 2 ; Positional count
PUSH 1 ; Named count
CALL
*/
case terms.FunctionCall: {
const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts(node, input)
const instructions: string[] = []
instructions.push(...this.#compileNode(identifierNode, input))
positionalArgs.forEach((arg) => {
instructions.push(...this.#compileNode(arg, input))
})
namedArgs.forEach((arg) => {
const { name, valueNode } = getNamedArgParts(arg, input)
instructions.push(`PUSH "${name}"`)
instructions.push(...this.#compileNode(valueNode, input))
})
instructions.push(`PUSH ${positionalArgs.length}`)
instructions.push(`PUSH ${namedArgs.length}`)
instructions.push(`CALL`)
return instructions
}
default:
throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to)
}
}
}
// Helper functions for extracting node parts
const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
const children: SyntaxNode[] = []
let child = node.firstChild
while (child) {
children.push(child)
child = child.nextSibling
}
return children
}
const getBinaryParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, op, right] = children
if (!left || !op || !right) {
throw new CompilerError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to)
}
return { left, op, right }
}
const getAssignmentParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, equals, right] = children
if (!left || left.type.id !== terms.Identifier) {
throw new CompilerError(
`Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`,
node.from,
node.to
)
} else if (!equals || !right) {
throw new CompilerError(
`Assign expected 3 children, got ${children.length}`,
node.from,
node.to
)
}
return { identifier: left, right }
}
const checkTreeForErrors = (tree: Tree, input: string): string[] => {
const errors: string[] = []
tree.iterate({
enter: (node) => {
if (node.type.isError) {
const errorText = input.slice(node.from, node.to)
errors.push(`Syntax error at ${node.from}-${node.to}: "${errorText}"`)
}
},
})
return errors
}
const getFunctionDefParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [fnKeyword, paramsNode, colon, bodyNode] = children
if (!fnKeyword || !paramsNode || !colon || !bodyNode) {
throw new CompilerError(
`FunctionDef expected 5 children, got ${children.length}`,
node.from,
node.to
)
}
const paramNames = getAllChildren(paramsNode)
.map((param) => {
if (param.type.id !== terms.Identifier) {
throw new CompilerError(
`FunctionDef params must be Identifiers, got ${param.type.name}`,
param.from,
param.to
)
}
return input.slice(param.from, param.to)
})
.join(' ')
return { paramNames, bodyNode }
}
const getFunctionCallParts = (node: SyntaxNode, input: string) => {
const [identifierNode, ...args] = getAllChildren(node)
if (!identifierNode) {
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
}
const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg)
const positionalArgs = args
.filter((arg) => arg.type.id === terms.PositionalArg)
.map((arg) => {
const child = arg.firstChild
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
return child
})
return { identifierNode, namedArgs, positionalArgs }
}
const getNamedArgParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [namedArgPrefix, valueNode] = getAllChildren(node)
if (!namedArgPrefix || !valueNode) {
const message = `NamedArg expected 2 children, got ${children.length}`
throw new CompilerError(message, node.from, node.to)
}
const name = input.slice(namedArgPrefix.from, namedArgPrefix.to - 2) // Remove the trailing =
return { name, valueNode }
}

View File

@ -1,7 +1,7 @@
export class RuntimeError extends Error {
export class CompilerError extends Error {
constructor(message: string, private from: number, private to: number) {
super(message)
this.name = 'RuntimeError'
this.name = 'CompilerError'
this.message = message
}

View File

@ -1,24 +1,28 @@
import { outputSignal } from '#editor/editor'
import { evaluate } from '#interpreter/evaluator'
import { parser } from '#parser/shrimp'
import { Compiler } from '#compiler/compiler'
import { errorMessage, log } from '#utils/utils'
import { keymap } from '@codemirror/view'
import { run, VM } from 'reefvm'
export const shrimpKeymap = keymap.of([
{
key: 'Cmd-Enter',
run: (view) => {
const input = view.state.doc.toString()
const context = new Map<string, any>()
try {
const tree = parser.parse(input)
const output = evaluate(input, tree, context)
outputSignal.emit({ output: String(output) })
} catch (error) {
log.error(error)
outputSignal.emit({ error: `${errorMessage(error)}` })
}
runInput(input)
return true
},
},
])
const runInput = async (input: string) => {
try {
const compiler = new Compiler(input)
const vm = new VM(compiler.bytecode)
const output = await vm.run()
outputSignal.emit({ output: String(output.value) })
} catch (error) {
log.error(error)
outputSignal.emit({ error: `${errorMessage(error)}` })
}
}

View File

@ -1,87 +0,0 @@
// import { resetCommandSource, setCommandSource, type CommandShape } from '#editor/commands'
// import { expect, test } from 'bun:test'
// test('number literal', () => {
// expect('42').toEvaluateTo(42)
// })
// test('negative number', () => {
// expect('-5').toEvaluateTo(-5)
// })
// test('string literal', () => {
// expect(`'hello'`).toEvaluateTo('hello')
// })
// test('boolean true', () => {
// expect('true').toEvaluateTo(true)
// })
// test('boolean false', () => {
// expect('false').toEvaluateTo(false)
// })
// test('addition', () => {
// expect('2 + 3').toEvaluateTo(5)
// })
// test('subtraction', () => {
// expect('10 - 4').toEvaluateTo(6)
// })
// test('multiplication', () => {
// expect('3 * 4').toEvaluateTo(12)
// })
// test('division', () => {
// expect('15 / 3').toEvaluateTo(5)
// })
// test('assign number', () => {
// expect('x = 5').toEvaluateTo(5)
// })
// test('emoji assignment to number', () => {
// expect('💎 = 5').toEvaluateTo(5)
// })
// test('assign string', () => {
// expect(`name = 'Alice'`).toEvaluateTo('Alice')
// })
// test('assign expression', () => {
// expect('sum = 2 + 3').toEvaluateTo(5)
// })
// test('parentheses', () => {
// expect('(2 + 3) * 4').toEvaluateTo(20)
// })
// test('simple command', () => {
// const commands: CommandShape[] = [
// {
// command: 'echo',
// args: [{ name: 'text', type: 'string' }],
// execute: (text: string) => text,
// },
// ]
// withCommands(commands, () => {
// expect(`echo 'hello'`).toEvaluateTo('hello')
// })
// })
// test.only('function', () => {
// expect(`add = fn a b: a + b; add 2 4`).toEvaluateTo(5)
// })
// const withCommands = (commands: CommandShape[], fn: () => void) => {
// try {
// setCommandSource(() => commands)
// fn()
// } catch (e) {
// throw e
// } finally {
// resetCommandSource()
// }
// }

View File

@ -1,377 +0,0 @@
import { Tree, type SyntaxNode } from '@lezer/common'
import * as terms from '../parser/shrimp.terms.ts'
import { RuntimeError } from '#interpreter/runtimeError.ts'
import { assert } from 'console'
import { assertNever } from '#utils/utils.tsx'
import { matchingCommands, type CommandShape } from '#editor/commands.ts'
export const evaluate = (input: string, tree: Tree, context: Context) => {
let result = undefined
let child = tree.topNode.firstChild
try {
while (child) {
result = evaluateNode(child, input, context)
child = child.nextSibling
}
} catch (error) {
if (error instanceof RuntimeError) {
throw new Error(error.toReadableString(input))
} else {
throw new Error('Unknown error during evaluation')
}
}
return result
}
const evaluateNode = (node: SyntaxNode, input: string, context: Context): any => {
try {
const evalNode = syntaxNodeToEvalNode(node, input, context)
return evaluateEvalNode(evalNode, input, context)
} catch (error) {
if (error instanceof RuntimeError) {
throw error
} else {
console.error(error)
throw new RuntimeError('Error evaluating node', node.from, node.to)
}
}
}
type ResolvedArg = {
value: any
resolved: boolean
}
const evaluateEvalNode = (evalNode: EvalNode, input: string, context: Context): any => {
switch (evalNode.kind) {
case 'number':
case 'string':
case 'boolean':
return evalNode.value
case 'identifier': {
const name = evalNode.name
if (context.has(name)) {
return context.get(name)
} else {
throw new RuntimeError(`Undefined variable "${name}"`, evalNode.node.from, evalNode.node.to)
}
}
case 'assignment': {
const name = evalNode.name
const value = evaluateEvalNode(evalNode.value, input, context)
context.set(name, value)
return value
}
case 'binop': {
const left = evaluateEvalNode(evalNode.left, input, context)
const right = evaluateEvalNode(evalNode.right, input, context)
if (evalNode.op === '+') {
return left + right
} else if (evalNode.op === '-') {
return left - right
} else if (evalNode.op === '*') {
return left * right
} else if (evalNode.op === '/') {
return left / right
} else {
throw new RuntimeError(
`Unsupported operator "${evalNode.op}"`,
evalNode.node.from,
evalNode.node.to
)
}
}
case 'function': {
const func = (...args: any[]) => {
if (args.length !== evalNode.params.length) {
throw new RuntimeError(
`Function expected ${evalNode.params.length} arguments, got ${args.length}`,
evalNode.node.from,
evalNode.node.to
)
}
// Create new context with parameter bindings
const localContext = new Map(context)
evalNode.params.forEach((param, index) => {
localContext.set(param, args[index])
})
// Evaluate function body with new context
return evaluateEvalNode(evalNode.body, input, localContext)
}
return func
}
case 'command': {
const { match: command } = matchingCommands(evalNode.name)
if (!command) {
const { from, to } = evalNode.node
throw new RuntimeError(`Unknown command "${evalNode.name}"`, from, to)
}
const resolvedArgs: ResolvedArg[] = command.args.map((argShape) => ({
value: argShape.default,
resolved: argShape.optional ? true : argShape.default !== undefined,
}))
// Filter the args into named and positional
const namedArgNodes: NamedArgEvalNode[] = []
const positionalArgNodes: PositionalArgEvalNode[] = []
evalNode.args.forEach((arg) => {
const isNamedArg = 'name' in arg && arg.name !== undefined
isNamedArg ? namedArgNodes.push(arg) : positionalArgNodes.push(arg)
})
// First set the named args
namedArgNodes.forEach((arg) => {
const shapeIndex = command.args.findIndex((def) => def.name === arg.name)
const shape = command.args[shapeIndex]
if (!shape) {
const { from, to } = arg.node
throw new RuntimeError(`Unknown argument "${arg.name}"`, from, to)
} else if (resolvedArgs[shapeIndex]?.resolved) {
const { from, to } = arg.node
throw new RuntimeError(`Argument "${arg.name}" already set`, from, to)
}
const value = evaluateEvalNode(arg.value, input, context)
resolvedArgs[shapeIndex] = { value, resolved: true }
})
// Now set the positional args in order
let unresolvedIndex = resolvedArgs.findIndex((arg) => !arg.resolved)
positionalArgNodes.forEach((arg) => {
const value = evaluateEvalNode(arg.value, input, context)
if (unresolvedIndex === -1) {
const { from, to } = arg.node
throw new RuntimeError(`Too many positional arguments`, from, to)
}
resolvedArgs[unresolvedIndex] = { value, resolved: true }
unresolvedIndex = resolvedArgs.findIndex((arg) => !arg.resolved)
})
let executor
if (typeof command.execute === 'string') {
throw new RuntimeError(
`Path-based commands aren't supported yet...`,
evalNode.node.from,
evalNode.node.to
)
// Dynamic imports are not supported in Bun test environment
// See:
// const { default: importedExecutor } = await import(command.execute)
// executor = importedExecutor
// if (typeof executor !== 'function') {
// throw new RuntimeError(
// `Module "${command.execute}" for command ${command.command} does not export a default function`,
// evalNode.node.from,
// evalNode.node.to
// )
// }
} else {
executor = command.execute
}
const argValues = resolvedArgs.map((arg) => arg.value)
const result = executor(...argValues)
return result
}
default:
assertNever(evalNode)
}
}
type Operators = '+' | '-' | '*' | '/'
type Context = Map<string, any>
type NamedArgEvalNode = { kind: 'arg'; value: EvalNode; name: string; node: SyntaxNode }
type PositionalArgEvalNode = { kind: 'arg'; value: EvalNode; node: SyntaxNode }
type ArgEvalNode = NamedArgEvalNode | PositionalArgEvalNode
type IdentifierEvalNode = { kind: 'identifier'; name: string; node: SyntaxNode }
type EvalNode =
| { kind: 'number'; value: number; node: SyntaxNode }
| { kind: 'string'; value: string; node: SyntaxNode }
| { kind: 'boolean'; value: boolean; node: SyntaxNode }
| { kind: 'binop'; op: Operators; left: EvalNode; right: EvalNode; node: SyntaxNode }
| { kind: 'assignment'; name: string; value: EvalNode; node: SyntaxNode }
| { kind: 'command'; name: string; args: ArgEvalNode[]; node: SyntaxNode }
| { kind: 'function'; params: string[]; body: EvalNode; node: SyntaxNode }
| IdentifierEvalNode
const syntaxNodeToEvalNode = (node: SyntaxNode, input: string, context: Context): EvalNode => {
const value = input.slice(node.from, node.to)
switch (node.type.id) {
case terms.Number:
return { kind: 'number', value: parseFloat(value), node }
case terms.String:
return { kind: 'string', value: value.slice(1, -1), node } // Remove quotes
case terms.Boolean:
return { kind: 'boolean', value: value === 'true', node }
case terms.Identifier:
return { kind: 'identifier', name: value, node }
case terms.BinOp: {
const { left, op, right } = getBinaryParts(node)
const opString = input.slice(op.from, op.to) as Operators
const leftNode = syntaxNodeToEvalNode(left, input, context)
const rightNode = syntaxNodeToEvalNode(right, input, context)
return { kind: 'binop', op: opString, left: leftNode, right: rightNode, node }
}
case terms.Assignment: {
const { identifier, value: expr } = getAssignmentParts(node)
const name = input.slice(identifier.from, identifier.to)
const value = syntaxNodeToEvalNode(expr, input, context)
return { kind: 'assignment', name, value, node }
}
case terms.ParenExpr: {
const expr = getParenParts(node)
return syntaxNodeToEvalNode(expr, input, context)
}
case terms.CommandCall: {
const { commandName, argNodes } = extractCommand(node, input)
const args = argNodes.map((argNode) => {
const children = getAllChildren(argNode)
if (argNode.type.id === terms.Arg) {
const [child] = children
if (!child) {
throw new Error(`Parser bug: Arg node has ${children.length} children, expected 1`)
}
const value = syntaxNodeToEvalNode(child, input, context)
return { kind: 'arg', value, node: argNode } as const
}
if (argNode.type.id === terms.NamedArg) {
const [nameChild, valueChild] = children
if (!nameChild || !valueChild) {
throw new Error(`Parser bug: NamedArg node has ${children.length} children, expected 2`)
}
const namePrefix = input.slice(nameChild.from, nameChild.to)
const name = namePrefix.slice(0, -1) // Remove '='
const value = syntaxNodeToEvalNode(valueChild, input, context)
return { kind: 'arg', name, value, node: argNode } as const
}
throw new Error(`Parser bug: Unexpected arg node type: ${argNode.type.name}`)
})
return { kind: 'command', name: commandName, args, node }
}
case terms.Function: {
const children = getAllChildren(node)
if (children.length < 3) {
throw new Error(
`Parser bug: Function node has ${children.length} children, expected at least 3`
)
}
// Structure: fn params : body
const [_fn, paramsNode, _colon, ...bodyNodes] = children
// Extract parameter names
const paramNodes = getAllChildren(paramsNode)
const params = paramNodes.map((paramNode) => {
if (paramNode.type.id !== terms.Identifier) {
throw new Error(`Parser bug: Function parameter is not an identifier`)
}
return input.slice(paramNode.from, paramNode.to)
})
// For now, assume body is a single expression (the rest of the children)
const bodyNode = bodyNodes[0]
if (!bodyNode) {
throw new Error(`Parser bug: Function missing body`)
}
const body = syntaxNodeToEvalNode(bodyNode, input, context)
return { kind: 'function', params, body, node }
}
}
throw new RuntimeError(`Unsupported node type "${node.type.name}"`, node.from, node.to)
}
// Helper functions for extracting node parts
const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
const children: SyntaxNode[] = []
let child = node.firstChild
while (child) {
children.push(child)
child = child.nextSibling
}
return children
}
const getBinaryParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, op, right] = children
if (!left || !op || !right) {
throw new RuntimeError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to)
}
return { left, op, right }
}
const getAssignmentParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [identifier, _equals, value] = children
if (!identifier || !_equals || !value) {
throw new RuntimeError(
`Assignment expected 3 children, got ${children.length}`,
node.from,
node.to
)
}
return { identifier, value }
}
const getParenParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [_leftParen, expr, _rightParen] = children
if (!_leftParen || !expr || !_rightParen) {
throw new RuntimeError(
`ParenExpr expected 3 children, got ${children.length}`,
node.from,
node.to
)
}
return expr
}
const extractCommand = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const commandNode = children[0] // The Command node
if (!commandNode || commandNode.type.id !== terms.Command) {
throw new RuntimeError('Invalid command structure', node.from, node.to)
}
const commandNameNode = commandNode.firstChild ?? commandNode
const commandName = input.slice(commandNameNode.from, commandNameNode.to)
const argNodes = children.slice(1) // All the Arg/NamedArg nodes
return { commandName, commandNode, argNodes }
}

View File

@ -61,12 +61,6 @@ describe('calling functions', () => {
`)
})
test.skip('when no commands match, falls back to Identifier', () => {
expect('omgwtf').toMatchTree(`
Identifier omgwtf
`)
})
test('Incomplete namedArg', () => {
expect('tail lines=').toMatchTree(`
FunctionCall
@ -108,6 +102,16 @@ describe('Parentheses', () => {
Number 3`)
})
test('a word can be contained in parens', () => {
expect('(basename ./cool)').toMatchTree(`
ParenExpr
FunctionCall
Identifier basename
PositionalArg
Word ./cool
`)
})
test('nested parentheses', () => {
expect('(2 + (1 * 4))').toMatchTree(`
ParenExpr
@ -192,17 +196,16 @@ describe('BinOp', () => {
describe('Fn', () => {
test('parses function no parameters', () => {
expect('fn: 1 end').toMatchTree(`
expect('fn: 1').toMatchTree(`
FunctionDef
fn fn
Params
: :
Number 1
end end`)
Number 1`)
})
test('parses function with single parameter', () => {
expect('fn x: x + 1 end').toMatchTree(`
expect('fn x: x + 1').toMatchTree(`
FunctionDef
fn fn
Params
@ -211,12 +214,11 @@ describe('Fn', () => {
BinOp
Identifier x
operator +
Number 1
end end`)
Number 1`)
})
test('parses function with multiple parameters', () => {
expect('fn x y: x * y end').toMatchTree(`
expect('fn x y: x * y').toMatchTree(`
FunctionDef
fn fn
Params
@ -226,8 +228,7 @@ describe('Fn', () => {
BinOp
Identifier x
operator *
Identifier y
end end`)
Identifier y`)
})
test('parses multiline function with multiple statements', () => {
@ -273,10 +274,55 @@ describe('ambiguity', () => {
})
})
describe('Assignment', () => {
describe('newlines', () => {
test('parses multiple statements separated by newlines', () => {
expect(`x = 5
y = 2`).toMatchTree(`
Assign
Identifier x
= =
Number 5
Assign
Identifier y
= =
Number 2`)
})
test('parses statements separated by semicolons', () => {
expect(`x = 5; y = 2`).toMatchTree(`
Assign
Identifier x
= =
Number 5
Assign
Identifier y
= =
Number 2`)
})
test('parses statement with word and a semicolon', () => {
expect(`a = hello; 2`).toMatchTree(`
Assign
Identifier a
= =
FunctionCallOrIdentifier
Identifier hello
Number 2`)
})
})
describe('Assign', () => {
test('parses simple assignment', () => {
expect('x = 5').toMatchTree(`
Assign
Identifier x
= =
Number 5`)
})
test('parses assignment with addition', () => {
expect('x = 5 + 3').toMatchTree(`
Assignment
Assign
Identifier x
= =
BinOp
@ -286,8 +332,8 @@ describe('Assignment', () => {
})
test('parses assignment with functions', () => {
expect('add = fn a b: a + b end').toMatchTree(`
Assignment
expect('add = fn a b: a + b').toMatchTree(`
Assign
Identifier add
= =
FunctionDef
@ -299,7 +345,31 @@ describe('Assignment', () => {
BinOp
Identifier a
operator +
Identifier b
end end`)
Identifier b`)
})
})
describe('whitespace', () => {
test('trims leading and trailing whitespace in expected tree', () => {
expect(`
3
fn x y:
x
end
`).toMatchTree(`
Number 3
FunctionDef
fn fn
Params
Identifier x
Identifier y
: :
Identifier x
end end
`)
})
})

View File

@ -2,7 +2,12 @@
@skip { space }
@top Program { (line newline)* }
@top Program { (statement | newlineOrSemicolon)+ eof?}
statement {
line newlineOrSemicolon |
line eof
}
@tokens {
@precedence { Number "-" }
@ -11,8 +16,9 @@
Number { "-"? $[0-9]+ ('.' $[0-9]+)? }
Boolean { "true" | "false" }
String { '\'' !["]* '\'' }
newline { "\n" | @eof }
space { " " }
newlineOrSemicolon { "\n" | ";" }
eof { @eof }
space { " " | "\t" }
leftParen { "(" }
rightParen { ")" }
":"
@ -37,7 +43,7 @@ line {
FunctionCall |
FunctionCallOrIdentifier |
FunctionDef |
Assignment |
Assign |
expressionWithoutIdentifier
}
@ -76,18 +82,18 @@ FunctionDef {
}
singleLineFunctionDef {
"fn" Params ":" expression "end"
"fn" Params ":" expression
}
multiLineFunctionDef {
"fn" Params ":" newline (expression newline)* "end"
"fn" Params ":" newlineOrSemicolon (expression newlineOrSemicolon)* "end"
}
Params {
Identifier*
}
Assignment {
Assign {
Identifier "=" line
}

View File

@ -15,4 +15,4 @@ export const
NamedArgPrefix = 17,
FunctionDef = 18,
Params = 20,
Assignment = 23
Assign = 23

View File

@ -4,16 +4,16 @@ import {tokenizer} from "./tokenizers"
import {highlighting} from "./highlight.js"
export const parser = LRParser.deserialize({
version: 14,
states: "'UQVQTOOOnQPO'#DSO!tQUO'#DSO#OQPOOOOQO'#DR'#DRO#oQTO'#CbOOQS'#DP'#DPO#vQTO'#DUOOQO'#Cn'#CnOOQO'#C|'#C|O$OQPO'#CuQVQTOOOOQS'#DO'#DOOOQS'#Ca'#CaO$TQTO'#ClOOQS'#C}'#C}OOQS'#Cv'#CvO$[QUO,58zOVQTO,59_O$lQTO,58}O$lQTO,58}O$sQPO,58|O%UQUO'#DSO%]QPO,58|OOQS'#Cw'#CwO%bQTO'#CpO%jQPO,59pOOQS,59a,59aOOQS-E6s-E6sOOQS,59W,59WOOQS-E6t-E6tOOQO1G.y1G.yOOQO'#DS'#DSOOQO1G.i1G.iO%oQPO1G.iOOQS1G.h1G.hOOQS-E6u-E6uO&WQTO1G/[O&bQPO7+$vO&sQTO7+$wOOQO<<Hb<<HbO&}QPO'#CxO'`QTO<<HcOOQO<<Hc<<HcOOQS,59d,59dOOQS-E6v-E6vOOQOAN=}AN=}",
stateData: "'m~OoOS~OPQOQUO]UO^UO_UOcVOtTO~OWvXXvXYvXZvXzpX~OP[OQUO]UO^UO_UOa^OtTOWvXXvXYvXZvX~OhbOz[X~P!POWcOXcOYdOZdO~OQUO]UO^UO_UOtTO~OPfO~P#^OPhOedP~OzkO~OP[O~P#^OP[Oa^OzSawSa~P#^OPpO~P#^OwsOWvXXvXYvXZvX~Ow[X~P!POwsO~OPhOedX~OeuO~OWcOXcOYViZVizViwVifVi~OPpOzwO~P#^OWcOXcOYdOZdOfxO~OPpOf{O~P#^OWcOXcOYdOZdOz|O~OPpOf!OO~P#^O^Z~",
goto: "%VzPPPP{!T!Y!iPPPP{PPP!TP!tP!yPP!tP!|#S#Z#aPPP#g#n#s#{P$^$mP%Q%QUXOZbRgTV_QafkUOQTZ^abcdfuwzcSOTZbcduwzVXOZbRjVQZORlZSaQfRnaQiVRtiQzwR}zSYOZRobV`QafU]QafRm^bSOTZbcduwzX[Q^afUPOZbQeTZpcduwzWROTZbQqcQrdQvuTywzVWOZb",
nodeNames: "⚠ Identifier Word Program FunctionCall PositionalArg ParenExpr BinOp operator operator operator operator FunctionCallOrIdentifier String Number Boolean NamedArg NamedArgPrefix FunctionDef fn Params : end Assignment =",
maxTerm: 42,
states: "'[OVQTOOOqQPO'#DTO!zQUO'#DTO#XQPOOOOQO'#DS'#DSO#xQTO'#CbOOQS'#DQ'#DQO$PQTO'#DVOOQO'#Cn'#CnOOQO'#C}'#C}O$XQPO'#C|OOQS'#Cu'#CuQ$aQTOOOOQS'#DP'#DPOOQS'#Ca'#CaO$hQTO'#ClOOQS'#DO'#DOOOQS'#Cv'#CvO$oQUO,58zO%SQTO,59_O%^QTO,58}O%^QTO,58}O%eQPO,58|O%vQUO'#DTO%}QPO,58|OOQS'#Cw'#CwO&SQTO'#CpO&[QPO,59qOOQS,59h,59hOOQS-E6s-E6sQOQPOOOOQS,59W,59WOOQS-E6t-E6tOOQO1G.y1G.yOOQO'#DT'#DTOOQO1G.i1G.iO&aQPO1G.iOOQS1G.h1G.hOOQS-E6u-E6uO&xQTO1G/]O'SQPO7+$wO'hQTO7+$xO'rQPO'#CxO(TQTO<<HdOOQO<<Hd<<HdOOQS,59d,59dOOQS-E6v-E6vOOQOAN>OAN>O",
stateData: "(b~OoOS~OPQOQUO]UO^UO_UOcVOuTO{ZO~OWwXXwXYwXZwX{qX|qX~OP]OQUO]UO^UO_UOa_OuTOWwXXwXYwXZwX~OhcO{[X|[X~P!VOWdOXdOYeOZeO~OQUO]UO^UO_UOuTO~OPgO~P#gOPiOedP~O{lO|lO~O|nO~PVOP]O~P#gOP]Oa_O{Sa|SaxSa~P#gOPQOcVO~P#gOPrO~P#gOxuOWwXXwXYwXZwX~Ox[X~P!VOxuO~OPiOedX~OewO~OWdOXdOYViZVi{Vi|VixVi~OPrO{yO~P#gOWdOXdOYeOZeO{yq|yq~OPrOf|O~P#gOWdOXdOYeOZeO{}O~OPrOf!PO~P#gO^Z~",
goto: "%[{PPPP|!U!Z!jPPPP|PPP!UP!uP!zPP!uP!}#T#[#bPPP#h#l#s#x$QP$c$rP%V%VUXO[cRhTV`QbgkUOQT[_bcdegwy{cSOT[cdewy{VXO[cRkVQ[ORm[SbQgRpbQjVRvjQ{yR!O{TZO[SYO[RqcVaQbgU^QbgRo_bSOT[cdewy{X]Q_bgUPO[cQfTZrdewy{WROT[cQsdQteQxwTzy{VWO[c",
nodeNames: "⚠ Identifier Word Program FunctionCall PositionalArg ParenExpr BinOp operator operator operator operator FunctionCallOrIdentifier String Number Boolean NamedArg NamedArgPrefix FunctionDef fn Params : end Assign =",
maxTerm: 44,
propSources: [highlighting],
skippedNodes: [0],
repeatNodeCount: 4,
tokenData: "(t~RcYZ!^pq!cwx!hxy#lyz#qz{#v{|#{}!O$Q!P!Q$s!Q![$Y![!]$x!_!`$}#T#X%S#X#Y%b#Y#Z&]#Z#h%S#h#i(U#i#o%S~~!^~!cOz~~!hOo~~!kUOr!hsw!hwx!}x;'S!h;'S;=`#f<%lO!h~#SU]~Or!hsw!hwx!}x;'S!h;'S;=`#f<%lO!h~#iP;=`<%l!h~#qOt~~#vOw~~#{OW~~$QOY~~$VPZ~!Q![$Y~$_Q^~!O!P$e!Q![$Y~$hP!Q![$k~$pP^~!Q![$k~$xOX~~$}Oe~~%SOh~Q%VQ!_!`%]#T#o%SQ%bOaQR%eS!_!`%]#T#b%S#b#c%q#c#o%SR%tS!_!`%]#T#W%S#W#X&Q#X#o%SR&VQfP!_!`%]#T#o%S~&`T!_!`%]#T#U&o#U#b%S#b#c'y#c#o%S~&rS!_!`%]#T#`%S#`#a'O#a#o%S~'RS!_!`%]#T#g%S#g#h'_#h#o%S~'bS!_!`%]#T#X%S#X#Y'n#Y#o%S~'sQ_~!_!`%]#T#o%SR(OQcP!_!`%]#T#o%S~(XS!_!`%]#T#f%S#f#g(e#g#o%S~(hS!_!`%]#T#i%S#i#j'_#j#o%S",
tokenData: ")P~ReXY!dYZ!ipq!dwx!nxy#ryz#wz{#|{|$R}!O$W!P!Q$y!Q![$`![!]%O!]!^!i!_!`%T#T#X%Y#X#Y%h#Y#Z&c#Z#h%Y#h#i([#i#o%Y~~(z~!iOo~~!nO{~~!qUOr!nsw!nwx#Tx;'S!n;'S;=`#l<%lO!n~#YU]~Or!nsw!nwx#Tx;'S!n;'S;=`#l<%lO!n~#oP;=`<%l!n~#wOu~~#|Ox~~$ROW~~$WOY~~$]PZ~!Q![$`~$eQ^~!O!P$k!Q![$`~$nP!Q![$q~$vP^~!Q![$q~%OOX~~%TOe~~%YOh~Q%]Q!_!`%c#T#o%YQ%hOaQR%kS!_!`%c#T#b%Y#b#c%w#c#o%YR%zS!_!`%c#T#W%Y#W#X&W#X#o%YR&]QfP!_!`%c#T#o%Y~&fT!_!`%c#T#U&u#U#b%Y#b#c(P#c#o%Y~&xS!_!`%c#T#`%Y#`#a'U#a#o%Y~'XS!_!`%c#T#g%Y#g#h'e#h#o%Y~'hS!_!`%c#T#X%Y#X#Y't#Y#o%Y~'yQ_~!_!`%c#T#o%YR(UQcP!_!`%c#T#o%Y~(_S!_!`%c#T#f%Y#f#g(k#g#o%Y~(nS!_!`%c#T#i%Y#i#j'e#j#o%Y~)PO|~",
tokenizers: [0, 1, tokenizer],
topRules: {"Program":[0,3]},
tokenPrec: 302
tokenPrec: 337
})

View File

@ -13,6 +13,16 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack
ch = getFullCodePoint(input, pos)
if (isWhitespace(ch) || ch === -1) break
// Certain characters might end a word or identifier if they are followed by whitespace.
// This allows things like `a = hello; 2` or a = (basename ./file.txt)
// to work as expected.
if (canBeWord && (ch === 59 /* ; */ || ch === 41) /* ) */) {
const nextCh = getFullCodePoint(input, pos + 1)
if (isWhitespace(nextCh) || nextCh === -1) {
break
}
}
// Track identifier validity
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) {
if (!canBeWord) break

View File

@ -2,8 +2,9 @@ import { expect } from 'bun:test'
import { Tree, TreeCursor } from '@lezer/common'
import { parser } from '#parser/shrimp'
import { $ } from 'bun'
import { assert } from '#utils/utils'
import { evaluate } from '#interpreter/evaluator'
import { assert, assertNever, errorMessage } from '#utils/utils'
import { Compiler } from '#compiler/compiler'
import { VM, type Value } from 'reefvm'
const regenerateParser = async () => {
let generate = true
@ -32,7 +33,8 @@ declare module 'bun:test' {
toMatchTree(expected: string): T
toMatchExpression(expected: string): T
toFailParse(): T
toEvaluateTo(expected: unknown): T
toEvaluateTo(expected: unknown): Promise<T>
toFailEvaluation(): Promise<T>
}
}
@ -91,40 +93,22 @@ expect.extend({
}
},
toEvaluateTo(received: unknown, expected: unknown) {
async toEvaluateTo(received: unknown, expected: unknown) {
assert(typeof received === 'string', 'toEvaluateTo can only be used with string values')
try {
const tree = parser.parse(received)
let hasErrors = false
tree.iterate({
enter(n) {
if (n.type.isError) {
hasErrors = true
return false
}
},
})
const compiler = new Compiler(received)
const vm = new VM(compiler.bytecode)
await vm.run()
const result = await vm.run()
const value = VMResultToValue(result)
if (hasErrors) {
const actual = treeToString(tree, received)
return {
message: () =>
`Expected input to evaluate successfully, but it had syntax errors:\n${actual}`,
pass: false,
}
if (value === expected) {
return { pass: true }
} else {
const context = new Map<string, unknown>()
const result = evaluate(received, tree, context)
if (Object.is(result, expected)) {
return { pass: true }
} else {
const expectedStr = JSON.stringify(expected)
const resultStr = JSON.stringify(result)
return {
message: () => `Expected evaluation to be ${expectedStr}, but got ${resultStr}`,
pass: false,
}
return {
message: () => `Expected evaluation to be ${expected}, but got ${value}`,
pass: false,
}
}
} catch (error) {
@ -134,6 +118,26 @@ expect.extend({
}
}
},
async toFailEvaluation(received: unknown) {
assert(typeof received === 'string', 'toFailEvaluation can only be used with string values')
try {
const compiler = new Compiler(received)
const vm = new VM(compiler.bytecode)
await vm.run()
return {
message: () => `Expected evaluation to fail, but it succeeded.`,
pass: false,
}
} catch (error) {
return {
message: () => `Evaluation failed as expected: ${errorMessage(error)}`,
pass: true,
}
}
},
})
const treeToString = (tree: Tree, input: string): string => {
@ -187,9 +191,23 @@ const trimWhitespace = (str: string): string => {
.join('\n')
}
const expectString = (value: unknown): string => {
if (typeof value !== 'string') {
throw new Error('Expected a string input')
const VMResultToValue = (result: Value): unknown => {
if (result.type === 'number' || result.type === 'boolean' || result.type === 'string') {
return result.value
} else if (result.type === 'null') {
return null
} else if (result.type === 'array') {
return result.value.map(VMResultToValue)
} else if (result.type === 'dict') {
const obj: Record<string, unknown> = {}
for (const [key, val] of Object.entries(result.value)) {
obj[key] = VMResultToValue(val)
}
return obj
} else if (result.type === 'function') {
return Function
} else {
assertNever(result)
}
return value
}

View File

@ -12,7 +12,7 @@ export const errorMessage = (error: unknown) => {
return String(error)
}
export function assert(condition: boolean, message: string): asserts condition {
export function assert(condition: any, message: string): asserts condition {
if (!condition) {
throw new Error(message)
}