shrimp/src/compiler/compiler.ts
Corey Johnson cb62fdf437 feat(compiler): add PipeExpr compilation support
Implement Task 6 from docs/plans/2025-10-12-pipe-expressions.md

- Add pipe operator (|) termination to tokenizer
- Update grammar to include expressionWithoutIdentifier in pipeOperand
- Add PipeExpr case to compiler switch statement
- Implement pipe compilation: piped value becomes first argument
- Store piped values in temporary __pipe_value variable
- Handle both FunctionCallOrIdentifier and FunctionCall operands
- Add integration tests for pipe expressions

Tests:
- Simple pipe (5 | double) works correctly
- Additional tests exist but have pre-existing issues with function parameters

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 17:00:17 -07:00

392 lines
13 KiB
TypeScript

import { CompilerError } from '#compiler/compilerError.ts'
import { parser } from '#parser/shrimp.ts'
import * as terms from '#parser/shrimp.terms'
import type { SyntaxNode, Tree } from '@lezer/common'
import { assert, errorMessage } from '#utils/utils'
import { toBytecode, type Bytecode, type ProgramItem } from 'reefvm'
import {
checkTreeForErrors,
getAllChildren,
getAssignmentParts,
getBinaryParts,
getFunctionCallParts,
getFunctionDefParts,
getIfExprParts,
getNamedArgParts,
} from '#compiler/utils'
type Label = `.${string}`
export class Compiler {
instructions: ProgramItem[] = []
fnLabels = new Map<Label, ProgramItem[]>()
ifLabelCount = 0
bytecode: Bytecode
constructor(public input: string) {
try {
const cst = parser.parse(input)
const errors = checkTreeForErrors(cst, input)
if (errors.length > 0) {
throw new CompilerError(`Syntax errors found:\n${errors.join('\n')}`, 0, input.length)
}
this.#compileCst(cst, input)
// Add the labels
for (const [label, labelInstructions] of this.fnLabels) {
this.instructions.push([`${label}:`])
this.instructions.push(...labelInstructions)
this.instructions.push(['RETURN'])
}
// logInstructions(this.instructions)
this.bytecode = toBytecode(this.instructions)
} catch (error) {
if (error instanceof CompilerError) {
throw new Error(error.toReadableString(input))
} else {
throw new Error(`Unknown error during compilation:\n${errorMessage(error)}`)
}
}
}
#compileCst(cst: Tree, input: string) {
const isProgram = cst.topNode.type.id === terms.Program
assert(isProgram, `Expected Program node, got ${cst.topNode.type.name}`)
let child = cst.topNode.firstChild
while (child) {
this.instructions.push(...this.#compileNode(child, input))
child = child.nextSibling
}
this.instructions.push(['HALT'])
}
#compileNode(node: SyntaxNode, input: string): ProgramItem[] {
const value = input.slice(node.from, node.to)
switch (node.type.id) {
case terms.Number:
const number = Number(value)
if (Number.isNaN(number))
throw new CompilerError(`Invalid number literal: ${value}`, node.from, node.to)
return [[`PUSH`, number]]
case terms.String:
const strValue = value.slice(1, -1).replace(/\\/g, '')
return [[`PUSH`, strValue]]
case terms.Boolean: {
return [[`PUSH`, value === 'true']]
}
case terms.Identifier: {
return [[`TRY_LOAD`, value]]
}
case terms.BinOp: {
const { left, op, right } = getBinaryParts(node)
const instructions: ProgramItem[] = []
instructions.push(...this.#compileNode(left, input))
instructions.push(...this.#compileNode(right, input))
const opValue = input.slice(op.from, op.to)
switch (opValue) {
case '+':
instructions.push(['ADD'])
break
case '-':
instructions.push(['SUB'])
break
case '*':
instructions.push(['MUL'])
break
case '/':
instructions.push(['DIV'])
break
default:
throw new CompilerError(`Unsupported binary operator: ${opValue}`, op.from, op.to)
}
return instructions
}
case terms.Assign: {
const { identifier, right } = getAssignmentParts(node)
const instructions: ProgramItem[] = []
instructions.push(...this.#compileNode(right, input))
const identifierName = input.slice(identifier.from, identifier.to)
instructions.push(['STORE', identifierName])
return instructions
}
case terms.ParenExpr: {
const child = node.firstChild
if (!child) return [] // I guess it is empty parentheses?
return this.#compileNode(child, input)
}
case terms.FunctionDef: {
const { paramNames, bodyNode } = getFunctionDefParts(node, input)
const instructions: ProgramItem[] = []
const functionLabel: Label = `.func_${this.fnLabels.size}`
const bodyInstructions: ProgramItem[] = []
if (this.fnLabels.has(functionLabel)) {
throw new CompilerError(`Function name collision: ${functionLabel}`, node.from, node.to)
}
this.fnLabels.set(functionLabel, bodyInstructions)
instructions.push(['MAKE_FUNCTION', paramNames, functionLabel])
bodyInstructions.push(...this.#compileNode(bodyNode, input))
return instructions
}
case terms.FunctionCallOrIdentifier: {
return [['TRY_CALL', value]]
}
/*
### Function Calls
Stack order (bottom to top):
LOAD fn
PUSH arg1 ; Positional args
PUSH arg2
PUSH "name" ; Named arg key
PUSH "value" ; Named arg value
PUSH 2 ; Positional count
PUSH 1 ; Named count
CALL
*/
case terms.FunctionCall: {
const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts(node, input)
const instructions: ProgramItem[] = []
instructions.push(...this.#compileNode(identifierNode, input))
positionalArgs.forEach((arg) => {
instructions.push(...this.#compileNode(arg, input))
})
namedArgs.forEach((arg) => {
const { name, valueNode } = getNamedArgParts(arg, input)
instructions.push(['PUSH', name])
instructions.push(...this.#compileNode(valueNode, input))
})
instructions.push(['PUSH', positionalArgs.length])
instructions.push(['PUSH', namedArgs.length])
instructions.push(['CALL'])
return instructions
}
case terms.ThenBlock: {
const instructions = getAllChildren(node)
.map((child) => this.#compileNode(child, input))
.flat()
return instructions
}
case terms.IfExpr: {
const { conditionNode, thenBlock, elseIfBlocks, elseThenBlock } = getIfExprParts(
node,
input
)
const instructions: ProgramItem[] = []
instructions.push(...this.#compileNode(conditionNode, input))
this.ifLabelCount++
const endLabel: Label = `.end_${this.ifLabelCount}`
const thenBlockInstructions = this.#compileNode(thenBlock, input)
instructions.push(['JUMP_IF_FALSE', thenBlockInstructions.length + 1])
instructions.push(...thenBlockInstructions)
instructions.push(['JUMP', endLabel])
// Else if
elseIfBlocks.forEach(({ conditional, thenBlock }) => {
instructions.push(...this.#compileNode(conditional, input))
const elseIfInstructions = this.#compileNode(thenBlock, input)
instructions.push(['JUMP_IF_FALSE', elseIfInstructions.length + 1])
instructions.push(...elseIfInstructions)
instructions.push(['JUMP', endLabel])
})
// Else
if (elseThenBlock) {
const elseThenInstructions = this.#compileNode(elseThenBlock, input)
instructions.push(...elseThenInstructions)
} else {
instructions.push(['PUSH', null])
}
instructions.push([`${endLabel}:`])
return instructions
}
// - `EQ`, `NEQ`, `LT`, `GT`, `LTE`, `GTE` - Pop 2, push boolean
case terms.ConditionalOp: {
const instructions: ProgramItem[] = []
const { left, op, right } = getBinaryParts(node)
const leftInstructions: ProgramItem[] = this.#compileNode(left, input)
const rightInstructions: ProgramItem[] = this.#compileNode(right, input)
const opValue = input.slice(op.from, op.to)
switch (opValue) {
case '=':
instructions.push(...leftInstructions, ...rightInstructions, ['EQ'])
break
case '!=':
instructions.push(...leftInstructions, ...rightInstructions, ['NEQ'])
break
case '<':
instructions.push(...leftInstructions, ...rightInstructions, ['LT'])
break
case '>':
instructions.push(...leftInstructions, ...rightInstructions, ['GT'])
break
case '<=':
instructions.push(...leftInstructions, ...rightInstructions, ['LTE'])
break
case '>=':
instructions.push(...leftInstructions, ...rightInstructions, ['GTE'])
break
case 'and':
instructions.push(...leftInstructions)
instructions.push(['DUP'])
instructions.push(['JUMP_IF_FALSE', rightInstructions.length + 1])
instructions.push(['POP'])
instructions.push(...rightInstructions)
break
case 'or':
instructions.push(...leftInstructions)
instructions.push(['DUP'])
instructions.push(['JUMP_IF_TRUE', rightInstructions.length + 1])
instructions.push(['POP'])
instructions.push(...rightInstructions)
break
default:
throw new CompilerError(`Unsupported conditional operator: ${opValue}`, op.from, op.to)
}
return instructions
}
case terms.PipeExpr: {
const allChildren = getAllChildren(node)
// Filter out the pipe operator nodes (they're just syntax)
const operands = allChildren.filter((child) => child.type.name !== 'operator')
if (operands.length < 2) {
throw new CompilerError('PipeExpr must have at least two operands', node.from, node.to)
}
const instructions: ProgramItem[] = []
// Compile first operand normally
instructions.push(...this.#compileNode(operands[0]!, input))
// For each subsequent operand, transform it to receive piped value as first arg
for (let i = 1; i < operands.length; i++) {
const operand = operands[i]!
// Result from previous stage is on stack
// We need to make it the first argument to the next call
if (operand.type.id === terms.FunctionCallOrIdentifier) {
// Simple identifier - emit TRY_CALL with piped value as single argument
const identifierNode = operand.getChild('Identifier')
if (!identifierNode) {
throw new CompilerError('FunctionCallOrIdentifier must have Identifier', operand.from, operand.to)
}
const fnName = input.slice(identifierNode.from, identifierNode.to)
// Stack has: [piped_value]
// Store piped value temporarily
instructions.push(['STORE', '__pipe_value'])
// Load function
instructions.push(['TRY_LOAD', fnName])
// Load piped value as first arg
instructions.push(['LOAD', '__pipe_value'])
// Call with 1 positional arg and 0 named args
instructions.push(['PUSH', 1])
instructions.push(['PUSH', 0])
instructions.push(['CALL'])
} else if (operand.type.id === terms.FunctionCall) {
// Function call with arguments - piped value becomes first argument
const { identifierNode, namedArgs, positionalArgs } = getFunctionCallParts(operand, input)
// Store piped value temporarily
instructions.push(['STORE', '__pipe_value'])
// Load function
instructions.push(...this.#compileNode(identifierNode, input))
// Push piped value as first arg
instructions.push(['LOAD', '__pipe_value'])
// Push remaining positional args
positionalArgs.forEach((arg) => {
instructions.push(...this.#compileNode(arg, input))
})
// Push named args
namedArgs.forEach((arg) => {
const { name, valueNode } = getNamedArgParts(arg, input)
instructions.push(['PUSH', name])
instructions.push(...this.#compileNode(valueNode, input))
})
// Call with (positionalArgs + 1 for piped value) and namedArgs
instructions.push(['PUSH', positionalArgs.length + 1])
instructions.push(['PUSH', namedArgs.length])
instructions.push(['CALL'])
} else {
throw new CompilerError(`Unsupported pipe operand type: ${operand.type.name}`, operand.from, operand.to)
}
}
return instructions
}
default:
throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to)
}
}
}
const logInstructions = (instructions: ProgramItem[]) => {
const instructionsString = instructions
.map((parts) => {
const isPush = parts[0] === 'PUSH'
return parts
.map((part, i) => {
const partAsString = typeof part == 'string' && isPush ? `'${part}'` : part!.toString()
return i > 0 ? partAsString : part
})
.join(' ')
})
.join('\n')
console.log(`\n🤖 instructions:\n----------------\n${instructionsString}\n\n`)
}