hell yeah

This commit is contained in:
Corey Johnson 2025-10-10 15:50:09 -07:00
parent 560a946745
commit 597a25da80
11 changed files with 736 additions and 216 deletions

@ -1 +1 @@
Subproject commit 146b0a28831161e03966746acce7d5fc7fe2229d Subproject commit 82e7b181ec1b0a2df4d76ca529b4736c9e56383b

View File

@ -63,7 +63,7 @@ describe('compiler', () => {
}) })
test('function', () => { test('function', () => {
expect(`add = fn a b: a + b; add`).toEvaluateTo(Function) expect(`fn a b: a + b`).toEvaluateTo(Function)
}) })
test('function call', () => { test('function call', () => {
@ -73,6 +73,61 @@ describe('compiler', () => {
test('function call with no args', () => { test('function call with no args', () => {
expect(`bloop = fn: 'bloop'; bloop`).toEvaluateTo('bloop') expect(`bloop = fn: 'bloop'; bloop`).toEvaluateTo('bloop')
}) })
test('simple conditionals', () => {
expect(`(3 < 6)`).toEvaluateTo(true)
expect(`(10 > 20)`).toEvaluateTo(false)
expect(`(4 <= 9)`).toEvaluateTo(true)
expect(`(15 >= 20)`).toEvaluateTo(false)
expect(`(7 = 7)`).toEvaluateTo(true)
expect(`(5 != 5)`).toEvaluateTo(false)
expect(`('shave' and 'haircut')`).toEvaluateTo('haircut')
expect(`(false and witness)`).toEvaluateTo(false)
expect(`('pride' or 'prejudice')`).toEvaluateTo('pride')
expect(`(false or false)`).toEvaluateTo(false)
})
test('if', () => {
expect(`if 3 < 9:
shire
end`).toEvaluateTo('shire')
})
test('if else', () => {
expect(`if false:
grey
else:
white
end`).toEvaluateTo('white')
})
test('if elsif', () => {
expect(`if false:
boromir
elsif true:
frodo
end`).toEvaluateTo('frodo')
})
test('if elsif else', () => {
expect(`if false:
destroyed
elsif true:
fire
else:
darkness
end`).toEvaluateTo('fire')
expect(`if false:
king
elsif false:
elf
elsif true:
dwarf
else:
scattered
end`).toEvaluateTo('dwarf')
})
}) })
describe('errors', () => { describe('errors', () => {

View File

@ -4,12 +4,21 @@ import * as terms from '#parser/shrimp.terms'
import type { SyntaxNode, Tree } from '@lezer/common' import type { SyntaxNode, Tree } from '@lezer/common'
import { assert, errorMessage } from '#utils/utils' import { assert, errorMessage } from '#utils/utils'
import { toBytecode, type Bytecode } from 'reefvm' import { toBytecode, type Bytecode } from 'reefvm'
import { compile } from 'tailwindcss' import {
checkTreeForErrors,
getAllChildren,
getAssignmentParts,
getBinaryParts,
getFunctionCallParts,
getFunctionDefParts,
getIfExprParts,
getNamedArgParts,
} from '#compiler/utils'
export class Compiler { export class Compiler {
fnCounter = 0
instructions: string[] = [] instructions: string[] = []
labels = new Map<string, string[]>() fnLabels = new Map<string, string[]>()
ifLabelCount = 0
bytecode: Bytecode bytecode: Bytecode
constructor(public input: string) { constructor(public input: string) {
@ -24,17 +33,17 @@ export class Compiler {
this.#compileCst(cst, input) this.#compileCst(cst, input)
// Add the labels // Add the labels
for (const [label, labelInstructions] of this.labels) { for (const [label, labelInstructions] of this.fnLabels) {
this.instructions.push(`${label}:`) this.instructions.push(`${label}:`)
this.instructions.push(...labelInstructions.map((instr) => ` ${instr}`)) this.instructions.push(...labelInstructions.map((instr) => ` ${instr}`))
this.instructions.push(' RETURN') this.instructions.push(' RETURN')
} }
// console.log(`🌭`, this.instructions.join('\n')) // console.log(`\n🤖 instructions:\n----------------\n${this.instructions.join('\n')}\n\n`)
this.bytecode = toBytecode(this.instructions.join('\n')) this.bytecode = toBytecode(this.instructions.join('\n'))
} catch (error) { } catch (error) {
if (error instanceof CompilerError) { if (error instanceof CompilerError) {
throw new Error(`Compiler Error:\n${error.toReadableString(input)}`) throw new Error(error.toReadableString(input))
} else { } else {
throw new Error(`Unknown error during compilation:\n${errorMessage(error)}`) throw new Error(`Unknown error during compilation:\n${errorMessage(error)}`)
} }
@ -119,13 +128,13 @@ export class Compiler {
case terms.FunctionDef: { case terms.FunctionDef: {
const { paramNames, bodyNode } = getFunctionDefParts(node, input) const { paramNames, bodyNode } = getFunctionDefParts(node, input)
const instructions: string[] = [] const instructions: string[] = []
const functionName = `.func_${this.labels.size}` const functionName = `.func_${this.fnLabels.size}`
const bodyInstructions: string[] = [] const bodyInstructions: string[] = []
if (this.labels.has(functionName)) { if (this.fnLabels.has(functionName)) {
throw new CompilerError(`Function name collision: ${functionName}`, node.from, node.to) throw new CompilerError(`Function name collision: ${functionName}`, node.from, node.to)
} }
this.labels.set(functionName, bodyInstructions) this.fnLabels.set(functionName, bodyInstructions)
instructions.push(`MAKE_FUNCTION (${paramNames}) ${functionName}`) instructions.push(`MAKE_FUNCTION (${paramNames}) ${functionName}`)
bodyInstructions.push(...this.#compileNode(bodyNode, input)) bodyInstructions.push(...this.#compileNode(bodyNode, input))
@ -134,9 +143,7 @@ export class Compiler {
} }
case terms.FunctionCallOrIdentifier: { case terms.FunctionCallOrIdentifier: {
// For now, just treat them all like identifiers, but we might return [`TRY_CALL ${value}`]
// need something like TRY_CALL in the future.
return [`TRY_LOAD ${value}`]
} }
/* /*
@ -173,126 +180,112 @@ export class Compiler {
return instructions return instructions
} }
case terms.ThenBlock: {
const instructions = getAllChildren(node)
.map((child) => this.#compileNode(child, input))
.flat()
return instructions
}
case terms.IfExpr: {
const { conditionNode, thenBlock, elseIfBlocks, elseThenBlock } = getIfExprParts(
node,
input
)
const instructions: string[] = []
instructions.push(...this.#compileNode(conditionNode, input))
this.ifLabelCount++
const elseLabel = `.else_${this.ifLabelCount}`
const endLabel = `.end_${this.ifLabelCount}`
const thenBlockInstructions = this.#compileNode(thenBlock, input)
instructions.push(`JUMP_IF_FALSE #${thenBlockInstructions.length + 1}`)
instructions.push(...thenBlockInstructions)
instructions.push(`JUMP ${endLabel}`)
// Else if
elseIfBlocks.forEach(({ conditional, thenBlock }, index) => {
instructions.push(...this.#compileNode(conditional, input))
const elseIfInstructions = this.#compileNode(thenBlock, input)
instructions.push(`JUMP_IF_FALSE #${elseIfInstructions.length + 1}`)
instructions.push(...elseIfInstructions)
instructions.push(`JUMP ${endLabel}`)
})
// Else
instructions.push(`${elseLabel}:`)
if (elseThenBlock) {
const elseThenInstructions = this.#compileNode(elseThenBlock, input).map((i) => ` ${i}`)
instructions.push(...elseThenInstructions)
} else {
instructions.push(` PUSH null`)
}
instructions.push(`${endLabel}:`)
return instructions
}
// - `EQ`, `NEQ`, `LT`, `GT`, `LTE`, `GTE` - Pop 2, push boolean
case terms.ConditionalOp: {
const instructions: string[] = []
const { left, op, right } = getBinaryParts(node)
const leftInstructions: string[] = this.#compileNode(left, input)
const rightInstructions: string[] = this.#compileNode(right, input)
const opValue = input.slice(op.from, op.to)
switch (opValue) {
case '=':
instructions.push(...leftInstructions, ...rightInstructions, 'EQ')
break
case '!=':
instructions.push(...leftInstructions, ...rightInstructions, 'NEQ')
break
case '<':
instructions.push(...leftInstructions, ...rightInstructions, 'LT')
break
case '>':
instructions.push(...leftInstructions, ...rightInstructions, 'GT')
break
case '<=':
instructions.push(...leftInstructions, ...rightInstructions, 'LTE')
break
case '>=':
instructions.push(...leftInstructions, ...rightInstructions, 'GTE')
break
case 'and':
instructions.push(...leftInstructions)
instructions.push('DUP')
instructions.push(`JUMP_IF_FALSE #${rightInstructions.length + 1}`)
instructions.push('POP')
instructions.push(...rightInstructions)
break
case 'or':
instructions.push(...leftInstructions)
instructions.push('PUSH 9')
instructions.push(`JUMP_IF_TRUE #${rightInstructions.length + 1}`)
instructions.push('POP')
instructions.push(...rightInstructions)
break
default:
throw new CompilerError(`Unsupported conditional operator: ${opValue}`, op.from, op.to)
}
return instructions
}
default: default:
throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to) throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to)
} }
} }
} }
// Helper functions for extracting node parts
const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
const children: SyntaxNode[] = []
let child = node.firstChild
while (child) {
children.push(child)
child = child.nextSibling
}
return children
}
const getBinaryParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, op, right] = children
if (!left || !op || !right) {
throw new CompilerError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to)
}
return { left, op, right }
}
const getAssignmentParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, equals, right] = children
if (!left || left.type.id !== terms.Identifier) {
throw new CompilerError(
`Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`,
node.from,
node.to
)
} else if (!equals || !right) {
throw new CompilerError(
`Assign expected 3 children, got ${children.length}`,
node.from,
node.to
)
}
return { identifier: left, right }
}
const checkTreeForErrors = (tree: Tree, input: string): string[] => {
const errors: string[] = []
tree.iterate({
enter: (node) => {
if (node.type.isError) {
const errorText = input.slice(node.from, node.to)
errors.push(`Syntax error at ${node.from}-${node.to}: "${errorText}"`)
}
},
})
return errors
}
const getFunctionDefParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [fnKeyword, paramsNode, colon, bodyNode] = children
if (!fnKeyword || !paramsNode || !colon || !bodyNode) {
throw new CompilerError(
`FunctionDef expected 5 children, got ${children.length}`,
node.from,
node.to
)
}
const paramNames = getAllChildren(paramsNode)
.map((param) => {
if (param.type.id !== terms.Identifier) {
throw new CompilerError(
`FunctionDef params must be Identifiers, got ${param.type.name}`,
param.from,
param.to
)
}
return input.slice(param.from, param.to)
})
.join(' ')
return { paramNames, bodyNode }
}
const getFunctionCallParts = (node: SyntaxNode, input: string) => {
const [identifierNode, ...args] = getAllChildren(node)
if (!identifierNode) {
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
}
const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg)
const positionalArgs = args
.filter((arg) => arg.type.id === terms.PositionalArg)
.map((arg) => {
const child = arg.firstChild
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
return child
})
return { identifierNode, namedArgs, positionalArgs }
}
const getNamedArgParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [namedArgPrefix, valueNode] = getAllChildren(node)
if (!namedArgPrefix || !valueNode) {
const message = `NamedArg expected 2 children, got ${children.length}`
throw new CompilerError(message, node.from, node.to)
}
const name = input.slice(namedArgPrefix.from, namedArgPrefix.to - 2) // Remove the trailing =
return { name, valueNode }
}

View File

@ -5,12 +5,71 @@ export class CompilerError extends Error {
this.message = message this.message = message
} }
// This code is A MESS, but I don't really care because once we get it right we'll never touch it again.
toReadableString(input: string) { toReadableString(input: string) {
const pointer = ' '.repeat(this.from) + '^'.repeat(this.to - this.from) const lineInfo = this.lineAtPosition(input)
const message = `${this.message} at "${input.slice(this.from, this.to)}" (${this.from}:${ if (!lineInfo) {
this.to return `${this.message} at position ${this.from}:${this.to}`
})` }
return `${input}\n${pointer}\n${message}` const { lineNumber, columnStart, columnEnd } = lineInfo
const previousSevenLines = input.split('\n').slice(Math.max(0, lineNumber - 8), lineNumber)
const padding = lineNumber.toString().length
const ws = ' '.repeat(padding + 1)
const lines = previousSevenLines
.map((line, index) => {
const currentLineNumber = lineNumber - previousSevenLines.length + index + 1
return `${grey(currentLineNumber.toString().padStart(padding))}${line}`
})
.join('\n')
const underlineStartLen = (columnEnd - columnStart) / 2
const underlineEndLen = columnEnd - columnStart - underlineStartLen
const underline =
' '.repeat(columnStart - 1) +
'─'.repeat(underlineStartLen) +
'┬' +
'─'.repeat(underlineEndLen)
const messageWithArrow =
' '.repeat(columnStart + underlineStartLen - 1) + '╰── ' + blue(this.message)
const message = `${green('')}
${ws} ${red('Compiler Error')}
${ws}
${lines}
${ws} ${underline}
${ws} ${messageWithArrow}
${ws}
`
return `${message}`
}
lineAtPosition(input: string) {
const lines = input.split('\n')
let currentPos = 0
for (let i = 0; i < lines.length; i++) {
const line = lines[i]!
if (this.from >= currentPos && this.from <= currentPos + line.length) {
const columnStart = this.from - currentPos + 1
const columnEnd = columnStart + (this.to - this.from) - 1
// If the error spans multiple lines, so just return the line start
if (columnEnd > line.length) {
return { lineNumber: i + 1, columnStart, columnEnd: line.length, text: line }
}
return { lineNumber: i + 1, columnStart, columnEnd, text: line }
}
currentPos += line.length + 1 // +1 for the newline character
} }
} }
}
const red = (text: string) => `\x1b[31m${text}\x1b[0m`
const green = (text: string) => `\x1b[32m${text}\x1b[0m`
const blue = (text: string) => `\x1b[34m${text}\x1b[0m`
const grey = (text: string) => `\x1b[90m${text}\x1b[0m`
const underline = (text: string) => `\x1b[4m${text}\x1b[0m`
const bold = (text: string) => `\x1b[1m${text}\x1b[0m`

158
src/compiler/utils.ts Normal file
View File

@ -0,0 +1,158 @@
import { CompilerError } from '#compiler/compilerError.ts'
import * as terms from '#parser/shrimp.terms'
import type { SyntaxNode, Tree } from '@lezer/common'
export const checkTreeForErrors = (tree: Tree, input: string): string[] => {
const errors: string[] = []
tree.iterate({
enter: (node) => {
if (node.type.isError) {
const errorText = input.slice(node.from, node.to)
errors.push(`Syntax error at ${node.from}-${node.to}: "${errorText}"`)
}
},
})
return errors
}
export const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
const children: SyntaxNode[] = []
let child = node.firstChild
while (child) {
children.push(child)
child = child.nextSibling
}
return children
}
export const getBinaryParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, op, right] = children
if (!left || !op || !right) {
throw new CompilerError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to)
}
return { left, op, right }
}
export const getAssignmentParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, equals, right] = children
if (!left || left.type.id !== terms.Identifier) {
throw new CompilerError(
`Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`,
node.from,
node.to
)
} else if (!equals || !right) {
throw new CompilerError(
`Assign expected 3 children, got ${children.length}`,
node.from,
node.to
)
}
return { identifier: left, right }
}
export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [fnKeyword, paramsNode, colon, bodyNode] = children
if (!fnKeyword || !paramsNode || !colon || !bodyNode) {
throw new CompilerError(
`FunctionDef expected 5 children, got ${children.length}`,
node.from,
node.to
)
}
const paramNames = getAllChildren(paramsNode)
.map((param) => {
if (param.type.id !== terms.Identifier) {
throw new CompilerError(
`FunctionDef params must be Identifiers, got ${param.type.name}`,
param.from,
param.to
)
}
return input.slice(param.from, param.to)
})
.join(' ')
return { paramNames, bodyNode }
}
export const getFunctionCallParts = (node: SyntaxNode, input: string) => {
const [identifierNode, ...args] = getAllChildren(node)
if (!identifierNode) {
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
}
const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg)
const positionalArgs = args
.filter((arg) => arg.type.id === terms.PositionalArg)
.map((arg) => {
const child = arg.firstChild
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
return child
})
return { identifierNode, namedArgs, positionalArgs }
}
export const getNamedArgParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [namedArgPrefix, valueNode] = getAllChildren(node)
if (!namedArgPrefix || !valueNode) {
const message = `NamedArg expected 2 children, got ${children.length}`
throw new CompilerError(message, node.from, node.to)
}
const name = input.slice(namedArgPrefix.from, namedArgPrefix.to - 2) // Remove the trailing =
return { name, valueNode }
}
export const getIfExprParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [ifKeyword, conditionNode, _colon, thenBlock, ...rest] = children
if (!ifKeyword || !conditionNode || !thenBlock) {
throw new CompilerError(
`IfExpr expected at least 4 children, got ${children.length}`,
node.from,
node.to
)
}
let elseIfBlocks: { conditional: SyntaxNode; thenBlock: SyntaxNode }[] = []
let elseThenBlock: SyntaxNode | undefined
rest.forEach((child) => {
const parts = getAllChildren(child)
if (child.type.id === terms.ElseExpr) {
if (parts.length !== 3) {
const message = `ElseExpr expected 1 child, got ${parts.length}`
throw new CompilerError(message, child.from, child.to)
}
elseThenBlock = parts.at(-1)
} else if (child.type.id === terms.ElsifExpr) {
const [_keyword, conditional, _colon, thenBlock] = parts
if (!conditional || !thenBlock) {
const names = parts.map((p) => p.type.name).join(', ')
const message = `ElsifExpr expected conditional and thenBlock, got ${names}`
throw new CompilerError(message, child.from, child.to)
}
elseIfBlocks.push({ conditional, thenBlock })
}
})
return { conditionNode, thenBlock, elseThenBlock, elseIfBlocks }
}

View File

@ -54,6 +54,12 @@ export const shrimpTheme = EditorView.theme(
outline: 'none', outline: 'none',
height: '100%', height: '100%',
}, },
'.cm-matchingBracket': {
backgroundColor: '#FF5370',
},
'.cm-nonmatchingBracket': {
backgroundColor: '#C3E88D',
},
}, },
{ dark: true } { dark: true }
) )

View File

@ -2,6 +2,7 @@ import { expect, describe, test } from 'bun:test'
import { afterEach } from 'bun:test' import { afterEach } from 'bun:test'
import { resetCommandSource, setCommandSource } from '#editor/commands' import { resetCommandSource, setCommandSource } from '#editor/commands'
import { beforeEach } from 'bun:test' import { beforeEach } from 'bun:test'
import './shrimp.grammar' // Importing this so changes cause it to retest! import './shrimp.grammar' // Importing this so changes cause it to retest!
describe('calling functions', () => { describe('calling functions', () => {
@ -81,7 +82,7 @@ describe('Identifier', () => {
}) })
describe('Parentheses', () => { describe('Parentheses', () => {
test('parses expressions with parentheses correctly', () => { test('allows binOps with parentheses correctly', () => {
expect('(2 + 3)').toMatchTree(` expect('(2 + 3)').toMatchTree(`
ParenExpr ParenExpr
BinOp BinOp
@ -90,6 +91,54 @@ describe('Parentheses', () => {
Number 3`) Number 3`)
}) })
test('allows numbers, strings, and booleans with parentheses correctly', () => {
expect('(42)').toMatchTree(`
ParenExpr
Number 42`)
expect("('hello')").toMatchTree(`
ParenExpr
String hello`)
expect('(true)').toMatchTree(`
ParenExpr
Boolean true`)
expect('(false)').toMatchTree(`
ParenExpr
Boolean false`)
})
test('allows function calls in parens', () => {
expect('(echo 3)').toMatchTree(`
ParenExpr
FunctionCall
Identifier echo
PositionalArg
Number 3`)
expect('(echo)').toMatchTree(`
ParenExpr
FunctionCallOrIdentifier
Identifier echo`)
})
test('allows conditionals in parens', () => {
expect('(a > b)').toMatchTree(`
ParenExpr
ConditionalOp
Identifier a
operator >
Identifier b`)
expect('(a and b)').toMatchTree(`
ParenExpr
ConditionalOp
Identifier a
operator and
Identifier b`)
})
test('allows parens in function calls', () => { test('allows parens in function calls', () => {
expect('echo (3 + 3)').toMatchTree(` expect('echo (3 + 3)').toMatchTree(`
FunctionCall FunctionCall
@ -198,19 +247,19 @@ describe('Fn', () => {
test('parses function no parameters', () => { test('parses function no parameters', () => {
expect('fn: 1').toMatchTree(` expect('fn: 1').toMatchTree(`
FunctionDef FunctionDef
fn fn keyword fn
Params Params
: : colon :
Number 1`) Number 1`)
}) })
test('parses function with single parameter', () => { test('parses function with single parameter', () => {
expect('fn x: x + 1').toMatchTree(` expect('fn x: x + 1').toMatchTree(`
FunctionDef FunctionDef
fn fn keyword fn
Params Params
Identifier x Identifier x
: : colon :
BinOp BinOp
Identifier x Identifier x
operator + operator +
@ -220,11 +269,11 @@ describe('Fn', () => {
test('parses function with multiple parameters', () => { test('parses function with multiple parameters', () => {
expect('fn x y: x * y').toMatchTree(` expect('fn x y: x * y').toMatchTree(`
FunctionDef FunctionDef
fn fn keyword fn
Params Params
Identifier x Identifier x
Identifier y Identifier y
: : colon :
BinOp BinOp
Identifier x Identifier x
operator * operator *
@ -237,11 +286,11 @@ describe('Fn', () => {
x + 9 x + 9
end`).toMatchTree(` end`).toMatchTree(`
FunctionDef FunctionDef
fn fn keyword fn
Params Params
Identifier x Identifier x
Identifier y Identifier y
: : colon :
BinOp BinOp
Identifier x Identifier x
operator * operator *
@ -280,11 +329,11 @@ describe('newlines', () => {
y = 2`).toMatchTree(` y = 2`).toMatchTree(`
Assign Assign
Identifier x Identifier x
= = operator =
Number 5 Number 5
Assign Assign
Identifier y Identifier y
= = operator =
Number 2`) Number 2`)
}) })
@ -292,11 +341,11 @@ y = 2`).toMatchTree(`
expect(`x = 5; y = 2`).toMatchTree(` expect(`x = 5; y = 2`).toMatchTree(`
Assign Assign
Identifier x Identifier x
= = operator =
Number 5 Number 5
Assign Assign
Identifier y Identifier y
= = operator =
Number 2`) Number 2`)
}) })
@ -304,7 +353,7 @@ y = 2`).toMatchTree(`
expect(`a = hello; 2`).toMatchTree(` expect(`a = hello; 2`).toMatchTree(`
Assign Assign
Identifier a Identifier a
= = operator =
FunctionCallOrIdentifier FunctionCallOrIdentifier
Identifier hello Identifier hello
Number 2`) Number 2`)
@ -316,7 +365,7 @@ describe('Assign', () => {
expect('x = 5').toMatchTree(` expect('x = 5').toMatchTree(`
Assign Assign
Identifier x Identifier x
= = operator =
Number 5`) Number 5`)
}) })
@ -324,7 +373,7 @@ describe('Assign', () => {
expect('x = 5 + 3').toMatchTree(` expect('x = 5 + 3').toMatchTree(`
Assign Assign
Identifier x Identifier x
= = operator =
BinOp BinOp
Number 5 Number 5
operator + operator +
@ -335,13 +384,13 @@ describe('Assign', () => {
expect('add = fn a b: a + b').toMatchTree(` expect('add = fn a b: a + b').toMatchTree(`
Assign Assign
Identifier add Identifier add
= = operator =
FunctionDef FunctionDef
fn fn keyword fn
Params Params
Identifier a Identifier a
Identifier b Identifier b
: : colon :
BinOp BinOp
Identifier a Identifier a
operator + operator +
@ -349,6 +398,141 @@ describe('Assign', () => {
}) })
}) })
describe('if/elsif/else', () => {
test('parses single line if', () => {
expect(`if y = 1: 'cool'`).toMatchTree(`
IfExpr
keyword if
ConditionalOp
Identifier y
operator =
Number 1
colon :
ThenBlock
String cool
`)
expect('a = if x: 2').toMatchTree(`
Assign
Identifier a
operator =
IfExpr
keyword if
Identifier x
colon :
ThenBlock
Number 2
`)
})
test('parses multiline if', () => {
expect(`
if x < 9:
yes
end`).toMatchTree(`
IfExpr
keyword if
ConditionalOp
Identifier x
operator <
Number 9
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier yes
end end
`)
})
test('parses multiline if with else', () => {
expect(`if with-else:
x
else:
y
end`).toMatchTree(`
IfExpr
keyword if
Identifier with-else
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier x
ElseExpr
keyword else
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier y
end end
`)
})
test('parses multiline if with elsif', () => {
expect(`if with-elsif:
x
elsif another-condition:
y
end`).toMatchTree(`
IfExpr
keyword if
Identifier with-elsif
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier x
ElsifExpr
keyword elsif
Identifier another-condition
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier y
end end
`)
})
test('parses multiline if with multiple elsif and else', () => {
expect(`if with-elsif-else:
x
elsif another-condition:
y
elsif yet-another-condition:
z
else:
oh-no
end`).toMatchTree(`
IfExpr
keyword if
Identifier with-elsif-else
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier x
ElsifExpr
keyword elsif
Identifier another-condition
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier y
ElsifExpr
keyword elsif
Identifier yet-another-condition
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier z
ElseExpr
keyword else
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier oh-no
end end
`)
})
})
describe('multiline', () => { describe('multiline', () => {
test('parses multiline strings', () => { test('parses multiline strings', () => {
expect(`'first'\n'second'`).toMatchTree(` expect(`'first'\n'second'`).toMatchTree(`
@ -367,16 +551,16 @@ describe('multiline', () => {
`).toMatchTree(` `).toMatchTree(`
Assign Assign
Identifier add Identifier add
= = operator =
FunctionDef FunctionDef
fn fn keyword fn
Params Params
Identifier a Identifier a
Identifier b Identifier b
: : colon :
Assign Assign
Identifier result Identifier result
= = operator =
BinOp BinOp
Identifier a Identifier a
operator + operator +
@ -406,11 +590,11 @@ end
Number 3 Number 3
FunctionDef FunctionDef
fn fn keyword fn
Params Params
Identifier x Identifier x
Identifier y Identifier y
: : colon :
FunctionCallOrIdentifier FunctionCallOrIdentifier
Identifier x Identifier x
end end end end

View File

@ -2,12 +2,7 @@
@skip { space } @skip { space }
@top Program { (statement | newlineOrSemicolon)+ eof?} @top Program { item* }
statement {
line newlineOrSemicolon |
line eof
}
@tokens { @tokens {
@precedence { Number "-" } @precedence { Number "-" }
@ -21,10 +16,20 @@ statement {
space { " " | "\t" } space { " " | "\t" }
leftParen { "(" } leftParen { "(" }
rightParen { ")" } rightParen { ")" }
":" colon[closedBy="end", @name="colon"] { ":" }
"fn" end[openedBy="colon", @name="end"] { "end" }
"end" "fn" [@name=keyword]
"=" "if" [@name=keyword]
"elsif" [@name=keyword]
"else" [@name=keyword]
"and" [@name=operator]
"or" [@name=operator]
"!=" [@name=operator]
"<" [@name=operator]
"<=" [@name=operator]
">" [@name=operator]
">=" [@name=operator]
"=" [@name=operator]
"+"[@name=operator] "+"[@name=operator]
"-"[@name=operator] "-"[@name=operator]
"*"[@name=operator] "*"[@name=operator]
@ -39,28 +44,30 @@ statement {
call call
} }
line { item {
FunctionCall | consumeToTerminator newlineOrSemicolon |
FunctionCallOrIdentifier | consumeToTerminator eof |
newlineOrSemicolon // allow blank lines
}
consumeToTerminator {
ambiguousFunctionCall |
IfExpr |
FunctionDef | FunctionDef |
Assign | Assign |
BinOp |
expressionWithoutIdentifier expressionWithoutIdentifier
} }
expression {
expressionWithoutIdentifier | Identifier
}
expressionWithoutIdentifier {
BinOp |
valueWithoutIdentifier
}
FunctionCallOrIdentifier { FunctionCallOrIdentifier {
Identifier Identifier
} }
ambiguousFunctionCall {
FunctionCall | FunctionCallOrIdentifier
}
FunctionCall { FunctionCall {
Identifier arg+ Identifier arg+
} }
@ -70,11 +77,11 @@ arg {
} }
PositionalArg { PositionalArg {
value expression
} }
NamedArg { NamedArg {
NamedArgPrefix value NamedArgPrefix expression
} }
FunctionDef { FunctionDef {
@ -82,11 +89,46 @@ FunctionDef {
} }
singleLineFunctionDef { singleLineFunctionDef {
"fn" Params ":" expression "fn" Params colon consumeToTerminator
} }
multilineFunctionDef { multilineFunctionDef {
"fn" Params ":" newlineOrSemicolon (line newlineOrSemicolon)* "end" "fn" Params colon newlineOrSemicolon block end
}
IfExpr {
singleLineIf | multilineIf
}
singleLineIf {
"if" (ConditionalOp | expression) colon ThenBlock { consumeToTerminator }
}
multilineIf {
"if" (ConditionalOp | expression) colon newlineOrSemicolon ThenBlock ElsifExpr* ElseExpr? end
}
ElsifExpr {
"elsif" (ConditionalOp | expression) colon newlineOrSemicolon ThenBlock
}
ElseExpr {
"else" colon newlineOrSemicolon ThenBlock
}
ThenBlock {
block
}
ConditionalOp {
expression "=" expression |
expression "!=" expression |
expression "<" expression |
expression "<=" expression |
expression ">" expression |
expression ">=" expression |
expression "and" (expression | ConditionalOp) |
expression "or" (expression | ConditionalOp)
} }
Params { Params {
@ -94,24 +136,36 @@ Params {
} }
Assign { Assign {
Identifier "=" line Identifier "=" consumeToTerminator
} }
BinOp { BinOp {
expression !multiplicative "*" expression | (expression | BinOp) !multiplicative "*" (expression | BinOp) |
expression !multiplicative "/" expression | (expression | BinOp) !multiplicative "/" (expression | BinOp) |
expression !additive "+" expression | (expression | BinOp) !additive "+" (expression | BinOp) |
expression !additive "-" expression (expression | BinOp) !additive "-" (expression | BinOp)
} }
ParenExpr { ParenExpr {
leftParen (expressionWithoutIdentifier | FunctionCall | FunctionCallOrIdentifier) rightParen leftParen (ambiguousFunctionCall | BinOp | expressionWithoutIdentifier | ConditionalOp ) rightParen
} }
value { expression {
valueWithoutIdentifier | Identifier expressionWithoutIdentifier | Identifier
} }
valueWithoutIdentifier { // We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator.
// Without this, when parsing "my-var" at statement level, the parser can't decide:
// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier
// - expression → Identifier
// Both want the same Identifier token! So we use expressionWithoutIdentifier
// to remove Identifier from the second path, forcing standalone identifiers
// to go through ambiguousFunctionCall (which is what we want semantically).
// Yes, it is annoying and I gave up trying to use GLR to fix it.
expressionWithoutIdentifier {
ParenExpr | Word | String | Number | Boolean ParenExpr | Word | String | Number | Boolean
} }
block {
(consumeToTerminator newlineOrSemicolon)*
}

View File

@ -7,12 +7,19 @@ export const
PositionalArg = 5, PositionalArg = 5,
ParenExpr = 6, ParenExpr = 6,
BinOp = 7, BinOp = 7,
FunctionCallOrIdentifier = 12, ConditionalOp = 12,
String = 13, String = 21,
Number = 14, Number = 22,
Boolean = 15, Boolean = 23,
NamedArg = 16, NamedArg = 24,
NamedArgPrefix = 17, NamedArgPrefix = 25,
FunctionDef = 18, FunctionCallOrIdentifier = 26,
Params = 20, IfExpr = 27,
Assign = 23 colon = 29,
ThenBlock = 31,
ElsifExpr = 32,
ElseExpr = 34,
end = 36,
FunctionDef = 37,
Params = 39,
Assign = 40

View File

@ -4,16 +4,20 @@ import {tokenizer} from "./tokenizer"
import {highlighting} from "./highlight" import {highlighting} from "./highlight"
export const parser = LRParser.deserialize({ export const parser = LRParser.deserialize({
version: 14, version: 14,
states: "'[OVQTOOOqQPO'#DTO!zQUO'#DTO#XQPOOOOQO'#DS'#DSO#xQTO'#CbOOQS'#DQ'#DQO$PQTO'#DVOOQO'#Cn'#CnOOQO'#C}'#C}O$XQPO'#C|OOQS'#Cu'#CuQ$aQTOOOOQS'#DP'#DPOOQS'#Ca'#CaO$hQTO'#ClOOQS'#DO'#DOOOQS'#Cv'#CvO$oQUO,58zO%SQTO,59_O%^QTO,58}O%^QTO,58}O%eQPO,58|O%vQUO'#DTO%}QPO,58|OOQS'#Cw'#CwO&SQTO'#CpO&[QPO,59qOOQS,59h,59hOOQS-E6s-E6sQOQPOOOOQS,59W,59WOOQS-E6t-E6tOOQO1G.y1G.yOOQO'#DT'#DTOOQO1G.i1G.iO&aQPO1G.iOOQS1G.h1G.hOOQS-E6u-E6uO&xQTO1G/]O'SQPO7+$wO'hQTO7+$xO'uQPO'#CxO'zQTO<<HdOOQO<<Hd<<HdOOQS,59d,59dOOQS-E6v-E6vOOQOAN>OAN>O", states: "+pQVQTOOOtQPO'#CcO!SQPO'#D`O!yQTO'#CbOOQS'#Dd'#DdO#QQPO'#DcO#fQUO'#DcOOQO'#Da'#DaO$VQTO'#DgOOQO'#Cw'#CwO$^QTO'#DkOOQO'#DR'#DROOQO'#D`'#D`O$fQPO'#D_OOQS'#D_'#D_OOQS'#DV'#DVQVQTOOO$VQTO,58}O$VQTO,58}O%YQPO'#CcO%jQPO,58|O%vQPO,58|O&sQPO,58|O&zQUO'#DcOOQS'#Dc'#DcOOQS'#Ca'#CaO$VQTO'#CtOOQS'#Db'#DbOOQS'#DW'#DWO'hQUO,58zO'{QTO,59pO(YQPO,5:RO(aQPO,5:ROOQS'#DZ'#DZO(fQTO'#DTO(nQPO,5:VOOQS,59y,59yOOQS-E7T-E7TOOQO1G.i1G.iO(sQPO1G.iO$VQTO,59SO$VQTO,59SOOQS1G.h1G.hOOQS,59`,59`OOQS-E7U-E7UOOQO1G/[1G/[O)[QTO1G/mOOQS-E7X-E7XO)lQTO1G/qOOQO1G.n1G.nO)|QPO1G.nOOQO'#Cz'#CzOOQO7+%X7+%XO*WQTO7+%YOOQO7+%]7+%]O*nQTO7+%^O+OQPO'#DXO+TQTO'#DjOOQO'#C{'#C{O+kQPO<<HtO+vQPO<<HxOOQS,59s,59sOOQS-E7V-E7VO$VQTO'#C|OOQO'#DY'#DYO+{QPOAN>`O,WQPO'#DOOOQOAN>`AN>`O,RQPOAN>`OOQOAN>dAN>dO,]QPO,59hO,dQPO,59hOOQO-E7W-E7WOOQOG23zG23zO,iQPOG23zO,nQPO,59jO,sQPO1G/SOOQOLD)fLD)fO*nQTO1G/UO*WQTO7+$nOOQO7+$p7+$pOOQO<<HY<<HY",
stateData: "([~OoOS~OPQOQUO]UO^UO_UOcVOuTO{ZO~OWwXXwXYwXZwX{qX|qX~OP]OQUO]UO^UO_UOa_OuTOWwXXwXYwXZwX~OhcO{[X|[X~P!VOWdOXdOYeOZeO~OQUO]UO^UO_UOuTO~OPgO~P#gOPiOedP~O{lO|lO~O|nO~PVOP]O~P#gOP]Oa_O{Sa|SaxSa~P#gOPQOcVO~P#gOPrO~P#gOxuOWwXXwXYwXZwX~Ox[X~P!VOxuO~OPiOedX~OewO~OWdOXdOYViZVi{Vi|VixVi~OPrO{yO~P#gOWdOXdOYeOZeO{yq|yq~OPQOcVOf|O~P#gO{}O~OPQOcVOf!PO~P#gO^Z~", stateData: ",{~O!QOS~OPUOQSOeSOfSOgSOlWOvYO!XRO!]^O~OWaOXaOYbOZbO~OWaOXaOYbOZbO!]!SX!a!SX~OQSOeSOfSOgSO!XRO~OPgO~P!hOW!VXX!VXY!VXZ!VX!]!SX!a!SX~OPhO]nOijOW!VXX!VXY!VXZ!VX!]jX!ajX~P!hOPhO~P!hOPqOmwP~O!]tO!atO~O]xO^xO_xO`xOaxObxOcyOdyO~OWaOXaOYbOZbO~P$nOWaOXaOYbOZbO!YzO~OW!VXX!VXY!VXZ!VX]!VX^!VX_!VX`!VXa!VXb!VXc!VXd!VX~O!YzO~P%{OPhOQSOeSOfSOgSOijO!XRO!YjX~P%{OPhOijO!]Sa!aSa!YSa~P!hOPUOlWOvYO~P!hOm!OO~P$nOm!OO~OPqOmwX~Om!QO~OWaOXaOYViZVi!]Vi!aVi!YVi~OPUOlWOvYO!]!VO~P!hOPUOlWOvYO!]!XO~P!hO!Y[im[i~P$nOPUOlWOvYOq!^Ps!^Pt!^P~P!hOPUOlWOvYOt!^P~P!hO!]!_O~OPUOlWOvYOq!^Xs!^Xt!^X~P!hOq!aOs!dOt!eO~Ot!gO~Oq!aOs!dOt!kO~Om!mO~Om!nO~P$nOm!nO~Ot!oO~O!]!pO~O!]!qO~OfZ~",
goto: "%d{PPPP|!W!]!lPPPP|PPP!WP!wP#OPP!wP#R#X#`#fPPP#l#p#{$Q$YP$k$zP%]%]YXO[cy{RhTV`QbgkUOQT[_bcdegwy{cSOT[cdewy{ZXO[cy{RkVQ[ORm[SbQgRpbQjVRvjQ{yR!O{TZO[SYO[QqcTzy{VaQbgU^QbgRo_bSOT[cdewy{X]Q_bgYPO[cy{QfTVrdew[ROT[cy{QsdQteRxwZWO[cy{", goto: "(o!aPPPP!b!o!t#]PPPP#rPPPPPPPPPPP!oP!b$OPP$[$_$hP$lPP$OP$r$O$u${%S%]%cPPP%i%m&R&b&g'bPP'}'}P(Z(c(cgVOR`n!O!Q!V!X!Z!p!qVkUgm{SORUW`abgjmnxy!O!Q!V!X!Z!a!p!qdQO`n!O!Q!V!X!Z!p!qQdRQvaRwbQeRQpWQ!RyR!i!ae[O`n!O!Q!V!X!Z!p!qR!U!OQ!]!VQ!r!pR!s!qT!b!]!cQ!f!]R!l!cRsYQ`ORu`SmUgR|mW!Z!V!X!p!qR!`!ZQ!c!]R!j!cQrYR!PrT_O`S]O`Q}nQ!T!OQ!W!QZ!Y!V!X!Z!p!qd[O`n!O!Q!V!X!Z!p!qReRVlUgmdPO`n!O!Q!V!X!Z!p!qQcRUiUgmQoWQvaQwbQ{jQ!RxQ!SyR!h!adTO`n!O!Q!V!X!Z!p!qQfRehUWabgjmxy!aeXO`n!O!Q!V!X!Z!p!qU![!V!p!qR!^!XeZO`n!O!Q!V!X!Z!p!q",
nodeNames: "⚠ Identifier Word Program FunctionCall PositionalArg ParenExpr BinOp operator operator operator operator FunctionCallOrIdentifier String Number Boolean NamedArg NamedArgPrefix FunctionDef fn Params : end Assign =", nodeNames: "⚠ Identifier Word Program FunctionCall PositionalArg ParenExpr BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String Number Boolean NamedArg NamedArgPrefix FunctionCallOrIdentifier IfExpr keyword colon ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword end FunctionDef keyword Params Assign",
maxTerm: 44, maxTerm: 63,
nodeProps: [
["closedBy", 29,"end"],
["openedBy", 36,"colon"]
],
propSources: [highlighting], propSources: [highlighting],
skippedNodes: [0], skippedNodes: [0],
repeatNodeCount: 4, repeatNodeCount: 5,
tokenData: "(j~ReXY!dYZ!ipq!dwx!nxy#]yz#bz{#g{|#l}!O#q!P!Q$d!Q![#y![!]$i!]!^!i!_!`$n#T#X$s#X#Y%R#Y#Z%|#Z#h$s#h#i'u#i#o$s~~(e~!iOo~~!nO{~~!qTOw!nwx#Qx;'S!n;'S;=`#V<%lO!n~#VO]~~#YP;=`<%l!n~#bOu~~#gOx~~#lOW~~#qOY~~#vPZ~!Q![#y~$OQ^~!O!P$U!Q![#y~$XP!Q![$[~$aP^~!Q![$[~$iOX~~$nOe~~$sOh~Q$vQ!_!`$|#T#o$sQ%ROaQR%US!_!`$|#T#b$s#b#c%b#c#o$sR%eS!_!`$|#T#W$s#W#X%q#X#o$sR%vQfP!_!`$|#T#o$s~&PT!_!`$|#T#U&`#U#b$s#b#c'j#c#o$s~&cS!_!`$|#T#`$s#`#a&o#a#o$s~&rS!_!`$|#T#g$s#g#h'O#h#o$s~'RS!_!`$|#T#X$s#X#Y'_#Y#o$s~'dQ_~!_!`$|#T#o$sR'oQcP!_!`$|#T#o$s~'xS!_!`$|#T#f$s#f#g(U#g#o$s~(XS!_!`$|#T#i$s#i#j'O#j#o$s~(jO|~", tokenData: "-m~RmXY!|YZ#Rpq!|qr#Wwx#cxy$Qyz$Vz{$[{|$a}!O$f!P!Q%X!Q![$n![!]%^!]!^#R!^!_%c!_!`%p!`!a%u#T#U&S#U#X&h#X#Y']#Y#Z)y#Z#]&h#]#^+r#^#c&h#c#d,^#d#h&h#h#i,x#i#o&h~~-h~#RO!Q~~#WO!]~~#ZP!_!`#^~#cO^~~#fTOw#cwx#ux;'S#c;'S;=`#z<%lO#c~#zOe~~#}P;=`<%l#c~$VO!X~~$[O!Y~~$aOW~~$fOY~~$kPZ~!Q![$n~$sQf~!O!P$y!Q![$n~$|P!Q![%P~%UPf~!Q![%P~%^OX~~%cOm~~%hP_~!_!`%k~%pO`~~%uO]~~%zPa~!_!`%}~&SOb~~&VS!_!`&c#T#b&h#b#c&q#c#o&hQ&hOiQQ&kQ!_!`&c#T#o&h~&tS!_!`&c#T#W&h#W#X'Q#X#o&h~'VQc~!_!`&c#T#o&hR'`U!_!`&c#T#`&h#`#a'r#a#b&h#b#c)_#c#o&hR'uS!_!`&c#T#g&h#g#h(R#h#o&hR(UU!_!`&c#T#X&h#X#Y(h#Y#]&h#]#^(s#^#o&hR(mQsP!_!`&c#T#o&hR(vS!_!`&c#T#Y&h#Y#Z)S#Z#o&hR)XQqP!_!`&c#T#o&hR)bS!_!`&c#T#W&h#W#X)n#X#o&hR)sQtP!_!`&c#T#o&h~)|T!_!`&c#T#U*]#U#b&h#b#c+g#c#o&h~*`S!_!`&c#T#`&h#`#a*l#a#o&h~*oS!_!`&c#T#g&h#g#h*{#h#o&h~+OS!_!`&c#T#X&h#X#Y+[#Y#o&h~+aQg~!_!`&c#T#o&hR+lQvP!_!`&c#T#o&hR+uS!_!`&c#T#Y&h#Y#Z,R#Z#o&hR,WQlP!_!`&c#T#o&h~,aS!_!`&c#T#f&h#f#g,m#g#o&h~,rQd~!_!`&c#T#o&h~,{S!_!`&c#T#f&h#f#g-X#g#o&h~-[S!_!`&c#T#i&h#i#j*{#j#o&h~-mO!a~",
tokenizers: [0, 1, tokenizer], tokenizers: [0, 1, tokenizer],
topRules: {"Program":[0,3]}, topRules: {"Program":[0,3]},
tokenPrec: 331 tokenPrec: 546
}) })

View File

@ -16,7 +16,7 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack
// Certain characters might end a word or identifier if they are followed by whitespace. // Certain characters might end a word or identifier if they are followed by whitespace.
// This allows things like `a = hello; 2` or a = (basename ./file.txt) // This allows things like `a = hello; 2` or a = (basename ./file.txt)
// to work as expected. // to work as expected.
if (canBeWord && (ch === 59 /* ; */ || ch === 41) /* ) */) { if ((canBeWord && (ch === 59 /* ; */ || ch === 41)) /* ) */ || ch === 58 /* : */) {
const nextCh = getFullCodePoint(input, pos + 1) const nextCh = getFullCodePoint(input, pos + 1)
if (isWhitespace(nextCh) || nextCh === -1) { if (isWhitespace(nextCh) || nextCh === -1) {
break break