hell yeah

This commit is contained in:
Corey Johnson 2025-10-10 15:50:09 -07:00
parent 560a946745
commit 597a25da80
11 changed files with 736 additions and 216 deletions

@ -1 +1 @@
Subproject commit 146b0a28831161e03966746acce7d5fc7fe2229d
Subproject commit 82e7b181ec1b0a2df4d76ca529b4736c9e56383b

View File

@ -63,7 +63,7 @@ describe('compiler', () => {
})
test('function', () => {
expect(`add = fn a b: a + b; add`).toEvaluateTo(Function)
expect(`fn a b: a + b`).toEvaluateTo(Function)
})
test('function call', () => {
@ -73,6 +73,61 @@ describe('compiler', () => {
test('function call with no args', () => {
expect(`bloop = fn: 'bloop'; bloop`).toEvaluateTo('bloop')
})
test('simple conditionals', () => {
expect(`(3 < 6)`).toEvaluateTo(true)
expect(`(10 > 20)`).toEvaluateTo(false)
expect(`(4 <= 9)`).toEvaluateTo(true)
expect(`(15 >= 20)`).toEvaluateTo(false)
expect(`(7 = 7)`).toEvaluateTo(true)
expect(`(5 != 5)`).toEvaluateTo(false)
expect(`('shave' and 'haircut')`).toEvaluateTo('haircut')
expect(`(false and witness)`).toEvaluateTo(false)
expect(`('pride' or 'prejudice')`).toEvaluateTo('pride')
expect(`(false or false)`).toEvaluateTo(false)
})
test('if', () => {
expect(`if 3 < 9:
shire
end`).toEvaluateTo('shire')
})
test('if else', () => {
expect(`if false:
grey
else:
white
end`).toEvaluateTo('white')
})
test('if elsif', () => {
expect(`if false:
boromir
elsif true:
frodo
end`).toEvaluateTo('frodo')
})
test('if elsif else', () => {
expect(`if false:
destroyed
elsif true:
fire
else:
darkness
end`).toEvaluateTo('fire')
expect(`if false:
king
elsif false:
elf
elsif true:
dwarf
else:
scattered
end`).toEvaluateTo('dwarf')
})
})
describe('errors', () => {

View File

@ -4,12 +4,21 @@ import * as terms from '#parser/shrimp.terms'
import type { SyntaxNode, Tree } from '@lezer/common'
import { assert, errorMessage } from '#utils/utils'
import { toBytecode, type Bytecode } from 'reefvm'
import { compile } from 'tailwindcss'
import {
checkTreeForErrors,
getAllChildren,
getAssignmentParts,
getBinaryParts,
getFunctionCallParts,
getFunctionDefParts,
getIfExprParts,
getNamedArgParts,
} from '#compiler/utils'
export class Compiler {
fnCounter = 0
instructions: string[] = []
labels = new Map<string, string[]>()
fnLabels = new Map<string, string[]>()
ifLabelCount = 0
bytecode: Bytecode
constructor(public input: string) {
@ -24,17 +33,17 @@ export class Compiler {
this.#compileCst(cst, input)
// Add the labels
for (const [label, labelInstructions] of this.labels) {
for (const [label, labelInstructions] of this.fnLabels) {
this.instructions.push(`${label}:`)
this.instructions.push(...labelInstructions.map((instr) => ` ${instr}`))
this.instructions.push(' RETURN')
}
// console.log(`🌭`, this.instructions.join('\n'))
// console.log(`\n🤖 instructions:\n----------------\n${this.instructions.join('\n')}\n\n`)
this.bytecode = toBytecode(this.instructions.join('\n'))
} catch (error) {
if (error instanceof CompilerError) {
throw new Error(`Compiler Error:\n${error.toReadableString(input)}`)
throw new Error(error.toReadableString(input))
} else {
throw new Error(`Unknown error during compilation:\n${errorMessage(error)}`)
}
@ -119,13 +128,13 @@ export class Compiler {
case terms.FunctionDef: {
const { paramNames, bodyNode } = getFunctionDefParts(node, input)
const instructions: string[] = []
const functionName = `.func_${this.labels.size}`
const functionName = `.func_${this.fnLabels.size}`
const bodyInstructions: string[] = []
if (this.labels.has(functionName)) {
if (this.fnLabels.has(functionName)) {
throw new CompilerError(`Function name collision: ${functionName}`, node.from, node.to)
}
this.labels.set(functionName, bodyInstructions)
this.fnLabels.set(functionName, bodyInstructions)
instructions.push(`MAKE_FUNCTION (${paramNames}) ${functionName}`)
bodyInstructions.push(...this.#compileNode(bodyNode, input))
@ -134,9 +143,7 @@ export class Compiler {
}
case terms.FunctionCallOrIdentifier: {
// For now, just treat them all like identifiers, but we might
// need something like TRY_CALL in the future.
return [`TRY_LOAD ${value}`]
return [`TRY_CALL ${value}`]
}
/*
@ -173,126 +180,112 @@ export class Compiler {
return instructions
}
case terms.ThenBlock: {
const instructions = getAllChildren(node)
.map((child) => this.#compileNode(child, input))
.flat()
return instructions
}
case terms.IfExpr: {
const { conditionNode, thenBlock, elseIfBlocks, elseThenBlock } = getIfExprParts(
node,
input
)
const instructions: string[] = []
instructions.push(...this.#compileNode(conditionNode, input))
this.ifLabelCount++
const elseLabel = `.else_${this.ifLabelCount}`
const endLabel = `.end_${this.ifLabelCount}`
const thenBlockInstructions = this.#compileNode(thenBlock, input)
instructions.push(`JUMP_IF_FALSE #${thenBlockInstructions.length + 1}`)
instructions.push(...thenBlockInstructions)
instructions.push(`JUMP ${endLabel}`)
// Else if
elseIfBlocks.forEach(({ conditional, thenBlock }, index) => {
instructions.push(...this.#compileNode(conditional, input))
const elseIfInstructions = this.#compileNode(thenBlock, input)
instructions.push(`JUMP_IF_FALSE #${elseIfInstructions.length + 1}`)
instructions.push(...elseIfInstructions)
instructions.push(`JUMP ${endLabel}`)
})
// Else
instructions.push(`${elseLabel}:`)
if (elseThenBlock) {
const elseThenInstructions = this.#compileNode(elseThenBlock, input).map((i) => ` ${i}`)
instructions.push(...elseThenInstructions)
} else {
instructions.push(` PUSH null`)
}
instructions.push(`${endLabel}:`)
return instructions
}
// - `EQ`, `NEQ`, `LT`, `GT`, `LTE`, `GTE` - Pop 2, push boolean
case terms.ConditionalOp: {
const instructions: string[] = []
const { left, op, right } = getBinaryParts(node)
const leftInstructions: string[] = this.#compileNode(left, input)
const rightInstructions: string[] = this.#compileNode(right, input)
const opValue = input.slice(op.from, op.to)
switch (opValue) {
case '=':
instructions.push(...leftInstructions, ...rightInstructions, 'EQ')
break
case '!=':
instructions.push(...leftInstructions, ...rightInstructions, 'NEQ')
break
case '<':
instructions.push(...leftInstructions, ...rightInstructions, 'LT')
break
case '>':
instructions.push(...leftInstructions, ...rightInstructions, 'GT')
break
case '<=':
instructions.push(...leftInstructions, ...rightInstructions, 'LTE')
break
case '>=':
instructions.push(...leftInstructions, ...rightInstructions, 'GTE')
break
case 'and':
instructions.push(...leftInstructions)
instructions.push('DUP')
instructions.push(`JUMP_IF_FALSE #${rightInstructions.length + 1}`)
instructions.push('POP')
instructions.push(...rightInstructions)
break
case 'or':
instructions.push(...leftInstructions)
instructions.push('PUSH 9')
instructions.push(`JUMP_IF_TRUE #${rightInstructions.length + 1}`)
instructions.push('POP')
instructions.push(...rightInstructions)
break
default:
throw new CompilerError(`Unsupported conditional operator: ${opValue}`, op.from, op.to)
}
return instructions
}
default:
throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to)
}
}
}
// Helper functions for extracting node parts
const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
const children: SyntaxNode[] = []
let child = node.firstChild
while (child) {
children.push(child)
child = child.nextSibling
}
return children
}
const getBinaryParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, op, right] = children
if (!left || !op || !right) {
throw new CompilerError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to)
}
return { left, op, right }
}
const getAssignmentParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, equals, right] = children
if (!left || left.type.id !== terms.Identifier) {
throw new CompilerError(
`Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`,
node.from,
node.to
)
} else if (!equals || !right) {
throw new CompilerError(
`Assign expected 3 children, got ${children.length}`,
node.from,
node.to
)
}
return { identifier: left, right }
}
const checkTreeForErrors = (tree: Tree, input: string): string[] => {
const errors: string[] = []
tree.iterate({
enter: (node) => {
if (node.type.isError) {
const errorText = input.slice(node.from, node.to)
errors.push(`Syntax error at ${node.from}-${node.to}: "${errorText}"`)
}
},
})
return errors
}
const getFunctionDefParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [fnKeyword, paramsNode, colon, bodyNode] = children
if (!fnKeyword || !paramsNode || !colon || !bodyNode) {
throw new CompilerError(
`FunctionDef expected 5 children, got ${children.length}`,
node.from,
node.to
)
}
const paramNames = getAllChildren(paramsNode)
.map((param) => {
if (param.type.id !== terms.Identifier) {
throw new CompilerError(
`FunctionDef params must be Identifiers, got ${param.type.name}`,
param.from,
param.to
)
}
return input.slice(param.from, param.to)
})
.join(' ')
return { paramNames, bodyNode }
}
const getFunctionCallParts = (node: SyntaxNode, input: string) => {
const [identifierNode, ...args] = getAllChildren(node)
if (!identifierNode) {
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
}
const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg)
const positionalArgs = args
.filter((arg) => arg.type.id === terms.PositionalArg)
.map((arg) => {
const child = arg.firstChild
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
return child
})
return { identifierNode, namedArgs, positionalArgs }
}
const getNamedArgParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [namedArgPrefix, valueNode] = getAllChildren(node)
if (!namedArgPrefix || !valueNode) {
const message = `NamedArg expected 2 children, got ${children.length}`
throw new CompilerError(message, node.from, node.to)
}
const name = input.slice(namedArgPrefix.from, namedArgPrefix.to - 2) // Remove the trailing =
return { name, valueNode }
}

View File

@ -5,12 +5,71 @@ export class CompilerError extends Error {
this.message = message
}
// This code is A MESS, but I don't really care because once we get it right we'll never touch it again.
toReadableString(input: string) {
const pointer = ' '.repeat(this.from) + '^'.repeat(this.to - this.from)
const message = `${this.message} at "${input.slice(this.from, this.to)}" (${this.from}:${
this.to
})`
const lineInfo = this.lineAtPosition(input)
if (!lineInfo) {
return `${this.message} at position ${this.from}:${this.to}`
}
return `${input}\n${pointer}\n${message}`
const { lineNumber, columnStart, columnEnd } = lineInfo
const previousSevenLines = input.split('\n').slice(Math.max(0, lineNumber - 8), lineNumber)
const padding = lineNumber.toString().length
const ws = ' '.repeat(padding + 1)
const lines = previousSevenLines
.map((line, index) => {
const currentLineNumber = lineNumber - previousSevenLines.length + index + 1
return `${grey(currentLineNumber.toString().padStart(padding))}${line}`
})
.join('\n')
const underlineStartLen = (columnEnd - columnStart) / 2
const underlineEndLen = columnEnd - columnStart - underlineStartLen
const underline =
' '.repeat(columnStart - 1) +
'─'.repeat(underlineStartLen) +
'┬' +
'─'.repeat(underlineEndLen)
const messageWithArrow =
' '.repeat(columnStart + underlineStartLen - 1) + '╰── ' + blue(this.message)
const message = `${green('')}
${ws} ${red('Compiler Error')}
${ws}
${lines}
${ws} ${underline}
${ws} ${messageWithArrow}
${ws}
`
return `${message}`
}
lineAtPosition(input: string) {
const lines = input.split('\n')
let currentPos = 0
for (let i = 0; i < lines.length; i++) {
const line = lines[i]!
if (this.from >= currentPos && this.from <= currentPos + line.length) {
const columnStart = this.from - currentPos + 1
const columnEnd = columnStart + (this.to - this.from) - 1
// If the error spans multiple lines, so just return the line start
if (columnEnd > line.length) {
return { lineNumber: i + 1, columnStart, columnEnd: line.length, text: line }
}
return { lineNumber: i + 1, columnStart, columnEnd, text: line }
}
currentPos += line.length + 1 // +1 for the newline character
}
}
}
const red = (text: string) => `\x1b[31m${text}\x1b[0m`
const green = (text: string) => `\x1b[32m${text}\x1b[0m`
const blue = (text: string) => `\x1b[34m${text}\x1b[0m`
const grey = (text: string) => `\x1b[90m${text}\x1b[0m`
const underline = (text: string) => `\x1b[4m${text}\x1b[0m`
const bold = (text: string) => `\x1b[1m${text}\x1b[0m`

158
src/compiler/utils.ts Normal file
View File

@ -0,0 +1,158 @@
import { CompilerError } from '#compiler/compilerError.ts'
import * as terms from '#parser/shrimp.terms'
import type { SyntaxNode, Tree } from '@lezer/common'
export const checkTreeForErrors = (tree: Tree, input: string): string[] => {
const errors: string[] = []
tree.iterate({
enter: (node) => {
if (node.type.isError) {
const errorText = input.slice(node.from, node.to)
errors.push(`Syntax error at ${node.from}-${node.to}: "${errorText}"`)
}
},
})
return errors
}
export const getAllChildren = (node: SyntaxNode): SyntaxNode[] => {
const children: SyntaxNode[] = []
let child = node.firstChild
while (child) {
children.push(child)
child = child.nextSibling
}
return children
}
export const getBinaryParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, op, right] = children
if (!left || !op || !right) {
throw new CompilerError(`BinOp expected 3 children, got ${children.length}`, node.from, node.to)
}
return { left, op, right }
}
export const getAssignmentParts = (node: SyntaxNode) => {
const children = getAllChildren(node)
const [left, equals, right] = children
if (!left || left.type.id !== terms.Identifier) {
throw new CompilerError(
`Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`,
node.from,
node.to
)
} else if (!equals || !right) {
throw new CompilerError(
`Assign expected 3 children, got ${children.length}`,
node.from,
node.to
)
}
return { identifier: left, right }
}
export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [fnKeyword, paramsNode, colon, bodyNode] = children
if (!fnKeyword || !paramsNode || !colon || !bodyNode) {
throw new CompilerError(
`FunctionDef expected 5 children, got ${children.length}`,
node.from,
node.to
)
}
const paramNames = getAllChildren(paramsNode)
.map((param) => {
if (param.type.id !== terms.Identifier) {
throw new CompilerError(
`FunctionDef params must be Identifiers, got ${param.type.name}`,
param.from,
param.to
)
}
return input.slice(param.from, param.to)
})
.join(' ')
return { paramNames, bodyNode }
}
export const getFunctionCallParts = (node: SyntaxNode, input: string) => {
const [identifierNode, ...args] = getAllChildren(node)
if (!identifierNode) {
throw new CompilerError(`FunctionCall expected at least 1 child, got 0`, node.from, node.to)
}
const namedArgs = args.filter((arg) => arg.type.id === terms.NamedArg)
const positionalArgs = args
.filter((arg) => arg.type.id === terms.PositionalArg)
.map((arg) => {
const child = arg.firstChild
if (!child) throw new CompilerError(`PositionalArg has no child`, arg.from, arg.to)
return child
})
return { identifierNode, namedArgs, positionalArgs }
}
export const getNamedArgParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [namedArgPrefix, valueNode] = getAllChildren(node)
if (!namedArgPrefix || !valueNode) {
const message = `NamedArg expected 2 children, got ${children.length}`
throw new CompilerError(message, node.from, node.to)
}
const name = input.slice(namedArgPrefix.from, namedArgPrefix.to - 2) // Remove the trailing =
return { name, valueNode }
}
export const getIfExprParts = (node: SyntaxNode, input: string) => {
const children = getAllChildren(node)
const [ifKeyword, conditionNode, _colon, thenBlock, ...rest] = children
if (!ifKeyword || !conditionNode || !thenBlock) {
throw new CompilerError(
`IfExpr expected at least 4 children, got ${children.length}`,
node.from,
node.to
)
}
let elseIfBlocks: { conditional: SyntaxNode; thenBlock: SyntaxNode }[] = []
let elseThenBlock: SyntaxNode | undefined
rest.forEach((child) => {
const parts = getAllChildren(child)
if (child.type.id === terms.ElseExpr) {
if (parts.length !== 3) {
const message = `ElseExpr expected 1 child, got ${parts.length}`
throw new CompilerError(message, child.from, child.to)
}
elseThenBlock = parts.at(-1)
} else if (child.type.id === terms.ElsifExpr) {
const [_keyword, conditional, _colon, thenBlock] = parts
if (!conditional || !thenBlock) {
const names = parts.map((p) => p.type.name).join(', ')
const message = `ElsifExpr expected conditional and thenBlock, got ${names}`
throw new CompilerError(message, child.from, child.to)
}
elseIfBlocks.push({ conditional, thenBlock })
}
})
return { conditionNode, thenBlock, elseThenBlock, elseIfBlocks }
}

View File

@ -54,6 +54,12 @@ export const shrimpTheme = EditorView.theme(
outline: 'none',
height: '100%',
},
'.cm-matchingBracket': {
backgroundColor: '#FF5370',
},
'.cm-nonmatchingBracket': {
backgroundColor: '#C3E88D',
},
},
{ dark: true }
)

View File

@ -2,6 +2,7 @@ import { expect, describe, test } from 'bun:test'
import { afterEach } from 'bun:test'
import { resetCommandSource, setCommandSource } from '#editor/commands'
import { beforeEach } from 'bun:test'
import './shrimp.grammar' // Importing this so changes cause it to retest!
describe('calling functions', () => {
@ -81,7 +82,7 @@ describe('Identifier', () => {
})
describe('Parentheses', () => {
test('parses expressions with parentheses correctly', () => {
test('allows binOps with parentheses correctly', () => {
expect('(2 + 3)').toMatchTree(`
ParenExpr
BinOp
@ -90,6 +91,54 @@ describe('Parentheses', () => {
Number 3`)
})
test('allows numbers, strings, and booleans with parentheses correctly', () => {
expect('(42)').toMatchTree(`
ParenExpr
Number 42`)
expect("('hello')").toMatchTree(`
ParenExpr
String hello`)
expect('(true)').toMatchTree(`
ParenExpr
Boolean true`)
expect('(false)').toMatchTree(`
ParenExpr
Boolean false`)
})
test('allows function calls in parens', () => {
expect('(echo 3)').toMatchTree(`
ParenExpr
FunctionCall
Identifier echo
PositionalArg
Number 3`)
expect('(echo)').toMatchTree(`
ParenExpr
FunctionCallOrIdentifier
Identifier echo`)
})
test('allows conditionals in parens', () => {
expect('(a > b)').toMatchTree(`
ParenExpr
ConditionalOp
Identifier a
operator >
Identifier b`)
expect('(a and b)').toMatchTree(`
ParenExpr
ConditionalOp
Identifier a
operator and
Identifier b`)
})
test('allows parens in function calls', () => {
expect('echo (3 + 3)').toMatchTree(`
FunctionCall
@ -198,19 +247,19 @@ describe('Fn', () => {
test('parses function no parameters', () => {
expect('fn: 1').toMatchTree(`
FunctionDef
fn fn
keyword fn
Params
: :
colon :
Number 1`)
})
test('parses function with single parameter', () => {
expect('fn x: x + 1').toMatchTree(`
FunctionDef
fn fn
keyword fn
Params
Identifier x
: :
colon :
BinOp
Identifier x
operator +
@ -220,11 +269,11 @@ describe('Fn', () => {
test('parses function with multiple parameters', () => {
expect('fn x y: x * y').toMatchTree(`
FunctionDef
fn fn
keyword fn
Params
Identifier x
Identifier y
: :
colon :
BinOp
Identifier x
operator *
@ -237,11 +286,11 @@ describe('Fn', () => {
x + 9
end`).toMatchTree(`
FunctionDef
fn fn
keyword fn
Params
Identifier x
Identifier y
: :
colon :
BinOp
Identifier x
operator *
@ -280,11 +329,11 @@ describe('newlines', () => {
y = 2`).toMatchTree(`
Assign
Identifier x
= =
operator =
Number 5
Assign
Identifier y
= =
operator =
Number 2`)
})
@ -292,11 +341,11 @@ y = 2`).toMatchTree(`
expect(`x = 5; y = 2`).toMatchTree(`
Assign
Identifier x
= =
operator =
Number 5
Assign
Identifier y
= =
operator =
Number 2`)
})
@ -304,7 +353,7 @@ y = 2`).toMatchTree(`
expect(`a = hello; 2`).toMatchTree(`
Assign
Identifier a
= =
operator =
FunctionCallOrIdentifier
Identifier hello
Number 2`)
@ -316,7 +365,7 @@ describe('Assign', () => {
expect('x = 5').toMatchTree(`
Assign
Identifier x
= =
operator =
Number 5`)
})
@ -324,7 +373,7 @@ describe('Assign', () => {
expect('x = 5 + 3').toMatchTree(`
Assign
Identifier x
= =
operator =
BinOp
Number 5
operator +
@ -335,13 +384,13 @@ describe('Assign', () => {
expect('add = fn a b: a + b').toMatchTree(`
Assign
Identifier add
= =
operator =
FunctionDef
fn fn
keyword fn
Params
Identifier a
Identifier b
: :
colon :
BinOp
Identifier a
operator +
@ -349,6 +398,141 @@ describe('Assign', () => {
})
})
describe('if/elsif/else', () => {
test('parses single line if', () => {
expect(`if y = 1: 'cool'`).toMatchTree(`
IfExpr
keyword if
ConditionalOp
Identifier y
operator =
Number 1
colon :
ThenBlock
String cool
`)
expect('a = if x: 2').toMatchTree(`
Assign
Identifier a
operator =
IfExpr
keyword if
Identifier x
colon :
ThenBlock
Number 2
`)
})
test('parses multiline if', () => {
expect(`
if x < 9:
yes
end`).toMatchTree(`
IfExpr
keyword if
ConditionalOp
Identifier x
operator <
Number 9
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier yes
end end
`)
})
test('parses multiline if with else', () => {
expect(`if with-else:
x
else:
y
end`).toMatchTree(`
IfExpr
keyword if
Identifier with-else
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier x
ElseExpr
keyword else
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier y
end end
`)
})
test('parses multiline if with elsif', () => {
expect(`if with-elsif:
x
elsif another-condition:
y
end`).toMatchTree(`
IfExpr
keyword if
Identifier with-elsif
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier x
ElsifExpr
keyword elsif
Identifier another-condition
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier y
end end
`)
})
test('parses multiline if with multiple elsif and else', () => {
expect(`if with-elsif-else:
x
elsif another-condition:
y
elsif yet-another-condition:
z
else:
oh-no
end`).toMatchTree(`
IfExpr
keyword if
Identifier with-elsif-else
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier x
ElsifExpr
keyword elsif
Identifier another-condition
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier y
ElsifExpr
keyword elsif
Identifier yet-another-condition
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier z
ElseExpr
keyword else
colon :
ThenBlock
FunctionCallOrIdentifier
Identifier oh-no
end end
`)
})
})
describe('multiline', () => {
test('parses multiline strings', () => {
expect(`'first'\n'second'`).toMatchTree(`
@ -367,16 +551,16 @@ describe('multiline', () => {
`).toMatchTree(`
Assign
Identifier add
= =
operator =
FunctionDef
fn fn
keyword fn
Params
Identifier a
Identifier b
: :
colon :
Assign
Identifier result
= =
operator =
BinOp
Identifier a
operator +
@ -406,11 +590,11 @@ end
Number 3
FunctionDef
fn fn
keyword fn
Params
Identifier x
Identifier y
: :
colon :
FunctionCallOrIdentifier
Identifier x
end end

View File

@ -2,12 +2,7 @@
@skip { space }
@top Program { (statement | newlineOrSemicolon)+ eof?}
statement {
line newlineOrSemicolon |
line eof
}
@top Program { item* }
@tokens {
@precedence { Number "-" }
@ -21,10 +16,20 @@ statement {
space { " " | "\t" }
leftParen { "(" }
rightParen { ")" }
":"
"fn"
"end"
"="
colon[closedBy="end", @name="colon"] { ":" }
end[openedBy="colon", @name="end"] { "end" }
"fn" [@name=keyword]
"if" [@name=keyword]
"elsif" [@name=keyword]
"else" [@name=keyword]
"and" [@name=operator]
"or" [@name=operator]
"!=" [@name=operator]
"<" [@name=operator]
"<=" [@name=operator]
">" [@name=operator]
">=" [@name=operator]
"=" [@name=operator]
"+"[@name=operator]
"-"[@name=operator]
"*"[@name=operator]
@ -39,28 +44,30 @@ statement {
call
}
line {
FunctionCall |
FunctionCallOrIdentifier |
item {
consumeToTerminator newlineOrSemicolon |
consumeToTerminator eof |
newlineOrSemicolon // allow blank lines
}
consumeToTerminator {
ambiguousFunctionCall |
IfExpr |
FunctionDef |
Assign |
BinOp |
expressionWithoutIdentifier
}
expression {
expressionWithoutIdentifier | Identifier
}
expressionWithoutIdentifier {
BinOp |
valueWithoutIdentifier
}
FunctionCallOrIdentifier {
Identifier
}
ambiguousFunctionCall {
FunctionCall | FunctionCallOrIdentifier
}
FunctionCall {
Identifier arg+
}
@ -70,11 +77,11 @@ arg {
}
PositionalArg {
value
expression
}
NamedArg {
NamedArgPrefix value
NamedArgPrefix expression
}
FunctionDef {
@ -82,11 +89,46 @@ FunctionDef {
}
singleLineFunctionDef {
"fn" Params ":" expression
"fn" Params colon consumeToTerminator
}
multilineFunctionDef {
"fn" Params ":" newlineOrSemicolon (line newlineOrSemicolon)* "end"
"fn" Params colon newlineOrSemicolon block end
}
IfExpr {
singleLineIf | multilineIf
}
singleLineIf {
"if" (ConditionalOp | expression) colon ThenBlock { consumeToTerminator }
}
multilineIf {
"if" (ConditionalOp | expression) colon newlineOrSemicolon ThenBlock ElsifExpr* ElseExpr? end
}
ElsifExpr {
"elsif" (ConditionalOp | expression) colon newlineOrSemicolon ThenBlock
}
ElseExpr {
"else" colon newlineOrSemicolon ThenBlock
}
ThenBlock {
block
}
ConditionalOp {
expression "=" expression |
expression "!=" expression |
expression "<" expression |
expression "<=" expression |
expression ">" expression |
expression ">=" expression |
expression "and" (expression | ConditionalOp) |
expression "or" (expression | ConditionalOp)
}
Params {
@ -94,24 +136,36 @@ Params {
}
Assign {
Identifier "=" line
Identifier "=" consumeToTerminator
}
BinOp {
expression !multiplicative "*" expression |
expression !multiplicative "/" expression |
expression !additive "+" expression |
expression !additive "-" expression
(expression | BinOp) !multiplicative "*" (expression | BinOp) |
(expression | BinOp) !multiplicative "/" (expression | BinOp) |
(expression | BinOp) !additive "+" (expression | BinOp) |
(expression | BinOp) !additive "-" (expression | BinOp)
}
ParenExpr {
leftParen (expressionWithoutIdentifier | FunctionCall | FunctionCallOrIdentifier) rightParen
leftParen (ambiguousFunctionCall | BinOp | expressionWithoutIdentifier | ConditionalOp ) rightParen
}
value {
valueWithoutIdentifier | Identifier
expression {
expressionWithoutIdentifier | Identifier
}
valueWithoutIdentifier {
// We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator.
// Without this, when parsing "my-var" at statement level, the parser can't decide:
// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier
// - expression → Identifier
// Both want the same Identifier token! So we use expressionWithoutIdentifier
// to remove Identifier from the second path, forcing standalone identifiers
// to go through ambiguousFunctionCall (which is what we want semantically).
// Yes, it is annoying and I gave up trying to use GLR to fix it.
expressionWithoutIdentifier {
ParenExpr | Word | String | Number | Boolean
}
block {
(consumeToTerminator newlineOrSemicolon)*
}

View File

@ -7,12 +7,19 @@ export const
PositionalArg = 5,
ParenExpr = 6,
BinOp = 7,
FunctionCallOrIdentifier = 12,
String = 13,
Number = 14,
Boolean = 15,
NamedArg = 16,
NamedArgPrefix = 17,
FunctionDef = 18,
Params = 20,
Assign = 23
ConditionalOp = 12,
String = 21,
Number = 22,
Boolean = 23,
NamedArg = 24,
NamedArgPrefix = 25,
FunctionCallOrIdentifier = 26,
IfExpr = 27,
colon = 29,
ThenBlock = 31,
ElsifExpr = 32,
ElseExpr = 34,
end = 36,
FunctionDef = 37,
Params = 39,
Assign = 40

View File

@ -4,16 +4,20 @@ import {tokenizer} from "./tokenizer"
import {highlighting} from "./highlight"
export const parser = LRParser.deserialize({
version: 14,
states: "'[OVQTOOOqQPO'#DTO!zQUO'#DTO#XQPOOOOQO'#DS'#DSO#xQTO'#CbOOQS'#DQ'#DQO$PQTO'#DVOOQO'#Cn'#CnOOQO'#C}'#C}O$XQPO'#C|OOQS'#Cu'#CuQ$aQTOOOOQS'#DP'#DPOOQS'#Ca'#CaO$hQTO'#ClOOQS'#DO'#DOOOQS'#Cv'#CvO$oQUO,58zO%SQTO,59_O%^QTO,58}O%^QTO,58}O%eQPO,58|O%vQUO'#DTO%}QPO,58|OOQS'#Cw'#CwO&SQTO'#CpO&[QPO,59qOOQS,59h,59hOOQS-E6s-E6sQOQPOOOOQS,59W,59WOOQS-E6t-E6tOOQO1G.y1G.yOOQO'#DT'#DTOOQO1G.i1G.iO&aQPO1G.iOOQS1G.h1G.hOOQS-E6u-E6uO&xQTO1G/]O'SQPO7+$wO'hQTO7+$xO'uQPO'#CxO'zQTO<<HdOOQO<<Hd<<HdOOQS,59d,59dOOQS-E6v-E6vOOQOAN>OAN>O",
stateData: "([~OoOS~OPQOQUO]UO^UO_UOcVOuTO{ZO~OWwXXwXYwXZwX{qX|qX~OP]OQUO]UO^UO_UOa_OuTOWwXXwXYwXZwX~OhcO{[X|[X~P!VOWdOXdOYeOZeO~OQUO]UO^UO_UOuTO~OPgO~P#gOPiOedP~O{lO|lO~O|nO~PVOP]O~P#gOP]Oa_O{Sa|SaxSa~P#gOPQOcVO~P#gOPrO~P#gOxuOWwXXwXYwXZwX~Ox[X~P!VOxuO~OPiOedX~OewO~OWdOXdOYViZVi{Vi|VixVi~OPrO{yO~P#gOWdOXdOYeOZeO{yq|yq~OPQOcVOf|O~P#gO{}O~OPQOcVOf!PO~P#gO^Z~",
goto: "%d{PPPP|!W!]!lPPPP|PPP!WP!wP#OPP!wP#R#X#`#fPPP#l#p#{$Q$YP$k$zP%]%]YXO[cy{RhTV`QbgkUOQT[_bcdegwy{cSOT[cdewy{ZXO[cy{RkVQ[ORm[SbQgRpbQjVRvjQ{yR!O{TZO[SYO[QqcTzy{VaQbgU^QbgRo_bSOT[cdewy{X]Q_bgYPO[cy{QfTVrdew[ROT[cy{QsdQteRxwZWO[cy{",
nodeNames: "⚠ Identifier Word Program FunctionCall PositionalArg ParenExpr BinOp operator operator operator operator FunctionCallOrIdentifier String Number Boolean NamedArg NamedArgPrefix FunctionDef fn Params : end Assign =",
maxTerm: 44,
states: "+pQVQTOOOtQPO'#CcO!SQPO'#D`O!yQTO'#CbOOQS'#Dd'#DdO#QQPO'#DcO#fQUO'#DcOOQO'#Da'#DaO$VQTO'#DgOOQO'#Cw'#CwO$^QTO'#DkOOQO'#DR'#DROOQO'#D`'#D`O$fQPO'#D_OOQS'#D_'#D_OOQS'#DV'#DVQVQTOOO$VQTO,58}O$VQTO,58}O%YQPO'#CcO%jQPO,58|O%vQPO,58|O&sQPO,58|O&zQUO'#DcOOQS'#Dc'#DcOOQS'#Ca'#CaO$VQTO'#CtOOQS'#Db'#DbOOQS'#DW'#DWO'hQUO,58zO'{QTO,59pO(YQPO,5:RO(aQPO,5:ROOQS'#DZ'#DZO(fQTO'#DTO(nQPO,5:VOOQS,59y,59yOOQS-E7T-E7TOOQO1G.i1G.iO(sQPO1G.iO$VQTO,59SO$VQTO,59SOOQS1G.h1G.hOOQS,59`,59`OOQS-E7U-E7UOOQO1G/[1G/[O)[QTO1G/mOOQS-E7X-E7XO)lQTO1G/qOOQO1G.n1G.nO)|QPO1G.nOOQO'#Cz'#CzOOQO7+%X7+%XO*WQTO7+%YOOQO7+%]7+%]O*nQTO7+%^O+OQPO'#DXO+TQTO'#DjOOQO'#C{'#C{O+kQPO<<HtO+vQPO<<HxOOQS,59s,59sOOQS-E7V-E7VO$VQTO'#C|OOQO'#DY'#DYO+{QPOAN>`O,WQPO'#DOOOQOAN>`AN>`O,RQPOAN>`OOQOAN>dAN>dO,]QPO,59hO,dQPO,59hOOQO-E7W-E7WOOQOG23zG23zO,iQPOG23zO,nQPO,59jO,sQPO1G/SOOQOLD)fLD)fO*nQTO1G/UO*WQTO7+$nOOQO7+$p7+$pOOQO<<HY<<HY",
stateData: ",{~O!QOS~OPUOQSOeSOfSOgSOlWOvYO!XRO!]^O~OWaOXaOYbOZbO~OWaOXaOYbOZbO!]!SX!a!SX~OQSOeSOfSOgSO!XRO~OPgO~P!hOW!VXX!VXY!VXZ!VX!]!SX!a!SX~OPhO]nOijOW!VXX!VXY!VXZ!VX!]jX!ajX~P!hOPhO~P!hOPqOmwP~O!]tO!atO~O]xO^xO_xO`xOaxObxOcyOdyO~OWaOXaOYbOZbO~P$nOWaOXaOYbOZbO!YzO~OW!VXX!VXY!VXZ!VX]!VX^!VX_!VX`!VXa!VXb!VXc!VXd!VX~O!YzO~P%{OPhOQSOeSOfSOgSOijO!XRO!YjX~P%{OPhOijO!]Sa!aSa!YSa~P!hOPUOlWOvYO~P!hOm!OO~P$nOm!OO~OPqOmwX~Om!QO~OWaOXaOYViZVi!]Vi!aVi!YVi~OPUOlWOvYO!]!VO~P!hOPUOlWOvYO!]!XO~P!hO!Y[im[i~P$nOPUOlWOvYOq!^Ps!^Pt!^P~P!hOPUOlWOvYOt!^P~P!hO!]!_O~OPUOlWOvYOq!^Xs!^Xt!^X~P!hOq!aOs!dOt!eO~Ot!gO~Oq!aOs!dOt!kO~Om!mO~Om!nO~P$nOm!nO~Ot!oO~O!]!pO~O!]!qO~OfZ~",
goto: "(o!aPPPP!b!o!t#]PPPP#rPPPPPPPPPPP!oP!b$OPP$[$_$hP$lPP$OP$r$O$u${%S%]%cPPP%i%m&R&b&g'bPP'}'}P(Z(c(cgVOR`n!O!Q!V!X!Z!p!qVkUgm{SORUW`abgjmnxy!O!Q!V!X!Z!a!p!qdQO`n!O!Q!V!X!Z!p!qQdRQvaRwbQeRQpWQ!RyR!i!ae[O`n!O!Q!V!X!Z!p!qR!U!OQ!]!VQ!r!pR!s!qT!b!]!cQ!f!]R!l!cRsYQ`ORu`SmUgR|mW!Z!V!X!p!qR!`!ZQ!c!]R!j!cQrYR!PrT_O`S]O`Q}nQ!T!OQ!W!QZ!Y!V!X!Z!p!qd[O`n!O!Q!V!X!Z!p!qReRVlUgmdPO`n!O!Q!V!X!Z!p!qQcRUiUgmQoWQvaQwbQ{jQ!RxQ!SyR!h!adTO`n!O!Q!V!X!Z!p!qQfRehUWabgjmxy!aeXO`n!O!Q!V!X!Z!p!qU![!V!p!qR!^!XeZO`n!O!Q!V!X!Z!p!q",
nodeNames: "⚠ Identifier Word Program FunctionCall PositionalArg ParenExpr BinOp operator operator operator operator ConditionalOp operator operator operator operator operator operator operator operator String Number Boolean NamedArg NamedArgPrefix FunctionCallOrIdentifier IfExpr keyword colon ThenBlock ThenBlock ElsifExpr keyword ElseExpr keyword end FunctionDef keyword Params Assign",
maxTerm: 63,
nodeProps: [
["closedBy", 29,"end"],
["openedBy", 36,"colon"]
],
propSources: [highlighting],
skippedNodes: [0],
repeatNodeCount: 4,
tokenData: "(j~ReXY!dYZ!ipq!dwx!nxy#]yz#bz{#g{|#l}!O#q!P!Q$d!Q![#y![!]$i!]!^!i!_!`$n#T#X$s#X#Y%R#Y#Z%|#Z#h$s#h#i'u#i#o$s~~(e~!iOo~~!nO{~~!qTOw!nwx#Qx;'S!n;'S;=`#V<%lO!n~#VO]~~#YP;=`<%l!n~#bOu~~#gOx~~#lOW~~#qOY~~#vPZ~!Q![#y~$OQ^~!O!P$U!Q![#y~$XP!Q![$[~$aP^~!Q![$[~$iOX~~$nOe~~$sOh~Q$vQ!_!`$|#T#o$sQ%ROaQR%US!_!`$|#T#b$s#b#c%b#c#o$sR%eS!_!`$|#T#W$s#W#X%q#X#o$sR%vQfP!_!`$|#T#o$s~&PT!_!`$|#T#U&`#U#b$s#b#c'j#c#o$s~&cS!_!`$|#T#`$s#`#a&o#a#o$s~&rS!_!`$|#T#g$s#g#h'O#h#o$s~'RS!_!`$|#T#X$s#X#Y'_#Y#o$s~'dQ_~!_!`$|#T#o$sR'oQcP!_!`$|#T#o$s~'xS!_!`$|#T#f$s#f#g(U#g#o$s~(XS!_!`$|#T#i$s#i#j'O#j#o$s~(jO|~",
repeatNodeCount: 5,
tokenData: "-m~RmXY!|YZ#Rpq!|qr#Wwx#cxy$Qyz$Vz{$[{|$a}!O$f!P!Q%X!Q![$n![!]%^!]!^#R!^!_%c!_!`%p!`!a%u#T#U&S#U#X&h#X#Y']#Y#Z)y#Z#]&h#]#^+r#^#c&h#c#d,^#d#h&h#h#i,x#i#o&h~~-h~#RO!Q~~#WO!]~~#ZP!_!`#^~#cO^~~#fTOw#cwx#ux;'S#c;'S;=`#z<%lO#c~#zOe~~#}P;=`<%l#c~$VO!X~~$[O!Y~~$aOW~~$fOY~~$kPZ~!Q![$n~$sQf~!O!P$y!Q![$n~$|P!Q![%P~%UPf~!Q![%P~%^OX~~%cOm~~%hP_~!_!`%k~%pO`~~%uO]~~%zPa~!_!`%}~&SOb~~&VS!_!`&c#T#b&h#b#c&q#c#o&hQ&hOiQQ&kQ!_!`&c#T#o&h~&tS!_!`&c#T#W&h#W#X'Q#X#o&h~'VQc~!_!`&c#T#o&hR'`U!_!`&c#T#`&h#`#a'r#a#b&h#b#c)_#c#o&hR'uS!_!`&c#T#g&h#g#h(R#h#o&hR(UU!_!`&c#T#X&h#X#Y(h#Y#]&h#]#^(s#^#o&hR(mQsP!_!`&c#T#o&hR(vS!_!`&c#T#Y&h#Y#Z)S#Z#o&hR)XQqP!_!`&c#T#o&hR)bS!_!`&c#T#W&h#W#X)n#X#o&hR)sQtP!_!`&c#T#o&h~)|T!_!`&c#T#U*]#U#b&h#b#c+g#c#o&h~*`S!_!`&c#T#`&h#`#a*l#a#o&h~*oS!_!`&c#T#g&h#g#h*{#h#o&h~+OS!_!`&c#T#X&h#X#Y+[#Y#o&h~+aQg~!_!`&c#T#o&hR+lQvP!_!`&c#T#o&hR+uS!_!`&c#T#Y&h#Y#Z,R#Z#o&hR,WQlP!_!`&c#T#o&h~,aS!_!`&c#T#f&h#f#g,m#g#o&h~,rQd~!_!`&c#T#o&h~,{S!_!`&c#T#f&h#f#g-X#g#o&h~-[S!_!`&c#T#i&h#i#j*{#j#o&h~-mO!a~",
tokenizers: [0, 1, tokenizer],
topRules: {"Program":[0,3]},
tokenPrec: 331
tokenPrec: 546
})

View File

@ -16,7 +16,7 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack
// Certain characters might end a word or identifier if they are followed by whitespace.
// This allows things like `a = hello; 2` or a = (basename ./file.txt)
// to work as expected.
if (canBeWord && (ch === 59 /* ; */ || ch === 41) /* ) */) {
if ((canBeWord && (ch === 59 /* ; */ || ch === 41)) /* ) */ || ch === 58 /* : */) {
const nextCh = getFullCodePoint(input, pos + 1)
if (isWhitespace(nextCh) || nextCh === -1) {
break