904 lines
24 KiB
TypeScript
904 lines
24 KiB
TypeScript
import { Scanner, type Token, TokenType } from './tokenizer2'
|
|
import { SyntaxNode, operators, precedence, conditionals, compounds } from './node'
|
|
import { globals } from './tokenizer'
|
|
import { parseString } from './stringParser'
|
|
|
|
const $T = TokenType
|
|
|
|
export const parse = (input: string): SyntaxNode => {
|
|
const parser = new Parser()
|
|
return parser.parse(input)
|
|
}
|
|
|
|
class Scope {
|
|
parent?: Scope
|
|
set = new Set<string>()
|
|
|
|
constructor(parent?: Scope) {
|
|
this.parent = parent
|
|
|
|
// no parent means this is global scope
|
|
if (!parent) for (const name of globals) this.add(name)
|
|
}
|
|
|
|
add(key: string) {
|
|
this.set.add(key)
|
|
}
|
|
|
|
has(key: string): boolean {
|
|
return this.set.has(key) || this.parent?.has(key) || false
|
|
}
|
|
}
|
|
|
|
export class Parser {
|
|
tokens: Token[] = []
|
|
pos = 0
|
|
inParens = 0
|
|
input = ''
|
|
scope = new Scope
|
|
inTestExpr = false
|
|
|
|
parse(input: string): SyntaxNode {
|
|
const scanner = new Scanner()
|
|
this.tokens = scanner.tokenize(input)
|
|
this.pos = 0
|
|
this.input = input
|
|
this.scope = new Scope()
|
|
this.inTestExpr = false
|
|
|
|
const node = new SyntaxNode('Program', 0, input.length)
|
|
|
|
while (!this.isEOF()) {
|
|
if (this.is($T.Newline) || this.is($T.Semicolon)) {
|
|
this.next()
|
|
continue
|
|
}
|
|
|
|
const prevPos = this.pos
|
|
const stmt = this.statement()
|
|
if (stmt) node.add(stmt)
|
|
|
|
if (this.pos === prevPos && !this.isEOF())
|
|
throw "parser didn't advance - you need to call next()\n\n ${this.input}\n"
|
|
}
|
|
|
|
return node
|
|
}
|
|
|
|
//
|
|
// parse foundation nodes - statements, expressions
|
|
//
|
|
|
|
// statement is a line of code
|
|
statement(): SyntaxNode | null {
|
|
if (this.is($T.Comment))
|
|
return this.comment()
|
|
|
|
while (this.is($T.Newline) || this.is($T.Semicolon))
|
|
this.next()
|
|
|
|
if (this.isEOF() || this.isExprEndKeyword())
|
|
return null
|
|
|
|
return this.expression()
|
|
}
|
|
|
|
// expressions can be found in four places:
|
|
// 1. line of code
|
|
// 2. right side of assignment
|
|
// 3. if/while conditions
|
|
// 4. inside (parens)
|
|
expression(allowPipe = true): SyntaxNode {
|
|
let expr
|
|
|
|
// x = value
|
|
if (this.is($T.Identifier) && (
|
|
this.nextIs($T.Operator, '=') || compounds.some(x => this.nextIs($T.Operator, x))
|
|
))
|
|
expr = this.assign()
|
|
|
|
// if, while, do, etc
|
|
else if (this.is($T.Keyword))
|
|
expr = this.keywords()
|
|
|
|
// dotget
|
|
else if (this.nextIs($T.Operator, '.'))
|
|
expr = this.dotGetFunctionCall()
|
|
|
|
// echo hello world
|
|
else if (this.is($T.Identifier) && !this.nextIs($T.Operator) && !this.nextIsExprEnd())
|
|
expr = this.functionCall()
|
|
|
|
// bare-function-call
|
|
else if (this.is($T.Identifier) && this.nextIsExprEnd())
|
|
expr = this.functionCallOrIdentifier()
|
|
|
|
// everything else
|
|
else
|
|
expr = this.exprWithPrecedence()
|
|
|
|
// check for destructuring
|
|
if (expr.type.is('Array') && this.is($T.Operator, '='))
|
|
return this.destructure(expr)
|
|
|
|
// check for parens function call
|
|
// ex: (ref my-func) my-arg
|
|
if (expr.type.is('ParenExpr') && !this.isExprEnd())
|
|
expr = this.functionCall(expr)
|
|
|
|
// one | echo
|
|
if (allowPipe && this.isPipe())
|
|
return this.pipe(expr)
|
|
|
|
// regular
|
|
else
|
|
return expr
|
|
}
|
|
|
|
// piping | stuff | is | cool
|
|
pipe(left: SyntaxNode): SyntaxNode {
|
|
const canLookPastNewlines = this.inParens === 0
|
|
const parts: SyntaxNode[] = [left]
|
|
|
|
while (this.isPipe()) {
|
|
// consume newlines before pipe (only if not in parens)
|
|
if (canLookPastNewlines) {
|
|
while (this.is($T.Newline)) this.next()
|
|
}
|
|
|
|
const pipeOp = this.op('|')
|
|
pipeOp.type = 'operator'
|
|
parts.push(pipeOp)
|
|
|
|
// consume newlines after pipe (only if not in parens)
|
|
if (canLookPastNewlines) {
|
|
while (this.is($T.Newline)) this.next()
|
|
}
|
|
|
|
// parse right side - don't allow nested pipes
|
|
parts.push(this.expression(false))
|
|
}
|
|
|
|
const node = new SyntaxNode('PipeExpr', parts[0]!.from, parts.at(-1)!.to)
|
|
return node.push(...parts)
|
|
}
|
|
|
|
// Pratt parser - parses expressions with precedence climbing
|
|
// bp = binding precedence
|
|
exprWithPrecedence(minBp = 0): SyntaxNode {
|
|
let left = this.value()
|
|
|
|
// infix operators with precedence
|
|
while (this.is($T.Operator)) {
|
|
const op = this.current().value!
|
|
const bp = precedence[op]
|
|
|
|
// operator has lower precedence than required, stop
|
|
if (bp === undefined || bp < minBp) break
|
|
|
|
const opNode = this.op()
|
|
|
|
// right-associative operators (like **) use same bp, others use bp + 1
|
|
const nextMinBp = op === '**' ? bp : bp + 1
|
|
|
|
// parse right-hand side with higher precedence
|
|
const right = this.exprWithPrecedence(nextMinBp)
|
|
|
|
const nodeType = conditionals.has(op) ? 'ConditionalOp' : 'BinOp'
|
|
const node = new SyntaxNode(nodeType, left.from, right.to)
|
|
|
|
node.push(left, opNode, right)
|
|
left = node
|
|
}
|
|
|
|
return left
|
|
}
|
|
|
|
// if, while, do, etc
|
|
keywords(): SyntaxNode {
|
|
if (this.is($T.Keyword, 'if'))
|
|
return this.if()
|
|
|
|
if (this.is($T.Keyword, 'while'))
|
|
return this.while()
|
|
|
|
if (this.is($T.Keyword, 'do'))
|
|
return this.do()
|
|
|
|
if (this.is($T.Keyword, 'try'))
|
|
return this.try()
|
|
|
|
if (this.is($T.Keyword, 'throw'))
|
|
return this.throw()
|
|
|
|
if (this.is($T.Keyword, 'import'))
|
|
return this.import()
|
|
|
|
return this.expect($T.Keyword, 'if/while/do/import') as never
|
|
}
|
|
|
|
// value can be an atom or a (parens that gets turned into an atom)
|
|
// values are used in a few places:
|
|
// 1. function arguments
|
|
// 2. array/dict members
|
|
// 3. binary operations
|
|
// 4. anywhere an expression can be used
|
|
value(): SyntaxNode {
|
|
if (this.is($T.OpenParen))
|
|
return this.parens()
|
|
|
|
if (this.is($T.OpenBracket))
|
|
return this.arrayOrDict()
|
|
|
|
// dotget
|
|
if (this.nextIs($T.Operator, '.'))
|
|
return this.dotGet()
|
|
|
|
return this.atom()
|
|
}
|
|
|
|
//
|
|
// parse specific nodes
|
|
//
|
|
|
|
// raw determines whether we just want the SyntaxNodes or we want to
|
|
// wrap them in a PositionalArg
|
|
arg(raw = false): SyntaxNode {
|
|
// 'do' is a special function arg - it doesn't need to be wrapped
|
|
// in parens. otherwise, args are regular value()s
|
|
const val = this.is($T.Keyword, 'do') ? this.do() : this.value()
|
|
|
|
if (raw) {
|
|
return val
|
|
} else {
|
|
const arg = new SyntaxNode('PositionalArg', val.from, val.to)
|
|
arg.add(val)
|
|
return arg
|
|
}
|
|
}
|
|
|
|
// [ 1 2 3 ]
|
|
array(): SyntaxNode {
|
|
const open = this.expect($T.OpenBracket)
|
|
|
|
const values = []
|
|
while (!this.is($T.CloseBracket) && !this.isEOF()) {
|
|
if (this.is($T.Semicolon) || this.is($T.Newline)) {
|
|
this.next()
|
|
continue
|
|
}
|
|
|
|
if (this.is($T.Comment)) {
|
|
values.push(this.comment())
|
|
continue
|
|
}
|
|
|
|
values.push(this.value())
|
|
}
|
|
|
|
const close = this.expect($T.CloseBracket)
|
|
|
|
const node = new SyntaxNode('Array', open.from, close.to)
|
|
return node.push(...values)
|
|
}
|
|
|
|
// which are we dealing with? ignores leading newlines and comments
|
|
arrayOrDict(): SyntaxNode {
|
|
let peek = 1
|
|
let curr = this.peek(peek++)
|
|
let isDict = false
|
|
|
|
while (curr && curr.type !== $T.CloseBracket) {
|
|
// definitely a dict
|
|
if (curr.type === $T.NamedArgPrefix) {
|
|
isDict = true
|
|
break
|
|
}
|
|
|
|
// empty dict
|
|
if (curr.type === $T.Operator && curr.value === '=') {
|
|
isDict = true
|
|
break
|
|
}
|
|
|
|
// probably an array
|
|
if (curr.type !== $T.Comment && curr.type !== $T.Semicolon && curr.type !== $T.Newline)
|
|
break
|
|
|
|
curr = this.peek(peek++)
|
|
}
|
|
|
|
return isDict ? this.dict() : this.array()
|
|
}
|
|
|
|
// x = true
|
|
assign(): SyntaxNode {
|
|
const ident = this.assignableIdentifier()
|
|
const opToken = this.current()!
|
|
const op = this.op()
|
|
const expr = this.expression()
|
|
|
|
const node = new SyntaxNode(
|
|
opToken.value === '=' ? 'Assign' : 'CompoundAssign',
|
|
ident.from,
|
|
expr.to
|
|
)
|
|
|
|
return node.push(ident, op, expr)
|
|
}
|
|
|
|
// identifier used in assignment (TODO: legacy lezer quirk)
|
|
assignableIdentifier(): SyntaxNode {
|
|
const token = this.expect($T.Identifier)
|
|
this.scope.add(token.value!)
|
|
const node = SyntaxNode.from(token)
|
|
node.type = 'AssignableIdentifier'
|
|
return node
|
|
}
|
|
|
|
// atoms are the basic building blocks: literals, identifiers, words
|
|
atom(): SyntaxNode {
|
|
if (this.is($T.String))
|
|
return this.string()
|
|
|
|
if (this.isAny($T.Null, $T.Boolean, $T.Number, $T.Identifier, $T.Word, $T.Regex, $T.Underscore))
|
|
return SyntaxNode.from(this.next())
|
|
|
|
const next = this.next()
|
|
throw `[atom] unexpected token ${TokenType[next.type]}: ${JSON.stringify(next)}\n\n ${this.input}\n`
|
|
}
|
|
|
|
// blocks in if, do, special calls, etc
|
|
// `: something end`
|
|
//
|
|
// `blockNode` determines whether we return [colon, BlockNode, end] or
|
|
// just a list of statements like [colon, stmt1, stmt2, end]
|
|
block(blockNode = true): SyntaxNode[] {
|
|
const stmts: SyntaxNode[] = []
|
|
const colon = this.colon()
|
|
|
|
while (!this.isExprEndKeyword() && !this.isEOF()) {
|
|
const stmt = this.statement()
|
|
if (stmt) stmts.push(stmt)
|
|
}
|
|
|
|
const out = [colon]
|
|
|
|
if (blockNode) {
|
|
const block = new SyntaxNode('Block', stmts[0]!.from, stmts.at(-1)!.to)
|
|
block.push(...stmts)
|
|
out.push(block)
|
|
} else {
|
|
out.push(...stmts)
|
|
}
|
|
|
|
return out
|
|
}
|
|
|
|
// catch err: block
|
|
catch(): SyntaxNode {
|
|
const keyword = this.keyword('catch')
|
|
|
|
let catchVar
|
|
if (this.is($T.Identifier))
|
|
catchVar = this.identifier()
|
|
|
|
const block = this.block()
|
|
|
|
const node = new SyntaxNode('CatchExpr', keyword.from, block.at(-1)!.to)
|
|
|
|
node.push(keyword)
|
|
if (catchVar) node.push(catchVar)
|
|
return node.push(...block)
|
|
}
|
|
|
|
// colon
|
|
colon(): SyntaxNode {
|
|
const colon = SyntaxNode.from(this.expect($T.Colon))
|
|
colon.type = 'colon' // TODO lezer legacy
|
|
return colon
|
|
}
|
|
|
|
// # comment
|
|
comment(): SyntaxNode {
|
|
return SyntaxNode.from(this.expect($T.Comment))
|
|
}
|
|
|
|
// [ a b c ] = [ 1 2 3 ]
|
|
destructure(array: SyntaxNode): SyntaxNode {
|
|
const eq = this.op('=')
|
|
const val = this.expression()
|
|
|
|
for (const ident of array.children) {
|
|
const varName = this.input.slice(ident.from, ident.to)
|
|
this.scope.add(varName)
|
|
}
|
|
|
|
const node = new SyntaxNode('Assign', array.from, val.to)
|
|
return node.push(array, eq, val)
|
|
}
|
|
|
|
// [ a=1 b=true c='three' ]
|
|
dict(): SyntaxNode {
|
|
const open = this.expect($T.OpenBracket)
|
|
|
|
// empty dict [=] or [ = ]
|
|
if (this.is($T.Operator, '=') && this.nextIs($T.CloseBracket)) {
|
|
const _op = this.next()
|
|
const close = this.next()
|
|
return new SyntaxNode('Dict', open.from, close.to)
|
|
}
|
|
|
|
const values = []
|
|
while (!this.is($T.CloseBracket) && !this.isEOF()) {
|
|
if (this.is($T.Semicolon) || this.is($T.Newline)) {
|
|
this.next()
|
|
continue
|
|
}
|
|
|
|
if (this.is($T.Comment)) {
|
|
values.push(this.comment())
|
|
continue
|
|
}
|
|
|
|
values.push(this.is($T.NamedArgPrefix) ? this.namedArg() : this.arg())
|
|
}
|
|
|
|
const close = this.expect($T.CloseBracket)
|
|
|
|
const node = new SyntaxNode('Dict', open.from, close.to)
|
|
return node.push(...values)
|
|
}
|
|
|
|
// FunctionDef `do x y: something end`
|
|
do(): SyntaxNode {
|
|
const doNode = this.keyword('do')
|
|
doNode.type = 'Do'
|
|
this.scope = new Scope(this.scope)
|
|
|
|
const params = []
|
|
while (!this.is($T.Colon) && !this.isExprEnd()) {
|
|
let varName = this.current().value!
|
|
if (varName.endsWith('=')) varName = varName.slice(0, varName.length - 1)
|
|
this.scope.add(varName)
|
|
|
|
let arg
|
|
if (this.is($T.Identifier))
|
|
arg = this.identifier()
|
|
else if (this.is($T.NamedArgPrefix))
|
|
arg = this.namedParam()
|
|
else
|
|
throw `[do] expected Identifier or NamedArgPrefix, got ${JSON.stringify(this.current())}\n\n ${this.input}\n`
|
|
|
|
params.push(arg)
|
|
}
|
|
|
|
const block = this.block(false)
|
|
let catchNode, finalNode
|
|
|
|
if (this.is($T.Keyword, 'catch'))
|
|
catchNode = this.catch()
|
|
|
|
if (this.is($T.Keyword, 'finally'))
|
|
finalNode = this.finally()
|
|
|
|
let end = this.keyword('end')
|
|
|
|
let last = block.at(-1)
|
|
if (finalNode) last = finalNode.children.at(-1)!
|
|
else if (catchNode) last = catchNode.children.at(-1)!
|
|
|
|
const node = new SyntaxNode('FunctionDef', doNode.from, last!.to)
|
|
|
|
node.add(doNode)
|
|
|
|
const paramsNode = new SyntaxNode(
|
|
'Params',
|
|
params[0]?.from ?? 0,
|
|
params.at(-1)?.to ?? 0
|
|
)
|
|
|
|
if (params.length) paramsNode.push(...params)
|
|
node.add(paramsNode)
|
|
|
|
this.scope = this.scope.parent!
|
|
|
|
node.push(...block)
|
|
|
|
if (catchNode) node.push(catchNode)
|
|
if (finalNode) node.push(finalNode)
|
|
|
|
return node.push(end)
|
|
}
|
|
|
|
// config.path
|
|
dotGet(): SyntaxNode {
|
|
const left = this.identifier()
|
|
const ident = this.input.slice(left.from, left.to)
|
|
|
|
// not in scope, just return Word
|
|
if (!this.scope.has(ident))
|
|
return this.word(left)
|
|
|
|
if (left.type.is('Identifier')) left.type = 'IdentifierBeforeDot'
|
|
|
|
let parts = []
|
|
while (this.is($T.Operator, '.')) {
|
|
this.next()
|
|
parts.push(this.is($T.OpenParen) ? this.parens() : this.atom())
|
|
}
|
|
|
|
// TODO lezer legacy - we can do a flat DotGet if we remove this
|
|
const nodes = parts.length > 1 ? collapseDotGets(parts) : undefined
|
|
|
|
const node = new SyntaxNode('DotGet', left.from, parts.at(-1)!.to)
|
|
return nodes ? node.push(left, nodes!) : node.push(left, ...parts)
|
|
}
|
|
|
|
// dotget in a statement/expression (something.blah) or (something.blah arg1)
|
|
dotGetFunctionCall(): SyntaxNode {
|
|
const dotGet = this.dotGet()
|
|
|
|
// dotget not in scope, regular Word
|
|
if (dotGet.type.is('Word')) return dotGet
|
|
|
|
if (this.isExprEnd())
|
|
return this.functionCallOrIdentifier(dotGet)
|
|
else
|
|
return this.functionCall(dotGet)
|
|
}
|
|
|
|
// can be used in functions or try block
|
|
finally(): SyntaxNode {
|
|
const keyword = this.keyword('finally')
|
|
const block = this.block()
|
|
const node = new SyntaxNode('FinallyExpr', keyword.from, block.at(-1)!.to)
|
|
|
|
return node.push(keyword, ...block)
|
|
}
|
|
|
|
// you're lookin at it
|
|
functionCall(fn?: SyntaxNode): SyntaxNode {
|
|
const ident = fn ?? this.identifier()
|
|
|
|
const args: SyntaxNode[] = []
|
|
while (!this.isExprEnd() && !this.is($T.Operator, '|'))
|
|
args.push(this.is($T.NamedArgPrefix) ? this.namedArg() : this.arg())
|
|
|
|
const node = new SyntaxNode('FunctionCall', ident.from, (args.at(-1) || ident).to)
|
|
node.push(ident, ...args)
|
|
|
|
if (!this.inTestExpr && this.is($T.Colon)) {
|
|
const block = this.block()
|
|
const end = this.keyword('end')
|
|
const blockNode = new SyntaxNode('FunctionCallWithBlock', node.from, end.to)
|
|
return blockNode.push(node, ...block, end)
|
|
}
|
|
|
|
return node
|
|
}
|
|
|
|
// bare identifier in an expression
|
|
functionCallOrIdentifier(inner?: SyntaxNode) {
|
|
if (!inner && this.nextIs($T.Operator, '.')) {
|
|
inner = this.dotGet()
|
|
|
|
// if the dotGet was just a Word, bail
|
|
if (inner.type.is('Word')) return inner
|
|
}
|
|
|
|
inner ??= this.identifier()
|
|
|
|
const wrapper = new SyntaxNode('FunctionCallOrIdentifier', inner.from, inner.to)
|
|
wrapper.push(inner)
|
|
|
|
if (!this.inTestExpr && this.is($T.Colon)) {
|
|
const block = this.block()
|
|
const end = this.keyword('end')
|
|
const node = new SyntaxNode('FunctionCallWithBlock', wrapper.from, end.to)
|
|
return node.push(wrapper, ...block, end)
|
|
}
|
|
|
|
return wrapper
|
|
}
|
|
|
|
// function and variable names
|
|
identifier(): SyntaxNode {
|
|
return SyntaxNode.from(this.expect($T.Identifier))
|
|
}
|
|
|
|
// if something: blah end
|
|
// if something: blah else: blah end
|
|
// if something: blah else if something: blah else: blah end
|
|
if(): SyntaxNode {
|
|
const ifNode = this.keyword('if')
|
|
const test = this.testExpr()
|
|
const ifBlock = this.block()
|
|
|
|
const node = new SyntaxNode('IfExpr', ifNode.from, ifBlock.at(-1)!.to)
|
|
node.push(ifNode, test)
|
|
node.push(...ifBlock)
|
|
|
|
while (this.is($T.Keyword, 'else') && this.nextIs($T.Keyword, 'if')) {
|
|
const elseWord = this.keyword('else')
|
|
const ifWord = this.keyword('if')
|
|
const elseIfTest = this.testExpr()
|
|
const elseIfBlock = this.block()
|
|
const elseIfNode = new SyntaxNode('ElseIfExpr', ifBlock.at(-1)!.from, elseIfBlock.at(-1)!.to)
|
|
elseIfNode.push(elseWord, ifWord, elseIfTest)
|
|
elseIfNode.push(...elseIfBlock)
|
|
node.push(elseIfNode)
|
|
}
|
|
|
|
if (this.is($T.Keyword, 'else') && this.nextIs($T.Colon)) {
|
|
const elseWord = this.keyword('else')
|
|
const elseBlock = this.block()
|
|
const elseNode = new SyntaxNode('ElseExpr', ifBlock.at(-1)!.from, elseBlock.at(-1)!.to)
|
|
elseNode.push(elseWord)
|
|
elseNode.push(...elseBlock)
|
|
node.push(elseNode)
|
|
}
|
|
|
|
return node.push(this.keyword('end'))
|
|
}
|
|
|
|
import(): SyntaxNode {
|
|
const keyword = this.keyword('import')
|
|
|
|
const args: SyntaxNode[] = []
|
|
while (!this.isExprEnd()) {
|
|
if (this.is($T.NamedArgPrefix)) {
|
|
const prefix = SyntaxNode.from(this.next())
|
|
const val = this.value()
|
|
const arg = new SyntaxNode('NamedArg', prefix.from, val.to)
|
|
arg.push(prefix, val)
|
|
args.push(arg)
|
|
} else {
|
|
args.push(this.identifier())
|
|
}
|
|
}
|
|
|
|
const node = new SyntaxNode('Import', keyword.from, args.at(-1)!.to)
|
|
node.add(keyword)
|
|
return node.push(...args)
|
|
}
|
|
|
|
// if, while, do, etc
|
|
keyword(name: string): SyntaxNode {
|
|
const node = SyntaxNode.from(this.expect($T.Keyword, name))
|
|
node.type = 'keyword' // TODO lezer legacy
|
|
return node
|
|
}
|
|
|
|
// abc= true
|
|
namedArg(): SyntaxNode {
|
|
const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix))
|
|
const val = this.arg(true)
|
|
const node = new SyntaxNode('NamedArg', prefix.from, val.to)
|
|
return node.push(prefix, val)
|
|
}
|
|
|
|
// abc= null|true|123|'hi'
|
|
namedParam(): SyntaxNode {
|
|
const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix))
|
|
const val = this.value()
|
|
|
|
if (!['Null', 'Boolean', 'Number', 'String'].includes(val.type.name))
|
|
throw `[namedParam] default value must be Null|Bool|Num|Str, got ${val.type}\n\n ${this.input}\n`
|
|
|
|
const node = new SyntaxNode('NamedParam', prefix.from, val.to)
|
|
return node.push(prefix, val)
|
|
}
|
|
|
|
// operators like + - =
|
|
op(op?: string): SyntaxNode {
|
|
const token = op ? this.expect($T.Operator, op) : this.expect($T.Operator)
|
|
const name = operators[token.value!]
|
|
if (!name) throw `[op] operator not registered: ${token.value!}\n\n ${this.input}\n`
|
|
return new SyntaxNode(name, token.from, token.to)
|
|
}
|
|
|
|
// ( expressions in parens )
|
|
parens(): SyntaxNode {
|
|
this.inParens++
|
|
const open = this.expect($T.OpenParen)
|
|
const child = this.expression()
|
|
const close = this.expect($T.CloseParen)
|
|
this.inParens--
|
|
|
|
const node = new SyntaxNode('ParenExpr', open.from, close.to)
|
|
node.add(child)
|
|
|
|
return node
|
|
}
|
|
|
|
// 'hell yes' "hell no" { hell if i know }
|
|
string(): SyntaxNode {
|
|
const token = this.expect($T.String)
|
|
return parseString(this.input, token.from, token.to, this)
|
|
}
|
|
|
|
// if TEST: blah end
|
|
testExpr(): SyntaxNode {
|
|
this.inTestExpr = true
|
|
const expr = this.expression()
|
|
this.inTestExpr = false
|
|
return expr
|
|
}
|
|
|
|
// throw blah
|
|
throw(): SyntaxNode {
|
|
const keyword = this.keyword('throw')
|
|
const val = this.value()
|
|
const node = new SyntaxNode('Throw', keyword.from, val.to)
|
|
return node.push(keyword, val)
|
|
}
|
|
|
|
// try: blah catch e: blah end
|
|
try(): SyntaxNode {
|
|
const tryNode = this.keyword('try')
|
|
const tryBlock = this.block()
|
|
let last = tryBlock.at(-1)
|
|
let catchNode, finalNode
|
|
|
|
if (this.is($T.Keyword, 'catch'))
|
|
catchNode = this.catch()
|
|
|
|
if (this.is($T.Keyword, 'finally'))
|
|
finalNode = this.finally()
|
|
|
|
const end = this.keyword('end')
|
|
|
|
if (finalNode) last = finalNode.children.at(-1)
|
|
else if (catchNode) last = catchNode.children.at(-1)
|
|
|
|
const node = new SyntaxNode('TryExpr', tryNode.from, last!.to)
|
|
node.push(tryNode, ...tryBlock)
|
|
|
|
if (catchNode)
|
|
node.push(catchNode)
|
|
|
|
if (finalNode)
|
|
node.push(finalNode)
|
|
|
|
return node.push(end)
|
|
}
|
|
|
|
// while test: blah end
|
|
while(): SyntaxNode {
|
|
const keyword = this.keyword('while')
|
|
const test = this.testExpr()
|
|
const block = this.block()
|
|
const end = this.keyword('end')
|
|
|
|
const node = new SyntaxNode('WhileExpr', keyword.from, end.to)
|
|
return node.push(keyword, test, ...block, end)
|
|
}
|
|
|
|
// readme.txt (when `readme` isn't in scope)
|
|
word(start?: SyntaxNode): SyntaxNode {
|
|
const parts = [start ?? this.expect($T.Word)]
|
|
|
|
while (this.is($T.Operator, '.')) {
|
|
this.next()
|
|
if (this.isAny($T.Word, $T.Identifier, $T.Number))
|
|
parts.push(this.next())
|
|
}
|
|
|
|
return new SyntaxNode('Word', parts[0]!.from, parts.at(-1)!.to)
|
|
}
|
|
|
|
//
|
|
// helpers
|
|
//
|
|
|
|
current(): Token {
|
|
return this.tokens[this.pos] || { type: TokenType.Newline, from: 0, to: 0 }
|
|
}
|
|
|
|
peek(offset = 1): Token | undefined {
|
|
return this.tokens[this.pos + offset]
|
|
}
|
|
|
|
// look past newlines to check for a specific token
|
|
peekPastNewlines(type: TokenType, value?: string): boolean {
|
|
let offset = 1
|
|
let peek = this.peek(offset)
|
|
|
|
while (peek && peek.type === $T.Newline)
|
|
peek = this.peek(++offset)
|
|
|
|
if (!peek || peek.type !== type) return false
|
|
if (value !== undefined && peek.value !== value) return false
|
|
return true
|
|
}
|
|
|
|
next(): Token {
|
|
const token = this.current()
|
|
this.pos++
|
|
return token
|
|
}
|
|
|
|
is(type: TokenType, value?: string): boolean {
|
|
const token = this.current()
|
|
if (!token || token.type !== type) return false
|
|
if (value !== undefined && token.value !== value) return false
|
|
return true
|
|
}
|
|
|
|
isAny(...type: TokenType[]): boolean {
|
|
return type.some(x => this.is(x))
|
|
}
|
|
|
|
nextIs(type: TokenType, value?: string): boolean {
|
|
const token = this.peek()
|
|
if (!token || token.type !== type) return false
|
|
if (value !== undefined && token.value !== value) return false
|
|
return true
|
|
}
|
|
|
|
nextIsAny(...type: TokenType[]): boolean {
|
|
return type.some(x => this.nextIs(x))
|
|
}
|
|
|
|
isExprEnd(): boolean {
|
|
return this.isAny($T.Colon, $T.Semicolon, $T.Newline, $T.CloseParen, $T.CloseBracket) ||
|
|
this.isExprEndKeyword() || !this.current()
|
|
}
|
|
|
|
nextIsExprEnd(): boolean {
|
|
// pipes act like expression end for function arg parsing
|
|
if (this.nextIs($T.Operator, '|'))
|
|
return true
|
|
|
|
return this.nextIsAny($T.Colon, $T.Semicolon, $T.Newline, $T.CloseBracket, $T.CloseParen) ||
|
|
this.nextIs($T.Keyword, 'end') || this.nextIs($T.Keyword, 'else') ||
|
|
this.nextIs($T.Keyword, 'catch') || this.nextIs($T.Keyword, 'finally') ||
|
|
!this.peek()
|
|
}
|
|
|
|
isExprEndKeyword(): boolean {
|
|
return this.is($T.Keyword, 'end') || this.is($T.Keyword, 'else') ||
|
|
this.is($T.Keyword, 'catch') || this.is($T.Keyword, 'finally')
|
|
}
|
|
|
|
isPipe(): boolean {
|
|
// inside parens, only look for pipes on same line (don't look past newlines)
|
|
const canLookPastNewlines = this.inParens === 0
|
|
|
|
return this.is($T.Operator, '|') ||
|
|
(canLookPastNewlines && this.peekPastNewlines($T.Operator, '|'))
|
|
}
|
|
|
|
expect(type: TokenType, value?: string): Token | never {
|
|
if (!this.is(type, value)) {
|
|
const token = this.current()
|
|
throw `expected ${TokenType[type]}${value ? ` "${value}"` : ''}, got ${TokenType[token?.type || 0]}${token?.value ? ` "${token.value}"` : ''} at position ${this.pos}\n\n ${this.input}\n`
|
|
}
|
|
return this.next()
|
|
}
|
|
|
|
isEOF(): boolean {
|
|
return this.pos >= this.tokens.length
|
|
}
|
|
}
|
|
|
|
// TODO lezer legacy
|
|
function collapseDotGets(origNodes: SyntaxNode[]): SyntaxNode {
|
|
const nodes = [...origNodes]
|
|
let right = nodes.pop()!
|
|
|
|
while (nodes.length > 0) {
|
|
const left = nodes.pop()!
|
|
|
|
if (left.type.is('Identifier')) left.type = 'IdentifierBeforeDot'
|
|
|
|
const dot = new SyntaxNode("DotGet", left.from, right.to)
|
|
dot.push(left, right)
|
|
|
|
right = dot
|
|
}
|
|
|
|
return right
|
|
}
|