Compare commits

...

43 Commits

Author SHA1 Message Date
757a50e23e fix ./bin/shrimp parse 2025-12-02 17:11:39 -08:00
cb7cdaea62 trim keys in inspect 2025-12-02 17:11:39 -08:00
688181654e enable [ a = true ] 2025-12-02 17:11:39 -08:00
728c5df9eb bun check 2025-12-02 17:11:39 -08:00
04e14cd83e wrong return type 2025-12-02 17:11:39 -08:00
b2d298ec6f fix search/replace 2025-12-02 17:11:39 -08:00
Chris Wanstrath
5ad6125527 you too 2025-12-02 17:11:39 -08:00
Chris Wanstrath
f160093c4d match lezer API 2025-12-02 17:11:39 -08:00
Chris Wanstrath
1ea130f8e0 pipes end expressions 2025-12-02 17:11:39 -08:00
Chris Wanstrath
ae9896c8a2 switch bin/shrimp to new parser 2025-12-02 17:11:39 -08:00
Chris Wanstrath
0d3f9867e6 we get globals for free now 2025-12-02 17:11:39 -08:00
Chris Wanstrath
cbc75f5ed7 use new parser in curlys 2025-12-02 17:11:39 -08:00
Chris Wanstrath
a836591854 keywords are magical 2025-12-02 17:11:39 -08:00
Chris Wanstrath
d0005d9ccd fix | 2025-12-02 17:11:39 -08:00
Chris Wanstrath
cc604bea49 fix dot.get + thing 2025-12-02 17:11:39 -08:00
Chris Wanstrath
2c2b277b29 throw takes an expression 2025-12-02 17:11:39 -08:00
Chris Wanstrath
1682a7ccb7 fix curly strings 2025-12-02 17:11:39 -08:00
Chris Wanstrath
0e92525b54 regex flags, bad regexs become Words 2025-12-02 17:11:39 -08:00
Chris Wanstrath
6a6675d30f fix bitwise precedence 2025-12-02 17:11:39 -08:00
Chris Wanstrath
d003d65a15 disable errors... for now! 2025-12-02 17:11:39 -08:00
Chris Wanstrath
579d755205 make more compiler tests pass 2025-12-02 17:11:39 -08:00
Chris Wanstrath
566beb87ef do allowed in arg/dict values 2025-12-02 17:11:39 -08:00
Chris Wanstrath
9e4471ad38 try to match lezer API more closely 2025-12-02 17:11:39 -08:00
Chris Wanstrath
3eac0a27a5 hwhitespace 2025-12-02 17:11:39 -08:00
Chris Wanstrath
e38e8d4f1e minor 2025-12-02 17:11:39 -08:00
abd78108c8 new parser(-ish) 2025-12-02 17:11:39 -08:00
ae46988219 sorry lezer... 2025-12-02 17:11:39 -08:00
e4bdddc762 Merge pull request 'Cache the parsing' (#28) from less-parsing into main
Reviewed-on: #28
2025-12-01 21:51:33 +00:00
7feb3cd7b0 Merge remote-tracking branch 'origin/main' into less-parsing 2025-12-01 13:51:24 -08:00
1fec471da9 Merge pull request 'broken-shrimp' (#50) from broken-shrimp into main
Reviewed-on: #50
2025-12-01 21:49:04 +00:00
09d2420508 add some arg help 2025-11-24 16:04:03 -08:00
028ccf2bf9 Delete 2025-01-24-autocomplete-design.md 2025-11-24 12:20:54 -08:00
1458da58cc Shrimp was broken 2025-11-24 12:19:58 -08:00
4a27a8b474 Delete shrimp-0.0.1.vsix 2025-11-24 09:42:24 -08:00
13adbe4c0e Merge branch 'less-parsing' of 54.219.130.253:probablycorey/shrimp into less-parsing 2025-11-07 07:30:21 -08:00
b3ec6995db Update server.ts 2025-11-07 07:30:10 -08:00
854ed02625 Merge branch 'mini-fix' into less-parsing 2025-11-07 07:28:56 -08:00
c325bca611 Merge branch 'main' into less-parsing 2025-11-07 15:28:12 +00:00
1082cc1281 Forgot to set globals in server 2025-11-07 07:27:20 -08:00
5b363c833a Merge remote-tracking branch 'origin/main' into less-parsing 2025-11-06 16:59:53 -08:00
d6aea4b0f9 cache it 2025-11-06 13:36:22 -08:00
44b30d2339 light cleanup 2025-11-06 12:45:02 -08:00
3aa75843ac get rid of this 2025-11-06 12:44:31 -08:00
48 changed files with 4820 additions and 277 deletions

2
.gitignore vendored
View File

@ -35,3 +35,5 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
/tmp /tmp
/docs /docs
*.vsix

View File

@ -1,7 +1,7 @@
#!/usr/bin/env bun #!/usr/bin/env bun
import { colors, globals as prelude } from '../src/prelude' import { colors, globals as prelude } from '../src/prelude'
import { treeToString } from '../src/utils/tree' import { treeToString2 } from '../src/utils/tree'
import { runCode, runFile, compileFile, parseCode } from '../src' import { runCode, runFile, compileFile, parseCode } from '../src'
import { resolve } from 'path' import { resolve } from 'path'
import { bytecodeToString } from 'reefvm' import { bytecodeToString } from 'reefvm'
@ -143,7 +143,7 @@ async function main() {
process.exit(1) process.exit(1)
} }
const input = readFileSync(file, 'utf-8') const input = readFileSync(file, 'utf-8')
console.log(treeToString(parseCode(input), input)) console.log(treeToString2(parseCode(input).topNode, input))
return return
} }

View File

@ -16,6 +16,8 @@
"@lezer/highlight": "^1.2.1", "@lezer/highlight": "^1.2.1",
"@lezer/lr": "^1.4.2", "@lezer/lr": "^1.4.2",
"@types/bun": "latest", "@types/bun": "latest",
"diff": "^8.0.2",
"kleur": "^4.1.5",
}, },
}, },
}, },
@ -60,8 +62,12 @@
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="], "csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
"diff": ["diff@8.0.2", "", {}, "sha512-sSuxWU5j5SR9QQji/o2qMvqRNYRDOcBTgsJ/DeCf4iSN4gW+gNMXM7wFIP+fdXZxoNiAnHUTGjCr+TSWXdRDKg=="],
"hono": ["hono@4.10.4", "", {}, "sha512-YG/fo7zlU3KwrBL5vDpWKisLYiM+nVstBQqfr7gCPbSYURnNEP9BDxEMz8KfsDR9JX0lJWDRNc6nXX31v7ZEyg=="], "hono": ["hono@4.10.4", "", {}, "sha512-YG/fo7zlU3KwrBL5vDpWKisLYiM+nVstBQqfr7gCPbSYURnNEP9BDxEMz8KfsDR9JX0lJWDRNc6nXX31v7ZEyg=="],
"kleur": ["kleur@4.1.5", "", {}, "sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ=="],
"reefvm": ["reefvm@git+https://git.nose.space/defunkt/reefvm#3e2e68b31f504347225a4d705c7568a0957d629e", { "peerDependencies": { "typescript": "^5" } }, "3e2e68b31f504347225a4d705c7568a0957d629e"], "reefvm": ["reefvm@git+https://git.nose.space/defunkt/reefvm#3e2e68b31f504347225a4d705c7568a0957d629e", { "peerDependencies": { "typescript": "^5" } }, "3e2e68b31f504347225a4d705c7568a0957d629e"],
"style-mod": ["style-mod@4.1.3", "", {}, "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ=="], "style-mod": ["style-mod@4.1.3", "", {}, "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ=="],

1
examples/find.shrimp Normal file
View File

@ -0,0 +1 @@
echo

View File

@ -10,7 +10,8 @@
"repl": "bun generate-parser && bun bin/repl", "repl": "bun generate-parser && bun bin/repl",
"update-reef": "rm -rf ~/.bun/install/cache/ && rm bun.lock && bun update reefvm", "update-reef": "rm -rf ~/.bun/install/cache/ && rm bun.lock && bun update reefvm",
"cli:install": "ln -s \"$(pwd)/bin/shrimp\" ~/.bun/bin/shrimp", "cli:install": "ln -s \"$(pwd)/bin/shrimp\" ~/.bun/bin/shrimp",
"cli:remove": "rm ~/.bun/bin/shrimp" "cli:remove": "rm ~/.bun/bin/shrimp",
"check": "bunx tsc --noEmit"
}, },
"dependencies": { "dependencies": {
"@codemirror/view": "^6.38.3", "@codemirror/view": "^6.38.3",
@ -24,7 +25,9 @@
"devDependencies": { "devDependencies": {
"@lezer/highlight": "^1.2.1", "@lezer/highlight": "^1.2.1",
"@lezer/lr": "^1.4.2", "@lezer/lr": "^1.4.2",
"@types/bun": "latest" "@types/bun": "latest",
"diff": "^8.0.2",
"kleur": "^4.1.5"
}, },
"prettier": { "prettier": {
"semi": false, "semi": false,

View File

@ -1,9 +1,10 @@
import { CompilerError } from '#compiler/compilerError.ts' import { CompilerError } from '#compiler/compilerError.ts'
import { parse } from '#parser/parser2'
import { SyntaxNode, Tree } from '#parser/node'
import { parser } from '#parser/shrimp.ts' import { parser } from '#parser/shrimp.ts'
import * as terms from '#parser/shrimp.terms' import * as terms from '#parser/shrimp.terms'
import { setGlobals } from '#parser/tokenizer' import { setGlobals } from '#parser/tokenizer'
import { tokenizeCurlyString } from '#parser/curlyTokenizer' import { tokenizeCurlyString } from '#parser/curlyTokenizer'
import type { SyntaxNode, Tree } from '@lezer/common'
import { assert, errorMessage } from '#utils/utils' import { assert, errorMessage } from '#utils/utils'
import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm' import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
import { import {
@ -63,13 +64,14 @@ export class Compiler {
constructor(public input: string, globals?: string[] | Record<string, any>) { constructor(public input: string, globals?: string[] | Record<string, any>) {
try { try {
if (globals) setGlobals(Array.isArray(globals) ? globals : Object.keys(globals)) if (globals) setGlobals(Array.isArray(globals) ? globals : Object.keys(globals))
const cst = parser.parse(input) const ast = parse(input)
const errors = checkTreeForErrors(cst) const cst = new Tree(ast)
// const errors = checkTreeForErrors(cst)
const firstError = errors[0] // const firstError = errors[0]
if (firstError) { // if (firstError) {
throw firstError // throw firstError
} // }
this.#compileCst(cst, input) this.#compileCst(cst, input)
this.bytecode = toBytecode(this.instructions) this.bytecode = toBytecode(this.instructions)

View File

@ -1,16 +1,17 @@
import { CompilerError } from '#compiler/compilerError.ts' import { CompilerError } from '#compiler/compilerError.ts'
import * as terms from '#parser/shrimp.terms' import * as terms from '#parser/shrimp.terms'
import type { SyntaxNode, Tree } from '@lezer/common' import type { SyntaxNode, Tree } from '#parser/node'
export const checkTreeForErrors = (tree: Tree): CompilerError[] => { export const checkTreeForErrors = (tree: Tree): CompilerError[] => {
const errors: CompilerError[] = [] const errors: CompilerError[] = []
tree.iterate({
enter: (node) => { // tree.iterate({
if (node.type.isError) { // enter: (node) => {
errors.push(new CompilerError(`Unexpected syntax.`, node.from, node.to)) // if (node.type.isError) {
} // errors.push(new CompilerError(`Unexpected syntax.`, node.from, node.to))
}, // }
}) // },
// })
return errors return errors
} }

View File

@ -245,7 +245,7 @@ const commandShapes: CommandShape[] = [
] as const ] as const
let commandSource = () => commandShapes let commandSource = () => commandShapes
export const setCommandSource = (do: () => CommandShape[]) => { export const setCommandSource = (fn: () => CommandShape[]) => {
commandSource = fn commandSource = fn
} }

View File

@ -1,15 +1,15 @@
import { readFileSync } from 'fs' import { readFileSync } from 'fs'
import { VM, fromValue, toValue, isValue, type Bytecode } from 'reefvm' import { VM, fromValue, toValue, isValue, type Bytecode } from 'reefvm'
import { type Tree } from '@lezer/common'
import { Compiler } from '#compiler/compiler' import { Compiler } from '#compiler/compiler'
import { parser } from '#parser/shrimp' import { parse } from '#parser/parser2'
import { Tree } from '#parser/node'
import { globals as parserGlobals, setGlobals as setParserGlobals } from '#parser/tokenizer' import { globals as parserGlobals, setGlobals as setParserGlobals } from '#parser/tokenizer'
import { globals as prelude } from '#prelude' import { globals as prelude } from '#prelude'
export { Compiler } from '#compiler/compiler' export { Compiler } from '#compiler/compiler'
export { parser } from '#parser/shrimp' export { parse } from '#parser/parser2'
export { type SyntaxNode, Tree } from '#parser/node'
export { globals as prelude } from '#prelude' export { globals as prelude } from '#prelude'
export type { Tree } from '@lezer/common'
export { type Value, type Bytecode } from 'reefvm' export { type Value, type Bytecode } from 'reefvm'
export { toValue, fromValue, isValue, Scope, VM, bytecodeToString } from 'reefvm' export { toValue, fromValue, isValue, Scope, VM, bytecodeToString } from 'reefvm'
@ -105,8 +105,8 @@ export function parseCode(code: string, globals?: Record<string, any>): Tree {
const globalNames = [...Object.keys(prelude), ...(globals ? Object.keys(globals) : [])] const globalNames = [...Object.keys(prelude), ...(globals ? Object.keys(globals) : [])]
setParserGlobals(globalNames) setParserGlobals(globalNames)
const result = parser.parse(code) const result = parse(code)
setParserGlobals(oldGlobals) setParserGlobals(oldGlobals)
return result return new Tree(result)
} }

View File

@ -1,5 +1,6 @@
import { parser } from '#parser/shrimp.ts' import { parser } from '#parser/shrimp.ts'
import type { SyntaxNode } from '@lezer/common' import { parse } from '#parser/parser2'
import type { SyntaxNode } from '#parser/node'
import { isIdentStart, isIdentChar } from './tokenizer' import { isIdentStart, isIdentChar } from './tokenizer'
// Turns a { curly string } into strings and nodes for interpolation // Turns a { curly string } into strings and nodes for interpolation
@ -37,7 +38,7 @@ export const tokenizeCurlyString = (value: string): (string | [string, SyntaxNod
} }
const input = value.slice(start + 2, pos) // skip '$(' const input = value.slice(start + 2, pos) // skip '$('
tokens.push([input, parser.parse(input).topNode]) tokens.push([input, parse(input)])
start = ++pos // skip ')' start = ++pos // skip ')'
} else { } else {
char = value[++pos] char = value[++pos]
@ -48,7 +49,7 @@ export const tokenizeCurlyString = (value: string): (string | [string, SyntaxNod
char = value[++pos] char = value[++pos]
const input = value.slice(start + 1, pos) // skip '$' const input = value.slice(start + 1, pos) // skip '$'
tokens.push([input, parser.parse(input).topNode]) tokens.push([input, parse(input)])
start = pos-- // backtrack and start over start = pos-- // backtrack and start over
} }
} }

426
src/parser/node.ts Normal file
View File

@ -0,0 +1,426 @@
import { type Token, TokenType } from './tokenizer2'
import * as term from './shrimp.terms'
export type NodeType =
| 'Program'
| 'Block'
| 'FunctionCall'
| 'FunctionCallOrIdentifier'
| 'FunctionCallWithBlock'
| 'PositionalArg'
| 'NamedArg'
| 'NamedArgPrefix'
| 'FunctionDef'
| 'Params'
| 'NamedParam'
| 'Null'
| 'Boolean'
| 'Number'
| 'String'
| 'StringFragment'
| 'CurlyString'
| 'DoubleQuote'
| 'EscapeSeq'
| 'Interpolation'
| 'Regex'
| 'Identifier'
| 'AssignableIdentifier'
| 'IdentifierBeforeDot'
| 'Word'
| 'Array'
| 'Dict'
| 'Comment'
| 'BinOp'
| 'ConditionalOp'
| 'ParenExpr'
| 'Assign'
| 'CompoundAssign'
| 'DotGet'
| 'PipeExpr'
| 'IfExpr'
| 'ElseIfExpr'
| 'ElseExpr'
| 'WhileExpr'
| 'TryExpr'
| 'CatchExpr'
| 'FinallyExpr'
| 'Throw'
| 'Eq'
| 'Modulo'
| 'Plus'
| 'Star'
| 'Slash'
| 'Import'
| 'Do'
| 'Underscore'
| 'colon'
| 'keyword'
| 'operator'
// TODO: remove this when we switch from lezer
export const operators: Record<string, any> = {
// Logic
'and': 'And',
'or': 'Or',
// Bitwise
'band': 'Band',
'bor': 'Bor',
'bxor': 'Bxor',
'>>>': 'Ushr',
'>>': 'Shr',
'<<': 'Shl',
// Comparison
'>=': 'Gte',
'<=': 'Lte',
'>': 'Gt',
'<': 'Lt',
'!=': 'Neq',
'==': 'EqEq',
// Compound assignment operators
'??=': 'NullishEq',
'+=': 'PlusEq',
'-=': 'MinusEq',
'*=': 'StarEq',
'/=': 'SlashEq',
'%=': 'ModuloEq',
// Nullish coalescing
'??': 'NullishCoalesce',
// Math
'*': 'Star',
'**': 'StarStar',
'=': 'Eq',
'/': 'Slash',
'+': 'Plus',
'-': 'Minus',
'%': 'Modulo',
// Dotget
'.': 'Dot',
// Pipe
'|': 'operator',
}
export class Tree {
constructor(public topNode: SyntaxNode) { }
get length(): number {
return this.topNode.to
}
cursor() {
return {
type: this.topNode.type,
from: this.topNode.from,
to: this.topNode.to,
node: this.topNode,
}
}
}
// TODO: TEMPORARY SHIM
class SyntaxNodeType {
constructor(public nodeType: NodeType) { }
is(other: string) {
return this.nodeType === other
}
get id(): number {
switch (this.nodeType) {
case 'Program':
return term.Program
case 'Block':
return term.Block
case 'FunctionCall':
return term.FunctionCall
case 'FunctionCallOrIdentifier':
return term.FunctionCallOrIdentifier
case 'FunctionCallWithBlock':
return term.FunctionCallWithBlock
case 'PositionalArg':
return term.PositionalArg
case 'NamedArg':
return term.NamedArg
case 'FunctionDef':
return term.FunctionDef
case 'Params':
return term.Params
case 'NamedParam':
return term.NamedParam
case 'Null':
return term.Null
case 'Boolean':
return term.Boolean
case 'Number':
return term.Number
case 'String':
return term.String
case 'StringFragment':
return term.StringFragment
case 'CurlyString':
return term.CurlyString
case 'DoubleQuote':
return term.DoubleQuote
case 'EscapeSeq':
return term.EscapeSeq
case 'Interpolation':
return term.Interpolation
case 'Regex':
return term.Regex
case 'Identifier':
return term.Identifier
case 'AssignableIdentifier':
return term.AssignableIdentifier
case 'IdentifierBeforeDot':
return term.IdentifierBeforeDot
case 'Word':
return term.Word
case 'Array':
return term.Array
case 'Dict':
return term.Dict
case 'Comment':
return term.Comment
case 'BinOp':
return term.BinOp
case 'ConditionalOp':
return term.ConditionalOp
case 'ParenExpr':
return term.ParenExpr
case 'Assign':
return term.Assign
case 'CompoundAssign':
return term.CompoundAssign
case 'DotGet':
return term.DotGet
case 'PipeExpr':
return term.PipeExpr
case 'IfExpr':
return term.IfExpr
case 'ElseIfExpr':
return term.ElseIfExpr
case 'ElseExpr':
return term.ElseExpr
case 'WhileExpr':
return term.WhileExpr
case 'TryExpr':
return term.TryExpr
case 'CatchExpr':
return term.CatchExpr
case 'FinallyExpr':
return term.FinallyExpr
case 'Throw':
return term.Throw
case 'Eq':
return term.Eq
case 'Modulo':
return term.Modulo
case 'Plus':
return term.Plus
case 'Star':
return term.Star
case 'Slash':
return term.Slash
case 'Import':
return term.Import
case 'Do':
return term.Do
case 'Underscore':
return term.Underscore
case 'colon':
return term.colon
case 'keyword':
return term.keyword
}
return 0
}
get name(): string {
return this.nodeType
}
}
export class SyntaxNode {
#type: NodeType
from: number
to: number
parent: SyntaxNode | null
children: SyntaxNode[] = []
constructor(type: NodeType, from: number, to: number, parent: SyntaxNode | null = null) {
this.#type = type
this.from = from
this.to = to
this.parent = parent
}
static from(token: Token, parent?: SyntaxNode): SyntaxNode {
return new SyntaxNode(TokenType[token.type] as NodeType, token.from, token.to, parent ?? null)
}
get type(): SyntaxNodeType {
return new SyntaxNodeType(this.#type)
}
set type(name: NodeType) {
this.#type = name
}
get name(): string {
return this.type.name
}
get isError(): boolean {
return false
}
get firstChild(): SyntaxNode | null {
return this.children[0] ?? null
}
get lastChild(): SyntaxNode | null {
return this.children.at(-1) ?? null
}
get nextSibling(): SyntaxNode | null {
if (!this.parent) return null
const siblings = this.parent.children
const index = siblings.indexOf(this)
return index >= 0 && index < siblings.length - 1 ? siblings[index + 1]! : null
}
get prevSibling(): SyntaxNode | null {
if (!this.parent) return null
const siblings = this.parent.children
const index = siblings.indexOf(this)
return index > 0 ? siblings[index - 1]! : null
}
add(node: SyntaxNode) {
node.parent = this
this.children.push(node)
}
push(...nodes: SyntaxNode[]): SyntaxNode {
nodes.forEach(child => child.parent = this)
this.children.push(...nodes)
return this
}
toString(): string {
return this.type.name
}
}
// Operator precedence (binding power) - higher = tighter binding
export const precedence: Record<string, number> = {
// Logical
'or': 10,
'and': 20,
// Comparison
'==': 30,
'!=': 30,
'<': 30,
'>': 30,
'<=': 30,
'>=': 30,
// Nullish coalescing
'??': 35,
// Bitwise shifts (lower precedence than addition)
'<<': 37,
'>>': 37,
'>>>': 37,
// Addition/Subtraction
'+': 40,
'-': 40,
// Bitwise AND/OR/XOR (higher precedence than addition)
'band': 45,
'bor': 45,
'bxor': 45,
// Multiplication/Division/Modulo
'*': 50,
'/': 50,
'%': 50,
// Exponentiation (right-associative)
'**': 60,
}
export const conditionals = new Set([
'==', '!=', '<', '>', '<=', '>=', '??', 'and', 'or'
])
export const compounds = [
'??=', '+=', '-=', '*=', '/=', '%='
]

949
src/parser/parser2.ts Normal file
View File

@ -0,0 +1,949 @@
import { Scanner, type Token, TokenType } from './tokenizer2'
import { SyntaxNode, operators, precedence, conditionals, compounds } from './node'
import { globals } from './tokenizer'
import { parseString } from './stringParser'
const $T = TokenType
export const parse = (input: string): SyntaxNode => {
const parser = new Parser()
return parser.parse(input)
}
class Scope {
parent?: Scope
set = new Set<string>()
constructor(parent?: Scope) {
this.parent = parent
// no parent means this is global scope
if (!parent) for (const name of globals) this.add(name)
}
add(key: string) {
this.set.add(key)
}
has(key: string): boolean {
return this.set.has(key) || this.parent?.has(key) || false
}
}
export class Parser {
tokens: Token[] = []
pos = 0
inParens = 0
input = ''
scope = new Scope
inTestExpr = false
parse(input: string): SyntaxNode {
const scanner = new Scanner()
this.tokens = scanner.tokenize(input)
this.pos = 0
this.input = input
this.scope = new Scope()
this.inTestExpr = false
const node = new SyntaxNode('Program', 0, input.length)
while (!this.isEOF()) {
if (this.is($T.Newline) || this.is($T.Semicolon)) {
this.next()
continue
}
const prevPos = this.pos
const stmt = this.statement()
if (stmt) node.add(stmt)
if (this.pos === prevPos && !this.isEOF())
throw "parser didn't advance - you need to call next()\n\n ${this.input}\n"
}
return node
}
//
// parse foundation nodes - statements, expressions
//
// statement is a line of code
statement(): SyntaxNode | null {
if (this.is($T.Comment))
return this.comment()
while (this.is($T.Newline) || this.is($T.Semicolon))
this.next()
if (this.isEOF() || this.isExprEndKeyword())
return null
return this.expression()
}
// expressions can be found in four places:
// 1. line of code
// 2. right side of assignment
// 3. if/while conditions
// 4. inside (parens)
expression(allowPipe = true): SyntaxNode {
let expr
// x = value
if (this.is($T.Identifier) && (
this.nextIs($T.Operator, '=') || compounds.some(x => this.nextIs($T.Operator, x))
))
expr = this.assign()
// if, while, do, etc
else if (this.is($T.Keyword))
expr = this.keywords()
// dotget
else if (this.nextIs($T.Operator, '.'))
expr = this.dotGetFunctionCall()
// echo hello world
else if (this.is($T.Identifier) && !this.nextIs($T.Operator) && !this.nextIsExprEnd())
expr = this.functionCall()
// bare-function-call
else if (this.is($T.Identifier) && this.nextIsExprEnd())
expr = this.functionCallOrIdentifier()
// everything else
else
expr = this.exprWithPrecedence()
// check for destructuring
if (expr.type.is('Array') && this.is($T.Operator, '='))
return this.destructure(expr)
// check for parens function call
// ex: (ref my-func) my-arg
if (expr.type.is('ParenExpr') && !this.isExprEnd())
expr = this.functionCall(expr)
// if dotget is followed by binary operator, continue parsing as binary expression
if (expr.type.is('DotGet') && this.is($T.Operator) && !this.is($T.Operator, '|'))
expr = this.dotGetBinOp(expr)
// one | echo
if (allowPipe && this.isPipe())
return this.pipe(expr)
// regular
else
return expr
}
// piping | stuff | is | cool
pipe(left: SyntaxNode): SyntaxNode {
const canLookPastNewlines = this.inParens === 0
const parts: SyntaxNode[] = [left]
while (this.isPipe()) {
// consume newlines before pipe (only if not in parens)
if (canLookPastNewlines) {
while (this.is($T.Newline)) this.next()
}
const pipeOp = this.op('|')
pipeOp.type = 'operator'
parts.push(pipeOp)
// consume newlines after pipe (only if not in parens)
if (canLookPastNewlines) {
while (this.is($T.Newline)) this.next()
}
// parse right side - don't allow nested pipes
parts.push(this.expression(false))
}
const node = new SyntaxNode('PipeExpr', parts[0]!.from, parts.at(-1)!.to)
return node.push(...parts)
}
// Pratt parser - parses expressions with precedence climbing
// bp = binding precedence
exprWithPrecedence(minBp = 0): SyntaxNode {
let left = this.value()
// infix operators with precedence
while (this.is($T.Operator)) {
const op = this.current().value!
const bp = precedence[op]
// operator has lower precedence than required, stop
if (bp === undefined || bp < minBp) break
const opNode = this.op()
// right-associative operators (like **) use same bp, others use bp + 1
const nextMinBp = op === '**' ? bp : bp + 1
// parse right-hand side with higher precedence
const right = this.exprWithPrecedence(nextMinBp)
const nodeType = conditionals.has(op) ? 'ConditionalOp' : 'BinOp'
const node = new SyntaxNode(nodeType, left.from, right.to)
node.push(left, opNode, right)
left = node
}
return left
}
// if, while, do, etc
keywords(): SyntaxNode {
if (this.is($T.Keyword, 'if'))
return this.if()
if (this.is($T.Keyword, 'while'))
return this.while()
if (this.is($T.Keyword, 'do'))
return this.do()
if (this.is($T.Keyword, 'try'))
return this.try()
if (this.is($T.Keyword, 'throw'))
return this.throw()
if (this.is($T.Keyword, 'import'))
return this.import()
return this.expect($T.Keyword, 'if/while/do/import') as never
}
// value can be an atom or a (parens that gets turned into an atom)
// values are used in a few places:
// 1. function arguments
// 2. array/dict members
// 3. binary operations
// 4. anywhere an expression can be used
value(): SyntaxNode {
if (this.is($T.OpenParen))
return this.parens()
if (this.is($T.OpenBracket))
return this.arrayOrDict()
// dotget
if (this.nextIs($T.Operator, '.'))
return this.dotGet()
return this.atom()
}
//
// parse specific nodes
//
// raw determines whether we just want the SyntaxNodes or we want to
// wrap them in a PositionalArg
arg(raw = false): SyntaxNode {
// 'do' is a special function arg - it doesn't need to be wrapped
// in parens. otherwise, args are regular value()s
const val = this.is($T.Keyword, 'do') ? this.do() : this.value()
if (raw) {
return val
} else {
const arg = new SyntaxNode('PositionalArg', val.from, val.to)
arg.add(val)
return arg
}
}
// [ 1 2 3 ]
array(): SyntaxNode {
const open = this.expect($T.OpenBracket)
const values = []
while (!this.is($T.CloseBracket) && !this.isEOF()) {
if (this.is($T.Semicolon) || this.is($T.Newline)) {
this.next()
continue
}
if (this.is($T.Comment)) {
values.push(this.comment())
continue
}
values.push(this.value())
}
const close = this.expect($T.CloseBracket)
const node = new SyntaxNode('Array', open.from, close.to)
return node.push(...values)
}
// which are we dealing with? ignores leading newlines and comments
arrayOrDict(): SyntaxNode {
let peek = 1
let curr = this.peek(peek++)
let isDict = false
while (curr && curr.type !== $T.CloseBracket) {
// definitely a dict
if (curr.type === $T.NamedArgPrefix) {
isDict = true
break
}
// empty dict
if (curr.type === $T.Operator && curr.value === '=') {
isDict = true
break
}
// [ a = true ]
const next = this.peek(peek)
if (next?.type === $T.Operator && next.value === '=') {
isDict = true
break
}
// probably an array
if (curr.type !== $T.Comment && curr.type !== $T.Semicolon && curr.type !== $T.Newline)
break
curr = this.peek(peek++)
}
return isDict ? this.dict() : this.array()
}
// x = true
assign(): SyntaxNode {
const ident = this.assignableIdentifier()
const opToken = this.current()!
const op = this.op()
const expr = this.expression()
const node = new SyntaxNode(
opToken.value === '=' ? 'Assign' : 'CompoundAssign',
ident.from,
expr.to
)
return node.push(ident, op, expr)
}
// identifier used in assignment (TODO: legacy lezer quirk)
assignableIdentifier(): SyntaxNode {
const token = this.expect($T.Identifier)
this.scope.add(token.value!)
const node = SyntaxNode.from(token)
node.type = 'AssignableIdentifier'
return node
}
// atoms are the basic building blocks: literals, identifiers, words
atom(): SyntaxNode {
if (this.is($T.String))
return this.string()
if (this.isAny($T.Null, $T.Boolean, $T.Number, $T.Identifier, $T.Word, $T.Regex, $T.Underscore))
return SyntaxNode.from(this.next())
const next = this.next()
throw `[atom] unexpected token ${TokenType[next.type]}: ${JSON.stringify(next)}\n\n ${this.input}\n`
}
// blocks in if, do, special calls, etc
// `: something end`
//
// `blockNode` determines whether we return [colon, BlockNode, end] or
// just a list of statements like [colon, stmt1, stmt2, end]
block(blockNode = true): SyntaxNode[] {
const stmts: SyntaxNode[] = []
const colon = this.colon()
while (!this.isExprEndKeyword() && !this.isEOF()) {
const stmt = this.statement()
if (stmt) stmts.push(stmt)
}
const out = [colon]
if (blockNode) {
const block = new SyntaxNode('Block', stmts[0]!.from, stmts.at(-1)!.to)
block.push(...stmts)
out.push(block)
} else {
out.push(...stmts)
}
return out
}
// catch err: block
catch(): SyntaxNode {
const keyword = this.keyword('catch')
let catchVar
if (this.is($T.Identifier))
catchVar = this.identifier()
const block = this.block()
const node = new SyntaxNode('CatchExpr', keyword.from, block.at(-1)!.to)
node.push(keyword)
if (catchVar) node.push(catchVar)
return node.push(...block)
}
// colon
colon(): SyntaxNode {
const colon = SyntaxNode.from(this.expect($T.Colon))
colon.type = 'colon' // TODO lezer legacy
return colon
}
// # comment
comment(): SyntaxNode {
return SyntaxNode.from(this.expect($T.Comment))
}
// [ a b c ] = [ 1 2 3 ]
destructure(array: SyntaxNode): SyntaxNode {
const eq = this.op('=')
const val = this.expression()
for (const ident of array.children) {
const varName = this.input.slice(ident.from, ident.to)
this.scope.add(varName)
}
const node = new SyntaxNode('Assign', array.from, val.to)
return node.push(array, eq, val)
}
// [ a=1 b=true c='three' ]
dict(): SyntaxNode {
const open = this.expect($T.OpenBracket)
// empty dict [=] or [ = ]
if (this.is($T.Operator, '=') && this.nextIs($T.CloseBracket)) {
const _op = this.next()
const close = this.next()
return new SyntaxNode('Dict', open.from, close.to)
}
const values = []
while (!this.is($T.CloseBracket) && !this.isEOF()) {
if (this.is($T.Semicolon) || this.is($T.Newline)) {
this.next()
continue
}
if (this.is($T.Comment)) {
values.push(this.comment())
continue
}
// check for named arg with space after it (vs connected)
if (this.nextIs($T.Operator, '=')) {
const ident = this.identifier()
const op = this.op('=')
const val = this.arg(true)
const prefix = new SyntaxNode('NamedArgPrefix', ident.from, op.to)
const node = new SyntaxNode('NamedArg', ident.from, val.to)
node.add(prefix)
node.add(val)
values.push(node)
} else {
values.push(this.is($T.NamedArgPrefix) ? this.namedArg() : this.arg())
}
}
const close = this.expect($T.CloseBracket)
const node = new SyntaxNode('Dict', open.from, close.to)
return node.push(...values)
}
// FunctionDef `do x y: something end`
do(): SyntaxNode {
const doNode = this.keyword('do')
doNode.type = 'Do'
this.scope = new Scope(this.scope)
const params = []
while (!this.is($T.Colon) && !this.isExprEnd()) {
let varName = this.current().value!
if (varName.endsWith('=')) varName = varName.slice(0, varName.length - 1)
this.scope.add(varName)
let arg
if (this.is($T.Identifier))
arg = this.identifier()
else if (this.is($T.NamedArgPrefix))
arg = this.namedParam()
else
throw `[do] expected Identifier or NamedArgPrefix, got ${JSON.stringify(this.current())}\n\n ${this.input}\n`
params.push(arg)
}
const block = this.block(false)
let catchNode, finalNode
if (this.is($T.Keyword, 'catch'))
catchNode = this.catch()
if (this.is($T.Keyword, 'finally'))
finalNode = this.finally()
let end = this.keyword('end')
let last = block.at(-1)
if (finalNode) last = finalNode.children.at(-1)!
else if (catchNode) last = catchNode.children.at(-1)!
const node = new SyntaxNode('FunctionDef', doNode.from, last!.to)
node.add(doNode)
const paramsNode = new SyntaxNode(
'Params',
params[0]?.from ?? 0,
params.at(-1)?.to ?? 0
)
if (params.length) paramsNode.push(...params)
node.add(paramsNode)
this.scope = this.scope.parent!
node.push(...block)
if (catchNode) node.push(catchNode)
if (finalNode) node.push(finalNode)
return node.push(end)
}
// config.path
dotGet(): SyntaxNode {
const left = this.identifier()
const ident = this.input.slice(left.from, left.to)
// not in scope, just return Word
if (!this.scope.has(ident))
return this.word(left)
if (left.type.is('Identifier')) left.type = 'IdentifierBeforeDot'
let parts = []
while (this.is($T.Operator, '.')) {
this.next()
parts.push(this.is($T.OpenParen) ? this.parens() : this.atom())
}
// TODO lezer legacy - we can do a flat DotGet if we remove this
const nodes = parts.length > 1 ? collapseDotGets(parts) : undefined
const node = new SyntaxNode('DotGet', left.from, parts.at(-1)!.to)
return nodes ? node.push(left, nodes!) : node.push(left, ...parts)
}
// continue parsing dotget/word binary operation
dotGetBinOp(left: SyntaxNode): SyntaxNode {
while (this.is($T.Operator) && !this.is($T.Operator, '|')) {
const op = this.current().value!
const bp = precedence[op]
if (bp === undefined) break
const opNode = this.op()
const right = this.exprWithPrecedence(bp + 1)
const nodeType = conditionals.has(op) ? 'ConditionalOp' : 'BinOp'
const node = new SyntaxNode(nodeType, left.from, right.to)
node.push(left, opNode, right)
left = node
}
return left
}
// dotget in a statement/expression (something.blah) or (something.blah arg1)
dotGetFunctionCall(): SyntaxNode {
const dotGet = this.dotGet()
// if followed by a binary operator (not pipe), return dotGet/Word as-is for expression parser
if (this.is($T.Operator) && !this.is($T.Operator, '|'))
return dotGet
// dotget not in scope, regular Word
if (dotGet.type.is('Word')) return dotGet
if (this.isExprEnd())
return this.functionCallOrIdentifier(dotGet)
else
return this.functionCall(dotGet)
}
// can be used in functions or try block
finally(): SyntaxNode {
const keyword = this.keyword('finally')
const block = this.block()
const node = new SyntaxNode('FinallyExpr', keyword.from, block.at(-1)!.to)
return node.push(keyword, ...block)
}
// you're lookin at it
functionCall(fn?: SyntaxNode): SyntaxNode {
const ident = fn ?? this.identifier()
const args: SyntaxNode[] = []
while (!this.isExprEnd())
args.push(this.is($T.NamedArgPrefix) ? this.namedArg() : this.arg())
const node = new SyntaxNode('FunctionCall', ident.from, (args.at(-1) || ident).to)
node.push(ident, ...args)
if (!this.inTestExpr && this.is($T.Colon)) {
const block = this.block()
const end = this.keyword('end')
const blockNode = new SyntaxNode('FunctionCallWithBlock', node.from, end.to)
return blockNode.push(node, ...block, end)
}
return node
}
// bare identifier in an expression
functionCallOrIdentifier(inner?: SyntaxNode) {
if (!inner && this.nextIs($T.Operator, '.')) {
inner = this.dotGet()
// if the dotGet was just a Word, bail
if (inner.type.is('Word')) return inner
}
inner ??= this.identifier()
const wrapper = new SyntaxNode('FunctionCallOrIdentifier', inner.from, inner.to)
wrapper.push(inner)
if (!this.inTestExpr && this.is($T.Colon)) {
const block = this.block()
const end = this.keyword('end')
const node = new SyntaxNode('FunctionCallWithBlock', wrapper.from, end.to)
return node.push(wrapper, ...block, end)
}
return wrapper
}
// function and variable names
identifier(): SyntaxNode {
return SyntaxNode.from(this.expect($T.Identifier))
}
// if something: blah end
// if something: blah else: blah end
// if something: blah else if something: blah else: blah end
if(): SyntaxNode {
const ifNode = this.keyword('if')
const test = this.testExpr()
const ifBlock = this.block()
const node = new SyntaxNode('IfExpr', ifNode.from, ifBlock.at(-1)!.to)
node.push(ifNode, test)
node.push(...ifBlock)
while (this.is($T.Keyword, 'else') && this.nextIs($T.Keyword, 'if')) {
const elseWord = this.keyword('else')
const ifWord = this.keyword('if')
const elseIfTest = this.testExpr()
const elseIfBlock = this.block()
const elseIfNode = new SyntaxNode('ElseIfExpr', ifBlock.at(-1)!.from, elseIfBlock.at(-1)!.to)
elseIfNode.push(elseWord, ifWord, elseIfTest)
elseIfNode.push(...elseIfBlock)
node.push(elseIfNode)
}
if (this.is($T.Keyword, 'else') && this.nextIs($T.Colon)) {
const elseWord = this.keyword('else')
const elseBlock = this.block()
const elseNode = new SyntaxNode('ElseExpr', ifBlock.at(-1)!.from, elseBlock.at(-1)!.to)
elseNode.push(elseWord)
elseNode.push(...elseBlock)
node.push(elseNode)
}
return node.push(this.keyword('end'))
}
import(): SyntaxNode {
const keyword = this.keyword('import')
const args: SyntaxNode[] = []
while (!this.isExprEnd()) {
if (this.is($T.NamedArgPrefix)) {
const prefix = SyntaxNode.from(this.next())
const val = this.value()
const arg = new SyntaxNode('NamedArg', prefix.from, val.to)
arg.push(prefix, val)
args.push(arg)
} else {
args.push(this.identifier())
}
}
const node = new SyntaxNode('Import', keyword.from, args.at(-1)!.to)
node.add(keyword)
return node.push(...args)
}
// if, while, do, etc
keyword(name: string): SyntaxNode {
const node = SyntaxNode.from(this.expect($T.Keyword, name))
node.type = 'keyword' // TODO lezer legacy
return node
}
// abc= true
namedArg(): SyntaxNode {
const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix))
const val = this.arg(true)
const node = new SyntaxNode('NamedArg', prefix.from, val.to)
return node.push(prefix, val)
}
// abc= null|true|123|'hi'
namedParam(): SyntaxNode {
const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix))
const val = this.value()
if (!['Null', 'Boolean', 'Number', 'String'].includes(val.type.name))
throw `[namedParam] default value must be Null|Bool|Num|Str, got ${val.type}\n\n ${this.input}\n`
const node = new SyntaxNode('NamedParam', prefix.from, val.to)
return node.push(prefix, val)
}
// operators like + - =
op(op?: string): SyntaxNode {
const token = op ? this.expect($T.Operator, op) : this.expect($T.Operator)
const name = operators[token.value!]
if (!name) throw `[op] operator not registered: ${token.value!}\n\n ${this.input}\n`
return new SyntaxNode(name, token.from, token.to)
}
// ( expressions in parens )
parens(): SyntaxNode {
this.inParens++
const open = this.expect($T.OpenParen)
const child = this.expression()
const close = this.expect($T.CloseParen)
this.inParens--
const node = new SyntaxNode('ParenExpr', open.from, close.to)
node.add(child)
return node
}
// 'hell yes' "hell no" { hell if i know }
string(): SyntaxNode {
const token = this.expect($T.String)
return parseString(this.input, token.from, token.to, this)
}
// if TEST: blah end
testExpr(): SyntaxNode {
this.inTestExpr = true
const expr = this.expression()
this.inTestExpr = false
return expr
}
// throw blah
throw(): SyntaxNode {
const keyword = this.keyword('throw')
const val = this.expression()
const node = new SyntaxNode('Throw', keyword.from, val.to)
return node.push(keyword, val)
}
// try: blah catch e: blah end
try(): SyntaxNode {
const tryNode = this.keyword('try')
const tryBlock = this.block()
let last = tryBlock.at(-1)
let catchNode, finalNode
if (this.is($T.Keyword, 'catch'))
catchNode = this.catch()
if (this.is($T.Keyword, 'finally'))
finalNode = this.finally()
const end = this.keyword('end')
if (finalNode) last = finalNode.children.at(-1)
else if (catchNode) last = catchNode.children.at(-1)
const node = new SyntaxNode('TryExpr', tryNode.from, last!.to)
node.push(tryNode, ...tryBlock)
if (catchNode)
node.push(catchNode)
if (finalNode)
node.push(finalNode)
return node.push(end)
}
// while test: blah end
while(): SyntaxNode {
const keyword = this.keyword('while')
const test = this.testExpr()
const block = this.block()
const end = this.keyword('end')
const node = new SyntaxNode('WhileExpr', keyword.from, end.to)
return node.push(keyword, test, ...block, end)
}
// readme.txt (when `readme` isn't in scope)
word(start?: SyntaxNode): SyntaxNode {
const parts = [start ?? this.expect($T.Word)]
while (this.is($T.Operator, '.')) {
this.next()
if (this.isAny($T.Word, $T.Identifier, $T.Number))
parts.push(this.next())
}
return new SyntaxNode('Word', parts[0]!.from, parts.at(-1)!.to)
}
//
// helpers
//
current(): Token {
return this.tokens[this.pos] || { type: TokenType.Newline, from: 0, to: 0 }
}
peek(offset = 1): Token | undefined {
return this.tokens[this.pos + offset]
}
// look past newlines to check for a specific token
peekPastNewlines(type: TokenType, value?: string): boolean {
let offset = 1
let peek = this.peek(offset)
while (peek && peek.type === $T.Newline)
peek = this.peek(++offset)
if (!peek || peek.type !== type) return false
if (value !== undefined && peek.value !== value) return false
return true
}
next(): Token {
const token = this.current()
this.pos++
return token
}
is(type: TokenType, value?: string): boolean {
const token = this.current()
if (!token || token.type !== type) return false
if (value !== undefined && token.value !== value) return false
return true
}
isAny(...type: TokenType[]): boolean {
return type.some(x => this.is(x))
}
nextIs(type: TokenType, value?: string): boolean {
const token = this.peek()
if (!token || token.type !== type) return false
if (value !== undefined && token.value !== value) return false
return true
}
nextIsAny(...type: TokenType[]): boolean {
return type.some(x => this.nextIs(x))
}
isExprEnd(): boolean {
return this.isAny($T.Colon, $T.Semicolon, $T.Newline, $T.CloseParen, $T.CloseBracket) ||
this.is($T.Operator, '|') ||
this.isExprEndKeyword() || !this.current()
}
nextIsExprEnd(): boolean {
// pipes act like expression end for function arg parsing
if (this.nextIs($T.Operator, '|'))
return true
return this.nextIsAny($T.Colon, $T.Semicolon, $T.Newline, $T.CloseBracket, $T.CloseParen) ||
this.nextIs($T.Keyword, 'end') || this.nextIs($T.Keyword, 'else') ||
this.nextIs($T.Keyword, 'catch') || this.nextIs($T.Keyword, 'finally') ||
!this.peek()
}
isExprEndKeyword(): boolean {
return this.is($T.Keyword, 'end') || this.is($T.Keyword, 'else') ||
this.is($T.Keyword, 'catch') || this.is($T.Keyword, 'finally')
}
isPipe(): boolean {
// inside parens, only look for pipes on same line (don't look past newlines)
const canLookPastNewlines = this.inParens === 0
return this.is($T.Operator, '|') ||
(canLookPastNewlines && this.peekPastNewlines($T.Operator, '|'))
}
expect(type: TokenType, value?: string): Token | never {
if (!this.is(type, value)) {
const token = this.current()
throw `expected ${TokenType[type]}${value ? ` "${value}"` : ''}, got ${TokenType[token?.type || 0]}${token?.value ? ` "${token.value}"` : ''} at position ${this.pos}\n\n ${this.input}\n`
}
return this.next()
}
isEOF(): boolean {
return this.pos >= this.tokens.length
}
}
// TODO lezer legacy
function collapseDotGets(origNodes: SyntaxNode[]): SyntaxNode {
const nodes = [...origNodes]
let right = nodes.pop()!
while (nodes.length > 0) {
const left = nodes.pop()!
if (left.type.is('Identifier')) left.type = 'IdentifierBeforeDot'
const dot = new SyntaxNode("DotGet", left.from, right.to)
dot.push(left, right)
right = dot
}
return right
}

258
src/parser/stringParser.ts Normal file
View File

@ -0,0 +1,258 @@
import { SyntaxNode } from './node'
/**
* Parse string contents into fragments, interpolations, and escape sequences.
*
* Input: full string including quotes, e.g. "'hello $name'"
* Output: SyntaxNode tree with StringFragment, Interpolation, EscapeSeq children
*/
export const parseString = (input: string, from: number, to: number, parser: any): SyntaxNode => {
const stringNode = new SyntaxNode('String', from, to)
const content = input.slice(from, to)
// Determine string type
const firstChar = content[0]
// Double-quoted strings: no interpolation or escapes
if (firstChar === '"') {
const fragment = new SyntaxNode('DoubleQuote', from, to)
stringNode.add(fragment)
return stringNode
}
// Curly strings: interpolation but no escapes
if (firstChar === '{') {
parseCurlyString(stringNode, input, from, to, parser)
return stringNode
}
// Single-quoted strings: interpolation and escapes
if (firstChar === "'") {
parseSingleQuoteString(stringNode, input, from, to, parser)
return stringNode
}
throw `Unknown string type starting with: ${firstChar}`
}
/**
* Parse single-quoted string: 'hello $name\n'
* Supports: interpolation ($var, $(expr)), escape sequences (\n, \$, etc)
*/
const parseSingleQuoteString = (stringNode: SyntaxNode, input: string, from: number, to: number, parser: any) => {
let pos = from + 1 // Skip opening '
let fragmentStart = pos
while (pos < to - 1) { // -1 to skip closing '
const char = input[pos]
// Escape sequence
if (char === '\\' && pos + 1 < to - 1) {
// Push accumulated fragment
if (pos > fragmentStart) {
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
stringNode.add(frag)
}
// Add escape sequence node
const escNode = new SyntaxNode('EscapeSeq', pos, pos + 2)
stringNode.add(escNode)
pos += 2
fragmentStart = pos
continue
}
// Interpolation
if (char === '$') {
// Push accumulated fragment
if (pos > fragmentStart) {
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
stringNode.add(frag)
}
pos++ // Skip $
// Parse interpolation content
if (input[pos] === '(') {
// Expression interpolation: $(expr)
const interpStart = pos - 1 // Include the $
const exprResult = parseInterpolationExpr(input, pos, parser)
const interpNode = new SyntaxNode('Interpolation', interpStart, exprResult.endPos)
interpNode.add(exprResult.node)
stringNode.add(interpNode)
pos = exprResult.endPos
} else {
// Variable interpolation: $name
const interpStart = pos - 1
const identEnd = findIdentifierEnd(input, pos, to - 1)
const identNode = new SyntaxNode('FunctionCallOrIdentifier', pos, identEnd)
const innerIdent = new SyntaxNode('Identifier', pos, identEnd)
identNode.add(innerIdent)
const interpNode = new SyntaxNode('Interpolation', interpStart, identEnd)
interpNode.add(identNode)
stringNode.add(interpNode)
pos = identEnd
}
fragmentStart = pos
continue
}
pos++
}
// Push final fragment
if (pos > fragmentStart && fragmentStart < to - 1) {
const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
stringNode.add(frag)
}
}
/**
* Parse curly string: { hello $name }
* Supports: interpolation ($var, $(expr)), nested braces
* Does NOT support: escape sequences (raw content)
*/
const parseCurlyString = (stringNode: SyntaxNode, input: string, from: number, to: number, parser: any) => {
let pos = from + 1 // Skip opening {
let fragmentStart = from // Include the opening { in the fragment
let depth = 1
while (pos < to && depth > 0) {
const char = input[pos]
// Track brace nesting
if (char === '{') {
depth++
pos++
continue
}
if (char === '}') {
depth--
if (depth === 0) {
// Push final fragment including closing }
const frag = new SyntaxNode('CurlyString', fragmentStart, pos + 1)
stringNode.add(frag)
break
}
pos++
continue
}
// Interpolation
if (char === '$') {
// Push accumulated fragment
if (pos > fragmentStart) {
const frag = new SyntaxNode('CurlyString', fragmentStart, pos)
stringNode.add(frag)
}
pos++ // Skip $
// Parse interpolation content
if (input[pos] === '(') {
// Expression interpolation: $(expr)
const interpStart = pos - 1
const exprResult = parseInterpolationExpr(input, pos, parser)
const interpNode = new SyntaxNode('Interpolation', interpStart, exprResult.endPos)
interpNode.add(exprResult.node)
stringNode.add(interpNode)
pos = exprResult.endPos
} else {
// Variable interpolation: $name
const interpStart = pos - 1
const identEnd = findIdentifierEnd(input, pos, to)
const identNode = new SyntaxNode('FunctionCallOrIdentifier', pos, identEnd)
const innerIdent = new SyntaxNode('Identifier', pos, identEnd)
identNode.add(innerIdent)
const interpNode = new SyntaxNode('Interpolation', interpStart, identEnd)
interpNode.add(identNode)
stringNode.add(interpNode)
pos = identEnd
}
fragmentStart = pos
continue
}
pos++
}
}
/**
* Parse a parenthesized expression interpolation: $(a + b)
* Returns the parsed expression node and the position after the closing )
* pos is position of the opening ( in the full input string
*/
const parseInterpolationExpr = (input: string, pos: number, parser: any): { node: SyntaxNode, endPos: number } => {
// Find matching closing paren
let depth = 1
let start = pos
let end = pos + 1 // Start after opening (
while (end < input.length && depth > 0) {
if (input[end] === '(') depth++
if (input[end] === ')') {
depth--
if (depth === 0) break
}
end++
}
const exprContent = input.slice(start + 1, end) // Content between ( and )
const closeParen = end
end++ // Move past closing )
// Use the main parser to parse the expression
const exprNode = parser.parse(exprContent)
// Get the first real node (skip Program wrapper)
const innerNode = exprNode.firstChild || exprNode
// Adjust node positions: they're relative to exprContent, need to offset to full input
const offset = start + 1 // Position where exprContent starts in full input
adjustNodePositions(innerNode, offset)
// Wrap in ParenExpr - use positions in the full string
const parenNode = new SyntaxNode('ParenExpr', start, closeParen + 1)
parenNode.add(innerNode)
return { node: parenNode, endPos: end }
}
/**
* Recursively adjust all node positions by adding an offset
*/
const adjustNodePositions = (node: SyntaxNode, offset: number) => {
node.from += offset
node.to += offset
for (const child of node.children) {
adjustNodePositions(child, offset)
}
}
/**
* Find the end position of an identifier starting at pos
* Identifiers: lowercase letter or emoji, followed by letters/digits/dashes/emoji
*/
const findIdentifierEnd = (input: string, pos: number, maxPos: number): number => {
let end = pos
while (end < maxPos) {
const char = input[end]
// Stop at non-identifier characters
if (!/[a-z0-9\-?]/.test(char)) {
break
}
end++
}
return end
}

View File

@ -810,44 +810,6 @@ describe('Nullish coalescing operator', () => {
}) })
}) })
describe('DotGet whitespace sensitivity', () => {
test('no whitespace - DotGet works when identifier in scope', () => {
expect('basename = 5; basename.prop').toMatchTree(`
Assign
AssignableIdentifier basename
Eq =
Number 5
FunctionCallOrIdentifier
DotGet
IdentifierBeforeDot basename
Identifier prop`)
})
test('space before dot - NOT DotGet, parses as division', () => {
expect('basename = 5; basename / prop').toMatchTree(`
Assign
AssignableIdentifier basename
Eq =
Number 5
BinOp
Identifier basename
Slash /
Identifier prop`)
})
test('dot followed by slash is Word, not DotGet', () => {
expect('basename ./cool').toMatchTree(`
FunctionCall
Identifier basename
PositionalArg
Word ./cool`)
})
test('identifier not in scope with dot becomes Word', () => {
expect('readme.txt').toMatchTree(`Word readme.txt`)
})
})
describe('Comments', () => { describe('Comments', () => {
test('are greedy', () => { test('are greedy', () => {
expect(` expect(`
@ -897,61 +859,6 @@ basename = 5 # very astute
}) })
}) })
describe('Array destructuring', () => {
test('parses array pattern with two variables', () => {
expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
Assign
Array
Identifier a
Identifier b
Eq =
Array
Number 1
Number 2
Number 3
Number 4`)
})
test('parses array pattern with one variable', () => {
expect('[ x ] = [ 42 ]').toMatchTree(`
Assign
Array
Identifier x
Eq =
Array
Number 42`)
})
test('parses array pattern with emoji identifiers', () => {
expect('[ 🚀 💎 ] = [ 1 2 ]').toMatchTree(`
Assign
Array
Identifier 🚀
Identifier 💎
Eq =
Array
Number 1
Number 2`)
})
test('works with dotget', () => {
expect('[ a ] = [ [1 2 3] ]; a.1').toMatchTree(`
Assign
Array
Identifier a
Eq =
Array
Array
Number 1
Number 2
Number 3
FunctionCallOrIdentifier
DotGet
IdentifierBeforeDot a
Number 1`)
})
})
describe('Conditional ops', () => { describe('Conditional ops', () => {
test('or can be chained', () => { test('or can be chained', () => {
expect(` expect(`
@ -1037,34 +944,3 @@ Assign
`) `)
}) })
}) })
describe('import', () => {
test('parses single import', () => {
expect(`import str`).toMatchTree(`
Import
keyword import
Identifier str
`)
})
test('parses multiple imports', () => {
expect(`import str math list`).toMatchTree(`
Import
keyword import
Identifier str
Identifier math
Identifier list
`)
})
test('parses named args', () => {
expect(`import str only=ends-with?`).toMatchTree(`
Import
keyword import
Identifier str
NamedArg
NamedArgPrefix only=
Identifier ends-with?
`)
})
})

View File

@ -24,6 +24,7 @@ describe('if/else if/else', () => {
Eq = Eq =
IfExpr IfExpr
keyword if keyword if
FunctionCallOrIdentifier
Identifier x Identifier x
colon : colon :
Block Block
@ -59,6 +60,7 @@ describe('if/else if/else', () => {
end`).toMatchTree(` end`).toMatchTree(`
IfExpr IfExpr
keyword if keyword if
FunctionCallOrIdentifier
Identifier with-else Identifier with-else
colon : colon :
Block Block
@ -82,6 +84,7 @@ describe('if/else if/else', () => {
end`).toMatchTree(` end`).toMatchTree(`
IfExpr IfExpr
keyword if keyword if
FunctionCallOrIdentifier
Identifier with-else-if Identifier with-else-if
colon : colon :
Block Block
@ -90,6 +93,7 @@ describe('if/else if/else', () => {
ElseIfExpr ElseIfExpr
keyword else keyword else
keyword if keyword if
FunctionCallOrIdentifier
Identifier another-condition Identifier another-condition
colon : colon :
Block Block
@ -111,6 +115,7 @@ describe('if/else if/else', () => {
end`).toMatchTree(` end`).toMatchTree(`
IfExpr IfExpr
keyword if keyword if
FunctionCallOrIdentifier
Identifier with-else-if-else Identifier with-else-if-else
colon : colon :
Block Block
@ -119,6 +124,7 @@ describe('if/else if/else', () => {
ElseIfExpr ElseIfExpr
keyword else keyword else
keyword if keyword if
FunctionCallOrIdentifier
Identifier another-condition Identifier another-condition
colon : colon :
Block Block
@ -127,6 +133,7 @@ describe('if/else if/else', () => {
ElseIfExpr ElseIfExpr
keyword else keyword else
keyword if keyword if
FunctionCallOrIdentifier
Identifier yet-another-condition Identifier yet-another-condition
colon : colon :
Block Block
@ -173,7 +180,7 @@ describe('if/else if/else', () => {
`) `)
}) })
test('parses function calls in if tests', () => { test("parses paren'd function calls in if tests", () => {
expect(`if (var? 'abc'): true end`).toMatchTree(` expect(`if (var? 'abc'): true end`).toMatchTree(`
IfExpr IfExpr
keyword if keyword if
@ -214,7 +221,7 @@ describe('if/else if/else', () => {
`) `)
}) })
test('parses function calls in else-if tests', () => { test("parses paren'd function calls in else-if tests", () => {
expect(`if false: true else if (var? 'abc'): true end`).toMatchTree(` expect(`if false: true else if (var? 'abc'): true end`).toMatchTree(`
IfExpr IfExpr
keyword if keyword if

View File

@ -0,0 +1,58 @@
import { expect, describe, test } from 'bun:test'
import '../shrimp.grammar' // Importing this so changes cause it to retest!
describe('Array destructuring', () => {
test('parses array pattern with two variables', () => {
expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
Assign
Array
Identifier a
Identifier b
Eq =
Array
Number 1
Number 2
Number 3
Number 4`)
})
test('parses array pattern with one variable', () => {
expect('[ x ] = [ 42 ]').toMatchTree(`
Assign
Array
Identifier x
Eq =
Array
Number 42`)
})
test('parses array pattern with emoji identifiers', () => {
expect('[ 🚀 💎 ] = [ 1 2 ]').toMatchTree(`
Assign
Array
Identifier 🚀
Identifier 💎
Eq =
Array
Number 1
Number 2`)
})
test('works with dotget', () => {
expect('[ a ] = [ [1 2 3] ]; a.1').toMatchTree(`
Assign
Array
Identifier a
Eq =
Array
Array
Number 1
Number 2
Number 3
FunctionCallOrIdentifier
DotGet
IdentifierBeforeDot a
Number 1`)
})
})

View File

@ -1,6 +1,44 @@
import { describe, test, expect } from 'bun:test' import { describe, test, expect } from 'bun:test'
import '../../testSetup' import '../../testSetup'
describe('DotGet whitespace sensitivity', () => {
test('no whitespace - DotGet works when identifier in scope', () => {
expect('basename = 5; basename.prop').toMatchTree(`
Assign
AssignableIdentifier basename
Eq =
Number 5
FunctionCallOrIdentifier
DotGet
IdentifierBeforeDot basename
Identifier prop`)
})
test('space before dot - NOT DotGet, parses as division', () => {
expect('basename = 5; basename / prop').toMatchTree(`
Assign
AssignableIdentifier basename
Eq =
Number 5
BinOp
Identifier basename
Slash /
Identifier prop`)
})
test('dot followed by slash is Word, not DotGet', () => {
expect('basename ./cool').toMatchTree(`
FunctionCall
Identifier basename
PositionalArg
Word ./cool`)
})
test('identifier not in scope with dot becomes Word', () => {
expect('readme.txt').toMatchTree(`Word readme.txt`)
})
})
describe('DotGet', () => { describe('DotGet', () => {
test('readme.txt is Word when readme not in scope', () => { test('readme.txt is Word when readme not in scope', () => {
expect('readme.txt').toMatchTree(`Word readme.txt`) expect('readme.txt').toMatchTree(`Word readme.txt`)
@ -199,7 +237,7 @@ end`).toMatchTree(`
`) `)
}) })
test("dot get doesn't work with spaces", () => { test.skip("dot get doesn't work with spaces", () => {
expect('obj . prop').toMatchTree(` expect('obj . prop').toMatchTree(`
FunctionCall FunctionCall
Identifier obj Identifier obj

View File

@ -139,10 +139,23 @@ describe('try/catch/finally/throw', () => {
`) `)
}) })
test('parses throw statement with BinOp', () => {
expect("throw 'error message:' + msg").toMatchTree(`
Throw
keyword throw
BinOp
String
StringFragment error message:
Plus +
Identifier msg
`)
})
test('parses throw statement with identifier', () => { test('parses throw statement with identifier', () => {
expect('throw error-object').toMatchTree(` expect('throw error-object').toMatchTree(`
Throw Throw
keyword throw keyword throw
FunctionCallOrIdentifier
Identifier error-object Identifier error-object
`) `)
}) })

View File

@ -43,6 +43,58 @@ describe('calling functions', () => {
`) `)
}) })
test('call with function', () => {
expect(`tail do x: x end`).toMatchTree(`
FunctionCall
Identifier tail
PositionalArg
FunctionDef
Do do
Params
Identifier x
colon :
FunctionCallOrIdentifier
Identifier x
keyword end
`)
})
test('call with arg and function', () => {
expect(`tail true do x: x end`).toMatchTree(`
FunctionCall
Identifier tail
PositionalArg
Boolean true
PositionalArg
FunctionDef
Do do
Params
Identifier x
colon :
FunctionCallOrIdentifier
Identifier x
keyword end
`)
})
test('call with function in named arg', () => {
expect(`tail callback=do x: x end`).toMatchTree(`
FunctionCall
Identifier tail
NamedArg
NamedArgPrefix callback=
FunctionDef
Do do
Params
Identifier x
colon :
FunctionCallOrIdentifier
Identifier x
keyword end
`)
})
test('command with arg that is also a command', () => { test('command with arg that is also a command', () => {
expect('tail tail').toMatchTree(` expect('tail tail').toMatchTree(`
FunctionCall FunctionCall
@ -57,7 +109,7 @@ describe('calling functions', () => {
`) `)
}) })
test('Incomplete namedArg', () => { test.skip('Incomplete namedArg', () => {
expect('tail lines=').toMatchTree(` expect('tail lines=').toMatchTree(`
FunctionCall FunctionCall
Identifier tail Identifier tail

View File

@ -0,0 +1,34 @@
import { expect, describe, test } from 'bun:test'
import '../shrimp.grammar' // Importing this so changes cause it to retest!
describe('import', () => {
test('parses single import', () => {
expect(`import str`).toMatchTree(`
Import
keyword import
Identifier str
`)
})
test('parses multiple imports', () => {
expect(`import str math list`).toMatchTree(`
Import
keyword import
Identifier str
Identifier math
Identifier list
`)
})
test('parses named args', () => {
expect(`import str only=ends-with?`).toMatchTree(`
Import
keyword import
Identifier str
NamedArg
NamedArgPrefix only=
Identifier ends-with?
`)
})
})

View File

@ -336,6 +336,22 @@ describe('dict literals', () => {
`) `)
}) })
test('work with functions', () => {
expect(`[trap=do x: x end]`).toMatchTree(`
Dict
NamedArg
NamedArgPrefix trap=
FunctionDef
Do do
Params
Identifier x
colon :
FunctionCallOrIdentifier
Identifier x
keyword end
`)
})
test('can be nested', () => { test('can be nested', () => {
expect('[a=one b=[two [c=three]]]').toMatchTree(` expect('[a=one b=[two [c=three]]]').toMatchTree(`
Dict Dict
@ -371,14 +387,35 @@ describe('dict literals', () => {
Number 3 Number 3
`) `)
}) })
test('can have spaces between equals', () => {
expect(`[
a = 1
b = 2
c = 3
]`).toMatchTree(`
Dict
NamedArg
NamedArgPrefix a =
Number 1
NamedArg
NamedArgPrefix b =
Number 2
NamedArg
NamedArgPrefix c =
Number 3
`)
})
test('empty dict', () => { test('empty dict', () => {
expect('[=]').toMatchTree(` expect('[=]').toMatchTree(`
Dict [=] Dict [=]
`) `)
})
test('empty dict w whitespace', () => {
expect('[ = ]').toMatchTree(` expect('[ = ]').toMatchTree(`
Array Dict [ = ]
Word =
`) `)
}) })

View File

@ -176,6 +176,43 @@ describe('pipe expressions', () => {
Identifier echo Identifier echo
`) `)
}) })
test('parenthesized expressions can be piped', () => {
expect(`(1 + 2) | echo`).toMatchTree(`
PipeExpr
ParenExpr
BinOp
Number 1
Plus +
Number 2
operator |
FunctionCallOrIdentifier
Identifier echo
`)
})
test('complex parenthesized expressions with pipes', () => {
expect(`((math.random) * 10 + 1) | math.floor`).toMatchTree(`
PipeExpr
ParenExpr
BinOp
BinOp
ParenExpr
FunctionCallOrIdentifier
DotGet
IdentifierBeforeDot math
Identifier random
Star *
Number 10
Plus +
Number 1
operator |
FunctionCallOrIdentifier
DotGet
IdentifierBeforeDot math
Identifier floor
`)
})
}) })
describe('pipe continuation', () => { describe('pipe continuation', () => {
@ -309,7 +346,7 @@ grep h`).toMatchTree(`
Identifier split Identifier split
PositionalArg PositionalArg
String String
StringFragment StringFragment (space)
operator | operator |
FunctionCall FunctionCall
Identifier map Identifier map
@ -333,3 +370,41 @@ grep h`).toMatchTree(`
`) `)
}) })
}) })
describe('Underscore', () => {
test('works in pipes', () => {
expect(`sub 3 1 | div (sub 110 9 | sub 1) _ | div 5`).toMatchTree(`
PipeExpr
FunctionCall
Identifier sub
PositionalArg
Number 3
PositionalArg
Number 1
operator |
FunctionCall
Identifier div
PositionalArg
ParenExpr
PipeExpr
FunctionCall
Identifier sub
PositionalArg
Number 110
PositionalArg
Number 9
operator |
FunctionCall
Identifier sub
PositionalArg
Number 1
PositionalArg
Underscore _
operator |
FunctionCall
Identifier div
PositionalArg
Number 5
`)
})
})

View File

@ -0,0 +1,729 @@
import { expect, describe, test } from 'bun:test'
describe('constant types', () => {
test('null', () => {
expect(`null`).toBeToken('Null')
})
test('boolean', () => {
expect(`true`).toMatchToken('Boolean', 'true')
expect(`false`).toMatchToken('Boolean', 'false')
})
})
describe('numbers', () => {
test('non-numbers', () => {
expect(`1st`).toMatchToken('Word', '1st')
expect(`1_`).toMatchToken('Word', '1_')
expect(`100.`).toMatchTokens(
{ type: 'Number', value: '100' },
{ type: 'Operator', value: '.' },
)
})
test('simple numbers', () => {
expect(`1`).toMatchToken('Number', '1')
expect(`200`).toMatchToken('Number', '200')
expect(`5.20`).toMatchToken('Number', '5.20')
expect(`0.20`).toMatchToken('Number', '0.20')
expect(`-20`).toMatchToken('Number', '-20')
expect(`+20`).toMatchToken('Number', '+20')
expect(`-2134.34`).toMatchToken('Number', '-2134.34')
expect(`+20.5325`).toMatchToken('Number', '+20.5325')
expect(`1_000`).toMatchToken('Number', '1_000')
expect(`53_232_220`).toMatchToken('Number', '53_232_220')
})
test('binary numbers', () => {
expect('0b110').toMatchToken('Number', '0b110')
})
test('hex numbers', () => {
expect('0xdeadbeef').toMatchToken('Number', '0xdeadbeef')
expect('0x02d3f4').toMatchToken('Number', '0x02d3f4')
})
test('hex numbers uppercase', () => {
expect('0xFF').toMatchToken('Number', '0xFF')
})
test('octal numbers', () => {
expect('0o644').toMatchToken('Number', '0o644')
expect('0o055').toMatchToken('Number', '0o055')
})
test('negative binary', () => {
expect('-0b110').toMatchToken('Number', '-0b110')
})
test('negative hex', () => {
expect('-0xFF').toMatchToken('Number', '-0xFF')
})
test('negative octal', () => {
expect('-0o755').toMatchToken('Number', '-0o755')
})
test('positive prefix binary', () => {
expect('+0b110').toMatchToken('Number', '+0b110')
})
test('positive prefix hex', () => {
expect('+0xFF').toMatchToken('Number', '+0xFF')
})
test('positive prefix octal', () => {
expect('+0o644').toMatchToken('Number', '+0o644')
})
test('underscores in number', () => {
expect(`1_000`).toMatchToken('Number', '1_000')
expect(`1_0`).toMatchToken('Number', '1_0')
expect('0b11_0').toMatchToken('Number', '0b11_0')
expect('0xdead_beef').toMatchToken('Number', '0xdead_beef')
expect('0o64_4').toMatchToken('Number', '0o64_4')
})
})
describe('identifiers', () => {
test('regular', () => {
expect('name').toBeToken('Identifier')
expect('bobby-mcgee').toBeToken('Identifier')
expect('starts-with?').toBeToken('Identifier')
expect('📢').toMatchToken('Identifier', '📢')
expect(' 📢 ').toMatchToken('Identifier', '📢')
expect(' oink-🐷-oink').toMatchToken('Identifier', 'oink-🐷-oink')
expect('$').toMatchToken('Identifier', '$')
expect('$cool').toMatchToken('Identifier', '$cool')
})
test('one character identifiers', () => {
expect('a').toMatchToken('Identifier', 'a')
expect('z').toMatchToken('Identifier', 'z')
expect('$').toMatchToken('Identifier', '$')
expect('📢').toMatchToken('Identifier', '📢')
expect('?').toBeToken('Word') // ? alone is not valid identifier start
})
test('two character identifiers', () => {
expect('ab').toMatchToken('Identifier', 'ab')
expect('a1').toMatchToken('Identifier', 'a1')
expect('a-').toMatchToken('Identifier', 'a-')
expect('a?').toMatchToken('Identifier', 'a?') // ? valid at end
expect('ab?').toMatchToken('Identifier', 'ab?')
})
test('three+ character identifiers', () => {
expect('abc').toMatchToken('Identifier', 'abc')
expect('a-b').toMatchToken('Identifier', 'a-b')
expect('a1b').toMatchToken('Identifier', 'a1b')
expect('abc?').toMatchToken('Identifier', 'abc?') // ? valid at end
expect('a-b-c?').toMatchToken('Identifier', 'a-b-c?')
})
test('edge cases', () => {
expect('-bobby-mcgee').toBeToken('Word')
expect('starts-with??').toMatchToken('Identifier', 'starts-with??')
expect('starts?with?').toMatchToken('Identifier', 'starts?with?')
expect('a??b').toMatchToken('Identifier', 'a??b')
expect('oink-oink!').toBeToken('Word')
expect('dog#pound').toMatchToken('Word', 'dog#pound')
expect('http://website.com').toMatchToken('Word', 'http://website.com')
expect('school$cool').toMatchToken('Identifier', 'school$cool')
expect('EXIT:').toMatchTokens(
{ type: 'Word', value: 'EXIT' },
{ type: 'Colon' },
)
expect(`if y == 1: 'cool' end`).toMatchTokens(
{ type: 'Keyword', value: 'if' },
{ type: 'Identifier', value: 'y' },
{ type: 'Operator', value: '==' },
{ type: 'Number', value: '1' },
{ type: 'Colon' },
{ type: 'String', value: `'cool'` },
{ type: 'Keyword', value: 'end' },
)
})
})
describe('paths', () => {
test('starting with ./', () => {
expect('./tmp').toMatchToken('Word', './tmp')
})
test('starting with /', () => {
expect('/home/chris/dev').toMatchToken('Word', '/home/chris/dev')
})
test('identifiers with dots tokenize separately', () => {
expect('readme.txt').toMatchTokens(
{ type: 'Identifier', value: 'readme' },
{ type: 'Operator', value: '.' },
{ type: 'Identifier', value: 'txt' },
)
})
test('words (non-identifiers) consume dots', () => {
expect('README.md').toMatchToken('Word', 'README.md')
})
test('all sorts of weird stuff', () => {
expect('dog#pound').toMatchToken('Word', 'dog#pound')
expect('my/kinda/place').toMatchToken('my/kinda/place')
expect('file://%/$##/@40!/index.php').toMatchToken('Word', 'file://%/$##/@40!/index.php')
})
})
describe('strings', () => {
test('single quoted', () => {
expect(`'hello world'`).toMatchToken('String', `'hello world'`)
expect(`'it\\'s a beautiful world'`).toMatchToken("'it\\'s a beautiful world'")
})
test('double quoted', () => {
expect(`"hello world"`).toMatchToken('String', `"hello world"`)
expect(`"it's a beautiful world"`).toMatchToken('String', `"it's a beautiful world"`)
})
test('empty strings', () => {
expect(`''`).toMatchToken('String', `''`)
expect(`""`).toMatchToken('String', `""`)
})
test('escape sequences', () => {
expect(`'hello\\nworld'`).toMatchToken('String', `'hello\\nworld'`)
expect(`'tab\\there'`).toMatchToken('String', `'tab\\there'`)
expect(`'quote\\''`).toMatchToken('String', `'quote\\''`)
expect(`'backslash\\\\'`).toMatchToken('String', `'backslash\\\\'`)
expect(`'dollar\\$sign'`).toMatchToken('String', `'dollar\\$sign'`)
})
test('unclosed strings - error case', () => {
// These should either fail or produce unexpected results
expect(`'hello`).toMatchToken('String', `'hello`)
expect(`"world`).toMatchToken('String', `"world`)
})
})
describe('curly strings', () => {
test('curly quoted', () => {
expect('{ one two three }').toMatchToken('String', `{ one two three }`)
})
test('work on multiple lines', () => {
expect(`{
one
two
three }`).toMatchToken('String', `{
one
two
three }`)
})
test('can contain other curlies', () => {
expect(`{ { one }
two
{ three } }`).toMatchToken('String', `{ { one }
two
{ three } }`)
})
test('empty curly string', () => {
expect('{}').toMatchToken('String', '{}')
})
test('unclosed curly string - error case', () => {
// Should either fail or produce unexpected results
expect('{ hello').toMatchToken('String', '{ hello')
expect('{ nested { unclosed }').toMatchToken('String', '{ nested { unclosed }')
})
})
describe('operators', () => {
test('math operators', () => {
// assignment
expect('=').toMatchToken('Operator', '=')
// logic
expect('or').toMatchToken('Operator', 'or')
expect('and').toMatchToken('Operator', 'and')
// bitwise
expect('band').toMatchToken('Operator', 'band')
expect('bor').toMatchToken('Operator', 'bor')
expect('bxor').toMatchToken('Operator', 'bxor')
expect('>>>').toMatchToken('Operator', '>>>')
expect('>>').toMatchToken('Operator', '>>')
expect('<<').toMatchToken('Operator', '<<')
// compound assignment
expect('??=').toMatchToken('Operator', '??=')
expect('+=').toMatchToken('Operator', '+=')
expect('-=').toMatchToken('Operator', '-=')
expect('*=').toMatchToken('Operator', '*=')
expect('/=').toMatchToken('Operator', '/=')
expect('%=').toMatchToken('Operator', '%=')
// nullish
expect('??').toMatchToken('Operator', '??')
// math
expect('**').toMatchToken('Operator', '**')
expect('*').toMatchToken('Operator', '*')
expect('/').toMatchToken('Operator', '/')
expect('+').toMatchToken('Operator', '+')
expect('-').toMatchToken('Operator', '-')
expect('%').toMatchToken('Operator', '%')
// comparison
expect('>=').toMatchToken('Operator', '>=')
expect('<=').toMatchToken('Operator', '<=')
expect('!=').toMatchToken('Operator', '!=')
expect('==').toMatchToken('Operator', '==')
expect('>').toMatchToken('Operator', '>')
expect('<').toMatchToken('Operator', '<')
// property access
expect('.').toMatchToken('Operator', '.')
})
})
describe('keywords', () => {
test('keywords', () => {
expect(`import`).toMatchToken('Keyword', 'import')
expect(`end`).toMatchToken('Keyword', 'end')
expect(`do`).toMatchToken('Keyword', 'do')
expect(`while`).toMatchToken('Keyword', 'while')
expect(`if`).toMatchToken('Keyword', 'if')
expect(`else`).toMatchToken('Keyword', 'else')
expect(`try`).toMatchToken('Keyword', 'try')
expect(`catch`).toMatchToken('Keyword', 'catch')
expect(`finally`).toMatchToken('Keyword', 'finally')
expect(`throw`).toMatchToken('Keyword', 'throw')
})
})
describe('regex', () => {
test('use double slash', () => {
expect(`//[0-9]+//`).toMatchToken('Regex', '//[0-9]+//')
})
})
describe('punctuation', () => {
test('underscore', () => {
expect(`_`).toBeToken('Underscore')
expect(`__`).toMatchToken('Word', '__')
})
test('semicolon', () => {
expect(`;`).toBeToken('Semicolon')
})
test('newline', () => {
expect('\n').toBeToken('Newline')
})
test('colon', () => {
expect(':').toBeToken('Colon')
})
})
describe('comments', () => {
test('comments', () => {
expect(`# hey friends`).toMatchToken('Comment', '# hey friends')
expect(`#hey-friends`).toMatchToken('Comment', '#hey-friends')
})
})
describe('brackets', () => {
test('parens', () => {
expect(`(`).toBeToken('OpenParen')
expect(`)`).toBeToken('CloseParen')
})
test('staples', () => {
expect(`[`).toBeToken('OpenBracket')
expect(`]`).toBeToken('CloseBracket')
})
})
describe('multiple tokens', () => {
test('constants work fine', () => {
expect(`null true false`).toMatchTokens(
{ type: 'Null' },
{ type: 'Boolean', value: 'true' },
{ type: 'Boolean', value: 'false' },
)
})
test('numbers', () => {
expect(`100 -400.42 null`).toMatchTokens(
{ type: 'Number', value: '100' },
{ type: 'Number', value: '-400.42' },
{ type: 'Null' },
)
})
test('whitespace', () => {
expect(`
'hello world'
'goodbye world'
`).toMatchTokens(
{ type: 'Newline' },
{ type: 'String', value: "'hello world'" },
{ type: 'Newline' },
{ type: 'Newline' },
{ type: 'String', value: "'goodbye world'" },
{ type: 'Newline' },
)
})
test('newline in parens is ignored', () => {
expect(`(
'hello world'
'goodbye world'
)`).toMatchTokens(
{ type: 'OpenParen' },
{ type: 'String', value: "'hello world'" },
{ type: 'String', value: "'goodbye world'" },
{ type: 'CloseParen' },
)
})
test('newline in brackets is ignored', () => {
expect(`[
a b
c d
e
f
]`).toMatchTokens(
{ type: 'OpenBracket' },
{ type: 'Identifier', value: "a" },
{ type: 'Identifier', value: "b" },
{ type: 'Identifier', value: "c" },
{ type: 'Identifier', value: "d" },
{ type: 'Identifier', value: "e" },
{ type: 'Identifier', value: "f" },
{ type: 'CloseBracket' },
)
})
test('function call', () => {
expect('echo hello world').toMatchTokens(
{ type: 'Identifier', value: 'echo' },
{ type: 'Identifier', value: 'hello' },
{ type: 'Identifier', value: 'world' },
)
})
test('assignment', () => {
expect('x = 5').toMatchTokens(
{ type: 'Identifier', value: 'x' },
{ type: 'Operator', value: '=' },
{ type: 'Number', value: '5' },
)
})
test('math expression', () => {
expect('1 + 2 * 3').toMatchTokens(
{ type: 'Number', value: '1' },
{ type: 'Operator', value: '+' },
{ type: 'Number', value: '2' },
{ type: 'Operator', value: '*' },
{ type: 'Number', value: '3' },
)
})
test('inline comment', () => {
expect('x = 5 # set x').toMatchTokens(
{ type: 'Identifier', value: 'x' },
{ type: 'Operator', value: '=' },
{ type: 'Number', value: '5' },
{ type: 'Comment', value: '# set x' },
)
})
test('line comment', () => {
expect('x = 5 \n# hello\n set x').toMatchTokens(
{ type: 'Identifier', value: 'x' },
{ type: 'Operator', value: '=' },
{ type: 'Number', value: '5' },
{ type: 'Newline' },
{ type: 'Comment', value: '# hello' },
{ type: 'Newline' },
{ type: 'Identifier', value: 'set' },
{ type: 'Identifier', value: 'x' },
)
})
test('colons separate tokens', () => {
expect('x do: y').toMatchTokens(
{ type: 'Identifier', value: 'x' },
{ type: 'Keyword', value: 'do' },
{ type: 'Colon' },
{ type: 'Identifier', value: 'y' },
)
expect('x: y').toMatchTokens(
{ type: 'Identifier', value: 'x' },
{ type: 'Colon' },
{ type: 'Identifier', value: 'y' },
)
expect('5: y').toMatchTokens(
{ type: 'Number', value: '5' },
{ type: 'Colon' },
{ type: 'Identifier', value: 'y' },
)
expect(`if (var? 'abc'): y`).toMatchTokens(
{ type: 'Keyword', value: 'if' },
{ type: 'OpenParen' },
{ type: 'Identifier', value: 'var?' },
{ type: 'String', value: `'abc'` },
{ type: 'CloseParen' },
{ type: 'Colon' },
{ type: 'Identifier', value: 'y' },
)
expect(`
do x:
y
end`).toMatchTokens(
{ type: 'Newline' },
{ type: 'Keyword', value: 'do' },
{ type: 'Identifier', value: 'x' },
{ type: 'Colon' },
{ type: 'Newline' },
{ type: 'Identifier', value: 'y' },
{ type: 'Newline' },
{ type: 'Keyword', value: 'end' },
)
})
test('semicolons separate statements', () => {
expect('x; y').toMatchTokens(
{ type: 'Identifier', value: 'x' },
{ type: 'Semicolon' },
{ type: 'Identifier', value: 'y' },
)
})
test('semicolons in parens', () => {
expect('(x; y)').toMatchTokens(
{ type: 'OpenParen' },
{ type: 'Identifier', value: 'x' },
{ type: 'Semicolon' },
{ type: 'Identifier', value: 'y' },
{ type: 'CloseParen' },
)
})
test('dot operator beginning word with slash', () => {
expect(`(basename ./cool)`).toMatchTokens(
{ 'type': 'OpenParen' },
{ 'type': 'Identifier', 'value': 'basename' },
{ 'type': 'Word', 'value': './cool' },
{ 'type': 'CloseParen' }
)
})
test('dot word after identifier with space', () => {
expect(`expand-path .git`).toMatchTokens(
{ 'type': 'Identifier', 'value': 'expand-path' },
{ 'type': 'Word', 'value': '.git' },
)
})
test('dot operator after identifier without space', () => {
expect(`config.path`).toMatchTokens(
{ 'type': 'Identifier', 'value': 'config' },
{ 'type': 'Operator', 'value': '.' },
{ 'type': 'Identifier', 'value': 'path' },
)
})
})
describe('nesting edge cases', () => {
test('deeply nested parens', () => {
expect('((nested))').toMatchTokens(
{ type: 'OpenParen' },
{ type: 'OpenParen' },
{ type: 'Identifier', value: 'nested' },
{ type: 'CloseParen' },
{ type: 'CloseParen' },
)
})
test('mixed nesting', () => {
expect('([combo])').toMatchTokens(
{ type: 'OpenParen' },
{ type: 'OpenBracket' },
{ type: 'Identifier', value: 'combo' },
{ type: 'CloseBracket' },
{ type: 'CloseParen' },
)
})
})
describe('invalid numbers that should be words', () => {
test('invalid binary', () => {
expect('0b2').toMatchToken('Word', '0b2')
expect('0b123').toMatchToken('Word', '0b123')
})
test('invalid octal', () => {
expect('0o8').toMatchToken('Word', '0o8')
expect('0o999').toMatchToken('Word', '0o999')
})
test('invalid hex', () => {
expect('0xGGG').toMatchToken('Word', '0xGGG')
expect('0xZZZ').toMatchToken('Word', '0xZZZ')
})
test('multiple decimal points', () => {
expect('1.2.3').toMatchToken('Word', '1.2.3')
})
})
describe('unicode and emoji', () => {
test('greek letters', () => {
expect('αβγ').toMatchToken('Identifier', 'αβγ')
expect('delta-δ').toMatchToken('Identifier', 'delta-δ')
})
test('math symbols', () => {
expect('∑').toMatchToken('Identifier', '∑')
expect('∏').toMatchToken('Identifier', '∏')
})
test('CJK characters', () => {
expect('你好').toMatchToken('Identifier', '你好')
expect('こんにちは').toMatchToken('Identifier', 'こんにちは')
})
})
describe('empty and whitespace input', () => {
test('empty string', () => {
expect('').toMatchTokens()
})
test('only whitespace', () => {
expect(' ').toMatchTokens()
})
test('only tabs', () => {
expect('\t\t\t').toMatchTokens()
})
test('only newlines', () => {
expect('\n\n\n').toMatchTokens(
{ type: 'Newline' },
{ type: 'Newline' },
{ type: 'Newline' },
)
})
})
describe('named args', () => {
test("don't need spaces", () => {
expect(`named=arg`).toMatchTokens(
{ type: 'NamedArgPrefix', value: 'named=' },
{ type: 'Identifier', value: 'arg' },
)
})
test("can have spaces", () => {
expect(`named= arg`).toMatchTokens(
{ type: 'NamedArgPrefix', value: 'named=' },
{ type: 'Identifier', value: 'arg' },
)
})
test("can include numbers", () => {
expect(`named123= arg`).toMatchTokens(
{ type: 'NamedArgPrefix', value: 'named123=' },
{ type: 'Identifier', value: 'arg' },
)
})
})
describe('dot operator', () => {
test('standalone dot', () => {
expect('.').toMatchToken('Operator', '.')
})
test('dot between identifiers tokenizes as separate tokens', () => {
expect('config.path').toMatchTokens(
{ type: 'Identifier', value: 'config' },
{ type: 'Operator', value: '.' },
{ type: 'Identifier', value: 'path' },
)
})
test('dot with number', () => {
expect('array.0').toMatchTokens(
{ type: 'Identifier', value: 'array' },
{ type: 'Operator', value: '.' },
{ type: 'Number', value: '0' },
)
})
test('chained dots', () => {
expect('a.b.c').toMatchTokens(
{ type: 'Identifier', value: 'a' },
{ type: 'Operator', value: '.' },
{ type: 'Identifier', value: 'b' },
{ type: 'Operator', value: '.' },
{ type: 'Identifier', value: 'c' },
)
})
test('identifier-like paths tokenize separately', () => {
expect('readme.txt').toMatchTokens(
{ type: 'Identifier', value: 'readme' },
{ type: 'Operator', value: '.' },
{ type: 'Identifier', value: 'txt' },
)
})
test('word-like paths remain as single token', () => {
expect('./file.txt').toMatchToken('Word', './file.txt')
expect('README.TXT').toMatchToken('Word', 'README.TXT')
})
test('dot with paren expression', () => {
expect('obj.(1 + 2)').toMatchTokens(
{ type: 'Identifier', value: 'obj' },
{ type: 'Operator', value: '.' },
{ type: 'OpenParen' },
{ type: 'Number', value: '1' },
{ type: 'Operator', value: '+' },
{ type: 'Number', value: '2' },
{ type: 'CloseParen' },
)
})
test('chained dot with paren expression', () => {
expect('obj.items.(i)').toMatchTokens(
{ type: 'Identifier', value: 'obj' },
{ type: 'Operator', value: '.' },
{ type: 'Identifier', value: 'items' },
{ type: 'Operator', value: '.' },
{ type: 'OpenParen' },
{ type: 'Identifier', value: 'i' },
{ type: 'CloseParen' },
)
})
})

592
src/parser/tokenizer2.ts Normal file
View File

@ -0,0 +1,592 @@
const DEBUG = process.env.DEBUG || false
export type Token = {
type: TokenType
value?: string,
from: number,
to: number,
}
export enum TokenType {
Comment,
Keyword,
Operator,
Newline,
Semicolon,
Colon,
Underscore,
OpenParen,
CloseParen,
OpenBracket,
CloseBracket,
Identifier,
Word,
NamedArgPrefix,
Null,
Boolean,
Number,
String,
Regex,
}
const valueTokens = new Set([
TokenType.Comment,
TokenType.Keyword, TokenType.Operator,
TokenType.Identifier, TokenType.Word, TokenType.NamedArgPrefix,
TokenType.Boolean, TokenType.Number, TokenType.String, TokenType.Regex,
TokenType.Underscore
])
const operators = new Set([
// assignment
'=',
// logic
'or',
'and',
// bitwise
'band',
'bor',
'bxor',
'>>>',
'>>',
'<<',
// compound assignment
'??=',
'+=',
'-=',
'*=',
'/=',
'%=',
// nullish
'??',
// math
'**',
'*',
'/',
'+',
'-',
'%',
// comparison
'>=',
'<=',
'!=',
'==',
'>',
'<',
// property access
'.',
// pipe
'|',
])
const keywords = new Set([
'import',
'end',
'do',
'if',
'while',
'if',
'else',
'try',
'catch',
'finally',
'throw',
])
// helper
function c(strings: TemplateStringsArray, ...values: any[]) {
return strings.reduce((result, str, i) => result + str + (values[i] ?? ""), "").charCodeAt(0)
}
function s(c: number): string {
return String.fromCharCode(c)
}
export class Scanner {
input = ''
pos = 0
start = 0
char = 0
prev = 0
inParen = 0
inBracket = 0
tokens: Token[] = []
prevIsWhitespace = true
reset() {
this.input = ''
this.pos = 0
this.start = 0
this.char = 0
this.prev = 0
this.tokens.length = 0
this.prevIsWhitespace = true
}
peek(count = 0): number {
return getFullCodePoint(this.input, this.pos + count)
}
next(): number {
this.prevIsWhitespace = isWhitespace(this.char)
this.prev = this.char
this.char = this.peek()
this.pos += getCharSize(this.char)
return this.char
}
push(type: TokenType, from?: number, to?: number) {
from ??= this.start
to ??= this.pos - getCharSize(this.char)
if (to < from) to = from
this.tokens.push(Object.assign({}, {
type,
from,
to,
}, valueTokens.has(type) ? { value: this.input.slice(from, to) } : {}))
if (DEBUG) {
const tok = this.tokens.at(-1)
console.log(`≫ PUSH(${from},${to})`, TokenType[tok?.type || 0], '—', tok?.value)
}
this.start = this.pos
}
pushChar(type: TokenType) {
this.push(type, this.pos - 1, this.pos)
}
// turn shrimp code into shrimp tokens that get fed into the parser
tokenize(input: string): Token[] {
this.reset()
this.input = input
this.next()
while (this.char > 0) {
const char = this.char
if (char === c`#`) {
this.readComment()
continue
}
if (isBracket(char)) {
this.readBracket()
continue
}
if (isStringDelim(char)) {
this.readString(char)
continue
}
if (char === c`{`) {
this.readCurlyString()
continue
}
if (isIdentStart(char)) {
this.readWordOrIdent(true) // true = started with identifier char
continue
}
if (isDigit(char) || ((char === c`-` || char === c`+`) && isDigit(this.peek()))) {
this.readNumber()
continue
}
if (char === c`:`) {
this.pushChar(TokenType.Colon)
this.next()
continue
}
// whitespace-sensitive dot as operator (property access) only after identifier/number
if (char === c`.`) {
if (this.canBeDotGet(this.tokens.at(-1))) {
this.pushChar(TokenType.Operator)
this.next()
continue
}
}
if (char === c`/` && this.peek() === c`/`) {
this.readRegex()
continue
}
if (isWordChar(char)) {
this.readWordOrIdent(false) // false = didn't start with identifier char
continue
}
if (char === c`\n`) {
if (this.inParen === 0 && this.inBracket === 0)
this.pushChar(TokenType.Newline)
this.next()
continue
}
if (char === c`;`) {
this.pushChar(TokenType.Semicolon)
this.next()
continue
}
this.next()
}
return this.tokens
}
readComment() {
this.start = this.pos - 1
while (this.char !== c`\n` && this.char > 0) this.next()
this.push(TokenType.Comment)
}
readBracket() {
switch (this.char) {
case c`(`:
this.inParen++
this.pushChar(TokenType.OpenParen); break
case c`)`:
this.inParen--
this.pushChar(TokenType.CloseParen); break
case c`[`:
this.inBracket++
this.pushChar(TokenType.OpenBracket); break
case c`]`:
this.inBracket--
this.pushChar(TokenType.CloseBracket); break
}
this.next()
}
readString(delim: number) {
this.start = this.pos - 1
this.next() // skip opening delim
while (this.char > 0 && (this.char !== delim || (this.char === delim && this.prev === c`\\`)))
this.next()
this.next() // skip closing delim
this.push(TokenType.String)
}
readCurlyString() {
this.start = this.pos - 1
let depth = 1
this.next()
while (depth > 0 && this.char > 0) {
if (this.char === c`{`) depth++
if (this.char === c`}`) depth--
this.next()
}
this.push(TokenType.String)
}
readWordOrIdent(startedWithIdentChar: boolean) {
this.start = this.pos - getCharSize(this.char)
while (isWordChar(this.char)) {
// stop at colon if followed by whitespace (e.g., 'do x: echo x end')
if (this.char === c`:`) {
const nextCh = this.peek()
if (isWhitespace(nextCh) || nextCh === 0) break
}
// stop at equal sign (named arg) - but only if what we've read so far is an identifier
if (this.char === c`=`) {
const soFar = this.input.slice(this.start, this.pos - getCharSize(this.char))
if (isIdentifer(soFar)) {
this.next()
break
}
}
// stop at dot only if it would create a valid property access
// AND only if we started with an identifier character (not for Words like README.txt)
if (startedWithIdentChar && this.char === c`.`) {
const nextCh = this.peek()
if (isIdentStart(nextCh) || isDigit(nextCh) || nextCh === c`(`) {
const soFar = this.input.slice(this.start, this.pos - getCharSize(this.char))
if (isIdentifer(soFar)) break
}
}
this.next()
}
const word = this.input.slice(this.start, this.pos - getCharSize(this.char))
// classify the token based on what we read
if (word === '_')
this.push(TokenType.Underscore)
else if (word === 'null')
this.push(TokenType.Null)
else if (word === 'true' || word === 'false')
this.push(TokenType.Boolean)
else if (isKeyword(word))
this.push(TokenType.Keyword)
else if (isOperator(word))
this.push(TokenType.Operator)
else if (isIdentifer(word))
this.push(TokenType.Identifier)
else if (word.endsWith('='))
this.push(TokenType.NamedArgPrefix)
else
this.push(TokenType.Word)
}
readNumber() {
this.start = this.pos - 1
while (isWordChar(this.char)) {
// stop at dot unless it's part of the number
if (this.char === c`.`) {
const nextCh = this.peek()
if (!isDigit(nextCh)) break
}
// stop at colon
if (this.char === c`:`) {
const nextCh = this.peek()
if (isWhitespace(nextCh) || nextCh === 0) break
}
this.next()
}
const ident = this.input.slice(this.start, this.pos - 1)
this.push(isNumber(ident) ? TokenType.Number : TokenType.Word)
}
readRegex() {
this.start = this.pos - 1
this.next() // skip 2nd /
while (this.char > 0) {
if (this.char === c`/` && this.peek() === c`/`) {
this.next() // skip /
this.next() // skip /
// read regex flags
while (this.char > 0 && isIdentStart(this.char))
this.next()
// validate regex
const to = this.pos - getCharSize(this.char)
const regexText = this.input.slice(this.start, to)
const [_, pattern, flags] = regexText.match(/^\/\/(.*)\/\/([gimsuy]*)$/) || []
if (pattern) {
try {
new RegExp(pattern, flags)
this.push(TokenType.Regex)
break
} catch (e) {
// invalid regex - fall through to Word
}
}
// invalid regex is treated as Word
this.push(TokenType.Word)
break
}
this.next()
}
}
canBeDotGet(lastToken?: Token): boolean {
return !this.prevIsWhitespace && !!lastToken &&
(lastToken.type === TokenType.Identifier ||
lastToken.type === TokenType.Number ||
lastToken.type === TokenType.CloseParen ||
lastToken.type === TokenType.CloseBracket)
}
}
const isNumber = (word: string): boolean => {
// regular number
if (/^[+-]?\d+(_?\d+)*(\.(\d+(_?\d+)*))?$/.test(word))
return true
// binary
if (/^[+-]?0b[01]+(_?[01]+)*(\.[01](_?[01]*))?$/.test(word))
return true
// octal
if (/^[+-]?0o[0-7]+(_?[0-7]+)*(\.[0-7](_?[0-7]*))?$/.test(word))
return true
// hex
if (/^[+-]?0x[0-9a-f]+([0-9a-f]_?[0-9a-f]+)*(\.([0-9a-f]_?[0-9a-f]*))?$/i.test(word))
return true
return false
}
const isIdentifer = (s: string): boolean => {
if (s.length === 0) return false
let pos = 0
const chars = []
while (pos < s.length) {
const out = getFullCodePoint(s, pos)
pos += getCharSize(out)
chars.push(out)
}
if (chars.length === 1)
return isIdentStart(chars[0]!)
else if (chars.length === 2)
return isIdentStart(chars[0]!) && isIdentEnd(chars[1]!)
else
return isIdentStart(chars[0]!) &&
chars.slice(1, chars.length - 1).every(isIdentChar) &&
isIdentEnd(chars.at(-1)!)
}
const isStringDelim = (ch: number): boolean => {
return ch === c`'` || ch === c`"`
}
const isIdentStart = (char: number | string): boolean => {
let ch = typeof char === 'string' ? char.charCodeAt(0) : char
return isLowercaseLetter(ch) || isEmojiOrUnicode(ch) || ch === 36 /* $ */
}
const isIdentChar = (char: number | string): boolean => {
let ch = typeof char === 'string' ? char.charCodeAt(0) : char
return isIdentStart(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */
}
const isIdentEnd = (char: number | string): boolean => {
return isIdentChar(char)
}
const isLowercaseLetter = (ch: number): boolean => {
return ch >= 97 && ch <= 122 // a-z
}
const isDigit = (ch: number): boolean => {
return ch >= 48 && ch <= 57 // 0-9
}
const isWhitespace = (ch: number): boolean => {
return ch === 32 /* space */ || ch === 9 /* tab */ ||
ch === 13 /* \r */ || ch === 10 /* \n */ ||
ch === -1 || ch === 0 /* EOF */
}
const isWordChar = (ch: number): boolean => {
return (
!isWhitespace(ch) &&
ch !== 10 /* \n */ &&
ch !== 59 /* ; */ &&
ch !== 41 /* ) */ &&
ch !== 93 /* ] */ &&
ch !== -1 /* EOF */
)
}
const isOperator = (word: string): boolean => {
return operators.has(word)
}
const isKeyword = (word: string): boolean => {
return keywords.has(word)
}
const isBracket = (char: number): boolean => {
return char === c`(` || char === c`)` || char === c`[` || char === c`]`
}
const getCharSize = (ch: number) =>
(ch > 0xffff ? 2 : 1) // emoji takes 2 UTF-16 code units
const getFullCodePoint = (input: string, pos: number): number => {
const ch = input[pos]?.charCodeAt(0) || 0
// Check if this is a high surrogate (0xD800-0xDBFF)
if (ch >= 0xd800 && ch <= 0xdbff) {
const low = input[pos + 1]?.charCodeAt(0) || 0
// Check if next is low surrogate (0xDC00-0xDFFF)
if (low >= 0xdc00 && low <= 0xdfff) {
// Combine surrogate pair into full code point
return 0x10000 + ((ch & 0x3ff) << 10) + (low & 0x3ff)
}
}
return ch
}
const isEmojiOrUnicode = (ch: number): boolean => {
return (
// Basic Emoticons
(ch >= 0x1f600 && ch <= 0x1f64f) ||
// Miscellaneous Symbols and Pictographs
(ch >= 0x1f300 && ch <= 0x1f5ff) ||
// Transport and Map Symbols
(ch >= 0x1f680 && ch <= 0x1f6ff) ||
// Regional Indicator Symbols (flags)
(ch >= 0x1f1e6 && ch <= 0x1f1ff) ||
// Miscellaneous Symbols (hearts, stars, weather)
(ch >= 0x2600 && ch <= 0x26ff) ||
// Dingbats (scissors, pencils, etc)
(ch >= 0x2700 && ch <= 0x27bf) ||
// Supplemental Symbols and Pictographs (newer emojis)
(ch >= 0x1f900 && ch <= 0x1f9ff) ||
// Symbols and Pictographs Extended-A (newest emojis)
(ch >= 0x1fa70 && ch <= 0x1faff) ||
// Various Asian Characters with emoji presentation
(ch >= 0x1f018 && ch <= 0x1f270) ||
// Variation Selectors (for emoji presentation)
(ch >= 0xfe00 && ch <= 0xfe0f) ||
// Additional miscellaneous items
(ch >= 0x238c && ch <= 0x2454) ||
// Combining Diacritical Marks for Symbols
(ch >= 0x20d0 && ch <= 0x20ff) ||
// Latin-1 Supplement (includes ², ³, ¹ and other special chars)
(ch >= 0x00a0 && ch <= 0x00ff) ||
// Greek and Coptic (U+0370-U+03FF)
(ch >= 0x0370 && ch <= 0x03ff) ||
// Mathematical Alphanumeric Symbols (U+1D400-U+1D7FF)
(ch >= 0x1d400 && ch <= 0x1d7ff) ||
// Mathematical Operators (U+2200-U+22FF)
(ch >= 0x2200 && ch <= 0x22ff) ||
// Superscripts and Subscripts (U+2070-U+209F)
(ch >= 0x2070 && ch <= 0x209f) ||
// Arrows (U+2190-U+21FF)
(ch >= 0x2190 && ch <= 0x21ff) ||
// Hiragana (U+3040-U+309F)
(ch >= 0x3040 && ch <= 0x309f) ||
// Katakana (U+30A0-U+30FF)
(ch >= 0x30a0 && ch <= 0x30ff) ||
// CJK Unified Ideographs (U+4E00-U+9FFF)
(ch >= 0x4e00 && ch <= 0x9fff)
)
}

View File

@ -191,8 +191,8 @@ export function formatValue(value: Value, inner = false): string {
return `${colors.blue}[${colors.reset}${items}${colors.blue}]${colors.reset}` return `${colors.blue}[${colors.reset}${items}${colors.blue}]${colors.reset}`
} }
case 'dict': { case 'dict': {
const entries = Array.from(value.value.entries()) const entries = Array.from(value.value.entries()).reverse()
.map(([k, v]) => `${k}${colors.blue}=${colors.reset}${formatValue(v, true)}`) .map(([k, v]) => `${k.trim()}${colors.blue}=${colors.reset}${formatValue(v, true)}`)
.join(' ') .join(' ')
if (entries.length === 0) if (entries.length === 0)
return `${colors.blue}[=]${colors.reset}` return `${colors.blue}[=]${colors.reset}`

View File

@ -1,90 +1,89 @@
import { expect, describe, test } from 'bun:test' import { expect, describe, test } from 'bun:test'
import { globals } from '#prelude'
describe('var and var?', () => { describe('var and var?', () => {
test('var? checks if a variable exists', async () => { test('var? checks if a variable exists', async () => {
await expect(`var? 'nada'`).toEvaluateTo(false, globals) await expect(`var? 'nada'`).toEvaluateTo(false)
await expect(`var? 'info'`).toEvaluateTo(false, globals) await expect(`var? 'info'`).toEvaluateTo(false)
await expect(`abc = abc; var? 'abc'`).toEvaluateTo(true, globals) await expect(`abc = abc; var? 'abc'`).toEvaluateTo(true)
await expect(`var? 'var?'`).toEvaluateTo(true, globals) await expect(`var? 'var?'`).toEvaluateTo(true)
await expect(`var? 'dict'`).toEvaluateTo(true, globals) await expect(`var? 'dict'`).toEvaluateTo(true)
await expect(`var? dict`).toEvaluateTo(true, globals) await expect(`var? dict`).toEvaluateTo(true)
}) })
test('var returns a value or null', async () => { test('var returns a value or null', async () => {
await expect(`var 'nada'`).toEvaluateTo(null, globals) await expect(`var 'nada'`).toEvaluateTo(null)
await expect(`var nada`).toEvaluateTo(null, globals) await expect(`var nada`).toEvaluateTo(null)
await expect(`var 'info'`).toEvaluateTo(null, globals) await expect(`var 'info'`).toEvaluateTo(null)
await expect(`abc = my-string; var 'abc'`).toEvaluateTo('my-string', globals) await expect(`abc = my-string; var 'abc'`).toEvaluateTo('my-string')
await expect(`abc = my-string; var abc`).toEvaluateTo(null, globals) await expect(`abc = my-string; var abc`).toEvaluateTo(null)
}) })
}) })
describe('type predicates', () => { describe('type predicates', () => {
test('string? checks for string type', async () => { test('string? checks for string type', async () => {
await expect(`string? 'hello'`).toEvaluateTo(true, globals) await expect(`string? 'hello'`).toEvaluateTo(true)
await expect(`string? 42`).toEvaluateTo(false, globals) await expect(`string? 42`).toEvaluateTo(false)
}) })
test('number? checks for number type', async () => { test('number? checks for number type', async () => {
await expect(`number? 42`).toEvaluateTo(true, globals) await expect(`number? 42`).toEvaluateTo(true)
await expect(`number? 'hello'`).toEvaluateTo(false, globals) await expect(`number? 'hello'`).toEvaluateTo(false)
}) })
test('boolean? checks for boolean type', async () => { test('boolean? checks for boolean type', async () => {
await expect(`boolean? true`).toEvaluateTo(true, globals) await expect(`boolean? true`).toEvaluateTo(true)
await expect(`boolean? 42`).toEvaluateTo(false, globals) await expect(`boolean? 42`).toEvaluateTo(false)
}) })
test('array? checks for array type', async () => { test('array? checks for array type', async () => {
await expect(`array? [1 2 3]`).toEvaluateTo(true, globals) await expect(`array? [1 2 3]`).toEvaluateTo(true)
await expect(`array? 42`).toEvaluateTo(false, globals) await expect(`array? 42`).toEvaluateTo(false)
}) })
test('dict? checks for dict type', async () => { test('dict? checks for dict type', async () => {
await expect(`dict? [a=1]`).toEvaluateTo(true, globals) await expect(`dict? [a=1]`).toEvaluateTo(true)
await expect(`dict? []`).toEvaluateTo(false, globals) await expect(`dict? []`).toEvaluateTo(false)
}) })
test('null? checks for null type', async () => { test('null? checks for null type', async () => {
await expect(`null? null`).toEvaluateTo(true, globals) await expect(`null? null`).toEvaluateTo(true)
await expect(`null? 42`).toEvaluateTo(false, globals) await expect(`null? 42`).toEvaluateTo(false)
}) })
test('some? checks for non-null', async () => { test('some? checks for non-null', async () => {
await expect(`some? 42`).toEvaluateTo(true, globals) await expect(`some? 42`).toEvaluateTo(true)
await expect(`some? null`).toEvaluateTo(false, globals) await expect(`some? null`).toEvaluateTo(false)
}) })
}) })
describe('introspection', () => { describe('introspection', () => {
test('type returns proper types', async () => { test('type returns proper types', async () => {
await expect(`type 'hello'`).toEvaluateTo('string', globals) await expect(`type 'hello'`).toEvaluateTo('string')
await expect(`type 42`).toEvaluateTo('number', globals) await expect(`type 42`).toEvaluateTo('number')
await expect(`type true`).toEvaluateTo('boolean', globals) await expect(`type true`).toEvaluateTo('boolean')
await expect(`type false`).toEvaluateTo('boolean', globals) await expect(`type false`).toEvaluateTo('boolean')
await expect(`type null`).toEvaluateTo('null', globals) await expect(`type null`).toEvaluateTo('null')
await expect(`type [1 2 3]`).toEvaluateTo('array', globals) await expect(`type [1 2 3]`).toEvaluateTo('array')
await expect(`type [a=1 b=2]`).toEvaluateTo('dict', globals) await expect(`type [a=1 b=2]`).toEvaluateTo('dict')
}) })
test('inspect formats values', async () => { test('inspect formats values', async () => {
await expect(`inspect 'hello'`).toEvaluateTo("\u001b[32m'hello\u001b[32m'\u001b[0m", globals) await expect(`inspect 'hello'`).toEvaluateTo("\u001b[32m'hello\u001b[32m'\u001b[0m")
}) })
test('describe describes values', async () => { test('describe describes values', async () => {
await expect(`describe 'hello'`).toEvaluateTo("#<string: \u001b[32m'hello\u001b[32m'\u001b[0m>", globals) await expect(`describe 'hello'`).toEvaluateTo("#<string: \u001b[32m'hello\u001b[32m'\u001b[0m>")
}) })
}) })
describe('environment', () => { describe('environment', () => {
test('args is an array', async () => { test('args is an array', async () => {
await expect(`array? $.args`).toEvaluateTo(true, globals) await expect(`array? $.args`).toEvaluateTo(true)
}) })
test('args can be accessed', async () => { test('args can be accessed', async () => {
await expect(`type $.args`).toEvaluateTo('array', globals) await expect(`type $.args`).toEvaluateTo('array')
}) })
test('argv includes more than just the args', async () => { test('argv includes more than just the args', async () => {
@ -106,35 +105,35 @@ describe('ref', () => {
describe('$ global dictionary', () => { describe('$ global dictionary', () => {
test('$.args is an array', async () => { test('$.args is an array', async () => {
await expect(`$.args | array?`).toEvaluateTo(true, globals) await expect(`$.args | array?`).toEvaluateTo(true)
}) })
test('$.args can be accessed', async () => { test('$.args can be accessed', async () => {
await expect(`$.args | type`).toEvaluateTo('array', globals) await expect(`$.args | type`).toEvaluateTo('array')
}) })
test('$.script.name is a string', async () => { test('$.script.name is a string', async () => {
await expect(`$.script.name | string?`).toEvaluateTo(true, globals) await expect(`$.script.name | string?`).toEvaluateTo(true)
}) })
test('$.script.path is a string', async () => { test('$.script.path is a string', async () => {
await expect(`$.script.path | string?`).toEvaluateTo(true, globals) await expect(`$.script.path | string?`).toEvaluateTo(true)
}) })
test('$.env is a dict', async () => { test('$.env is a dict', async () => {
await expect(`$.env | dict?`).toEvaluateTo(true, globals) await expect(`$.env | dict?`).toEvaluateTo(true)
}) })
test('$.pid is a number', async () => { test('$.pid is a number', async () => {
await expect(`$.pid | number?`).toEvaluateTo(true, globals) await expect(`$.pid | number?`).toEvaluateTo(true)
await expect(`$.pid > 0`).toEvaluateTo(true, globals) await expect(`$.pid > 0`).toEvaluateTo(true)
}) })
test('$.cwd is a string', async () => { test('$.cwd is a string', async () => {
await expect(`$.cwd | string?`).toEvaluateTo(true, globals) await expect(`$.cwd | string?`).toEvaluateTo(true)
}) })
test('$.cwd returns current working directory', async () => { test('$.cwd returns current working directory', async () => {
await expect(`$.cwd`).toEvaluateTo(process.cwd(), globals) await expect(`$.cwd`).toEvaluateTo(process.cwd())
}) })
}) })

View File

@ -1,42 +1,41 @@
import { expect, describe, test } from 'bun:test' import { expect, describe, test } from 'bun:test'
import { globals } from '#prelude'
describe('loading a file', () => { describe('loading a file', () => {
test(`imports all a file's functions`, async () => { test(`imports all a file's functions`, async () => {
expect(` expect(`
math = load ./src/prelude/tests/math.sh math = load ./src/prelude/tests/math.sh
math.double 4 math.double 4
`).toEvaluateTo(8, globals) `).toEvaluateTo(8)
expect(` expect(`
math = load ./src/prelude/tests/math.sh math = load ./src/prelude/tests/math.sh
math.double (math.double 4) math.double (math.double 4)
`).toEvaluateTo(16, globals) `).toEvaluateTo(16)
expect(` expect(`
math = load ./src/prelude/tests/math.sh math = load ./src/prelude/tests/math.sh
dbl = ref math.double dbl = ref math.double
dbl (dbl 2) dbl (dbl 2)
`).toEvaluateTo(8, globals) `).toEvaluateTo(8)
expect(` expect(`
math = load ./src/prelude/tests/math.sh math = load ./src/prelude/tests/math.sh
math.pi math.pi
`).toEvaluateTo(3.14, globals) `).toEvaluateTo(3.14)
expect(` expect(`
math = load ./src/prelude/tests/math.sh math = load ./src/prelude/tests/math.sh
math | at 🥧 math | at 🥧
`).toEvaluateTo(3.14159265359, globals) `).toEvaluateTo(3.14159265359)
expect(` expect(`
math = load ./src/prelude/tests/math.sh math = load ./src/prelude/tests/math.sh
math.🥧 math.🥧
`).toEvaluateTo(3.14159265359, globals) `).toEvaluateTo(3.14159265359)
expect(` expect(`
math = load ./src/prelude/tests/math.sh math = load ./src/prelude/tests/math.sh
math.add1 5 math.add1 5
`).toEvaluateTo(6, globals) `).toEvaluateTo(6)
}) })
}) })

View File

@ -1,5 +1,4 @@
import { expect, describe, test } from 'bun:test' import { expect, describe, test } from 'bun:test'
import { globals } from '#prelude'
describe('string operations', () => { describe('string operations', () => {
test('to-upper converts to uppercase', async () => { test('to-upper converts to uppercase', async () => {

View File

@ -1,12 +1,16 @@
import { expect } from 'bun:test' import { expect } from 'bun:test'
import { diffLines } from 'diff'
import color from 'kleur'
import { Scanner, TokenType, type Token } from '#parser/tokenizer2'
import { parser } from '#parser/shrimp' import { parser } from '#parser/shrimp'
import { setGlobals } from '#parser/tokenizer' import { setGlobals } from '#parser/tokenizer'
import { parse } from '#parser/parser2'
import { globals as prelude } from '#prelude' import { globals as prelude } from '#prelude'
import { $ } from 'bun' import { $ } from 'bun'
import { assert, errorMessage } from '#utils/utils' import { assert, errorMessage } from '#utils/utils'
import { Compiler } from '#compiler/compiler' import { Compiler } from '#compiler/compiler'
import { run, VM } from 'reefvm' import { run, VM } from 'reefvm'
import { treeToString, VMResultToValue } from '#utils/tree' import { treeToString2, treeToString, VMResultToValue } from '#utils/tree'
const regenerateParser = async () => { const regenerateParser = async () => {
let generate = true let generate = true
@ -37,6 +41,9 @@ declare module 'bun:test' {
toFailParse(): T toFailParse(): T
toEvaluateTo(expected: unknown, globals?: Record<string, any>): Promise<T> toEvaluateTo(expected: unknown, globals?: Record<string, any>): Promise<T>
toFailEvaluation(): Promise<T> toFailEvaluation(): Promise<T>
toBeToken(expected: string): T
toMatchToken(typeOrValue: string, value?: string): T
toMatchTokens(...tokens: { type: string, value?: string }[]): T
} }
} }
@ -46,8 +53,8 @@ expect.extend({
const allGlobals = { ...prelude, ...(globals || {}) } const allGlobals = { ...prelude, ...(globals || {}) }
setGlobals(Object.keys(allGlobals)) setGlobals(Object.keys(allGlobals))
const tree = parser.parse(received) const tree = parse(received)
const actual = treeToString(tree, received) const actual = treeToString2(tree, received)
const normalizedExpected = trimWhitespace(expected) const normalizedExpected = trimWhitespace(expected)
try { try {
@ -144,8 +151,107 @@ expect.extend({
} }
} }
}, },
toBeToken(received: unknown, expected: string) {
assert(typeof received === 'string', 'toBeToken can only be used with string values')
try {
const tokens = tokenize(received)
const value = tokens[0] as Token
const target = TokenType[expected as keyof typeof TokenType]
if (!value) {
return {
message: () => `Expected token type to be ${expected}, but got ${value}`,
pass: false,
}
}
return {
message: () => `Expected token type to be ${expected}, but got ${TokenType[value.type]}`,
pass: value.type === target
}
} catch (error) {
return {
message: () => `Tokenization failed: ${errorMessage(error)}`,
pass: false,
}
}
},
toMatchToken(received: unknown, typeOrValue: string, value?: string) {
assert(typeof received === 'string', 'toMatchToken can only be used with string values')
const expectedValue = value ? value : typeOrValue
const expectedType = value ? typeOrValue : undefined
try {
const tokens = tokenize(received)
const token = tokens[0] as Token
if (!token) {
return {
message: () => `Expected token to be ${expectedValue.replaceAll('\n', '\\n')}, got ${token}`,
pass: false,
}
}
if (expectedType && TokenType[expectedType as keyof typeof TokenType] !== token.type) {
return {
message: () => `Expected token to be ${expectedType}, but got ${TokenType[token.type]}`,
pass: false
}
}
return {
message: () => `Expected token to be ${expectedValue.replaceAll('\n', '\\n')}, but got ${token.value}`,
pass: token.value === expectedValue
}
} catch (error) {
return {
message: () => `Tokenization failed: ${errorMessage(error)} `,
pass: false,
}
}
},
toMatchTokens(received: unknown, ...tokens: { type: string, value?: string }[]) {
assert(typeof received === 'string', 'toMatchTokens can only be used with string values')
try {
const result = tokenize(received).map(t => toHumanToken(t))
if (result.length === 0 && tokens.length > 0) {
return {
message: () => `Expected tokens ${JSON.stringify(tokens)}, got nothing`,
pass: false,
}
}
const expected = JSON.stringify(tokens, null, 2)
const actual = JSON.stringify(result, null, 2)
return {
message: () => `Tokens don't match: \n\n${diff(actual, expected)}`,
pass: expected == actual
}
} catch (error) {
return {
message: () => `Tokenization failed: ${errorMessage(error)} `,
pass: false,
}
}
}
}) })
const tokenize = (code: string): Token[] => {
const scanner = new Scanner
return scanner.tokenize(code)
}
const toHumanToken = (tok: Token): { type: string, value?: string } => {
return {
type: TokenType[tok.type],
value: tok.value
}
}
const trimWhitespace = (str: string): string => { const trimWhitespace = (str: string): string => {
const lines = str.split('\n').filter((line) => line.trim().length > 0) const lines = str.split('\n').filter((line) => line.trim().length > 0)
const firstLine = lines[0] const firstLine = lines[0]
@ -157,10 +263,33 @@ const trimWhitespace = (str: string): string => {
if (!line.startsWith(leadingWhitespace)) { if (!line.startsWith(leadingWhitespace)) {
let foundWhitespace = line.match(/^(\s*)/)?.[1] || '' let foundWhitespace = line.match(/^(\s*)/)?.[1] || ''
throw new Error( throw new Error(
`Line has inconsistent leading whitespace: "${line}" (found "${foundWhitespace}", expected "${leadingWhitespace}")` `Line has inconsistent leading whitespace: "${line}"(found "${foundWhitespace}", expected "${leadingWhitespace}")`
) )
} }
return line.slice(leadingWhitespace.length) return line.slice(leadingWhitespace.length)
}) })
.join('\n') .join('\n')
} }
const diff = (a: string, b: string): string => {
const expected = a.trim()
const actual = b.trim()
const lines = []
if (expected !== actual) {
const changes = diffLines(actual, expected)
for (const part of changes) {
const sign = part.added ? "+" : part.removed ? "-" : " "
let line = sign + part.value
if (part.added) {
line = color.green(line)
} else if (part.removed) {
line = color.red(line)
}
lines.push(line.endsWith("\n") || line.endsWith("\n\u001b[39m") ? line : line + "\n")
}
}
return lines.join('\n')
}

View File

@ -1,5 +1,39 @@
import { Tree, TreeCursor } from '@lezer/common' import { Tree, TreeCursor } from '@lezer/common'
import { type Value, fromValue } from 'reefvm' import { type Value, fromValue } from 'reefvm'
import { SyntaxNode } from '#parser/node'
const nodeToString = (node: SyntaxNode, input: string, depth = 0): string => {
const indent = ' '.repeat(depth)
const text = input.slice(node.from, node.to)
const nodeName = node.name
if (node.firstChild) {
return `${indent}${nodeName}`
} else {
// Only strip quotes from whole String nodes (legacy DoubleQuote), not StringFragment/EscapeSeq/CurlyString
let cleanText = nodeName === 'String' ? text.slice(1, -1) : text
if (cleanText === ' ') cleanText = '(space)'
return cleanText ? `${indent}${nodeName} ${cleanText}` : `${indent}${nodeName}`
}
}
export const treeToString2 = (tree: SyntaxNode, input: string, depth = 0): string => {
let lines = []
let node: SyntaxNode | null = tree
if (node.name === 'Program') node = node.firstChild
while (node) {
lines.push(nodeToString(node, input, depth))
if (node.firstChild)
lines.push(treeToString2(node.firstChild, input, depth + 1))
node = node.nextSibling
}
return lines.join('\n')
}
export const treeToString = (tree: Tree, input: string): string => { export const treeToString = (tree: Tree, input: string): string => {
const lines: string[] = [] const lines: string[] = []

View File

View File

@ -1,4 +0,0 @@
node_modules
client/dist
server/dist
*.vsix

View File

@ -19,7 +19,7 @@
"shrimp" "shrimp"
], ],
"extensions": [ "extensions": [
".sh" ".shrimp"
], ],
"configuration": "./language-configuration.json" "configuration": "./language-configuration.json"
} }
@ -80,11 +80,12 @@
"publisher": "shrimp-lang", "publisher": "shrimp-lang",
"scripts": { "scripts": {
"vscode:prepublish": "bun run package", "vscode:prepublish": "bun run package",
"compile": "bun run compile:client && bun run compile:server", "generate-prelude-metadata": "bun scripts/generate-prelude-metadata.ts",
"compile": "bun run generate-prelude-metadata && bun run compile:client && bun run compile:server",
"compile:client": "bun build client/src/extension.ts --outdir client/dist --target node --format cjs --external vscode", "compile:client": "bun build client/src/extension.ts --outdir client/dist --target node --format cjs --external vscode",
"compile:server": "bun build server/src/server.ts --outdir server/dist --target node --format cjs", "compile:server": "bun build server/src/server.ts --outdir server/dist --target node --format cjs",
"watch": "bun run compile:client --watch & bun run compile:server --watch", "watch": "bun run compile:client --watch & bun run compile:server --watch",
"package": "bun run compile:client --minify && bun run compile:server --minify", "package": "bun run generate-prelude-metadata && bun run compile:client --minify && bun run compile:server --minify",
"check-types": "tsc --noEmit", "check-types": "tsc --noEmit",
"build-and-install": "bun run package && bunx @vscode/vsce package --allow-missing-repository && code --install-extension shrimp-*.vsix" "build-and-install": "bun run package && bunx @vscode/vsce package --allow-missing-repository && code --install-extension shrimp-*.vsix"
}, },

View File

@ -0,0 +1,117 @@
#!/usr/bin/env bun
/**
* Generates prelude metadata for the VSCode extension.
* - Prelude names (for parser scope tracking)
* - Function signatures (for autocomplete)
*/
import { writeFileSync } from 'fs'
import { join } from 'path'
import { globals } from '../../src/prelude'
// Extract parameter names from a function
const extractParams = (fn: Function): string[] => {
const fnStr = fn.toString()
const match = fnStr.match(/\(([^)]*)\)/)
if (!match) return []
const paramsStr = match[1]!.trim()
if (!paramsStr) return []
// Split by comma, but be careful of default values with commas
const params: string[] = []
let current = ''
let inString = false
let stringChar = ''
for (let i = 0; i < paramsStr.length; i++) {
const char = paramsStr[i]
if ((char === '"' || char === "'") && (i === 0 || paramsStr[i - 1] !== '\\')) {
if (!inString) {
inString = true
stringChar = char
} else if (char === stringChar) {
inString = false
}
}
if (char === ',' && !inString) {
params.push(current.trim())
current = ''
} else {
current += char
}
}
if (current.trim()) params.push(current.trim())
return params
.map((p) => p.split(/[=:]/)[0]!.trim()) // Handle defaults and types
.filter((p) => p && p !== 'this')
}
// Generate metadata for a module
const generateModuleMetadata = (module: Record<string, any>) => {
const metadata: Record<string, { params: string[] }> = {}
for (const [name, value] of Object.entries(module)) {
if (typeof value === 'function') {
metadata[name] = { params: extractParams(value) }
}
}
return metadata
}
// Generate names list
const names = Object.keys(globals).sort()
// Generate module metadata
const moduleMetadata: Record<string, any> = {}
for (const [name, value] of Object.entries(globals)) {
if (typeof value === 'object' && value !== null && name !== '$') {
moduleMetadata[name] = generateModuleMetadata(value)
}
}
// Generate dollar metadata
const dollarMetadata: Record<string, { params: string[] }> = {}
if (globals.$ && typeof globals.$ === 'object') {
for (const key of Object.keys(globals.$)) {
dollarMetadata[key] = { params: [] }
}
}
// Write prelude-names.ts
const namesOutput = `// Auto-generated by scripts/generate-prelude-metadata.ts
// Do not edit manually - run 'bun run generate-prelude-metadata' to regenerate
export const PRELUDE_NAMES = ${JSON.stringify(names, null, 2)} as const
`
const namesPath = join(import.meta.dir, '../server/src/metadata/prelude-names.ts')
writeFileSync(namesPath, namesOutput)
// Write prelude-completions.ts
const completionsOutput = `// Auto-generated by scripts/generate-prelude-metadata.ts
// Do not edit manually - run 'bun run generate-prelude-metadata' to regenerate
export type CompletionMetadata = {
params: string[]
description?: string
}
export const completions = {
modules: ${JSON.stringify(moduleMetadata, null, 2)},
dollar: ${JSON.stringify(dollarMetadata, null, 2)},
} as const
`
const completionsPath = join(import.meta.dir, '../server/src/metadata/prelude-completions.ts')
writeFileSync(completionsPath, completionsOutput)
console.log(`✓ Generated ${names.length} prelude names to server/src/metadata/prelude-names.ts`)
console.log(
`✓ Generated completions for ${
Object.keys(moduleMetadata).length
} modules to server/src/metadata/prelude-completions.ts`
)

View File

@ -0,0 +1,52 @@
import { CompletionItem, CompletionItemKind } from 'vscode-languageserver/node'
import { TextDocument } from 'vscode-languageserver-textdocument'
import { completions } from '../metadata/prelude-completions'
import { analyzeCompletionContext } from './contextAnalyzer'
/**
* Provides context-aware completions for Shrimp code.
* Returns module function completions (dict.*, list.*, str.*) or dollar property
* completions ($.*) based on the cursor position.
*/
export const provideCompletions = (
document: TextDocument,
position: { line: number; character: number }
): CompletionItem[] => {
const context = analyzeCompletionContext(document, position)
if (context.type === 'module') {
return buildModuleCompletions(context.moduleName)
}
if (context.type === 'dollar') {
return buildDollarCompletions()
}
return [] // No completions for other contexts yet
}
/**
* Builds completion items for module functions (dict.*, list.*, str.*).
*/
const buildModuleCompletions = (moduleName: string): CompletionItem[] => {
const functions = completions.modules[moduleName as keyof typeof completions.modules]
if (!functions) return []
return Object.entries(functions).map(([name, meta]) => ({
label: name,
kind: CompletionItemKind.Method,
detail: `(${meta.params.join(', ')})`,
insertText: name,
}))
}
/**
* Builds completion items for dollar properties ($.*).
*/
const buildDollarCompletions = (): CompletionItem[] => {
return Object.entries(completions.dollar).map(([name, meta]) => ({
label: name,
kind: CompletionItemKind.Property,
insertText: name,
}))
}

View File

@ -0,0 +1,66 @@
import { TextDocument } from 'vscode-languageserver-textdocument'
import { SyntaxNode } from '@lezer/common'
import { parser } from '../../../../src/parser/shrimp'
import * as Terms from '../../../../src/parser/shrimp.terms'
export type CompletionContext =
| { type: 'module'; moduleName: string }
| { type: 'dollar' }
| { type: 'none' }
/**
* Analyzes the document at the given position to determine what kind of
* completion context we're in (module member access, dollar property, or none).
*/
export const analyzeCompletionContext = (
document: TextDocument,
position: { line: number; character: number }
): CompletionContext => {
const offset = document.offsetAt(position)
const text = document.getText()
const tree = parser.parse(text)
// Find node at cursor - could be DotGet or Identifier inside DotGet
const node = tree.resolveInner(offset, -1)
console.log(`🔍 Node at cursor: ${node.name} (type: ${node.type.id})`)
console.log(`🔍 Parent: ${node.parent?.name} (type: ${node.parent?.type.id})`)
console.log(`🔍 Node text: "${text.slice(node.from, node.to)}"`)
const SUPPORTED_MODULES = ['dict', 'list', 'str', 'math', 'fs', 'json', 'load']
// Case 1: Incomplete DotGet (dict. or $.)
// resolveInner returns DotGet node directly
if (node.type.id === Terms.DotGet) {
const leftSide = extractLeftSide(node, text)
console.log(`✅ Case 1: DotGet found, left side: "${leftSide}"`)
if (leftSide === '$') return { type: 'dollar' }
if (SUPPORTED_MODULES.includes(leftSide)) {
return { type: 'module', moduleName: leftSide }
}
}
// Case 2: Partial identifier (dict.g or $.e)
// resolveInner returns Identifier, parent is DotGet
if (node.type.id === Terms.Identifier && node.parent?.type.id === Terms.DotGet) {
const dotGetNode = node.parent
const leftSide = extractLeftSide(dotGetNode, text)
console.log(`✅ Case 2: Identifier in DotGet found, left side: "${leftSide}"`)
if (leftSide === '$') return { type: 'dollar' }
if (SUPPORTED_MODULES.includes(leftSide)) {
return { type: 'module', moduleName: leftSide }
}
}
console.log(`❌ No matching context found`)
return { type: 'none' }
}
/**
* Extracts the text of the left side of a DotGet node (the part before the dot).
*/
const extractLeftSide = (dotGetNode: SyntaxNode, text: string): string => {
const firstChild = dotGetNode.firstChild
if (!firstChild) return ''
return text.slice(firstChild.from, firstChild.to)
}

View File

@ -1,12 +1,12 @@
import { TextDocument, Position } from 'vscode-languageserver-textdocument' import { TextDocument, Position } from 'vscode-languageserver-textdocument'
import { Diagnostic, DiagnosticSeverity } from 'vscode-languageserver/node' import { Diagnostic, DiagnosticSeverity } from 'vscode-languageserver/node'
import { parser } from '../../../src/parser/shrimp' import { Tree } from '@lezer/common'
import { Compiler } from '../../../src/compiler/compiler' import { Compiler } from '../../../src/compiler/compiler'
import { CompilerError } from '../../../src/compiler/compilerError' import { CompilerError } from '../../../src/compiler/compilerError'
export const buildDiagnostics = (textDocument: TextDocument): Diagnostic[] => { export const buildDiagnostics = (textDocument: TextDocument, tree: Tree): Diagnostic[] => {
const text = textDocument.getText() const text = textDocument.getText()
const diagnostics = getParseErrors(textDocument) const diagnostics = getParseErrors(textDocument, tree)
if (diagnostics.length > 0) { if (diagnostics.length > 0) {
return diagnostics return diagnostics
@ -59,9 +59,7 @@ const unknownDiagnostic = (message: string): Diagnostic => {
return diagnostic return diagnostic
} }
const getParseErrors = (textDocument: TextDocument): Diagnostic[] => { const getParseErrors = (textDocument: TextDocument, tree: Tree): Diagnostic[] => {
const tree = parser.parse(textDocument.getText())
const ranges: { start: Position; end: Position }[] = [] const ranges: { start: Position; end: Position }[] = []
tree.iterate({ tree.iterate({
enter(n) { enter(n) {

View File

@ -1,7 +1,7 @@
import { SyntaxNode } from '@lezer/common' import { SyntaxNode } from '@lezer/common'
import { TextDocument } from 'vscode-languageserver-textdocument' import { TextDocument } from 'vscode-languageserver-textdocument'
import * as Terms from '../../../src/parser/shrimp.terms' import * as Terms from '../../../src/parser/shrimp.terms'
import { globals } from '../../../src/prelude' import { PRELUDE_NAMES } from './metadata/prelude-names'
/** /**
* Tracks variables in scope at a given position in the parse tree. * Tracks variables in scope at a given position in the parse tree.
@ -13,8 +13,7 @@ export class EditorScopeAnalyzer {
constructor(document: TextDocument) { constructor(document: TextDocument) {
this.document = document this.document = document
const preludeKeys = Object.keys(globals) this.scopeCache.set(0, new Set(PRELUDE_NAMES))
this.scopeCache.set(0, new Set(preludeKeys))
} }
/** /**

View File

@ -0,0 +1,732 @@
// Auto-generated by scripts/generate-prelude-metadata.ts
// Do not edit manually - run 'bun run generate-prelude-metadata' to regenerate
export type CompletionMetadata = {
params: string[]
description?: string
}
export const completions = {
modules: {
"dict": {
"keys": {
"params": [
"dict"
]
},
"values": {
"params": [
"dict"
]
},
"entries": {
"params": [
"dict"
]
},
"has?": {
"params": [
"dict",
"key"
]
},
"get": {
"params": [
"dict",
"key",
"defaultValue"
]
},
"set": {
"params": [
"dict",
"key",
"value"
]
},
"merge": {
"params": [
"...dicts"
]
},
"empty?": {
"params": [
"dict"
]
},
"map": {
"params": [
"dict",
"cb"
]
},
"filter": {
"params": [
"dict",
"cb"
]
},
"from-entries": {
"params": [
"entries"
]
}
},
"fs": {
"ls": {
"params": [
"path"
]
},
"mkdir": {
"params": [
"path"
]
},
"rmdir": {
"params": [
"path"
]
},
"pwd": {
"params": []
},
"cd": {
"params": [
"path"
]
},
"read": {
"params": [
"path"
]
},
"cat": {
"params": [
"path"
]
},
"read-bytes": {
"params": [
"path"
]
},
"write": {
"params": [
"path",
"content"
]
},
"append": {
"params": [
"path",
"content"
]
},
"delete": {
"params": [
"path"
]
},
"rm": {
"params": [
"path"
]
},
"copy": {
"params": [
"from",
"to"
]
},
"move": {
"params": [
"from",
"to"
]
},
"mv": {
"params": [
"from",
"to"
]
},
"basename": {
"params": [
"path"
]
},
"dirname": {
"params": [
"path"
]
},
"extname": {
"params": [
"path"
]
},
"join": {
"params": [
"...paths"
]
},
"resolve": {
"params": [
"...paths"
]
},
"stat": {
"params": [
"path"
]
},
"exists?": {
"params": [
"path"
]
},
"file?": {
"params": [
"path"
]
},
"dir?": {
"params": [
"path"
]
},
"symlink?": {
"params": [
"path"
]
},
"exec?": {
"params": [
"path"
]
},
"size": {
"params": [
"path"
]
},
"chmod": {
"params": [
"path",
"mode"
]
},
"symlink": {
"params": [
"target",
"path"
]
},
"readlink": {
"params": [
"path"
]
},
"glob": {
"params": [
"pattern"
]
},
"watch": {
"params": [
"path",
"callback"
]
},
"cp": {
"params": [
"from",
"to"
]
}
},
"json": {
"encode": {
"params": [
"s"
]
},
"decode": {
"params": [
"s"
]
},
"parse": {
"params": [
"s"
]
},
"stringify": {
"params": [
"s"
]
}
},
"list": {
"slice": {
"params": [
"list",
"start",
"end"
]
},
"map": {
"params": [
"list",
"cb"
]
},
"filter": {
"params": [
"list",
"cb"
]
},
"reject": {
"params": [
"list",
"cb"
]
},
"reduce": {
"params": [
"list",
"cb",
"initial"
]
},
"find": {
"params": [
"list",
"cb"
]
},
"empty?": {
"params": [
"list"
]
},
"contains?": {
"params": [
"list",
"item"
]
},
"includes?": {
"params": [
"list",
"item"
]
},
"has?": {
"params": [
"list",
"item"
]
},
"any?": {
"params": [
"list",
"cb"
]
},
"all?": {
"params": [
"list",
"cb"
]
},
"push": {
"params": [
"list",
"item"
]
},
"pop": {
"params": [
"list"
]
},
"shift": {
"params": [
"list"
]
},
"unshift": {
"params": [
"list",
"item"
]
},
"splice": {
"params": [
"list",
"start",
"deleteCount",
"...items"
]
},
"insert": {
"params": [
"list",
"index",
"item"
]
},
"reverse": {
"params": [
"list"
]
},
"sort": {
"params": [
"list",
"cb"
]
},
"concat": {
"params": [
"...lists"
]
},
"flatten": {
"params": [
"list",
"depth"
]
},
"unique": {
"params": [
"list"
]
},
"zip": {
"params": [
"list1",
"list2"
]
},
"first": {
"params": [
"list"
]
},
"last": {
"params": [
"list"
]
},
"rest": {
"params": [
"list"
]
},
"take": {
"params": [
"list",
"n"
]
},
"drop": {
"params": [
"list",
"n"
]
},
"append": {
"params": [
"list",
"item"
]
},
"prepend": {
"params": [
"list",
"item"
]
},
"index-of": {
"params": [
"list",
"item"
]
},
"sum": {
"params": [
"list"
]
},
"count": {
"params": [
"list",
"cb"
]
},
"partition": {
"params": [
"list",
"cb"
]
},
"compact": {
"params": [
"list"
]
},
"group-by": {
"params": [
"list",
"cb"
]
}
},
"math": {
"abs": {
"params": [
"n"
]
},
"floor": {
"params": [
"n"
]
},
"ceil": {
"params": [
"n"
]
},
"round": {
"params": [
"n"
]
},
"min": {
"params": [
"...nums"
]
},
"max": {
"params": [
"...nums"
]
},
"pow": {
"params": [
"base",
"exp"
]
},
"sqrt": {
"params": [
"n"
]
},
"random": {
"params": []
},
"clamp": {
"params": [
"n",
"min",
"max"
]
},
"sign": {
"params": [
"n"
]
},
"trunc": {
"params": [
"n"
]
},
"even?": {
"params": [
"n"
]
},
"odd?": {
"params": [
"n"
]
},
"positive?": {
"params": [
"n"
]
},
"negative?": {
"params": [
"n"
]
},
"zero?": {
"params": [
"n"
]
}
},
"str": {
"join": {
"params": [
"arr",
"sep"
]
},
"split": {
"params": [
"str",
"sep"
]
},
"to-upper": {
"params": [
"str"
]
},
"to-lower": {
"params": [
"str"
]
},
"trim": {
"params": [
"str"
]
},
"starts-with?": {
"params": [
"str",
"prefix"
]
},
"ends-with?": {
"params": [
"str",
"suffix"
]
},
"contains?": {
"params": [
"str",
"substr"
]
},
"empty?": {
"params": [
"str"
]
},
"index-of": {
"params": [
"str",
"search"
]
},
"last-index-of": {
"params": [
"str",
"search"
]
},
"replace": {
"params": [
"str",
"search",
"replacement"
]
},
"replace-all": {
"params": [
"str",
"search",
"replacement"
]
},
"slice": {
"params": [
"str",
"start",
"end"
]
},
"substring": {
"params": [
"str",
"start",
"end"
]
},
"repeat": {
"params": [
"str",
"count"
]
},
"pad-start": {
"params": [
"str",
"length",
"pad"
]
},
"pad-end": {
"params": [
"str",
"length",
"pad"
]
},
"lines": {
"params": [
"str"
]
},
"chars": {
"params": [
"str"
]
},
"match": {
"params": [
"str",
"regex"
]
},
"test?": {
"params": [
"str",
"regex"
]
}
}
},
dollar: {
"args": {
"params": []
},
"argv": {
"params": []
},
"env": {
"params": []
},
"pid": {
"params": []
},
"cwd": {
"params": []
},
"script": {
"params": []
}
},
} as const

View File

@ -0,0 +1,40 @@
// Auto-generated by scripts/generate-prelude-metadata.ts
// Do not edit manually - run 'bun run generate-prelude-metadata' to regenerate
export const PRELUDE_NAMES = [
"$",
"array?",
"at",
"bnot",
"boolean?",
"dec",
"describe",
"dict",
"dict?",
"each",
"echo",
"empty?",
"exit",
"fs",
"function?",
"identity",
"import",
"inc",
"inspect",
"json",
"length",
"list",
"load",
"math",
"not",
"null?",
"number?",
"range",
"ref",
"some?",
"str",
"string?",
"type",
"var",
"var?"
] as const

View File

@ -1,6 +1,6 @@
import { parser } from '../../../src/parser/shrimp' import { parser } from '../../../src/parser/shrimp'
import * as Terms from '../../../src/parser/shrimp.terms' import * as Terms from '../../../src/parser/shrimp.terms'
import { SyntaxNode } from '@lezer/common' import { SyntaxNode, Tree } from '@lezer/common'
import { TextDocument } from 'vscode-languageserver-textdocument' import { TextDocument } from 'vscode-languageserver-textdocument'
import { import {
SemanticTokensBuilder, SemanticTokensBuilder,
@ -28,9 +28,7 @@ export const TOKEN_MODIFIERS = [
SemanticTokenModifiers.readonly, SemanticTokenModifiers.readonly,
] ]
export function buildSemanticTokens(document: TextDocument): number[] { export function buildSemanticTokens(document: TextDocument, tree: Tree): number[] {
const text = document.getText()
const tree = parser.parse(text)
const builder = new SemanticTokensBuilder() const builder = new SemanticTokensBuilder()
const scopeTracker = new EditorScopeAnalyzer(document) const scopeTracker = new EditorScopeAnalyzer(document)

View File

@ -1,8 +1,13 @@
import { TextDocument } from 'vscode-languageserver-textdocument' import { TextDocument } from 'vscode-languageserver-textdocument'
import { buildDiagnostics } from './diagnostics' import { buildDiagnostics } from './diagnostics'
import { buildSemanticTokens, TOKEN_MODIFIERS, TOKEN_TYPES } from './semanticTokens' import { buildSemanticTokens, TOKEN_MODIFIERS, TOKEN_TYPES } from './semanticTokens'
import { provideCompletions } from './completion/completionProvider'
import { provideSignatureHelp } from './signatureHelp'
import { PRELUDE_NAMES } from './metadata/prelude-names'
import { parser } from '../../../src/parser/shrimp' import { parser } from '../../../src/parser/shrimp'
import { setGlobals } from '../../../src/parser/tokenizer'
import { Compiler } from '../../../src/compiler/compiler' import { Compiler } from '../../../src/compiler/compiler'
import { Tree } from '@lezer/common'
import { import {
InitializeResult, InitializeResult,
TextDocuments, TextDocuments,
@ -10,19 +15,30 @@ import {
createConnection, createConnection,
ProposedFeatures, ProposedFeatures,
CompletionItemKind, CompletionItemKind,
TextDocumentChangeEvent,
} from 'vscode-languageserver/node' } from 'vscode-languageserver/node'
import { setGlobals } from '../../../src/parser/tokenizer'
import { globals } from '../../../src/prelude'
// Initialize parser with prelude globals so it knows dict/list/str are in scope
setGlobals(PRELUDE_NAMES)
const connection = createConnection(ProposedFeatures.all) const connection = createConnection(ProposedFeatures.all)
const documents = new TextDocuments(TextDocument) const documents = new TextDocuments(TextDocument)
documents.listen(connection) documents.listen(connection)
const documentTrees = new Map<string, Tree>()
// Server capabilities // Server capabilities
connection.onInitialize(handleInitialize) connection.onInitialize(handleInitialize)
// Language features // Language features
connection.languages.semanticTokens.on(handleSemanticTokens) connection.languages.semanticTokens.on(handleSemanticTokens)
documents.onDidOpen(handleDocumentOpen)
documents.onDidChangeContent(handleDocumentChange) documents.onDidChangeContent(handleDocumentChange)
documents.onDidClose(handleDocumentClose)
connection.onCompletion(handleCompletion) connection.onCompletion(handleCompletion)
connection.onSignatureHelp(handleSignatureHelp)
// Debug commands // Debug commands
connection.onRequest('shrimp/parseTree', handleParseTree) connection.onRequest('shrimp/parseTree', handleParseTree)
@ -31,10 +47,7 @@ connection.onRequest('shrimp/bytecode', handleBytecode)
// Start listening // Start listening
connection.listen() connection.listen()
// ============================================================================
// Handler implementations // Handler implementations
// ============================================================================
function handleInitialize(): InitializeResult { function handleInitialize(): InitializeResult {
connection.console.log('🦐 Server initialized with capabilities') connection.console.log('🦐 Server initialized with capabilities')
const result: InitializeResult = { const result: InitializeResult = {
@ -43,6 +56,9 @@ function handleInitialize(): InitializeResult {
completionProvider: { completionProvider: {
triggerCharacters: ['.'], triggerCharacters: ['.'],
}, },
signatureHelpProvider: {
triggerCharacters: [' '],
},
semanticTokensProvider: { semanticTokensProvider: {
legend: { legend: {
tokenTypes: TOKEN_TYPES, tokenTypes: TOKEN_TYPES,
@ -56,27 +72,84 @@ function handleInitialize(): InitializeResult {
return result return result
} }
function handleDocumentOpen(event: TextDocumentChangeEvent<TextDocument>) {
const document = event.document
setGlobals(Object.keys(globals))
const tree = parser.parse(document.getText())
documentTrees.set(document.uri, tree)
}
function handleSemanticTokens(params: any) { function handleSemanticTokens(params: any) {
const document = documents.get(params.textDocument.uri) const document = documents.get(params.textDocument.uri)
if (!document) return { data: [] } if (!document) return { data: [] }
const data = buildSemanticTokens(document) const tree = documentTrees.get(params.textDocument.uri)
if (!tree) return { data: [] }
const data = buildSemanticTokens(document, tree)
return { data } return { data }
} }
function handleDocumentChange(change: any) { function handleDocumentChange(change: TextDocumentChangeEvent<TextDocument>) {
const textDocument = change.document const document = change.document
const diagnostics = buildDiagnostics(textDocument)
connection.sendDiagnostics({ uri: textDocument.uri, diagnostics }) // Parse and cache
setGlobals(Object.keys(globals))
const tree = parser.parse(document.getText())
documentTrees.set(document.uri, tree)
// Build diagnostics using cached tree
const diagnostics = buildDiagnostics(document, tree)
connection.sendDiagnostics({ uri: document.uri, diagnostics })
}
function handleDocumentClose(event: TextDocumentChangeEvent<TextDocument>) {
documentTrees.delete(event.document.uri)
} }
function handleCompletion(params: any) { function handleCompletion(params: any) {
const keywords = ['if', 'else', 'do', 'end', 'and', 'or', 'true', 'false', 'null'] const document = documents.get(params.textDocument.uri)
if (!document) {
console.log('❌ No document found')
return []
}
return keywords.map((keyword) => ({ const position = params.position
const text = document.getText()
const offset = document.offsetAt(position)
console.log(`📍 Text around cursor: "${text.slice(Math.max(0, offset - 10), offset + 10)}"`)
// First try context-aware completions (module/dollar)
const contextCompletions = provideCompletions(document, position)
console.log(`🎯 Context completions count: ${contextCompletions.length}`)
if (contextCompletions.length > 0) {
console.log(
`✅ Returning ${contextCompletions.length} completions:`,
contextCompletions.map((c) => c.label).join(', ')
)
return contextCompletions
}
// Fall back to keywords + prelude globals (for Ctrl+Space in general context)
console.log(`⌨️ Falling back to keywords + prelude globals`)
const keywords = ['if', 'else', 'do', 'end', 'and', 'or', 'true', 'false', 'null']
const keywordCompletions = keywords.map((keyword) => ({
label: keyword, label: keyword,
kind: CompletionItemKind.Keyword, kind: CompletionItemKind.Keyword,
})) }))
const preludeCompletions = PRELUDE_NAMES.map((name) => ({
label: name,
kind: CompletionItemKind.Function,
}))
return [...keywordCompletions, ...preludeCompletions]
}
function handleSignatureHelp(params: any) {
const document = documents.get(params.textDocument.uri)
if (!document) return
return provideSignatureHelp(document, params.position)
} }
function handleParseTree(params: { uri: string }) { function handleParseTree(params: { uri: string }) {
@ -84,8 +157,13 @@ function handleParseTree(params: { uri: string }) {
const document = documents.get(params.uri) const document = documents.get(params.uri)
if (!document) return 'Document not found' if (!document) return 'Document not found'
const tree = documentTrees.get(params.uri)
if (!tree) {
connection.console.error(`🦐 No cached tree for ${params.uri}`)
return 'No cached parse tree available'
}
const text = document.getText() const text = document.getText()
const tree = parser.parse(text)
const cursor = tree.cursor() const cursor = tree.cursor()
let formatted = '' let formatted = ''

View File

@ -0,0 +1,105 @@
import { SignatureHelp, SignatureInformation, ParameterInformation } from 'vscode-languageserver/node'
import { TextDocument } from 'vscode-languageserver-textdocument'
import { Tree, SyntaxNode } from '@lezer/common'
import { parser } from '../../../src/parser/shrimp'
import { completions } from './metadata/prelude-completions'
export const provideSignatureHelp = (
document: TextDocument,
position: { line: number; character: number }
): SignatureHelp | undefined => {
const text = document.getText()
const tree = parser.parse(text)
const cursorPos = document.offsetAt(position)
const context = findCallContext(tree, cursorPos, text)
if (!context) return
const params = lookupFunctionParams(context.funcName)
if (!params) return
return {
signatures: [buildSignature(context.funcName, params)],
activeParameter: Math.min(context.argCount, params.length - 1),
}
}
const findCallContext = (tree: Tree, cursorPos: number, text: string) => {
const findBestCall = (node: SyntaxNode): SyntaxNode | undefined => {
let result: SyntaxNode | undefined
const isCall = node.name === 'FunctionCall' || node.name === 'FunctionCallOrIdentifier'
// Call ends just before cursor (within 5 chars)
if (isCall && node.to <= cursorPos && cursorPos <= node.to + 5) {
result = node
}
// Cursor is inside the call's span
if (isCall && node.from < cursorPos && cursorPos < node.to) {
result = node
}
// Recurse - prefer smaller spans (more specific)
let child = node.firstChild
while (child) {
const found = findBestCall(child)
if (found) {
const foundSpan = found.to - found.from
const resultSpan = result ? result.to - result.from : Infinity
if (foundSpan < resultSpan) {
result = found
}
}
child = child.nextSibling
}
return result
}
const call = findBestCall(tree.topNode)
if (!call) return
// Count args before cursor
let argCount = 0
let child = call.firstChild
while (child) {
if ((child.name === 'PositionalArg' || child.name === 'NamedArg') && child.to <= cursorPos) {
argCount++
}
child = child.nextSibling
}
// Extract function name
const firstChild = call.firstChild
if (!firstChild) return
let funcName: string | undefined
if (firstChild.name === 'DotGet') {
funcName = text.slice(firstChild.from, firstChild.to)
} else if (firstChild.name === 'Identifier') {
funcName = text.slice(firstChild.from, firstChild.to)
}
if (!funcName) return
return { funcName, argCount }
}
const lookupFunctionParams = (funcName: string): string[] | undefined => {
// Handle module functions: "list.map" → modules.list.map
if (funcName.includes('.')) {
const [moduleName, methodName] = funcName.split('.')
const module = completions.modules[moduleName as keyof typeof completions.modules]
const method = module?.[methodName as keyof typeof module]
return method?.params as string[] | undefined
}
// TODO: Handle top-level prelude functions (print, range, etc.)
}
const buildSignature = (funcName: string, params: string[]): SignatureInformation => {
const label = `${funcName}(${params.join(', ')})`
const parameters: ParameterInformation[] = params.map(p => ({ label: p }))
return { label, parameters }
}

Binary file not shown.

View File

@ -0,0 +1,41 @@
import { parser } from '../../src/parser/shrimp'
import { setGlobals } from '../../src/parser/tokenizer'
import { PRELUDE_NAMES } from '../server/src/prelude-names'
// Set globals for DotGet detection
setGlobals(PRELUDE_NAMES as unknown as string[])
// Test cases - does incomplete DotGet parse correctly?
const testCases = [
'dict.',
'dict.g',
'dict.get',
'$.',
'$.e',
'$.env',
]
for (const code of testCases) {
console.log(`\nTesting: "${code}"`)
const tree = parser.parse(code)
const cursor = tree.cursor()
// Print the parse tree
const printTree = (depth = 0) => {
const indent = ' '.repeat(depth)
console.log(`${indent}${cursor.name} [${cursor.from}-${cursor.to}]`)
if (cursor.firstChild()) {
do {
printTree(depth + 1)
} while (cursor.nextSibling())
cursor.parent()
}
}
printTree()
// Check at cursor position (end of string)
const node = tree.resolveInner(code.length, -1)
console.log(`Node at end: ${node.name} (type: ${node.type.id})`)
}