tell the parser about builtin global functions

This commit is contained in:
Chris Wanstrath 2025-10-29 10:12:55 -07:00
parent 0eca3685f5
commit 3496b29072
5 changed files with 22 additions and 7 deletions

View File

@ -1,6 +1,7 @@
import { CompilerError } from '#compiler/compilerError.ts' import { CompilerError } from '#compiler/compilerError.ts'
import { parser } from '#parser/shrimp.ts' import { parser } from '#parser/shrimp.ts'
import * as terms from '#parser/shrimp.terms' import * as terms from '#parser/shrimp.terms'
import { setGlobals } from '#parser/tokenizer'
import type { SyntaxNode, Tree } from '@lezer/common' import type { SyntaxNode, Tree } from '@lezer/common'
import { assert, errorMessage } from '#utils/utils' import { assert, errorMessage } from '#utils/utils'
import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm' import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm'
@ -53,8 +54,9 @@ export class Compiler {
bytecode: Bytecode bytecode: Bytecode
pipeCounter = 0 pipeCounter = 0
constructor(public input: string) { constructor(public input: string, globals?: string[]) {
try { try {
if (globals) setGlobals(globals)
const cst = parser.parse(input) const cst = parser.parse(input)
const errors = checkTreeForErrors(cst) const errors = checkTreeForErrors(cst)

View File

@ -6,6 +6,13 @@ export function specializeKeyword(ident: string) {
return ident === 'do' ? Do : -1 return ident === 'do' ? Do : -1
} }
// tell the dotGet searcher about builtin globals
export const globals: string[] = []
export const setGlobals = (newGlobals: string[]) => {
globals.length = 0
globals.push(...newGlobals)
}
// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF. // The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.
export const tokenizer = new ExternalTokenizer( export const tokenizer = new ExternalTokenizer(
@ -152,7 +159,7 @@ const checkForDotGet = (input: InputStream, stack: Stack, pos: number): number |
// If identifier is in scope, this is property access (e.g., obj.prop) // If identifier is in scope, this is property access (e.g., obj.prop)
// If not in scope, it should be consumed as a Word (e.g., file.txt) // If not in scope, it should be consumed as a Word (e.g., file.txt)
return context?.scope.has(identifierText) ? IdentifierBeforeDot : null return context?.scope.has(identifierText) || globals.includes(identifierText) ? IdentifierBeforeDot : null
} }
// Decide between AssignableIdentifier and Identifier using grammar state + peek-ahead // Decide between AssignableIdentifier and Identifier using grammar state + peek-ahead

View File

@ -49,6 +49,9 @@ export const globalFunctions = {
'to-upper': (str: string) => str.toUpperCase(), 'to-upper': (str: string) => str.toUpperCase(),
'to-lower': (str: string) => str.toLowerCase(), 'to-lower': (str: string) => str.toLowerCase(),
trim: (str: string) => str.trim(), trim: (str: string) => str.trim(),
str: {
trim: (str: string) => str.trim(),
},
// collections // collections
at: (collection: any, index: number | string) => collection[index], at: (collection: any, index: number | string) => collection[index],

View File

@ -13,8 +13,8 @@ describe('string operations', () => {
}) })
test('trim removes whitespace', async () => { test('trim removes whitespace', async () => {
await expect(`trim ' hello '`).toEvaluateTo('hello', globalFunctions) await expect(`str.trim ' hello '`).toEvaluateTo('hello', globalFunctions)
await expect(`trim '\\n\\thello\\t\\n'`).toEvaluateTo('hello', globalFunctions) await expect(`str.trim '\\n\\thello\\t\\n'`).toEvaluateTo('hello', globalFunctions)
}) })
test('split divides string by separator', async () => { test('split divides string by separator', async () => {

View File

@ -1,9 +1,10 @@
import { expect } from 'bun:test' import { expect } from 'bun:test'
import { parser } from '#parser/shrimp' import { parser } from '#parser/shrimp'
import { setGlobals } from '#parser/tokenizer'
import { $ } from 'bun' import { $ } from 'bun'
import { assert, errorMessage } from '#utils/utils' import { assert, errorMessage } from '#utils/utils'
import { Compiler } from '#compiler/compiler' import { Compiler } from '#compiler/compiler'
import { run, VM, type TypeScriptFunction } from 'reefvm' import { run, VM } from 'reefvm'
import { treeToString, VMResultToValue } from '#utils/tree' import { treeToString, VMResultToValue } from '#utils/tree'
const regenerateParser = async () => { const regenerateParser = async () => {
@ -30,7 +31,7 @@ await regenerateParser()
// Type declaration for TypeScript // Type declaration for TypeScript
declare module 'bun:test' { declare module 'bun:test' {
interface Matchers<T> { interface Matchers<T> {
toMatchTree(expected: string): T toMatchTree(expected: string, globals?: Record<string, any>): T
toMatchExpression(expected: string): T toMatchExpression(expected: string): T
toFailParse(): T toFailParse(): T
toEvaluateTo(expected: unknown, globals?: Record<string, any>): Promise<T> toEvaluateTo(expected: unknown, globals?: Record<string, any>): Promise<T>
@ -39,9 +40,10 @@ declare module 'bun:test' {
} }
expect.extend({ expect.extend({
toMatchTree(received: unknown, expected: string) { toMatchTree(received: unknown, expected: string, globals?: Record<string, any>) {
assert(typeof received === 'string', 'toMatchTree can only be used with string values') assert(typeof received === 'string', 'toMatchTree can only be used with string values')
if (globals) setGlobals(Object.keys(globals))
const tree = parser.parse(received) const tree = parser.parse(received)
const actual = treeToString(tree, received) const actual = treeToString(tree, received)
const normalizedExpected = trimWhitespace(expected) const normalizedExpected = trimWhitespace(expected)
@ -97,6 +99,7 @@ expect.extend({
assert(typeof received === 'string', 'toEvaluateTo can only be used with string values') assert(typeof received === 'string', 'toEvaluateTo can only be used with string values')
try { try {
if (globals) setGlobals(Object.keys(globals))
const compiler = new Compiler(received) const compiler = new Compiler(received)
const result = await run(compiler.bytecode, globals) const result = await run(compiler.bytecode, globals)
let value = VMResultToValue(result) let value = VMResultToValue(result)