From e0fafc0088e3c90ca7eb590d1c2d380b866d91f1 Mon Sep 17 00:00:00 2001 From: Corey Johnson Date: Mon, 6 Oct 2025 13:18:47 -0700 Subject: [PATCH] wip --- README.md | 103 ++------ src/evaluator/evaluator.test.ts | 138 +++++------ src/parser/old-shrimp.grammar | 79 +++++++ src/parser/shrimp.grammar | 119 +++++----- src/parser/shrimp.terms.ts | 33 +-- src/parser/shrimp.test.ts | 400 ++++++++++++++------------------ src/parser/shrimp.ts | 22 +- src/parser/tokenizers.ts | 91 ++------ src/testSetup.ts | 9 +- 9 files changed, 446 insertions(+), 548 deletions(-) create mode 100644 src/parser/old-shrimp.grammar diff --git a/README.md b/README.md index d647a09..2360f9b 100644 --- a/README.md +++ b/README.md @@ -1,89 +1,36 @@ -# Shrimp Parser - Development Context +# Shrimp Language ## Overview -Building a command-line language parser using Lezer (CodeMirror's parser system) with TypeScript. The goal is to create a prototype that can parse commands with arguments, similar to shell syntax, with inline hints for autocompletion. +Shrimp is a shell-like scripting language that combines the simplicity of command-line interfaces with functional programming concepts. Built using Lezer (CodeMirror's parser system) with TypeScript. -## Current Architecture +## Language Design Philosophy -### Grammar Structure (`shrimp.grammar`) +- **Everything is an expression** - Commands, assignments, and functions all return values +- **Whitespace matters** - Spaces distinguish operators from identifiers (e.g., `x-1` is an identifier, `x - 1` is subtraction) +- **Shell-like command syntax** - `echo hello world` works naturally +- **Named arguments without quotes** - `tail file.txt lines=30` +- **Unbound symbols become strings** - `echo hello` treats `hello` as a string if not defined +- **Simplicity over cleverness** - Each feature should work one way, consistently. Two simple features that are easy to explain beat one complex feature that requires lots of explanation -- **Commands**: Can be complete (`Command`) or partial (`CommandPartial`) for autocomplete -- **Arguments**: Positional or named (with `name=value` syntax) -- **Key Challenge**: Handling arbitrary text (like file paths) as arguments without conflicting with operators/keywords +## Current Status & Goals -### Tokenizer Setup (`tokenizers.ts`) +### Today's Implementation Goals +1. **Interpreter Setup** - Rename evaluator to interpreter for clarity +2. **Command Execution** - Support calling external commands and built-in functions +3. **Variable Assignment** - Implement assignment with validation using Lezer context tracking -- **Main tokenizer**: Returns `Command`, `CommandPartial`, or `Identifier` based on context -- **Command matching**: Uses `matchCommand()` to check against available commands -- **Context-aware**: Uses `stack.canShift()` to return appropriate token based on parse position -- **Issue**: Second occurrence of command name (e.g., `tail tail`) should be `Identifier` not `Command` +### Parser Features +- βœ… Distinguishes between identifiers (assignable) and words (non-assignable) +- βœ… Smart tokenization for named args (`lines=30` splits, but `./path=value` stays together) +- βœ… Handles ambiguous cases (bare identifier could be function call or variable reference) -### Key Design Decisions +## Grammar Architecture -1. **External tokenizers over regular tokens** for commands to enable: +See `src/parser/example.shrimp` for language examples and `src/parser/shrimp.grammar` for the full grammar. - - Dynamic command list (can change at runtime) - - Partial matching for autocomplete - - Context-aware tokenization - -2. **Virtual semicolons** for statement boundaries: - - - Using `insertSemicolon` external tokenizer - - Inserts at newlines/EOF to keep parser "inside" CommandCall - - Prevents `tail t` from parsing as two separate commands - -3. **UnquotedArg token** for paths/arbitrary text: - - Accepts anything except whitespace/parens/equals - - Only valid in command argument context - - Avoids conflicts with operators elsewhere - -### Current Problems - -1. **Parser completes CommandCall too early** - - - After `tail `, cursor shows position in `Program` not `CommandCall` - - Makes hint system harder to implement - -2. **Command token in wrong context** - - - `tail tail` - second "tail" returns `Command` token but should be `Identifier` - - Need better context checking in tokenizer - -3. **Inline hints need to be smarter** - - Must look backward to find command context - - Handle cases where parser has "completed" the command - -### Test Infrastructure - -- Custom test matchers: `toMatchTree`, `toEvaluateTo` -- Command source injection for testing: `setCommandSource()` -- Tests in `shrimp.test.ts` - -### File Structure - -``` -src/parser/ - shrimp.grammar - Lezer grammar definition - tokenizers.ts - External tokenizers - shrimp.ts - Generated parser - -src/editor/ - commands.ts - Command definitions - plugins/ - inlineHints.tsx - Autocomplete hint UI -``` - -## Next Steps - -1. Fix tokenizer context checking with `stack.canShift()` -2. Improve hint detection for "after command with space" case -3. Consider if grammar structure changes would help - -## Key Concepts to Remember - -- Lezer is LR parser - builds tree bottom-up -- External tokenizers run at each position -- `@skip { space }` makes whitespace invisible to parser -- Token precedence matters for overlap resolution -- `stack.canShift(tokenId)` checks if token is valid at current position +### Key Token Types +- **Identifier** - Lowercase/emoji start, can contain dashes/numbers (assignable) +- **Word** - Any non-whitespace that isn't a valid identifier (paths, URLs, etc.) +- **FunctionCall** - Identifier followed by arguments +- **FunctionCallOrIdentifier** - Ambiguous case resolved at runtime diff --git a/src/evaluator/evaluator.test.ts b/src/evaluator/evaluator.test.ts index efab176..ec4921c 100644 --- a/src/evaluator/evaluator.test.ts +++ b/src/evaluator/evaluator.test.ts @@ -1,87 +1,87 @@ -import { resetCommandSource, setCommandSource, type CommandShape } from '#editor/commands' -import { expect, test } from 'bun:test' +// import { resetCommandSource, setCommandSource, type CommandShape } from '#editor/commands' +// import { expect, test } from 'bun:test' -test('number literal', () => { - expect('42').toEvaluateTo(42) -}) +// test('number literal', () => { +// expect('42').toEvaluateTo(42) +// }) -test('negative number', () => { - expect('-5').toEvaluateTo(-5) -}) +// test('negative number', () => { +// expect('-5').toEvaluateTo(-5) +// }) -test('string literal', () => { - expect(`'hello'`).toEvaluateTo('hello') -}) +// test('string literal', () => { +// expect(`'hello'`).toEvaluateTo('hello') +// }) -test('boolean true', () => { - expect('true').toEvaluateTo(true) -}) +// test('boolean true', () => { +// expect('true').toEvaluateTo(true) +// }) -test('boolean false', () => { - expect('false').toEvaluateTo(false) -}) +// test('boolean false', () => { +// expect('false').toEvaluateTo(false) +// }) -test('addition', () => { - expect('2 + 3').toEvaluateTo(5) -}) +// test('addition', () => { +// expect('2 + 3').toEvaluateTo(5) +// }) -test('subtraction', () => { - expect('10 - 4').toEvaluateTo(6) -}) +// test('subtraction', () => { +// expect('10 - 4').toEvaluateTo(6) +// }) -test('multiplication', () => { - expect('3 * 4').toEvaluateTo(12) -}) +// test('multiplication', () => { +// expect('3 * 4').toEvaluateTo(12) +// }) -test('division', () => { - expect('15 / 3').toEvaluateTo(5) -}) +// test('division', () => { +// expect('15 / 3').toEvaluateTo(5) +// }) -test('assign number', () => { - expect('x = 5').toEvaluateTo(5) -}) +// test('assign number', () => { +// expect('x = 5').toEvaluateTo(5) +// }) -test('emoji assignment to number', () => { - expect('πŸ’Ž = 5').toEvaluateTo(5) -}) +// test('emoji assignment to number', () => { +// expect('πŸ’Ž = 5').toEvaluateTo(5) +// }) -test('assign string', () => { - expect(`name = 'Alice'`).toEvaluateTo('Alice') -}) +// test('assign string', () => { +// expect(`name = 'Alice'`).toEvaluateTo('Alice') +// }) -test('assign expression', () => { - expect('sum = 2 + 3').toEvaluateTo(5) -}) +// test('assign expression', () => { +// expect('sum = 2 + 3').toEvaluateTo(5) +// }) -test('parentheses', () => { - expect('(2 + 3) * 4').toEvaluateTo(20) -}) +// test('parentheses', () => { +// expect('(2 + 3) * 4').toEvaluateTo(20) +// }) -test('simple command', () => { - const commands: CommandShape[] = [ - { - command: 'echo', - args: [{ name: 'text', type: 'string' }], - execute: (text: string) => text, - }, - ] +// test('simple command', () => { +// const commands: CommandShape[] = [ +// { +// command: 'echo', +// args: [{ name: 'text', type: 'string' }], +// execute: (text: string) => text, +// }, +// ] - withCommands(commands, () => { - expect(`echo 'hello'`).toEvaluateTo('hello') - }) -}) +// withCommands(commands, () => { +// expect(`echo 'hello'`).toEvaluateTo('hello') +// }) +// }) -test.only('function', () => { - expect(`add = fn a b: a + b; add 2 4`).toEvaluateTo(5) -}) +// test.only('function', () => { +// expect(`add = fn a b: a + b; add 2 4`).toEvaluateTo(5) +// }) -const withCommands = (commands: CommandShape[], fn: () => void) => { - try { - setCommandSource(() => commands) - fn() - } catch (e) { - throw e - } finally { - resetCommandSource() - } -} +// const withCommands = (commands: CommandShape[], fn: () => void) => { +// try { +// setCommandSource(() => commands) +// fn() +// } catch (e) { +// throw e +// } finally { +// resetCommandSource() +// } +// } diff --git a/src/parser/old-shrimp.grammar b/src/parser/old-shrimp.grammar new file mode 100644 index 0000000..c33a3c6 --- /dev/null +++ b/src/parser/old-shrimp.grammar @@ -0,0 +1,79 @@ +@external propSource highlighting from "./highlight.js" +@top Program { line* } + +line { + CommandCall semi | + expr semi +} + +@skip { space } + +@tokens { + @precedence { Number "-"} + space { @whitespace+ } + Number { "-"? $[0-9]+ ('.' $[0-9]+)? } + Boolean { "true" | "false" } + String { '\'' !["]* '\'' } + NamedArgPrefix { $[a-z]+ $[a-z0-9\-]* "=" } // matches "lines=", "follow=", etc. + + fn[@name=keyword] { "fn" } + equals[@name=operator] { "=" } + ":"[@name=colon] + "+"[@name=operator] + "-"[@name=operator] + "*"[@name=operator] + "/"[@name=operator] + leftParen[@name=paren] { "(" } + rightParen[@name=paren] { ")" } +} + +@external tokens tokenizer from "./tokenizers" { + Identifier, + Command, + CommandPartial +} + +@external tokens argTokenizer from "./tokenizers" { + UnquotedArg +} + +@external tokens insertSemicolon from "./tokenizers" { insertedSemi } + +@precedence { + multiplicative @left, + additive @left, + namedComplete @left, + function @right + assignment @right +} + +expr { + Assignment | + Function | + BinOp | + atom +} + +semi { insertedSemi | ";" } + +argValue { atom | UnquotedArg } + +CommandCall { (Command | CommandPartial) (NamedArg | PartialNamedArg | Arg)* } +Arg { !namedComplete argValue } +NamedArg { NamedArgPrefix !namedComplete argValue } // Required atom, higher precedence +PartialNamedArg { NamedArgPrefix } // Just the prefix + +Assignment { Identifier !assignment equals expr } + +Function { !function fn Params ":" expr } +Params { Identifier* } + +BinOp { + expr !multiplicative "*" expr | + expr !multiplicative "/" expr | + expr !additive "+" expr | + expr !additive "-" expr +} + +ParenExpr { leftParen expr rightParen } +atom { Identifier ~command | Number | String | Boolean | ParenExpr } diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index c33a3c6..2cc57ec 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -1,79 +1,78 @@ @external propSource highlighting from "./highlight.js" -@top Program { line* } +@top Program { (Expression newline)* } -line { - CommandCall semi | - expr semi -} - -@skip { space } - -@tokens { - @precedence { Number "-"} - space { @whitespace+ } +@tokens { Number { "-"? $[0-9]+ ('.' $[0-9]+)? } Boolean { "true" | "false" } String { '\'' !["]* '\'' } - NamedArgPrefix { $[a-z]+ $[a-z0-9\-]* "=" } // matches "lines=", "follow=", etc. - - fn[@name=keyword] { "fn" } - equals[@name=operator] { "=" } - ":"[@name=colon] + newline { "\n" | @eof } + space { " " } + leftParen { "(" } + rightParen { ")" } "+"[@name=operator] "-"[@name=operator] "*"[@name=operator] - "/"[@name=operator] - leftParen[@name=paren] { "(" } - rightParen[@name=paren] { ")" } + "/"[@name=operator] } -@external tokens tokenizer from "./tokenizers" { - Identifier, - Command, - CommandPartial -} - -@external tokens argTokenizer from "./tokenizers" { - UnquotedArg -} - -@external tokens insertSemicolon from "./tokenizers" { insertedSemi } - -@precedence { +@external tokens tokenizer from "./tokenizers" { Identifier, Word } +@precedence { multiplicative @left, - additive @left, - namedComplete @left, - function @right - assignment @right + additive @left } -expr { - Assignment | - Function | +Expression { + FunctionCall | + FunctionCallOrIdentifier | BinOp | - atom + ParenExpr | + Word | + String | + Number | + Boolean } -semi { insertedSemi | ";" } -argValue { atom | UnquotedArg } - -CommandCall { (Command | CommandPartial) (NamedArg | PartialNamedArg | Arg)* } -Arg { !namedComplete argValue } -NamedArg { NamedArgPrefix !namedComplete argValue } // Required atom, higher precedence -PartialNamedArg { NamedArgPrefix } // Just the prefix - -Assignment { Identifier !assignment equals expr } - -Function { !function fn Params ":" expr } -Params { Identifier* } - -BinOp { - expr !multiplicative "*" expr | - expr !multiplicative "/" expr | - expr !additive "+" expr | - expr !additive "-" expr +FunctionCallOrIdentifier { + Identifier } -ParenExpr { leftParen expr rightParen } -atom { Identifier ~command | Number | String | Boolean | ParenExpr } +FunctionCall { + Identifier (~ambig space arg)+ +} + +arg { + PositionalArg | NamedArg | IncompleteNamedArg +} + +PositionalArg { + value +} + +NamedArg { + Identifier "=" value +} + +IncompleteNamedArg { + Identifier "=" +} + +BinOp { + operand ~ambig space !multiplicative "*" space operand | + operand ~ambig space !multiplicative "/" space operand | + operand ~ambig space !additive "+" space operand | + operand ~ambig space !additive "-" space operand +} + +operand { + value | BinOp +} + + +ParenExpr { + leftParen Expression rightParen +} + +value { + ParenExpr | Identifier | Word | String | Number | Boolean +} diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index d556989..376b5c8 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -1,25 +1,16 @@ // This file was generated by lezer-generator. You probably shouldn't edit it. export const Identifier = 1, - Command = 2, - CommandPartial = 3, - UnquotedArg = 4, - insertedSemi = 32, - Program = 5, - CommandCall = 6, - NamedArg = 7, - NamedArgPrefix = 8, + Word = 2, + Program = 3, + Expression = 4, + FunctionCall = 5, + PositionalArg = 6, + ParenExpr = 7, + String = 8, Number = 9, - String = 10, - Boolean = 11, - ParenExpr = 12, - leftParen = 13, - Assignment = 14, - equals = 15, - Function = 16, - fn = 17, - Params = 18, - BinOp = 20, - rightParen = 25, - PartialNamedArg = 26, - Arg = 27 + Boolean = 10, + NamedArg = 11, + IncompleteNamedArg = 12, + FunctionCallOrIdentifier = 13, + BinOp = 14 diff --git a/src/parser/shrimp.test.ts b/src/parser/shrimp.test.ts index a5d188c..8c9c6d7 100644 --- a/src/parser/shrimp.test.ts +++ b/src/parser/shrimp.test.ts @@ -1,212 +1,192 @@ -// import { expect, describe, test } from 'bun:test' -// import { afterEach } from 'bun:test' -// import { resetCommandSource, setCommandSource } from '#editor/commands' -// import { beforeEach } from 'bun:test' -// import './shrimp.grammar' // Importing this so changes cause it to retest! +import { expect, describe, test } from 'bun:test' +import { afterEach } from 'bun:test' +import { resetCommandSource, setCommandSource } from '#editor/commands' +import { beforeEach } from 'bun:test' +import './shrimp.grammar' // Importing this so changes cause it to retest! -// describe('calling commands', () => { -// beforeEach(() => { -// setCommandSource(() => [ -// { command: 'tail', args: [{ name: 'path', type: 'string' }] }, -// { command: 'head', args: [{ name: 'path', type: 'string' }] }, -// { command: 'echo', args: [{ name: 'path', type: 'string' }] }, -// ]) -// }) +describe('calling functions', () => { + beforeEach(() => { + setCommandSource(() => [ + { + command: 'echo', + args: [{ name: 'path', type: 'string' }], + execute: (p: any) => p, + }, + ]) + }) -// afterEach(() => { -// resetCommandSource() -// }) + afterEach(() => { + resetCommandSource() + }) -// test('basic', () => { -// expect('tail path').toMatchTree(` -// CommandCall -// Command tail -// Arg -// Identifier path -// `) + test('call with no args', () => { + expect('tail').toMatchTree(` + Expression + FunctionCallOrIdentifier + Identifier tail + `) + }) -// expect('tai').toMatchTree(` -// CommandCall -// CommandPartial tai -// `) -// }) + test('call with arg', () => { + expect('tail path').toMatchTree(` + Expression + FunctionCall + Identifier tail + PositionalArg + Identifier path + `) + }) -// test('command with arg that is also a command', () => { -// expect('tail tail').toMatchTree(` -// CommandCall -// Command tail -// Arg -// Identifier tail -// `) + test('call with arg and named arg', () => { + expect('tail path lines=30').toMatchTree(` + Expression + FunctionCall + Identifier tail + PositionalArg + Identifier path + NamedArg + Identifier lines + Number 30 + `) + }) -// expect('tai').toMatchTree(` -// CommandCall -// CommandPartial tai -// `) -// }) + test('command with arg that is also a command', () => { + expect('tail tail').toMatchTree(` + Expression + FunctionCall + Identifier tail + PositionalArg + Identifier tail + `) -// test('when no commands match, falls back to Identifier', () => { -// expect('omgwtf').toMatchTree(` -// Identifier omgwtf -// `) -// }) + expect('tai').toMatchTree(` + Expression + FunctionCallOrIdentifier + Identifier tai + `) + }) -// // In shrimp.test.ts, add to the 'calling commands' section -// test('arg', () => { -// expect('tail l').toMatchTree(` -// CommandCall -// Command tail -// Arg -// Identifier l -// `) -// }) + test.skip('when no commands match, falls back to Identifier', () => { + expect('omgwtf').toMatchTree(` + Identifier omgwtf + `) + }) -// test('partial namedArg', () => { -// expect('tail lines=').toMatchTree(` -// CommandCall -// Command tail -// PartialNamedArg -// NamedArgPrefix lines= -// `) -// }) + test('Incomplete namedArg', () => { + expect('tail lines=').toMatchTree(` + Expression + FunctionCall + Identifier tail + IncompleteNamedArg + Identifier lines + `) + }) +}) -// test('complete namedArg', () => { -// expect('tail lines=10').toMatchTree(` -// CommandCall -// Command tail -// NamedArg -// NamedArgPrefix lines= -// Number 10 -// `) -// }) +describe('Identifier', () => { + test('fails on underscores and capital letters', () => { + expect('myVar').toFailParse() + expect('underscore_var').toFailParse() + expect('_leadingUnderscore').toFailParse() + expect('trailingUnderscore_').toFailParse() + expect('mixed-123_var').toFailParse() + }) -// test('mixed positional and named args', () => { -// expect('tail ../file.txt lines=5').toMatchTree(` -// CommandCall -// Command tail -// Arg -// UnquotedArg ../file.txt -// NamedArg -// NamedArgPrefix lines= -// Number 5 -// `) -// }) + test('parses identifiers with emojis and dashes', () => { + expect('moo-😊-34').toMatchTree(` + Expression + FunctionCallOrIdentifier + Identifier moo-😊-34`) + }) +}) -// test('named args', () => { -// expect(`tail lines='5' path`).toMatchTree(` -// CommandCall -// Command tail -// NamedArg -// NamedArgPrefix lines= -// String 5 -// Arg -// Identifier path -// `) -// }) +describe('Parentheses', () => { + test('parses expressions with parentheses correctly', () => { + expect('(2 + 3)').toMatchTree(` + Expression + ParenExpr + Expression + BinOp + Number 2 + operator + + Number 3`) + }) -// test('complex args', () => { -// expect(`tail lines=(2 + 3) filter='error' (a + b)`).toMatchTree(` -// CommandCall -// Command tail -// NamedArg -// NamedArgPrefix lines= -// paren ( -// BinOp -// Number 2 -// operator + -// Number 3 -// paren ) -// NamedArg -// NamedArgPrefix filter= -// String error + test('allows parens in function calls', () => { + expect('echo (3 + 3)').toMatchTree(` + Expression + FunctionCall + Identifier echo + PositionalArg + ParenExpr + Expression + BinOp + Number 3 + operator + + Number 3`) + }) +}) -// Arg -// paren ( -// BinOp -// Identifier a -// operator + -// Identifier b -// paren ) -// `) -// }) -// }) +describe('BinOp', () => { + test('addition tests', () => { + expect('2 + 3').toMatchTree(` + Expression + BinOp + Number 2 + operator + + Number 3 + `) + }) -// describe('Identifier', () => { -// test('parses simple identifiers', () => { -// expect('hyphenated-var').toMatchTree(`Identifier hyphenated-var`) -// expect('var').toMatchTree(`Identifier var`) -// expect('var123').toMatchTree(`Identifier var123`) -// }) + test('subtraction tests', () => { + expect('5 - 2').toMatchTree(` + Expression + BinOp + Number 5 + operator - + Number 2 + `) + }) -// test('fails on underscores and capital letters', () => { -// expect('myVar').toFailParse() -// expect('underscore_var').toFailParse() -// expect('_leadingUnderscore').toFailParse() -// expect('trailingUnderscore_').toFailParse() -// expect('mixed-123_var').toFailParse() -// }) + test('multiplication tests', () => { + expect('4 * 3').toMatchTree(` + Expression + BinOp + Number 4 + operator * + Number 3 + `) + }) -// test('parses identifiers with emojis', () => { -// expect('var😊').toMatchTree(`Identifier var😊`) -// expect('😊').toMatchTree(`Identifier 😊`) -// }) -// }) + test('division tests', () => { + expect('8 / 2').toMatchTree(` + Expression + BinOp + Number 8 + operator / + Number 2 + `) + }) -// describe('BinOp', () => { -// test('addition tests', () => { -// expect('2 + 3').toMatchTree(` -// BinOp -// Number 2 -// operator + -// Number 3 -// `) -// }) - -// test('subtraction tests', () => { -// expect('5 - 2').toMatchTree(` -// BinOp -// Number 5 -// operator - -// Number 2 -// `) -// }) - -// test('multiplication tests', () => { -// expect('4 * 3').toMatchTree(` -// BinOp -// Number 4 -// operator * -// Number 3 -// `) -// }) - -// test('division tests', () => { -// expect('8 / 2').toMatchTree(` -// BinOp -// Number 8 -// operator / -// Number 2 -// `) -// }) - -// test('mixed operations with precedence', () => { -// expect('2 + 3 * 4 - 5 / 1').toMatchTree(` -// BinOp -// BinOp -// Number 2 -// operator + -// BinOp -// Number 3 -// operator * -// Number 4 -// operator - -// BinOp -// Number 5 -// operator / -// Number 1 -// `) -// }) -// }) + test('mixed operations with precedence', () => { + expect('2 + 3 * 4 - 5 / 1').toMatchTree(` + Expression + BinOp + BinOp + Number 2 + operator + + BinOp + Number 3 + operator * + Number 4 + operator - + BinOp + Number 5 + operator / + Number 1 + `) + }) +}) // describe('Fn', () => { // test('parses function with single parameter', () => { @@ -291,41 +271,3 @@ // Identifier b`) // }) // }) - -// describe('Parentheses', () => { -// test('parses expressions with parentheses correctly', () => { -// expect('(2 + 3) * 4').toMatchTree(` -// BinOp -// paren ( -// BinOp -// Number 2 -// operator + -// Number 3 -// paren ) -// operator * -// Number 4`) -// }) - -// test('parses nested parentheses correctly', () => { -// expect('((1 + 2) * (3 - 4)) / 5').toMatchTree(` -// BinOp -// paren ( -// BinOp -// paren ( -// BinOp -// Number 1 -// operator + -// Number 2 -// paren ) -// operator * -// paren ( -// BinOp -// Number 3 -// operator - -// Number 4 -// paren ) -// paren ) -// operator / -// Number 5`) -// }) -// }) diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 1217c02..8a9f492 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -1,19 +1,19 @@ // This file was generated by lezer-generator. You probably shouldn't edit it. import {LRParser} from "@lezer/lr" -import {tokenizer, argTokenizer, insertSemicolon} from "./tokenizers" +import {tokenizer} from "./tokenizers" import {highlighting} from "./highlight.js" export const parser = LRParser.deserialize({ version: 14, - states: "%jQVQTOOOqQaO'#DRO!]QTO'#ClO!eQaO'#DPOOQ`'#DS'#DSO!yQTO'#ChOOQl'#DR'#DRO#vQnO'#CbO!qQaO'#DPOOQS'#Cx'#CxQVQTOOO!yQTO,59UOOQS'#Cz'#CzO$QQTO'#CnO$YQPO,59WO!yQTO,59[O!yQTO,59[OOQS'#DT'#DTOOQS,59k,59kO$_QPO,59SOOQl'#DQ'#DQO%UQnO'#CvOOQl'#Cw'#CwOOQl'#Cy'#CyO%cQnO,58|OOQS-E6v-E6vO%mQaO1G.pOOQS-E6x-E6xO!yQTO1G.rOOQ`1G.v1G.vO&UQaO1G.vOOQl1G.n1G.nOOQl,58},58}OOQl-E6w-E6wO&mQaO7+$^", - stateData: "'X~OrOS~OPPOQVORVOXUOYUOZUO]TOaQO~O_ZOeuXfuXguXhuXpuXxuXiuX~OP[OcbP~Oe_Of_Og`Oh`OpaOxaO~OPPOXUOYUOZUO]TOaQO~OPUOSdOWeOXUOYUOZUO]TO~OpUXxUX~P#_OP[OcbX~OclO~Oe_Of_Og`Oh`OioO~OPUOSdOXUOYUOZUO]TO~OWjXpjXxjX~P$pOpUaxUa~P#_Oe_Of_Og`Oh`Op^ix^ii^i~Oe_Of_Ogdihdipdixdiidi~Oe_Of_Og`Oh`Op`qx`qi`q~OXh~", - goto: "$PxPPPPPPy}PPPP!RP!_P!_P!hP!_PPPPP}}!k!q!wPPPP!}#R#Y#h#{TWOYTgVheUOTVYZ_`ehl_SOTYZ_`lR^QQYORiYQhVRqhQ]QRk]TXOYSfVhRpe^SOTYZ_`lVdVehSROYQcTQjZQm_Qn`RrlTbRW", - nodeNames: "⚠ Identifier Command CommandPartial UnquotedArg Program CommandCall NamedArg NamedArgPrefix Number String Boolean ParenExpr paren Assignment operator Function keyword Params colon BinOp operator operator operator operator paren PartialNamedArg Arg", - maxTerm: 40, + states: "$nQQOTOOOQOTO'#CcOfOPO'#CtOqOPO'#CtOOOO'#Cx'#CxO!POPO'#CxO![OPOOOOOO'#C`'#C`O!aOPO'#CoQQOTOOO!fOPO,58}O!|OTO'#CpO#TOPO,58{O#`OQO,59UOOOS,59Z,59ZOOOS-E6m-E6mOOOO1G.i1G.iOOOO'#Ct'#CtO#nOPO'#CtOOOO'#Cb'#CbOOOO'#Cs'#CsOOOO,59[,59[OOOO-E6n-E6nO#|OPO1G.pO$ROTO,59SO$cOTO7+$[OOOO1G.m1G.mOOOO< { let ch = getFullCodePoint(input, 0) - if (!isLowercaseLetter(ch) && !isEmoji(ch)) return + if (isWhitespace(ch) || ch === -1) return let pos = getCharSize(ch) - let text = String.fromCodePoint(ch) + let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch) - // Continue consuming identifier characters while (true) { ch = getFullCodePoint(input, pos) + if (isWhitespace(ch) || ch === -1) break - if (isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || isEmoji(ch)) { - text += String.fromCodePoint(ch) - pos += getCharSize(ch) - } else { - break + // Only stop at = if we could parse a NamedArg here + if (ch === 61 /* = */ && isValidIdentifier) { + break // Stop, let grammar handle identifier = value } + + // Track identifier validity + if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) { + isValidIdentifier = false + } + + pos += getCharSize(ch) } input.advance(pos) - - if (!stack.canShift(Command) && !stack.canShift(CommandPartial)) { - input.acceptToken(Identifier) - return - } - - const { match, partialMatches } = matchingCommands(text) - if (match) { - input.acceptToken(Command) - } else if (partialMatches.length > 0) { - input.acceptToken(CommandPartial) - } else { - input.acceptToken(Identifier) - } + input.acceptToken(isValidIdentifier ? Identifier : Word) }) -export const argTokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => { - // Only match if we're in a command argument position - if (!stack.canShift(UnquotedArg)) return - - const firstCh = input.peek(0) - - // Don't match if it starts with tokens we handle elsewhere - if ( - firstCh === 39 /* ' */ || - firstCh === 40 /* ( */ || - firstCh === 45 /* - (for negative numbers) */ || - (firstCh >= 48 && firstCh <= 57) /* 0-9 (numbers) */ - ) - return - - // Read everything that's not a space, newline, or paren - let pos = 0 - while (true) { - const ch = input.peek(pos) - if ( - ch === -1 || - ch === 32 /* space */ || - ch === 10 /* \n */ || - ch === 40 /* ( */ || - ch === 41 /* ) */ || - ch === 61 /* = */ - ) - break - pos++ - } - - if (pos > 0) { - input.advance(pos) - input.acceptToken(UnquotedArg) - } -}) - -export const insertSemicolon = new ExternalTokenizer((input: InputStream, stack: Stack) => { - const next = input.peek(0) - - // We're at a newline or end of file - if (next === 10 /* \n */ || next === -1 /* EOF */) { - // Check if insertedSemi would be valid here - if (stack.canShift(insertedSemi)) { - // Don't advance! Virtual token has zero width - input.acceptToken(insertedSemi, 0) - } - } -}) +const isWhitespace = (ch: number): boolean => { + return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */ +} const isLowercaseLetter = (ch: number): boolean => { return ch >= 97 && ch <= 122 // a-z diff --git a/src/testSetup.ts b/src/testSetup.ts index d9bedd0..6d990ab 100644 --- a/src/testSetup.ts +++ b/src/testSetup.ts @@ -30,6 +30,7 @@ await regenerateParser() declare module 'bun:test' { interface Matchers { toMatchTree(expected: string): T + toMatchExpression(expected: string): T toFailParse(): T toEvaluateTo(expected: unknown): T } @@ -153,13 +154,7 @@ const treeToString = (tree: Tree, input: string): string => { cursor.parent() } else { const cleanText = nodeName === 'String' ? text.slice(1, -1) : text - // Node names that should be displayed as single tokens (operators, keywords) - const singleTokens = ['+', '-', '*', '/', '->', 'fn', '=', 'equals'] - if (singleTokens.includes(nodeName)) { - lines.push(`${indent}${nodeName}`) - } else { - lines.push(`${indent}${nodeName} ${cleanText}`) - } + lines.push(`${indent}${nodeName} ${cleanText}`) } }