This commit is contained in:
Corey Johnson 2025-10-06 13:18:47 -07:00
parent eff09931ad
commit e0fafc0088
9 changed files with 446 additions and 548 deletions

103
README.md
View File

@ -1,89 +1,36 @@
# Shrimp Parser - Development Context
# Shrimp Language
## Overview
Building a command-line language parser using Lezer (CodeMirror's parser system) with TypeScript. The goal is to create a prototype that can parse commands with arguments, similar to shell syntax, with inline hints for autocompletion.
Shrimp is a shell-like scripting language that combines the simplicity of command-line interfaces with functional programming concepts. Built using Lezer (CodeMirror's parser system) with TypeScript.
## Current Architecture
## Language Design Philosophy
### Grammar Structure (`shrimp.grammar`)
- **Everything is an expression** - Commands, assignments, and functions all return values
- **Whitespace matters** - Spaces distinguish operators from identifiers (e.g., `x-1` is an identifier, `x - 1` is subtraction)
- **Shell-like command syntax** - `echo hello world` works naturally
- **Named arguments without quotes** - `tail file.txt lines=30`
- **Unbound symbols become strings** - `echo hello` treats `hello` as a string if not defined
- **Simplicity over cleverness** - Each feature should work one way, consistently. Two simple features that are easy to explain beat one complex feature that requires lots of explanation
- **Commands**: Can be complete (`Command`) or partial (`CommandPartial`) for autocomplete
- **Arguments**: Positional or named (with `name=value` syntax)
- **Key Challenge**: Handling arbitrary text (like file paths) as arguments without conflicting with operators/keywords
## Current Status & Goals
### Tokenizer Setup (`tokenizers.ts`)
### Today's Implementation Goals
1. **Interpreter Setup** - Rename evaluator to interpreter for clarity
2. **Command Execution** - Support calling external commands and built-in functions
3. **Variable Assignment** - Implement assignment with validation using Lezer context tracking
- **Main tokenizer**: Returns `Command`, `CommandPartial`, or `Identifier` based on context
- **Command matching**: Uses `matchCommand()` to check against available commands
- **Context-aware**: Uses `stack.canShift()` to return appropriate token based on parse position
- **Issue**: Second occurrence of command name (e.g., `tail tail`) should be `Identifier` not `Command`
### Parser Features
- ✅ Distinguishes between identifiers (assignable) and words (non-assignable)
- ✅ Smart tokenization for named args (`lines=30` splits, but `./path=value` stays together)
- ✅ Handles ambiguous cases (bare identifier could be function call or variable reference)
### Key Design Decisions
## Grammar Architecture
1. **External tokenizers over regular tokens** for commands to enable:
See `src/parser/example.shrimp` for language examples and `src/parser/shrimp.grammar` for the full grammar.
- Dynamic command list (can change at runtime)
- Partial matching for autocomplete
- Context-aware tokenization
2. **Virtual semicolons** for statement boundaries:
- Using `insertSemicolon` external tokenizer
- Inserts at newlines/EOF to keep parser "inside" CommandCall
- Prevents `tail t` from parsing as two separate commands
3. **UnquotedArg token** for paths/arbitrary text:
- Accepts anything except whitespace/parens/equals
- Only valid in command argument context
- Avoids conflicts with operators elsewhere
### Current Problems
1. **Parser completes CommandCall too early**
- After `tail `, cursor shows position in `Program` not `CommandCall`
- Makes hint system harder to implement
2. **Command token in wrong context**
- `tail tail` - second "tail" returns `Command` token but should be `Identifier`
- Need better context checking in tokenizer
3. **Inline hints need to be smarter**
- Must look backward to find command context
- Handle cases where parser has "completed" the command
### Test Infrastructure
- Custom test matchers: `toMatchTree`, `toEvaluateTo`
- Command source injection for testing: `setCommandSource()`
- Tests in `shrimp.test.ts`
### File Structure
```
src/parser/
shrimp.grammar - Lezer grammar definition
tokenizers.ts - External tokenizers
shrimp.ts - Generated parser
src/editor/
commands.ts - Command definitions
plugins/
inlineHints.tsx - Autocomplete hint UI
```
## Next Steps
1. Fix tokenizer context checking with `stack.canShift()`
2. Improve hint detection for "after command with space" case
3. Consider if grammar structure changes would help
## Key Concepts to Remember
- Lezer is LR parser - builds tree bottom-up
- External tokenizers run at each position
- `@skip { space }` makes whitespace invisible to parser
- Token precedence matters for overlap resolution
- `stack.canShift(tokenId)` checks if token is valid at current position
### Key Token Types
- **Identifier** - Lowercase/emoji start, can contain dashes/numbers (assignable)
- **Word** - Any non-whitespace that isn't a valid identifier (paths, URLs, etc.)
- **FunctionCall** - Identifier followed by arguments
- **FunctionCallOrIdentifier** - Ambiguous case resolved at runtime

View File

@ -1,87 +1,87 @@
import { resetCommandSource, setCommandSource, type CommandShape } from '#editor/commands'
import { expect, test } from 'bun:test'
// import { resetCommandSource, setCommandSource, type CommandShape } from '#editor/commands'
// import { expect, test } from 'bun:test'
test('number literal', () => {
expect('42').toEvaluateTo(42)
})
// test('number literal', () => {
// expect('42').toEvaluateTo(42)
// })
test('negative number', () => {
expect('-5').toEvaluateTo(-5)
})
// test('negative number', () => {
// expect('-5').toEvaluateTo(-5)
// })
test('string literal', () => {
expect(`'hello'`).toEvaluateTo('hello')
})
// test('string literal', () => {
// expect(`'hello'`).toEvaluateTo('hello')
// })
test('boolean true', () => {
expect('true').toEvaluateTo(true)
})
// test('boolean true', () => {
// expect('true').toEvaluateTo(true)
// })
test('boolean false', () => {
expect('false').toEvaluateTo(false)
})
// test('boolean false', () => {
// expect('false').toEvaluateTo(false)
// })
test('addition', () => {
expect('2 + 3').toEvaluateTo(5)
})
// test('addition', () => {
// expect('2 + 3').toEvaluateTo(5)
// })
test('subtraction', () => {
expect('10 - 4').toEvaluateTo(6)
})
// test('subtraction', () => {
// expect('10 - 4').toEvaluateTo(6)
// })
test('multiplication', () => {
expect('3 * 4').toEvaluateTo(12)
})
// test('multiplication', () => {
// expect('3 * 4').toEvaluateTo(12)
// })
test('division', () => {
expect('15 / 3').toEvaluateTo(5)
})
// test('division', () => {
// expect('15 / 3').toEvaluateTo(5)
// })
test('assign number', () => {
expect('x = 5').toEvaluateTo(5)
})
// test('assign number', () => {
// expect('x = 5').toEvaluateTo(5)
// })
test('emoji assignment to number', () => {
expect('💎 = 5').toEvaluateTo(5)
})
// test('emoji assignment to number', () => {
// expect('💎 = 5').toEvaluateTo(5)
// })
test('assign string', () => {
expect(`name = 'Alice'`).toEvaluateTo('Alice')
})
// test('assign string', () => {
// expect(`name = 'Alice'`).toEvaluateTo('Alice')
// })
test('assign expression', () => {
expect('sum = 2 + 3').toEvaluateTo(5)
})
// test('assign expression', () => {
// expect('sum = 2 + 3').toEvaluateTo(5)
// })
test('parentheses', () => {
expect('(2 + 3) * 4').toEvaluateTo(20)
})
// test('parentheses', () => {
// expect('(2 + 3) * 4').toEvaluateTo(20)
// })
test('simple command', () => {
const commands: CommandShape[] = [
{
command: 'echo',
args: [{ name: 'text', type: 'string' }],
execute: (text: string) => text,
},
]
// test('simple command', () => {
// const commands: CommandShape[] = [
// {
// command: 'echo',
// args: [{ name: 'text', type: 'string' }],
// execute: (text: string) => text,
// },
// ]
withCommands(commands, () => {
expect(`echo 'hello'`).toEvaluateTo('hello')
})
})
// withCommands(commands, () => {
// expect(`echo 'hello'`).toEvaluateTo('hello')
// })
// })
test.only('function', () => {
expect(`add = fn a b: a + b; add 2 4`).toEvaluateTo(5)
})
// test.only('function', () => {
// expect(`add = fn a b: a + b; add 2 4`).toEvaluateTo(5)
// })
const withCommands = (commands: CommandShape[], fn: () => void) => {
try {
setCommandSource(() => commands)
fn()
} catch (e) {
throw e
} finally {
resetCommandSource()
}
}
// const withCommands = (commands: CommandShape[], fn: () => void) => {
// try {
// setCommandSource(() => commands)
// fn()
// } catch (e) {
// throw e
// } finally {
// resetCommandSource()
// }
// }

View File

@ -0,0 +1,79 @@
@external propSource highlighting from "./highlight.js"
@top Program { line* }
line {
CommandCall semi |
expr semi
}
@skip { space }
@tokens {
@precedence { Number "-"}
space { @whitespace+ }
Number { "-"? $[0-9]+ ('.' $[0-9]+)? }
Boolean { "true" | "false" }
String { '\'' !["]* '\'' }
NamedArgPrefix { $[a-z]+ $[a-z0-9\-]* "=" } // matches "lines=", "follow=", etc.
fn[@name=keyword] { "fn" }
equals[@name=operator] { "=" }
":"[@name=colon]
"+"[@name=operator]
"-"[@name=operator]
"*"[@name=operator]
"/"[@name=operator]
leftParen[@name=paren] { "(" }
rightParen[@name=paren] { ")" }
}
@external tokens tokenizer from "./tokenizers" {
Identifier,
Command,
CommandPartial
}
@external tokens argTokenizer from "./tokenizers" {
UnquotedArg
}
@external tokens insertSemicolon from "./tokenizers" { insertedSemi }
@precedence {
multiplicative @left,
additive @left,
namedComplete @left,
function @right
assignment @right
}
expr {
Assignment |
Function |
BinOp |
atom
}
semi { insertedSemi | ";" }
argValue { atom | UnquotedArg }
CommandCall { (Command | CommandPartial) (NamedArg | PartialNamedArg | Arg)* }
Arg { !namedComplete argValue }
NamedArg { NamedArgPrefix !namedComplete argValue } // Required atom, higher precedence
PartialNamedArg { NamedArgPrefix } // Just the prefix
Assignment { Identifier !assignment equals expr }
Function { !function fn Params ":" expr }
Params { Identifier* }
BinOp {
expr !multiplicative "*" expr |
expr !multiplicative "/" expr |
expr !additive "+" expr |
expr !additive "-" expr
}
ParenExpr { leftParen expr rightParen }
atom { Identifier ~command | Number | String | Boolean | ParenExpr }

View File

@ -1,79 +1,78 @@
@external propSource highlighting from "./highlight.js"
@top Program { line* }
line {
CommandCall semi |
expr semi
}
@skip { space }
@top Program { (Expression newline)* }
@tokens {
@precedence { Number "-"}
space { @whitespace+ }
Number { "-"? $[0-9]+ ('.' $[0-9]+)? }
Boolean { "true" | "false" }
String { '\'' !["]* '\'' }
NamedArgPrefix { $[a-z]+ $[a-z0-9\-]* "=" } // matches "lines=", "follow=", etc.
fn[@name=keyword] { "fn" }
equals[@name=operator] { "=" }
":"[@name=colon]
newline { "\n" | @eof }
space { " " }
leftParen { "(" }
rightParen { ")" }
"+"[@name=operator]
"-"[@name=operator]
"*"[@name=operator]
"/"[@name=operator]
leftParen[@name=paren] { "(" }
rightParen[@name=paren] { ")" }
}
@external tokens tokenizer from "./tokenizers" {
Identifier,
Command,
CommandPartial
}
@external tokens argTokenizer from "./tokenizers" {
UnquotedArg
}
@external tokens insertSemicolon from "./tokenizers" { insertedSemi }
@external tokens tokenizer from "./tokenizers" { Identifier, Word }
@precedence {
multiplicative @left,
additive @left,
namedComplete @left,
function @right
assignment @right
additive @left
}
expr {
Assignment |
Function |
Expression {
FunctionCall |
FunctionCallOrIdentifier |
BinOp |
atom
ParenExpr |
Word |
String |
Number |
Boolean
}
semi { insertedSemi | ";" }
argValue { atom | UnquotedArg }
FunctionCallOrIdentifier {
Identifier
}
CommandCall { (Command | CommandPartial) (NamedArg | PartialNamedArg | Arg)* }
Arg { !namedComplete argValue }
NamedArg { NamedArgPrefix !namedComplete argValue } // Required atom, higher precedence
PartialNamedArg { NamedArgPrefix } // Just the prefix
FunctionCall {
Identifier (~ambig space arg)+
}
Assignment { Identifier !assignment equals expr }
arg {
PositionalArg | NamedArg | IncompleteNamedArg
}
Function { !function fn Params ":" expr }
Params { Identifier* }
PositionalArg {
value
}
NamedArg {
Identifier "=" value
}
IncompleteNamedArg {
Identifier "="
}
BinOp {
expr !multiplicative "*" expr |
expr !multiplicative "/" expr |
expr !additive "+" expr |
expr !additive "-" expr
operand ~ambig space !multiplicative "*" space operand |
operand ~ambig space !multiplicative "/" space operand |
operand ~ambig space !additive "+" space operand |
operand ~ambig space !additive "-" space operand
}
ParenExpr { leftParen expr rightParen }
atom { Identifier ~command | Number | String | Boolean | ParenExpr }
operand {
value | BinOp
}
ParenExpr {
leftParen Expression rightParen
}
value {
ParenExpr | Identifier | Word | String | Number | Boolean
}

View File

@ -1,25 +1,16 @@
// This file was generated by lezer-generator. You probably shouldn't edit it.
export const
Identifier = 1,
Command = 2,
CommandPartial = 3,
UnquotedArg = 4,
insertedSemi = 32,
Program = 5,
CommandCall = 6,
NamedArg = 7,
NamedArgPrefix = 8,
Word = 2,
Program = 3,
Expression = 4,
FunctionCall = 5,
PositionalArg = 6,
ParenExpr = 7,
String = 8,
Number = 9,
String = 10,
Boolean = 11,
ParenExpr = 12,
leftParen = 13,
Assignment = 14,
equals = 15,
Function = 16,
fn = 17,
Params = 18,
BinOp = 20,
rightParen = 25,
PartialNamedArg = 26,
Arg = 27
Boolean = 10,
NamedArg = 11,
IncompleteNamedArg = 12,
FunctionCallOrIdentifier = 13,
BinOp = 14

View File

@ -1,212 +1,192 @@
// import { expect, describe, test } from 'bun:test'
// import { afterEach } from 'bun:test'
// import { resetCommandSource, setCommandSource } from '#editor/commands'
// import { beforeEach } from 'bun:test'
// import './shrimp.grammar' // Importing this so changes cause it to retest!
import { expect, describe, test } from 'bun:test'
import { afterEach } from 'bun:test'
import { resetCommandSource, setCommandSource } from '#editor/commands'
import { beforeEach } from 'bun:test'
import './shrimp.grammar' // Importing this so changes cause it to retest!
// describe('calling commands', () => {
// beforeEach(() => {
// setCommandSource(() => [
// { command: 'tail', args: [{ name: 'path', type: 'string' }] },
// { command: 'head', args: [{ name: 'path', type: 'string' }] },
// { command: 'echo', args: [{ name: 'path', type: 'string' }] },
// ])
// })
describe('calling functions', () => {
beforeEach(() => {
setCommandSource(() => [
{
command: 'echo',
args: [{ name: 'path', type: 'string' }],
execute: (p: any) => p,
},
])
})
// afterEach(() => {
// resetCommandSource()
// })
afterEach(() => {
resetCommandSource()
})
// test('basic', () => {
// expect('tail path').toMatchTree(`
// CommandCall
// Command tail
// Arg
// Identifier path
// `)
test('call with no args', () => {
expect('tail').toMatchTree(`
Expression
FunctionCallOrIdentifier
Identifier tail
`)
})
// expect('tai').toMatchTree(`
// CommandCall
// CommandPartial tai
// `)
// })
test('call with arg', () => {
expect('tail path').toMatchTree(`
Expression
FunctionCall
Identifier tail
PositionalArg
Identifier path
`)
})
// test('command with arg that is also a command', () => {
// expect('tail tail').toMatchTree(`
// CommandCall
// Command tail
// Arg
// Identifier tail
// `)
test('call with arg and named arg', () => {
expect('tail path lines=30').toMatchTree(`
Expression
FunctionCall
Identifier tail
PositionalArg
Identifier path
NamedArg
Identifier lines
Number 30
`)
})
// expect('tai').toMatchTree(`
// CommandCall
// CommandPartial tai
// `)
// })
test('command with arg that is also a command', () => {
expect('tail tail').toMatchTree(`
Expression
FunctionCall
Identifier tail
PositionalArg
Identifier tail
`)
// test('when no commands match, falls back to Identifier', () => {
// expect('omgwtf').toMatchTree(`
// Identifier omgwtf
// `)
// })
expect('tai').toMatchTree(`
Expression
FunctionCallOrIdentifier
Identifier tai
`)
})
// // In shrimp.test.ts, add to the 'calling commands' section
// test('arg', () => {
// expect('tail l').toMatchTree(`
// CommandCall
// Command tail
// Arg
// Identifier l
// `)
// })
test.skip('when no commands match, falls back to Identifier', () => {
expect('omgwtf').toMatchTree(`
Identifier omgwtf
`)
})
// test('partial namedArg', () => {
// expect('tail lines=').toMatchTree(`
// CommandCall
// Command tail
// PartialNamedArg
// NamedArgPrefix lines=
// `)
// })
test('Incomplete namedArg', () => {
expect('tail lines=').toMatchTree(`
Expression
FunctionCall
Identifier tail
IncompleteNamedArg
Identifier lines
`)
})
})
// test('complete namedArg', () => {
// expect('tail lines=10').toMatchTree(`
// CommandCall
// Command tail
// NamedArg
// NamedArgPrefix lines=
// Number 10
// `)
// })
describe('Identifier', () => {
test('fails on underscores and capital letters', () => {
expect('myVar').toFailParse()
expect('underscore_var').toFailParse()
expect('_leadingUnderscore').toFailParse()
expect('trailingUnderscore_').toFailParse()
expect('mixed-123_var').toFailParse()
})
// test('mixed positional and named args', () => {
// expect('tail ../file.txt lines=5').toMatchTree(`
// CommandCall
// Command tail
// Arg
// UnquotedArg ../file.txt
// NamedArg
// NamedArgPrefix lines=
// Number 5
// `)
// })
test('parses identifiers with emojis and dashes', () => {
expect('moo-😊-34').toMatchTree(`
Expression
FunctionCallOrIdentifier
Identifier moo-😊-34`)
})
})
// test('named args', () => {
// expect(`tail lines='5' path`).toMatchTree(`
// CommandCall
// Command tail
// NamedArg
// NamedArgPrefix lines=
// String 5
// Arg
// Identifier path
// `)
// })
describe('Parentheses', () => {
test('parses expressions with parentheses correctly', () => {
expect('(2 + 3)').toMatchTree(`
Expression
ParenExpr
Expression
BinOp
Number 2
operator +
Number 3`)
})
// test('complex args', () => {
// expect(`tail lines=(2 + 3) filter='error' (a + b)`).toMatchTree(`
// CommandCall
// Command tail
// NamedArg
// NamedArgPrefix lines=
// paren (
// BinOp
// Number 2
// operator +
// Number 3
// paren )
// NamedArg
// NamedArgPrefix filter=
// String error
test('allows parens in function calls', () => {
expect('echo (3 + 3)').toMatchTree(`
Expression
FunctionCall
Identifier echo
PositionalArg
ParenExpr
Expression
BinOp
Number 3
operator +
Number 3`)
})
})
// Arg
// paren (
// BinOp
// Identifier a
// operator +
// Identifier b
// paren )
// `)
// })
// })
describe('BinOp', () => {
test('addition tests', () => {
expect('2 + 3').toMatchTree(`
Expression
BinOp
Number 2
operator +
Number 3
`)
})
// describe('Identifier', () => {
// test('parses simple identifiers', () => {
// expect('hyphenated-var').toMatchTree(`Identifier hyphenated-var`)
// expect('var').toMatchTree(`Identifier var`)
// expect('var123').toMatchTree(`Identifier var123`)
// })
test('subtraction tests', () => {
expect('5 - 2').toMatchTree(`
Expression
BinOp
Number 5
operator -
Number 2
`)
})
// test('fails on underscores and capital letters', () => {
// expect('myVar').toFailParse()
// expect('underscore_var').toFailParse()
// expect('_leadingUnderscore').toFailParse()
// expect('trailingUnderscore_').toFailParse()
// expect('mixed-123_var').toFailParse()
// })
test('multiplication tests', () => {
expect('4 * 3').toMatchTree(`
Expression
BinOp
Number 4
operator *
Number 3
`)
})
// test('parses identifiers with emojis', () => {
// expect('var😊').toMatchTree(`Identifier var😊`)
// expect('😊').toMatchTree(`Identifier 😊`)
// })
// })
test('division tests', () => {
expect('8 / 2').toMatchTree(`
Expression
BinOp
Number 8
operator /
Number 2
`)
})
// describe('BinOp', () => {
// test('addition tests', () => {
// expect('2 + 3').toMatchTree(`
// BinOp
// Number 2
// operator +
// Number 3
// `)
// })
// test('subtraction tests', () => {
// expect('5 - 2').toMatchTree(`
// BinOp
// Number 5
// operator -
// Number 2
// `)
// })
// test('multiplication tests', () => {
// expect('4 * 3').toMatchTree(`
// BinOp
// Number 4
// operator *
// Number 3
// `)
// })
// test('division tests', () => {
// expect('8 / 2').toMatchTree(`
// BinOp
// Number 8
// operator /
// Number 2
// `)
// })
// test('mixed operations with precedence', () => {
// expect('2 + 3 * 4 - 5 / 1').toMatchTree(`
// BinOp
// BinOp
// Number 2
// operator +
// BinOp
// Number 3
// operator *
// Number 4
// operator -
// BinOp
// Number 5
// operator /
// Number 1
// `)
// })
// })
test('mixed operations with precedence', () => {
expect('2 + 3 * 4 - 5 / 1').toMatchTree(`
Expression
BinOp
BinOp
Number 2
operator +
BinOp
Number 3
operator *
Number 4
operator -
BinOp
Number 5
operator /
Number 1
`)
})
})
// describe('Fn', () => {
// test('parses function with single parameter', () => {
@ -291,41 +271,3 @@
// Identifier b`)
// })
// })
// describe('Parentheses', () => {
// test('parses expressions with parentheses correctly', () => {
// expect('(2 + 3) * 4').toMatchTree(`
// BinOp
// paren (
// BinOp
// Number 2
// operator +
// Number 3
// paren )
// operator *
// Number 4`)
// })
// test('parses nested parentheses correctly', () => {
// expect('((1 + 2) * (3 - 4)) / 5').toMatchTree(`
// BinOp
// paren (
// BinOp
// paren (
// BinOp
// Number 1
// operator +
// Number 2
// paren )
// operator *
// paren (
// BinOp
// Number 3
// operator -
// Number 4
// paren )
// paren )
// operator /
// Number 5`)
// })
// })

View File

@ -1,19 +1,19 @@
// This file was generated by lezer-generator. You probably shouldn't edit it.
import {LRParser} from "@lezer/lr"
import {tokenizer, argTokenizer, insertSemicolon} from "./tokenizers"
import {tokenizer} from "./tokenizers"
import {highlighting} from "./highlight.js"
export const parser = LRParser.deserialize({
version: 14,
states: "%jQVQTOOOqQaO'#DRO!]QTO'#ClO!eQaO'#DPOOQ`'#DS'#DSO!yQTO'#ChOOQl'#DR'#DRO#vQnO'#CbO!qQaO'#DPOOQS'#Cx'#CxQVQTOOO!yQTO,59UOOQS'#Cz'#CzO$QQTO'#CnO$YQPO,59WO!yQTO,59[O!yQTO,59[OOQS'#DT'#DTOOQS,59k,59kO$_QPO,59SOOQl'#DQ'#DQO%UQnO'#CvOOQl'#Cw'#CwOOQl'#Cy'#CyO%cQnO,58|OOQS-E6v-E6vO%mQaO1G.pOOQS-E6x-E6xO!yQTO1G.rOOQ`1G.v1G.vO&UQaO1G.vOOQl1G.n1G.nOOQl,58},58}OOQl-E6w-E6wO&mQaO7+$^",
stateData: "'X~OrOS~OPPOQVORVOXUOYUOZUO]TOaQO~O_ZOeuXfuXguXhuXpuXxuXiuX~OP[OcbP~Oe_Of_Og`Oh`OpaOxaO~OPPOXUOYUOZUO]TOaQO~OPUOSdOWeOXUOYUOZUO]TO~OpUXxUX~P#_OP[OcbX~OclO~Oe_Of_Og`Oh`OioO~OPUOSdOXUOYUOZUO]TO~OWjXpjXxjX~P$pOpUaxUa~P#_Oe_Of_Og`Oh`Op^ix^ii^i~Oe_Of_Ogdihdipdixdiidi~Oe_Of_Og`Oh`Op`qx`qi`q~OXh~",
goto: "$PxPPPPPPy}PPPP!RP!_P!_P!hP!_PPPPP}}!k!q!wPPPP!}#R#Y#h#{TWOYTgVheUOTVYZ_`ehl_SOTYZ_`lR^QQYORiYQhVRqhQ]QRk]TXOYSfVhRpe^SOTYZ_`lVdVehSROYQcTQjZQm_Qn`RrlTbRW",
nodeNames: "⚠ Identifier Command CommandPartial UnquotedArg Program CommandCall NamedArg NamedArgPrefix Number String Boolean ParenExpr paren Assignment operator Function keyword Params colon BinOp operator operator operator operator paren PartialNamedArg Arg",
maxTerm: 40,
states: "$nQQOTOOOQOTO'#CcOfOPO'#CtOqOPO'#CtOOOO'#Cx'#CxO!POPO'#CxO![OPOOOOOO'#C`'#C`O!aOPO'#CoQQOTOOO!fOPO,58}O!|OTO'#CpO#TOPO,58{O#`OQO,59UOOOS,59Z,59ZOOOS-E6m-E6mOOOO1G.i1G.iOOOO'#Ct'#CtO#nOPO'#CtOOOO'#Cb'#CbOOOO'#Cs'#CsOOOO,59[,59[OOOO-E6n-E6nO#|OPO1G.pO$ROTO,59SO$cOTO7+$[OOOO1G.m1G.mOOOO<<Gv<<Gv",
stateData: "$j~OPROQQOWQOXQOYQOiPO~OfhXmSXjSX~OfZOfhXm]Xj]X~OflXmSXjSX~Of]O~Om^O~Oj`O~OQaOWaOXaOYaOiPO~OPbO~P!kOfZOmTajTa~O_gO`gOagObgO~OkhOfhXmhXjhX~OfiO~OPaOf[am[aj[a~P!kOPaO~P!kO",
goto: "#UmPPPPnuz}PPPzzu!XPPPP!a!gPP!m!pPPP!|SWOXRYPVVOPXRdZUQOPXVaZhiQSiVTOPXQXOR_XQ[RRf[ReZWSOPXiQcZRjhUUOPXRki",
nodeNames: "⚠ Identifier Word Program Expression FunctionCall PositionalArg ParenExpr String Number Boolean NamedArg IncompleteNamedArg FunctionCallOrIdentifier BinOp operator operator operator operator",
maxTerm: 29,
propSources: [highlighting],
skippedNodes: [0],
repeatNodeCount: 3,
tokenData: "*W~RjX^!spq!swx#hxy$lyz$qz{$v{|${}!O%Q!P!Q%s!Q![%Y![!]%x!]!^%}!_!`&S#T#Y&X#Y#Z&m#Z#h&X#h#i)[#i#o&X#y#z!s$f$g!s#BY#BZ!s$IS$I_!s$I|$JO!s$JT$JU!s$KV$KW!s&FU&FV!s~!xYr~X^!spq!s#y#z!s$f$g!s#BY#BZ!s$IS$I_!s$I|$JO!s$JT$JU!s$KV$KW!s&FU&FV!s~#kUOr#hsw#hwx#}x;'S#h;'S;=`$f<%lO#h~$SUY~Or#hsw#hwx#}x;'S#h;'S;=`$f<%lO#h~$iP;=`<%l#h~$qO]~~$vOi~~${Oe~~%QOg~~%VPh~!Q![%Y~%_QX~!O!P%e!Q![%Y~%hP!Q![%k~%pPX~!Q![%k~%xOf~~%}Oc~~&SOx~~&XO_~Q&[S}!O&X!Q![&X!_!`&h#T#o&XQ&mOWQ~&pV}!O&X!Q![&X!_!`&h#T#U'V#U#b&X#b#c(y#c#o&X~'YU}!O&X!Q![&X!_!`&h#T#`&X#`#a'l#a#o&X~'oU}!O&X!Q![&X!_!`&h#T#g&X#g#h(R#h#o&X~(UU}!O&X!Q![&X!_!`&h#T#X&X#X#Y(h#Y#o&X~(mSZ~}!O&X!Q![&X!_!`&h#T#o&XR)OSaP}!O&X!Q![&X!_!`&h#T#o&X~)_U}!O&X!Q![&X!_!`&h#T#f&X#f#g)q#g#o&X~)tU}!O&X!Q![&X!_!`&h#T#i&X#i#j(R#j#o&X",
tokenizers: [0, 1, tokenizer, argTokenizer, insertSemicolon],
topRules: {"Program":[0,5]},
tokenPrec: 282
repeatNodeCount: 2,
tokenData: "%i~R^YZ}pq!Swx!Xxy#]yz#bz{#g{|#l}!O#q!P!Q$d!Q![#y!_!`$i#Y#Z$n#h#i%]~~}~!SOm~~!XOf~~![UOr!Xsw!Xwx!nx;'S!X;'S;=`#V<%lO!X~!sUW~Or!Xsw!Xwx!nx;'S!X;'S;=`#V<%lO!X~#YP;=`<%l!X~#bOi~~#gOj~~#lO_~~#qOa~R#vPbQ!Q![#yP$OQXP!O!P$U!Q![#yP$XP!Q![$[P$aPXP!Q![$[~$iO`~~$nOk~~$qP#T#U$t~$wP#`#a$z~$}P#g#h%Q~%TP#X#Y%W~%]OY~~%`P#f#g%c~%fP#i#j%Q",
tokenizers: [0, 1, tokenizer],
topRules: {"Program":[0,3]},
tokenPrec: 0
})

View File

@ -1,92 +1,37 @@
import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
import { CommandPartial, Command, Identifier, UnquotedArg, insertedSemi } from './shrimp.terms'
import { matchingCommands } from '#editor/commands'
import { Identifier, Word, NamedArg } from './shrimp.terms'
export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => {
let ch = getFullCodePoint(input, 0)
if (!isLowercaseLetter(ch) && !isEmoji(ch)) return
if (isWhitespace(ch) || ch === -1) return
let pos = getCharSize(ch)
let text = String.fromCodePoint(ch)
let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch)
// Continue consuming identifier characters
while (true) {
ch = getFullCodePoint(input, pos)
if (isWhitespace(ch) || ch === -1) break
// Only stop at = if we could parse a NamedArg here
if (ch === 61 /* = */ && isValidIdentifier) {
break // Stop, let grammar handle identifier = value
}
// Track identifier validity
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 && !isEmoji(ch)) {
isValidIdentifier = false
}
if (isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || isEmoji(ch)) {
text += String.fromCodePoint(ch)
pos += getCharSize(ch)
} else {
break
}
}
input.advance(pos)
if (!stack.canShift(Command) && !stack.canShift(CommandPartial)) {
input.acceptToken(Identifier)
return
}
const { match, partialMatches } = matchingCommands(text)
if (match) {
input.acceptToken(Command)
} else if (partialMatches.length > 0) {
input.acceptToken(CommandPartial)
} else {
input.acceptToken(Identifier)
}
input.acceptToken(isValidIdentifier ? Identifier : Word)
})
export const argTokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => {
// Only match if we're in a command argument position
if (!stack.canShift(UnquotedArg)) return
const firstCh = input.peek(0)
// Don't match if it starts with tokens we handle elsewhere
if (
firstCh === 39 /* ' */ ||
firstCh === 40 /* ( */ ||
firstCh === 45 /* - (for negative numbers) */ ||
(firstCh >= 48 && firstCh <= 57) /* 0-9 (numbers) */
)
return
// Read everything that's not a space, newline, or paren
let pos = 0
while (true) {
const ch = input.peek(pos)
if (
ch === -1 ||
ch === 32 /* space */ ||
ch === 10 /* \n */ ||
ch === 40 /* ( */ ||
ch === 41 /* ) */ ||
ch === 61 /* = */
)
break
pos++
}
if (pos > 0) {
input.advance(pos)
input.acceptToken(UnquotedArg)
}
})
export const insertSemicolon = new ExternalTokenizer((input: InputStream, stack: Stack) => {
const next = input.peek(0)
// We're at a newline or end of file
if (next === 10 /* \n */ || next === -1 /* EOF */) {
// Check if insertedSemi would be valid here
if (stack.canShift(insertedSemi)) {
// Don't advance! Virtual token has zero width
input.acceptToken(insertedSemi, 0)
}
}
})
const isWhitespace = (ch: number): boolean => {
return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */
}
const isLowercaseLetter = (ch: number): boolean => {
return ch >= 97 && ch <= 122 // a-z

View File

@ -30,6 +30,7 @@ await regenerateParser()
declare module 'bun:test' {
interface Matchers<T> {
toMatchTree(expected: string): T
toMatchExpression(expected: string): T
toFailParse(): T
toEvaluateTo(expected: unknown): T
}
@ -153,15 +154,9 @@ const treeToString = (tree: Tree, input: string): string => {
cursor.parent()
} else {
const cleanText = nodeName === 'String' ? text.slice(1, -1) : text
// Node names that should be displayed as single tokens (operators, keywords)
const singleTokens = ['+', '-', '*', '/', '->', 'fn', '=', 'equals']
if (singleTokens.includes(nodeName)) {
lines.push(`${indent}${nodeName}`)
} else {
lines.push(`${indent}${nodeName} ${cleanText}`)
}
}
}
const cursor = tree.cursor()
if (cursor.firstChild()) {