shrimp/src/parser/shrimp.grammar

203 lines
4.5 KiB
Plaintext

@external propSource highlighting from "./highlight"
@context trackScope from "./scopeTracker"
@skip { space }
@top Program { item* }
@external tokens operatorTokenizer from "./operatorTokenizer" { Star, Slash, Plus, Minus, And, Or, Eq, Neq, Lt, Lte, Gt, Gte }
@tokens {
@precedence { Number Regex }
StringFragment { !['\\$]+ }
NamedArgPrefix { $[a-z]+ "=" }
Number { ("-" | "+")? $[0-9]+ ('.' $[0-9]+)? }
Boolean { "true" | "false" }
newlineOrSemicolon { "\n" | ";" }
eof { @eof }
space { " " | "\t" }
leftParen { "(" }
rightParen { ")" }
colon[closedBy="end", @name="colon"] { ":" }
Underscore { "_" }
Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar
"|"[@name=operator]
}
@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot }
@precedence {
pipe @left,
multiplicative @left,
additive @left,
call
}
item {
consumeToTerminator newlineOrSemicolon |
consumeToTerminator eof |
newlineOrSemicolon // allow blank lines
}
consumeToTerminator {
PipeExpr |
ambiguousFunctionCall |
DotGet |
IfExpr |
FunctionDef |
Assign |
BinOp |
expressionWithoutIdentifier
}
PipeExpr {
pipeOperand (!pipe "|" pipeOperand)+
}
pipeOperand {
FunctionCall | FunctionCallOrIdentifier
}
FunctionCallOrIdentifier {
Identifier
}
ambiguousFunctionCall {
FunctionCall | FunctionCallOrIdentifier
}
FunctionCall {
Identifier arg+
}
arg {
PositionalArg | NamedArg
}
PositionalArg {
expression | FunctionDef | Underscore
}
NamedArg {
NamedArgPrefix (expression | FunctionDef | Underscore)
}
FunctionDef {
singleLineFunctionDef | multilineFunctionDef
}
singleLineFunctionDef {
@specialize[@name=keyword]<Identifier, "do"> Params colon consumeToTerminator @specialize[@name=keyword]<Identifier, "end">
}
multilineFunctionDef {
@specialize[@name=keyword]<Identifier, "do"> Params colon newlineOrSemicolon block @specialize[@name=keyword]<Identifier, "end">
}
IfExpr {
singleLineIf | multilineIf
}
singleLineIf {
@specialize[@name=keyword]<Identifier, "if"> (ConditionalOp | expression) colon ThenBlock { consumeToTerminator }
}
multilineIf {
@specialize[@name=keyword]<Identifier, "if"> (ConditionalOp | expression) colon newlineOrSemicolon ThenBlock ElseIfExpr* ElseExpr? @specialize[@name=keyword]<Identifier, "end">
}
ElseIfExpr {
@specialize[@name=keyword]<Identifier, "elseif"> (ConditionalOp | expression) colon newlineOrSemicolon ThenBlock
}
ElseExpr {
@specialize[@name=keyword]<Identifier, "else"> colon newlineOrSemicolon ThenBlock
}
ThenBlock {
block
}
ConditionalOp {
expression Eq expression |
expression Neq expression |
expression Lt expression |
expression Lte expression |
expression Gt expression |
expression Gte expression |
expression And (expression | ConditionalOp) |
expression Or (expression | ConditionalOp)
}
Params {
AssignableIdentifier*
}
Assign {
AssignableIdentifier Eq consumeToTerminator
}
BinOp {
(expression | BinOp) !multiplicative Star (expression | BinOp) |
(expression | BinOp) !multiplicative Slash (expression | BinOp) |
(expression | BinOp) !additive Plus (expression | BinOp) |
(expression | BinOp) !additive Minus (expression | BinOp)
}
ParenExpr {
leftParen (ambiguousFunctionCall | BinOp | expressionWithoutIdentifier | ConditionalOp | PipeExpr | FunctionDef) rightParen
}
expression {
expressionWithoutIdentifier | DotGet | Identifier
}
@local tokens {
dot { "." }
}
@skip {} {
DotGet {
IdentifierBeforeDot dot Identifier
}
String { "'" stringContent* "'" }
}
stringContent {
StringFragment |
Interpolation |
EscapeSeq
}
Interpolation {
"$" Identifier |
"$" ParenExpr
}
EscapeSeq {
"\\" ("$" | "n" | "t" | "r" | "\\" | "'")
}
// We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator.
// Without this, when parsing "my-var" at statement level, the parser can't decide:
// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier
// - expression → Identifier
// Both want the same Identifier token! So we use expressionWithoutIdentifier
// to remove Identifier from the second path, forcing standalone identifiers
// to go through ambiguousFunctionCall (which is what we want semantically).
// Yes, it is annoying and I gave up trying to use GLR to fix it.
expressionWithoutIdentifier {
ParenExpr | Word | String | Number | Boolean | Regex | @specialize[@name=Null]<Identifier, "null">
}
block {
(consumeToTerminator newlineOrSemicolon)*
}