shrimp/src/parser/shrimp.grammar

253 lines
5.7 KiB
Plaintext

@external propSource highlighting from "./highlight"
@context trackScope from "./parserScopeContext"
@skip { space | Comment }
@top Program { item* }
@external tokens operatorTokenizer from "./operatorTokenizer" { Star, Slash, Plus, Minus, And, Or, Eq, EqEq, Neq, Lt, Lte, Gt, Gte, Modulo, PlusEq, MinusEq, StarEq, SlashEq, ModuloEq }
@tokens {
@precedence { Number Regex }
StringFragment { !['\\$]+ }
NamedArgPrefix { $[a-z-]+ "=" }
Number {
("-" | "+")? "0x" $[0-9a-fA-F]+ |
("-" | "+")? "0b" $[01]+ |
("-" | "+")? $[0-9]+ ('.' $[0-9]+)?
}
Boolean { "true" | "false" }
newlineOrSemicolon { "\n" | ";" }
eof { @eof }
space { " " | "\t" }
Comment { "#" ![\n]* }
leftParen { "(" }
rightParen { ")" }
colon[closedBy="end", @name="colon"] { ":" }
Underscore { "_" }
Regex { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar
"|"[@name=operator]
}
end { @specialize[@name=keyword]<Identifier, "end"> }
while { @specialize[@name=keyword]<Identifier, "while"> }
if { @specialize[@name=keyword]<Identifier, "if"> }
else { @specialize[@name=keyword]<Identifier, "else"> }
try { @specialize[@name=keyword]<Identifier, "try"> }
catch { @specialize[@name=keyword]<Identifier, "catch"> }
finally { @specialize[@name=keyword]<Identifier, "finally"> }
throw { @specialize[@name=keyword]<Identifier, "throw"> }
null { @specialize[@name=Null]<Identifier, "null"> }
@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot }
@external specialize {Identifier} specializeKeyword from "./tokenizer" { Do }
@precedence {
pipe @left,
or @left,
and @left,
comparison @left,
multiplicative @left,
additive @left,
call
}
item {
consumeToTerminator newlineOrSemicolon |
consumeToTerminator eof |
newlineOrSemicolon // allow blank lines
}
consumeToTerminator {
PipeExpr |
WhileExpr |
FunctionCallWithBlock |
ambiguousFunctionCall |
TryExpr |
Throw |
IfExpr |
FunctionDef |
CompoundAssign |
Assign |
BinOp |
ConditionalOp |
expressionWithoutIdentifier
}
PipeExpr {
pipeOperand (!pipe "|" pipeOperand)+
}
pipeOperand {
consumeToTerminator
}
WhileExpr {
while (ConditionalOp | expression) colon Block end
}
Block {
consumeToTerminator | newlineOrSemicolon block
}
FunctionCallWithBlock {
ambiguousFunctionCall colon Block CatchExpr? FinallyExpr? end
}
FunctionCallOrIdentifier {
DotGet | Identifier
}
ambiguousFunctionCall {
FunctionCall | FunctionCallOrIdentifier
}
FunctionCall {
(DotGet | Identifier | ParenExpr) arg+
}
arg {
PositionalArg | NamedArg
}
PositionalArg {
expression | FunctionDef | Underscore
}
NamedArg {
NamedArgPrefix (expression | FunctionDef | Underscore)
}
FunctionDef {
Do Params colon (consumeToTerminator | newlineOrSemicolon block) CatchExpr? FinallyExpr? end
}
ifTest {
ConditionalOp | expression | FunctionCall
}
IfExpr {
if ifTest colon Block ElseIfExpr* ElseExpr? end
}
ElseIfExpr {
else if ifTest colon Block
}
ElseExpr {
else colon Block
}
TryExpr {
try colon Block CatchExpr? FinallyExpr? end
}
CatchExpr {
catch Identifier colon Block
}
FinallyExpr {
finally colon Block
}
Throw {
throw (BinOp | ConditionalOp | expression)
}
ConditionalOp {
expression !comparison EqEq expression |
expression !comparison Neq expression |
expression !comparison Lt expression |
expression !comparison Lte expression |
expression !comparison Gt expression |
expression !comparison Gte expression |
(expression | ConditionalOp) !and And (expression | ConditionalOp) |
(expression | ConditionalOp) !or Or (expression | ConditionalOp)
}
Params {
Identifier* NamedParam*
}
NamedParam {
NamedArgPrefix (String | Number | Boolean | null)
}
Assign {
(AssignableIdentifier | Array) Eq consumeToTerminator
}
CompoundAssign {
AssignableIdentifier (PlusEq | MinusEq | StarEq | SlashEq | ModuloEq) consumeToTerminator
}
BinOp {
expression !multiplicative Modulo expression |
(expression | BinOp) !multiplicative Star (expression | BinOp) |
(expression | BinOp) !multiplicative Slash (expression | BinOp) |
(expression | BinOp) !additive Plus (expression | BinOp) |
(expression | BinOp) !additive Minus (expression | BinOp)
}
ParenExpr {
leftParen (IfExpr | ambiguousFunctionCall | BinOp | expressionWithoutIdentifier | ConditionalOp | PipeExpr | FunctionDef) rightParen
}
expression {
expressionWithoutIdentifier | DotGet | Identifier
}
@local tokens {
dot { "." }
}
@skip {} {
DotGet {
IdentifierBeforeDot dot (Number | Identifier | ParenExpr)
}
String { "'" stringContent* "'" }
}
stringContent {
StringFragment |
Interpolation |
EscapeSeq
}
Interpolation {
"$" Identifier |
"$" ParenExpr
}
EscapeSeq {
"\\" ("$" | "n" | "t" | "r" | "\\" | "'")
}
Dict {
"[=]" |
"[" newlineOrSemicolon* NamedArg (newlineOrSemicolon | NamedArg)* "]"
}
Array {
"[" newlineOrSemicolon* (expression (newlineOrSemicolon | expression)*)? "]"
}
// We need expressionWithoutIdentifier to avoid conflicts in consumeToTerminator.
// Without this, when parsing "my-var" at statement level, the parser can't decide:
// - ambiguousFunctionCall → FunctionCallOrIdentifier → Identifier
// - expression → Identifier
// Both want the same Identifier token! So we use expressionWithoutIdentifier
// to remove Identifier from the second path, forcing standalone identifiers
// to go through ambiguousFunctionCall (which is what we want semantically).
// Yes, it is annoying and I gave up trying to use GLR to fix it.
expressionWithoutIdentifier {
ParenExpr | Word | String | Number | Boolean | Regex | Dict | Array | null
}
block {
(consumeToTerminator? newlineOrSemicolon)*
}