shrimp/src/parser/tests/basics.test.ts
2025-12-02 17:11:39 -08:00

947 lines
21 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { expect, describe, test } from 'bun:test'
import '../shrimp.grammar' // Importing this so changes cause it to retest!
describe('null', () => {
test('parses null', () => {
expect('null').toMatchTree(`Null null`)
})
test('parses null in assignments', () => {
expect('a = null').toMatchTree(`
Assign
AssignableIdentifier a
Eq =
Null null`)
})
test('does not parse null in identifier', () => {
expect('null-jk = 5').toMatchTree(`
Assign
AssignableIdentifier null-jk
Eq =
Number 5`)
})
})
describe('Identifier', () => {
test('parses identifiers with emojis and dashes', () => {
expect('moo-😊-34').toMatchTree(`
FunctionCallOrIdentifier
Identifier moo-😊-34`)
})
test('parses mathematical unicode symbols like 𝜋 as identifiers', () => {
expect('𝜋').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝜋`)
})
test('parses identifiers with queries', () => {
expect('even? 20').toMatchTree(`
FunctionCall
Identifier even?
PositionalArg
Number 20`)
expect('even?').toMatchTree(`
FunctionCallOrIdentifier
Identifier even?`)
})
})
describe('Unicode Symbol Support', () => {
describe('Emoji (currently supported)', () => {
test('Basic Emoticons (U+1F600-U+1F64F)', () => {
expect('😀').toMatchTree(`
FunctionCallOrIdentifier
Identifier 😀`)
expect('😊-counter').toMatchTree(`
FunctionCallOrIdentifier
Identifier 😊-counter`)
})
test('Miscellaneous Symbols and Pictographs (U+1F300-U+1F5FF)', () => {
expect('🌍').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🌍`)
expect('🔥-handler').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🔥-handler`)
})
test('Transport and Map Symbols (U+1F680-U+1F6FF)', () => {
expect('🚀').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🚀`)
expect('🚀-launch').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🚀-launch`)
})
test('Regional Indicator Symbols / Flags (U+1F1E6-U+1F1FF)', () => {
// Note: Flags are typically two regional indicators combined
expect('🇺').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🇺`)
})
test('Supplemental Symbols and Pictographs (U+1F900-U+1F9FF)', () => {
expect('🤖').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🤖`)
expect('🦀-lang').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🦀-lang`)
})
test('Dingbats (U+2700-U+27BF)', () => {
expect('✂').toMatchTree(`
FunctionCallOrIdentifier
Identifier ✂`)
expect('✨-magic').toMatchTree(`
FunctionCallOrIdentifier
Identifier ✨-magic`)
})
test('Miscellaneous Symbols (U+2600-U+26FF)', () => {
expect('⚡').toMatchTree(`
FunctionCallOrIdentifier
Identifier ⚡`)
expect('☀-bright').toMatchTree(`
FunctionCallOrIdentifier
Identifier ☀-bright`)
})
})
describe('Greek Letters (not currently supported)', () => {
test('Greek lowercase alpha α (U+03B1)', () => {
expect('α').toMatchTree(`
FunctionCallOrIdentifier
Identifier α`)
})
test('Greek lowercase beta β (U+03B2)', () => {
expect('β').toMatchTree(`
FunctionCallOrIdentifier
Identifier β`)
})
test('Greek lowercase lambda λ (U+03BB)', () => {
expect('λ').toMatchTree(`
FunctionCallOrIdentifier
Identifier λ`)
})
test('Greek lowercase pi π (U+03C0)', () => {
// Note: This is different from mathematical pi 𝜋
expect('π').toMatchTree(`
FunctionCallOrIdentifier
Identifier π`)
})
})
describe('Mathematical Alphanumeric Symbols (not currently supported)', () => {
test('Mathematical italic small pi 𝜋 (U+1D70B)', () => {
expect('𝜋').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝜋`)
})
test('Mathematical bold small x 𝐱 (U+1D431)', () => {
expect('𝐱').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝐱`)
})
test('Mathematical script capital F 𝓕 (U+1D4D5)', () => {
expect('𝓕').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝓕`)
})
})
describe('Mathematical Operators (not currently supported)', () => {
test('Infinity symbol ∞ (U+221E)', () => {
expect('∞').toMatchTree(`
FunctionCallOrIdentifier
Identifier ∞`)
})
test('Sum symbol ∑ (U+2211)', () => {
expect('∑').toMatchTree(`
FunctionCallOrIdentifier
Identifier ∑`)
})
test('Integral symbol ∫ (U+222B)', () => {
expect('∫').toMatchTree(`
FunctionCallOrIdentifier
Identifier ∫`)
})
})
describe('Superscripts and Subscripts (not currently supported)', () => {
test('Superscript two ² (U+00B2)', () => {
expect('x²').toMatchTree(`
FunctionCallOrIdentifier
Identifier x²`)
})
test('Subscript two ₂ (U+2082)', () => {
expect('h₂o').toMatchTree(`
FunctionCallOrIdentifier
Identifier h₂o`)
})
})
describe('Arrows (not currently supported)', () => {
test('Rightward arrow → (U+2192)', () => {
expect('→').toMatchTree(`
FunctionCallOrIdentifier
Identifier →`)
})
test('Leftward arrow ← (U+2190)', () => {
expect('←').toMatchTree(`
FunctionCallOrIdentifier
Identifier ←`)
})
test('Double rightward arrow ⇒ (U+21D2)', () => {
expect('⇒').toMatchTree(`
FunctionCallOrIdentifier
Identifier ⇒`)
})
})
describe('CJK Symbols (not currently supported)', () => {
test('Hiragana あ (U+3042)', () => {
expect('あ').toMatchTree(`
FunctionCallOrIdentifier
Identifier あ`)
})
test('Katakana カ (U+30AB)', () => {
expect('カ').toMatchTree(`
FunctionCallOrIdentifier
Identifier カ`)
})
test('CJK Unified Ideograph 中 (U+4E2D)', () => {
expect('中').toMatchTree(`
FunctionCallOrIdentifier
Identifier 中`)
})
})
})
describe('Parentheses', () => {
test('allows binOps with parentheses correctly', () => {
expect('(2 + 3)').toMatchTree(`
ParenExpr
BinOp
Number 2
Plus +
Number 3`)
})
test('allows numbers, strings, and booleans with parentheses correctly', () => {
expect('(42)').toMatchTree(`
ParenExpr
Number 42`)
expect("('hello')").toMatchTree(`
ParenExpr
String
StringFragment hello`)
expect('(true)').toMatchTree(`
ParenExpr
Boolean true`)
expect('(false)').toMatchTree(`
ParenExpr
Boolean false`)
})
test('allows function calls in parens', () => {
expect('(echo 3)').toMatchTree(`
ParenExpr
FunctionCall
Identifier echo
PositionalArg
Number 3`)
expect('(echo)').toMatchTree(`
ParenExpr
FunctionCallOrIdentifier
Identifier echo`)
})
test('allows conditionals in parens', () => {
expect('(a > b)').toMatchTree(`
ParenExpr
ConditionalOp
Identifier a
Gt >
Identifier b`)
expect('(a and b)').toMatchTree(`
ParenExpr
ConditionalOp
Identifier a
And and
Identifier b`)
})
test('allows parens in function calls', () => {
expect('echo (3 + 3)').toMatchTree(`
FunctionCall
Identifier echo
PositionalArg
ParenExpr
BinOp
Number 3
Plus +
Number 3`)
})
test('a word can be contained in parens', () => {
expect('(basename ./cool)').toMatchTree(`
ParenExpr
FunctionCall
Identifier basename
PositionalArg
Word ./cool
`)
})
test('a word start with an operator', () => {
const operators = ['*', '/', '+', '-', 'and', 'or', '=', '!=', '>=', '<=', '>', '<']
for (const operator of operators) {
expect(`find ${operator}cool*`).toMatchTree(`
FunctionCall
Identifier find
PositionalArg
Word ${operator}cool*
`)
}
})
test('a word can look like a binop', () => {
expect('find cool*wow').toMatchTree(`
FunctionCall
Identifier find
PositionalArg
Word cool*wow
`)
})
test('nested parentheses', () => {
expect('(2 + (1 * 4))').toMatchTree(`
ParenExpr
BinOp
Number 2
Plus +
ParenExpr
BinOp
Number 1
Star *
Number 4`)
})
test('Function in parentheses', () => {
expect('4 + (echo 3)').toMatchTree(`
BinOp
Number 4
Plus +
ParenExpr
FunctionCall
Identifier echo
PositionalArg
Number 3`)
})
test('function call with named args on multiple lines in parens', () => {
expect(`(tail
arg1=true
arg2=30
)`).toMatchTree(`
ParenExpr
FunctionCall
Identifier tail
NamedArg
NamedArgPrefix arg1=
Boolean true
NamedArg
NamedArgPrefix arg2=
Number 30
`)
expect(`(
tail
arg1=true
arg2=30
)`).toMatchTree(`
ParenExpr
FunctionCall
Identifier tail
NamedArg
NamedArgPrefix arg1=
Boolean true
NamedArg
NamedArgPrefix arg2=
Number 30
`)
})
test('binop with newlines in parens', () => {
expect(`(
1 + 2
)`).toMatchTree(`
ParenExpr
BinOp
Number 1
Plus +
Number 2`)
})
test('comparison with newlines in parens', () => {
expect(`(
1 < 2
)`).toMatchTree(`
ParenExpr
ConditionalOp
Number 1
Lt <
Number 2`)
})
test('function call with multiple identifiers on separate lines in parens', () => {
expect(`(echo
arg1
arg2
arg3
)`).toMatchTree(`
ParenExpr
FunctionCall
Identifier echo
PositionalArg
Identifier arg1
PositionalArg
Identifier arg2
PositionalArg
Identifier arg3`)
})
test('function call with mulitline identifiers starting separate lines in parens', () => {
expect(`(
echo
arg1
arg2
arg3
)`).toMatchTree(`
ParenExpr
FunctionCall
Identifier echo
PositionalArg
Identifier arg1
PositionalArg
Identifier arg2
PositionalArg
Identifier arg3`)
})
})
describe('Number literals', () => {
test('allows underscores in integer literals', () => {
expect('10_000').toMatchTree(`Number 10_000`)
expect('1_000_000').toMatchTree(`Number 1_000_000`)
expect('100_000').toMatchTree(`Number 100_000`)
})
test('allows underscores in decimal literals', () => {
expect('3.14_159').toMatchTree(`Number 3.14_159`)
expect('1_000.50').toMatchTree(`Number 1_000.50`)
expect('0.000_001').toMatchTree(`Number 0.000_001`)
})
test('allows underscores in negative numbers', () => {
expect('-10_000').toMatchTree(`Number -10_000`)
expect('-3.14_159').toMatchTree(`Number -3.14_159`)
})
test('allows underscores in positive numbers with explicit sign', () => {
expect('+10_000').toMatchTree(`Number +10_000`)
expect('+3.14_159').toMatchTree(`Number +3.14_159`)
})
test('works in expressions', () => {
expect('1_000 + 2_000').toMatchTree(`
BinOp
Number 1_000
Plus +
Number 2_000`)
})
test('works in function calls', () => {
expect('echo 10_000').toMatchTree(`
FunctionCall
Identifier echo
PositionalArg
Number 10_000`)
})
})
describe('BinOp', () => {
test('addition tests', () => {
expect('2 + 3').toMatchTree(`
BinOp
Number 2
Plus +
Number 3
`)
})
test('subtraction tests', () => {
expect('5 - 2').toMatchTree(`
BinOp
Number 5
Minus -
Number 2
`)
})
test('multiplication tests', () => {
expect('4 * 3').toMatchTree(`
BinOp
Number 4
Star *
Number 3
`)
})
test('division tests', () => {
expect('8 / 2').toMatchTree(`
BinOp
Number 8
Slash /
Number 2
`)
})
test('modulo tests', () => {
expect('4 % 3').toMatchTree(`
BinOp
Number 4
Modulo %
Number 3
`)
})
test('mixed operations with precedence', () => {
expect('2 + 3 * 4 - 5 / 1').toMatchTree(`
BinOp
BinOp
Number 2
Plus +
BinOp
Number 3
Star *
Number 4
Minus -
BinOp
Number 5
Slash /
Number 1
`)
})
})
describe('ambiguity', () => {
test('parses ambiguous expressions correctly', () => {
expect('a + -3').toMatchTree(`
BinOp
Identifier a
Plus +
Number -3
`)
})
test('parses ambiguous expressions correctly', () => {
expect('a-var + a-thing').toMatchTree(`
BinOp
Identifier a-var
Plus +
Identifier a-thing
`)
})
})
describe('newlines', () => {
test('parses multiple statements separated by newlines', () => {
expect(`x = 5
y = 2`).toMatchTree(`
Assign
AssignableIdentifier x
Eq =
Number 5
Assign
AssignableIdentifier y
Eq =
Number 2`)
})
test('parses statements separated by semicolons', () => {
expect(`x = 5; y = 2`).toMatchTree(`
Assign
AssignableIdentifier x
Eq =
Number 5
Assign
AssignableIdentifier y
Eq =
Number 2`)
})
test('parses statement with word and a semicolon', () => {
expect(`a = hello; 2`).toMatchTree(`
Assign
AssignableIdentifier a
Eq =
FunctionCallOrIdentifier
Identifier hello
Number 2`)
})
})
describe('Assign', () => {
test('parses simple assignment', () => {
expect('x = 5').toMatchTree(`
Assign
AssignableIdentifier x
Eq =
Number 5`)
})
test('parses assignment with addition', () => {
expect('x = 5 + 3').toMatchTree(`
Assign
AssignableIdentifier x
Eq =
BinOp
Number 5
Plus +
Number 3`)
})
test('parses assignment with functions', () => {
expect('add = do a b: a + b end').toMatchTree(`
Assign
AssignableIdentifier add
Eq =
FunctionDef
Do do
Params
Identifier a
Identifier b
colon :
BinOp
Identifier a
Plus +
Identifier b
keyword end`)
})
})
describe('CompoundAssign', () => {
test('parses += operator', () => {
expect('x += 5').toMatchTree(`
CompoundAssign
AssignableIdentifier x
PlusEq +=
Number 5`)
})
test('parses -= operator', () => {
expect('count -= 1').toMatchTree(`
CompoundAssign
AssignableIdentifier count
MinusEq -=
Number 1`)
})
test('parses *= operator', () => {
expect('total *= 2').toMatchTree(`
CompoundAssign
AssignableIdentifier total
StarEq *=
Number 2`)
})
test('parses /= operator', () => {
expect('value /= 10').toMatchTree(`
CompoundAssign
AssignableIdentifier value
SlashEq /=
Number 10`)
})
test('parses %= operator', () => {
expect('remainder %= 3').toMatchTree(`
CompoundAssign
AssignableIdentifier remainder
ModuloEq %=
Number 3`)
})
test('parses compound assignment with expression', () => {
expect('x += 1 + 2').toMatchTree(`
CompoundAssign
AssignableIdentifier x
PlusEq +=
BinOp
Number 1
Plus +
Number 2`)
})
test('parses compound assignment with function call', () => {
expect('total += add 5 3').toMatchTree(`
CompoundAssign
AssignableIdentifier total
PlusEq +=
FunctionCall
Identifier add
PositionalArg
Number 5
PositionalArg
Number 3`)
})
test('parses ??= operator', () => {
expect('x ??= 5').toMatchTree(`
CompoundAssign
AssignableIdentifier x
NullishEq ??=
Number 5`)
})
test('parses ??= with expression', () => {
expect('config ??= get-default').toMatchTree(`
CompoundAssign
AssignableIdentifier config
NullishEq ??=
FunctionCallOrIdentifier
Identifier get-default`)
})
})
describe('Nullish coalescing operator', () => {
test('? can still end an identifier', () => {
expect('what?').toMatchTree(`
FunctionCallOrIdentifier
Identifier what?`)
})
test('?? can still end an identifier', () => {
expect('what??').toMatchTree(`
FunctionCallOrIdentifier
Identifier what??`)
})
test('?? can still be in a word', () => {
expect('what??the').toMatchTree(`
FunctionCallOrIdentifier
Identifier what??the`)
})
test('?? can still start a word', () => {
expect('??what??the').toMatchTree(`
Word ??what??the`)
})
test('parses ?? operator', () => {
expect('x ?? 5').toMatchTree(`
ConditionalOp
Identifier x
NullishCoalesce ??
Number 5`)
})
test('parses chained ?? operators', () => {
expect('a ?? b ?? c').toMatchTree(`
ConditionalOp
ConditionalOp
Identifier a
NullishCoalesce ??
Identifier b
NullishCoalesce ??
Identifier c`)
})
test('parses ?? with expressions', () => {
expect('get-value ?? default-value').toMatchTree(`
ConditionalOp
Identifier get-value
NullishCoalesce ??
Identifier default-value`)
})
test('parses ?? with parenthesized function call', () => {
expect('get-value ?? (default 10)').toMatchTree(`
ConditionalOp
Identifier get-value
NullishCoalesce ??
ParenExpr
FunctionCall
Identifier default
PositionalArg
Number 10`)
})
})
describe('Comments', () => {
test('are greedy', () => {
expect(`
x = 5 # one banana
y = 2 #two bananas`).toMatchTree(`
Assign
AssignableIdentifier x
Eq =
Number 5
Comment # one banana
Assign
AssignableIdentifier y
Eq =
Number 2
Comment #two bananas`)
expect(`
# some comment
basename = 5 # very astute
basename / prop
# good info`).toMatchTree(`
Comment # some comment
Assign
AssignableIdentifier basename
Eq =
Number 5
Comment # very astute
BinOp
Identifier basename
Slash /
Identifier prop
Comment # good info`)
})
test('words with # are not considered comments', () => {
expect('find my#hashtag-file.txt').toMatchTree(`
FunctionCall
Identifier find
PositionalArg
Word my#hashtag-file.txt`)
})
test('hastags in strings are not comments', () => {
expect("'this is not a #comment'").toMatchTree(`
String
StringFragment this is not a #comment`)
})
})
describe('Conditional ops', () => {
test('or can be chained', () => {
expect(`
is-positive = do x:
if x == 3 or x == 4 or x == 5:
true
end
end
`).toMatchTree(`
Assign
AssignableIdentifier is-positive
Eq =
FunctionDef
Do do
Params
Identifier x
colon :
IfExpr
keyword if
ConditionalOp
ConditionalOp
ConditionalOp
Identifier x
EqEq ==
Number 3
Or or
ConditionalOp
Identifier x
EqEq ==
Number 4
Or or
ConditionalOp
Identifier x
EqEq ==
Number 5
colon :
Block
Boolean true
keyword end
keyword end
`)
})
test('and can be chained', () => {
expect(`
is-positive = do x:
if x == 3 and x == 4 and x == 5:
true
end
end
`).toMatchTree(`
Assign
AssignableIdentifier is-positive
Eq =
FunctionDef
Do do
Params
Identifier x
colon :
IfExpr
keyword if
ConditionalOp
ConditionalOp
ConditionalOp
Identifier x
EqEq ==
Number 3
And and
ConditionalOp
Identifier x
EqEq ==
Number 4
And and
ConditionalOp
Identifier x
EqEq ==
Number 5
colon :
Block
Boolean true
keyword end
keyword end
`)
})
})