Merge remote-tracking branch 'origin/main' into dotget-function-calls

This commit is contained in:
Corey Johnson 2025-10-27 12:45:53 -07:00
commit ffdd666685
8 changed files with 305 additions and 16 deletions

View File

@ -303,7 +303,8 @@ export class Compiler {
return instructions return instructions
} }
case terms.ThenBlock: { case terms.ThenBlock:
case terms.SingleLineThenBlock: {
const instructions = getAllChildren(node) const instructions = getAllChildren(node)
.map((child) => this.#compileNode(child, input)) .map((child) => this.#compileNode(child, input))
.flat() .flat()
@ -468,7 +469,11 @@ export class Compiler {
} }
default: default:
throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to) throw new CompilerError(
`Compiler doesn't know how to handle a "${node.type.name}" node.`,
node.from,
node.to
)
} }
} }
} }

View File

@ -85,6 +85,21 @@ describe('compiler', () => {
expect(`bloop = do: 'bloop' end; bloop`).toEvaluateTo('bloop') expect(`bloop = do: 'bloop' end; bloop`).toEvaluateTo('bloop')
}) })
test('function call with if statement and multiple expressions', () => {
expect(`
abc = do:
if false:
echo nope
end
true
end
abc
`)
.toEvaluateTo(true)
})
test('simple conditionals', () => { test('simple conditionals', () => {
expect(`(3 < 6)`).toEvaluateTo(true) expect(`(3 < 6)`).toEvaluateTo(true)
expect(`(10 > 20)`).toEvaluateTo(false) expect(`(10 > 20)`).toEvaluateTo(false)
@ -139,6 +154,10 @@ describe('compiler', () => {
scattered scattered
end`).toEvaluateTo('dwarf') end`).toEvaluateTo('dwarf')
}) })
test('single line if', () => {
expect(`if 3 < 9: shire end`).toEvaluateTo('shire')
})
}) })
describe('errors', () => { describe('errors', () => {

View File

@ -2,7 +2,7 @@
@context trackScope from "./scopeTracker" @context trackScope from "./scopeTracker"
@skip { space } @skip { space | comment }
@top Program { item* } @top Program { item* }
@ -18,6 +18,7 @@
newlineOrSemicolon { "\n" | ";" } newlineOrSemicolon { "\n" | ";" }
eof { @eof } eof { @eof }
space { " " | "\t" } space { " " | "\t" }
comment { "#" ![\n]* }
leftParen { "(" } leftParen { "(" }
rightParen { ")" } rightParen { ")" }
colon[closedBy="end", @name="colon"] { ":" } colon[closedBy="end", @name="colon"] { ":" }
@ -104,7 +105,7 @@ IfExpr {
} }
singleLineIf { singleLineIf {
@specialize[@name=keyword]<Identifier, "if"> (ConditionalOp | expression) colon ThenBlock { consumeToTerminator } @specialize[@name=keyword]<Identifier, "if"> (ConditionalOp | expression) colon SingleLineThenBlock @specialize[@name=keyword]<Identifier, "end">
} }
multilineIf { multilineIf {
@ -123,6 +124,10 @@ ThenBlock {
block block
} }
SingleLineThenBlock {
consumeToTerminator
}
ConditionalOp { ConditionalOp {
expression Eq expression | expression Eq expression |
expression Neq expression | expression Neq expression |
@ -199,5 +204,5 @@ expressionWithoutIdentifier {
} }
block { block {
(consumeToTerminator newlineOrSemicolon)* (consumeToTerminator? newlineOrSemicolon)*
} }

View File

@ -42,6 +42,7 @@ export const
NamedArg = 40, NamedArg = 40,
NamedArgPrefix = 41, NamedArgPrefix = 41,
IfExpr = 43, IfExpr = 43,
SingleLineThenBlock = 45,
ThenBlock = 46, ThenBlock = 46,
ElseIfExpr = 47, ElseIfExpr = 47,
ElseExpr = 49, ElseExpr = 49,

View File

@ -30,6 +30,204 @@ describe('Identifier', () => {
FunctionCallOrIdentifier FunctionCallOrIdentifier
Identifier moo-😊-34`) Identifier moo-😊-34`)
}) })
test('parses mathematical unicode symbols like 𝜋 as identifiers', () => {
expect('𝜋').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝜋`)
})
})
describe('Unicode Symbol Support', () => {
describe('Emoji (currently supported)', () => {
test('Basic Emoticons (U+1F600-U+1F64F)', () => {
expect('😀').toMatchTree(`
FunctionCallOrIdentifier
Identifier 😀`)
expect('😊-counter').toMatchTree(`
FunctionCallOrIdentifier
Identifier 😊-counter`)
})
test('Miscellaneous Symbols and Pictographs (U+1F300-U+1F5FF)', () => {
expect('🌍').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🌍`)
expect('🔥-handler').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🔥-handler`)
})
test('Transport and Map Symbols (U+1F680-U+1F6FF)', () => {
expect('🚀').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🚀`)
expect('🚀-launch').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🚀-launch`)
})
test('Regional Indicator Symbols / Flags (U+1F1E6-U+1F1FF)', () => {
// Note: Flags are typically two regional indicators combined
expect('🇺').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🇺`)
})
test('Supplemental Symbols and Pictographs (U+1F900-U+1F9FF)', () => {
expect('🤖').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🤖`)
expect('🦀-lang').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🦀-lang`)
})
test('Dingbats (U+2700-U+27BF)', () => {
expect('✂').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
expect('✨-magic').toMatchTree(`
FunctionCallOrIdentifier
Identifier -magic`)
})
test('Miscellaneous Symbols (U+2600-U+26FF)', () => {
expect('⚡').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
expect('☀-bright').toMatchTree(`
FunctionCallOrIdentifier
Identifier -bright`)
})
})
describe('Greek Letters (not currently supported)', () => {
test('Greek lowercase alpha α (U+03B1)', () => {
expect('α').toMatchTree(`
FunctionCallOrIdentifier
Identifier α`)
})
test('Greek lowercase beta β (U+03B2)', () => {
expect('β').toMatchTree(`
FunctionCallOrIdentifier
Identifier β`)
})
test('Greek lowercase lambda λ (U+03BB)', () => {
expect('λ').toMatchTree(`
FunctionCallOrIdentifier
Identifier λ`)
})
test('Greek lowercase pi π (U+03C0)', () => {
// Note: This is different from mathematical pi 𝜋
expect('π').toMatchTree(`
FunctionCallOrIdentifier
Identifier π`)
})
})
describe('Mathematical Alphanumeric Symbols (not currently supported)', () => {
test('Mathematical italic small pi 𝜋 (U+1D70B)', () => {
expect('𝜋').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝜋`)
})
test('Mathematical bold small x 𝐱 (U+1D431)', () => {
expect('𝐱').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝐱`)
})
test('Mathematical script capital F 𝓕 (U+1D4D5)', () => {
expect('𝓕').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝓕`)
})
})
describe('Mathematical Operators (not currently supported)', () => {
test('Infinity symbol ∞ (U+221E)', () => {
expect('∞').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Sum symbol ∑ (U+2211)', () => {
expect('∑').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Integral symbol ∫ (U+222B)', () => {
expect('∫').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
})
describe('Superscripts and Subscripts (not currently supported)', () => {
test('Superscript two ² (U+00B2)', () => {
expect('x²').toMatchTree(`
FunctionCallOrIdentifier
Identifier x²`)
})
test('Subscript two ₂ (U+2082)', () => {
expect('h₂o').toMatchTree(`
FunctionCallOrIdentifier
Identifier ho`)
})
})
describe('Arrows (not currently supported)', () => {
test('Rightward arrow → (U+2192)', () => {
expect('→').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Leftward arrow ← (U+2190)', () => {
expect('←').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Double rightward arrow ⇒ (U+21D2)', () => {
expect('⇒').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
})
describe('CJK Symbols (not currently supported)', () => {
test('Hiragana あ (U+3042)', () => {
expect('あ').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Katakana カ (U+30AB)', () => {
expect('カ').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('CJK Unified Ideograph 中 (U+4E2D)', () => {
expect('中').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
})
}) })
describe('Parentheses', () => { describe('Parentheses', () => {
@ -349,3 +547,27 @@ describe('DotGet whitespace sensitivity', () => {
expect('readme.txt').toMatchTree(`Word readme.txt`) expect('readme.txt').toMatchTree(`Word readme.txt`)
}) })
}) })
describe('Comments', () => {
test('are barely there', () => {
expect(`x = 5 # one banana\ny = 2 # two bananas`).toMatchTree(`
Assign
AssignableIdentifier x
Eq =
Number 5
Assign
AssignableIdentifier y
Eq =
Number 2`)
expect('# some comment\nbasename = 5 # very astute\n basename / prop\n# good info').toMatchTree(`
Assign
AssignableIdentifier basename
Eq =
Number 5
BinOp
Identifier basename
Slash /
Identifier prop`)
})
})

View File

@ -4,7 +4,7 @@ import '../shrimp.grammar' // Importing this so changes cause it to retest!
describe('if/elseif/else', () => { describe('if/elseif/else', () => {
test('parses single line if', () => { test('parses single line if', () => {
expect(`if y = 1: 'cool'`).toMatchTree(` expect(`if y = 1: 'cool' end`).toMatchTree(`
IfExpr IfExpr
keyword if keyword if
ConditionalOp ConditionalOp
@ -12,12 +12,13 @@ describe('if/elseif/else', () => {
Eq = Eq =
Number 1 Number 1
colon : colon :
ThenBlock SingleLineThenBlock
String String
StringFragment cool StringFragment cool
keyword end
`) `)
expect('a = if x: 2').toMatchTree(` expect('a = if x: 2 end').toMatchTree(`
Assign Assign
AssignableIdentifier a AssignableIdentifier a
Eq = Eq =
@ -25,8 +26,9 @@ describe('if/elseif/else', () => {
keyword if keyword if
Identifier x Identifier x
colon : colon :
ThenBlock SingleLineThenBlock
Number 2 Number 2
keyword end
`) `)
}) })
@ -138,7 +140,7 @@ describe('if/elseif/else', () => {
}) })
test('does not parse identifiers that start with if', () => { test('does not parse identifiers that start with if', () => {
expect('iffy = if true: 2').toMatchTree(` expect('iffy = if true: 2 end').toMatchTree(`
Assign Assign
AssignableIdentifier iffy AssignableIdentifier iffy
Eq = Eq =
@ -146,8 +148,9 @@ describe('if/elseif/else', () => {
keyword if keyword if
Boolean true Boolean true
colon : colon :
ThenBlock SingleLineThenBlock
Number 2 Number 2
keyword end
`) `)
}) })
}) })

View File

@ -71,4 +71,20 @@ end
keyword end keyword end
`) `)
}) })
test('multiline with empty lines', () => {
expect(`
do:
2
end
`).toMatchTree(`
FunctionDef
keyword do
Params
colon :
Number 2
keyword end
`)
})
}) })

View File

@ -19,7 +19,7 @@ export const tokenizer = new ExternalTokenizer(
// Don't consume things that start with - or + followed by a digit (negative/positive numbers) // Don't consume things that start with - or + followed by a digit (negative/positive numbers)
if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return
const isValidStart = isLowercaseLetter(ch) || isEmoji(ch) const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
const canBeWord = stack.canShift(Word) const canBeWord = stack.canShift(Word)
// Consume all word characters, tracking if it remains a valid identifier // Consume all word characters, tracking if it remains a valid identifier
@ -111,8 +111,8 @@ const consumeWordToken = (
if (!isWordChar(nextCh)) break if (!isWordChar(nextCh)) break
} }
// Track identifier validity: must be lowercase, digit, dash, or emoji // Track identifier validity: must be lowercase, digit, dash, or emoji/unicode
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && !isEmoji(ch)) { if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && !isEmojiOrUnicode(ch)) {
if (!canBeWord) break if (!canBeWord) break
isValidIdentifier = false isValidIdentifier = false
} }
@ -222,7 +222,7 @@ const getFullCodePoint = (input: InputStream, pos: number): number => {
return ch return ch
} }
const isEmoji = (ch: number): boolean => { const isEmojiOrUnicode = (ch: number): boolean => {
return ( return (
// Basic Emoticons // Basic Emoticons
(ch >= 0x1f600 && ch <= 0x1f64f) || (ch >= 0x1f600 && ch <= 0x1f64f) ||
@ -247,7 +247,25 @@ const isEmoji = (ch: number): boolean => {
// Additional miscellaneous items // Additional miscellaneous items
(ch >= 0x238c && ch <= 0x2454) || (ch >= 0x238c && ch <= 0x2454) ||
// Combining Diacritical Marks for Symbols // Combining Diacritical Marks for Symbols
(ch >= 0x20d0 && ch <= 0x20ff) (ch >= 0x20d0 && ch <= 0x20ff) ||
// Latin-1 Supplement (includes ², ³, ¹ and other special chars)
(ch >= 0x00a0 && ch <= 0x00ff) ||
// Greek and Coptic (U+0370-U+03FF)
(ch >= 0x0370 && ch <= 0x03ff) ||
// Mathematical Alphanumeric Symbols (U+1D400-U+1D7FF)
(ch >= 0x1d400 && ch <= 0x1d7ff) ||
// Mathematical Operators (U+2200-U+22FF)
(ch >= 0x2200 && ch <= 0x22ff) ||
// Superscripts and Subscripts (U+2070-U+209F)
(ch >= 0x2070 && ch <= 0x209f) ||
// Arrows (U+2190-U+21FF)
(ch >= 0x2190 && ch <= 0x21ff) ||
// Hiragana (U+3040-U+309F)
(ch >= 0x3040 && ch <= 0x309f) ||
// Katakana (U+30A0-U+30FF)
(ch >= 0x30a0 && ch <= 0x30ff) ||
// CJK Unified Ideographs (U+4E00-U+9FFF)
(ch >= 0x4e00 && ch <= 0x9fff)
) )
} }