Compare commits
2 Commits
cdcaf5c9d3
...
0fc1f9f895
| Author | SHA1 | Date | |
|---|---|---|---|
| 0fc1f9f895 | |||
| 7cf7ac3703 |
|
|
@ -30,6 +30,204 @@ describe('Identifier', () => {
|
||||||
FunctionCallOrIdentifier
|
FunctionCallOrIdentifier
|
||||||
Identifier moo-😊-34`)
|
Identifier moo-😊-34`)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test('parses mathematical unicode symbols like 𝜋 as identifiers', () => {
|
||||||
|
expect('𝜋').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 𝜋`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Unicode Symbol Support', () => {
|
||||||
|
describe('Emoji (currently supported)', () => {
|
||||||
|
test('Basic Emoticons (U+1F600-U+1F64F)', () => {
|
||||||
|
expect('😀').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 😀`)
|
||||||
|
|
||||||
|
expect('😊-counter').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 😊-counter`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Miscellaneous Symbols and Pictographs (U+1F300-U+1F5FF)', () => {
|
||||||
|
expect('🌍').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 🌍`)
|
||||||
|
|
||||||
|
expect('🔥-handler').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 🔥-handler`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Transport and Map Symbols (U+1F680-U+1F6FF)', () => {
|
||||||
|
expect('🚀').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 🚀`)
|
||||||
|
|
||||||
|
expect('🚀-launch').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 🚀-launch`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Regional Indicator Symbols / Flags (U+1F1E6-U+1F1FF)', () => {
|
||||||
|
// Note: Flags are typically two regional indicators combined
|
||||||
|
expect('🇺').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 🇺`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Supplemental Symbols and Pictographs (U+1F900-U+1F9FF)', () => {
|
||||||
|
expect('🤖').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 🤖`)
|
||||||
|
|
||||||
|
expect('🦀-lang').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 🦀-lang`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Dingbats (U+2700-U+27BF)', () => {
|
||||||
|
expect('✂').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ✂`)
|
||||||
|
|
||||||
|
expect('✨-magic').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ✨-magic`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Miscellaneous Symbols (U+2600-U+26FF)', () => {
|
||||||
|
expect('⚡').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ⚡`)
|
||||||
|
|
||||||
|
expect('☀-bright').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ☀-bright`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Greek Letters (not currently supported)', () => {
|
||||||
|
test('Greek lowercase alpha α (U+03B1)', () => {
|
||||||
|
expect('α').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier α`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Greek lowercase beta β (U+03B2)', () => {
|
||||||
|
expect('β').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier β`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Greek lowercase lambda λ (U+03BB)', () => {
|
||||||
|
expect('λ').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier λ`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Greek lowercase pi π (U+03C0)', () => {
|
||||||
|
// Note: This is different from mathematical pi 𝜋
|
||||||
|
expect('π').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier π`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Mathematical Alphanumeric Symbols (not currently supported)', () => {
|
||||||
|
test('Mathematical italic small pi 𝜋 (U+1D70B)', () => {
|
||||||
|
expect('𝜋').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 𝜋`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Mathematical bold small x 𝐱 (U+1D431)', () => {
|
||||||
|
expect('𝐱').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 𝐱`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Mathematical script capital F 𝓕 (U+1D4D5)', () => {
|
||||||
|
expect('𝓕').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 𝓕`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Mathematical Operators (not currently supported)', () => {
|
||||||
|
test('Infinity symbol ∞ (U+221E)', () => {
|
||||||
|
expect('∞').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ∞`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Sum symbol ∑ (U+2211)', () => {
|
||||||
|
expect('∑').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ∑`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Integral symbol ∫ (U+222B)', () => {
|
||||||
|
expect('∫').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ∫`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Superscripts and Subscripts (not currently supported)', () => {
|
||||||
|
test('Superscript two ² (U+00B2)', () => {
|
||||||
|
expect('x²').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier x²`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Subscript two ₂ (U+2082)', () => {
|
||||||
|
expect('h₂o').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier h₂o`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Arrows (not currently supported)', () => {
|
||||||
|
test('Rightward arrow → (U+2192)', () => {
|
||||||
|
expect('→').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier →`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Leftward arrow ← (U+2190)', () => {
|
||||||
|
expect('←').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ←`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Double rightward arrow ⇒ (U+21D2)', () => {
|
||||||
|
expect('⇒').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier ⇒`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('CJK Symbols (not currently supported)', () => {
|
||||||
|
test('Hiragana あ (U+3042)', () => {
|
||||||
|
expect('あ').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier あ`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('Katakana カ (U+30AB)', () => {
|
||||||
|
expect('カ').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier カ`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('CJK Unified Ideograph 中 (U+4E2D)', () => {
|
||||||
|
expect('中').toMatchTree(`
|
||||||
|
FunctionCallOrIdentifier
|
||||||
|
Identifier 中`)
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('Parentheses', () => {
|
describe('Parentheses', () => {
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ export const tokenizer = new ExternalTokenizer(
|
||||||
// Don't consume things that start with - or + followed by a digit (negative/positive numbers)
|
// Don't consume things that start with - or + followed by a digit (negative/positive numbers)
|
||||||
if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return
|
if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return
|
||||||
|
|
||||||
const isValidStart = isLowercaseLetter(ch) || isEmoji(ch)
|
const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
|
||||||
const canBeWord = stack.canShift(Word)
|
const canBeWord = stack.canShift(Word)
|
||||||
|
|
||||||
// Consume all word characters, tracking if it remains a valid identifier
|
// Consume all word characters, tracking if it remains a valid identifier
|
||||||
|
|
@ -106,8 +106,8 @@ const consumeWordToken = (
|
||||||
if (!isWordChar(nextCh)) break
|
if (!isWordChar(nextCh)) break
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track identifier validity: must be lowercase, digit, dash, or emoji
|
// Track identifier validity: must be lowercase, digit, dash, or emoji/unicode
|
||||||
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && !isEmoji(ch)) {
|
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && !isEmojiOrUnicode(ch)) {
|
||||||
if (!canBeWord) break
|
if (!canBeWord) break
|
||||||
isValidIdentifier = false
|
isValidIdentifier = false
|
||||||
}
|
}
|
||||||
|
|
@ -217,7 +217,7 @@ const getFullCodePoint = (input: InputStream, pos: number): number => {
|
||||||
return ch
|
return ch
|
||||||
}
|
}
|
||||||
|
|
||||||
const isEmoji = (ch: number): boolean => {
|
const isEmojiOrUnicode = (ch: number): boolean => {
|
||||||
return (
|
return (
|
||||||
// Basic Emoticons
|
// Basic Emoticons
|
||||||
(ch >= 0x1f600 && ch <= 0x1f64f) ||
|
(ch >= 0x1f600 && ch <= 0x1f64f) ||
|
||||||
|
|
@ -242,7 +242,25 @@ const isEmoji = (ch: number): boolean => {
|
||||||
// Additional miscellaneous items
|
// Additional miscellaneous items
|
||||||
(ch >= 0x238c && ch <= 0x2454) ||
|
(ch >= 0x238c && ch <= 0x2454) ||
|
||||||
// Combining Diacritical Marks for Symbols
|
// Combining Diacritical Marks for Symbols
|
||||||
(ch >= 0x20d0 && ch <= 0x20ff)
|
(ch >= 0x20d0 && ch <= 0x20ff) ||
|
||||||
|
// Latin-1 Supplement (includes ², ³, ¹ and other special chars)
|
||||||
|
(ch >= 0x00a0 && ch <= 0x00ff) ||
|
||||||
|
// Greek and Coptic (U+0370-U+03FF)
|
||||||
|
(ch >= 0x0370 && ch <= 0x03ff) ||
|
||||||
|
// Mathematical Alphanumeric Symbols (U+1D400-U+1D7FF)
|
||||||
|
(ch >= 0x1d400 && ch <= 0x1d7ff) ||
|
||||||
|
// Mathematical Operators (U+2200-U+22FF)
|
||||||
|
(ch >= 0x2200 && ch <= 0x22ff) ||
|
||||||
|
// Superscripts and Subscripts (U+2070-U+209F)
|
||||||
|
(ch >= 0x2070 && ch <= 0x209f) ||
|
||||||
|
// Arrows (U+2190-U+21FF)
|
||||||
|
(ch >= 0x2190 && ch <= 0x21ff) ||
|
||||||
|
// Hiragana (U+3040-U+309F)
|
||||||
|
(ch >= 0x3040 && ch <= 0x309f) ||
|
||||||
|
// Katakana (U+30A0-U+30FF)
|
||||||
|
(ch >= 0x30a0 && ch <= 0x30ff) ||
|
||||||
|
// CJK Unified Ideographs (U+4E00-U+9FFF)
|
||||||
|
(ch >= 0x4e00 && ch <= 0x9fff)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user