emoji
This commit is contained in:
parent
7585f0e8a2
commit
0168d7f933
|
|
@ -1,4 +1,7 @@
|
||||||
|
|
||||||
[serve.static]
|
[serve.static]
|
||||||
plugins = ["bun-plugin-tailwind"]
|
plugins = ["bun-plugin-tailwind"]
|
||||||
env = "BUN_PUBLIC_*"
|
env = "BUN_PUBLIC_*"
|
||||||
|
|
||||||
|
[test]
|
||||||
|
preload = ["./src/testSetup.ts"]
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
"pretest": "bun generate-parser",
|
||||||
"serve": "bun --hot src/server/server.tsx",
|
"serve": "bun --hot src/server/server.tsx",
|
||||||
"generate-parser": "lezer-generator src/parser/shrimp.grammar --typeScript -o src/parser/shrimp.ts"
|
"generate-parser": "lezer-generator src/parser/shrimp.grammar --typeScript -o src/parser/shrimp.ts"
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,14 @@
|
||||||
@external propSource highlighting from "./highlight.js"
|
@external propSource highlighting from "./highlight.js"
|
||||||
|
|
||||||
@top Program { expr* }
|
@top Program { expr* }
|
||||||
|
|
||||||
@skip { space }
|
@skip { space }
|
||||||
|
|
||||||
@tokens {
|
@tokens {
|
||||||
@precedence { fn Boolean Identifier }
|
|
||||||
|
|
||||||
space { @whitespace+ }
|
space { @whitespace+ }
|
||||||
Number { $[0-9]+ ('.' $[0-9]+)? }
|
Number { $[0-9]+ ('.' $[0-9]+)? }
|
||||||
Boolean { "true" | "false" }
|
Boolean { "true" | "false" }
|
||||||
String { '"' !["]* '"' }
|
String { '"' !["]* '"' }
|
||||||
Identifier { $[A-Za-z_]$[A-Za-z_0-9-]* }
|
|
||||||
fn[@name=Keyword] { "fn" }
|
fn[@name=Keyword] { "fn" }
|
||||||
equals[@name=Operator] { "=" }
|
equals[@name=Operator] { "=" }
|
||||||
":"[@name=Colon]
|
":"[@name=Colon]
|
||||||
|
|
@ -23,6 +20,10 @@
|
||||||
rightParen[@name=Paren] { ")" }
|
rightParen[@name=Paren] { ")" }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@external tokens identifierTokenizer from "./tokenizers" {
|
||||||
|
Identifier
|
||||||
|
}
|
||||||
|
|
||||||
@precedence {
|
@precedence {
|
||||||
multiplicative @left,
|
multiplicative @left,
|
||||||
additive @left,
|
additive @left,
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
||||||
export const
|
export const
|
||||||
Program = 1,
|
Identifier = 1,
|
||||||
Assignment = 2,
|
Program = 2,
|
||||||
Identifier = 3,
|
Assignment = 3,
|
||||||
equals = 4,
|
equals = 4,
|
||||||
Function = 5,
|
Function = 5,
|
||||||
fn = 6,
|
fn = 6,
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,27 @@
|
||||||
import { regenerateParser } from '@/parser/test-helper'
|
import { expect, describe, test } from 'bun:test'
|
||||||
import { expect, beforeAll, describe, test } from 'bun:test'
|
|
||||||
|
describe('Identifier', () => {
|
||||||
|
test('parses simple identifiers', () => {
|
||||||
|
expect('hyphenated-var').toMatchTree(`Identifier hyphenated-var`)
|
||||||
|
expect('var').toMatchTree(`Identifier var`)
|
||||||
|
expect('var123').toMatchTree(`Identifier var123`)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('fails on underscores and capital letters', () => {
|
||||||
|
expect('myVar').toFailParse()
|
||||||
|
expect('underscore_var').toFailParse()
|
||||||
|
expect('_leadingUnderscore').toFailParse()
|
||||||
|
expect('trailingUnderscore_').toFailParse()
|
||||||
|
expect('mixed-123_var').toFailParse()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('parses identifiers with emojis', () => {
|
||||||
|
expect('var😊').toMatchTree(`Identifier var😊`)
|
||||||
|
expect('😊').toMatchTree(`Identifier 😊`)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
describe('BinOp', () => {
|
describe('BinOp', () => {
|
||||||
beforeAll(() => regenerateParser())
|
|
||||||
|
|
||||||
test('addition tests', () => {
|
test('addition tests', () => {
|
||||||
expect('2 + 3').toMatchTree(`
|
expect('2 + 3').toMatchTree(`
|
||||||
BinOp
|
BinOp
|
||||||
|
|
@ -60,8 +78,6 @@ describe('BinOp', () => {
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('Fn', () => {
|
describe('Fn', () => {
|
||||||
beforeAll(() => regenerateParser())
|
|
||||||
|
|
||||||
test('parses function with single parameter', () => {
|
test('parses function with single parameter', () => {
|
||||||
expect('fn x: x + 1').toMatchTree(`
|
expect('fn x: x + 1').toMatchTree(`
|
||||||
Function
|
Function
|
||||||
|
|
@ -109,8 +125,6 @@ describe('Fn', () => {
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('Identifier', () => {
|
describe('Identifier', () => {
|
||||||
beforeAll(() => regenerateParser())
|
|
||||||
|
|
||||||
test('parses hyphenated identifiers correctly', () => {
|
test('parses hyphenated identifiers correctly', () => {
|
||||||
expect('my-var - another-var').toMatchTree(`
|
expect('my-var - another-var').toMatchTree(`
|
||||||
BinOp
|
BinOp
|
||||||
|
|
@ -133,8 +147,6 @@ describe('Identifier', () => {
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('Assignment', () => {
|
describe('Assignment', () => {
|
||||||
beforeAll(() => regenerateParser())
|
|
||||||
|
|
||||||
test('parses assignment with addition', () => {
|
test('parses assignment with addition', () => {
|
||||||
expect('x = 5 + 3').toMatchTree(`
|
expect('x = 5 + 3').toMatchTree(`
|
||||||
Assignment
|
Assignment
|
||||||
|
|
@ -165,8 +177,6 @@ describe('Assignment', () => {
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('Parentheses', () => {
|
describe('Parentheses', () => {
|
||||||
beforeAll(() => regenerateParser())
|
|
||||||
|
|
||||||
test('parses expressions with parentheses correctly', () => {
|
test('parses expressions with parentheses correctly', () => {
|
||||||
expect('(2 + 3) * 4').toMatchTree(`
|
expect('(2 + 3) * 4').toMatchTree(`
|
||||||
BinOp
|
BinOp
|
||||||
|
|
@ -205,8 +215,6 @@ describe('Parentheses', () => {
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('multiline', () => {
|
describe('multiline', () => {
|
||||||
beforeAll(() => regenerateParser())
|
|
||||||
|
|
||||||
test('parses multiline expressions', () => {
|
test('parses multiline expressions', () => {
|
||||||
expect(`
|
expect(`
|
||||||
5 + 4
|
5 + 4
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,19 @@
|
||||||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
||||||
import {LRParser} from "@lezer/lr"
|
import {LRParser} from "@lezer/lr"
|
||||||
|
import {identifierTokenizer} from "./tokenizers"
|
||||||
import {highlighting} from "./highlight.js"
|
import {highlighting} from "./highlight.js"
|
||||||
export const parser = LRParser.deserialize({
|
export const parser = LRParser.deserialize({
|
||||||
version: 14,
|
version: 14,
|
||||||
states: "$OQVQPOOOkQPO'#CuO!fQPO'#CaO!nQPO'#CoOOQO'#Cu'#CuOVQPO'#CuOOQO'#Ct'#CtQVQPOOOVQPO,58xOOQO'#Cp'#CpO#cQPO'#CcO#kQPO,58{OVQPO,59POVQPO,59PO#pQPO,59aOOQO-E6m-E6mO$RQPO1G.dOOQO-E6n-E6nOVQPO1G.gOOQO1G.k1G.kO$yQPO1G.kOOQO1G.{1G.{O%qQPO7+$R",
|
states: "$OQVQROOOkQRO'#CuO!fQRO'#CaO!nQRO'#CoOOQQ'#Cu'#CuOVQRO'#CuOOQQ'#Ct'#CtQVQROOOVQRO,58yOOQQ'#Cp'#CpO#cQRO'#CcO#kQPO,58{OVQRO,59POVQRO,59PO#pQPO,59aOOQQ-E6m-E6mO$RQRO1G.eOOQQ-E6n-E6nOVQRO1G.gOOQQ1G.k1G.kO$yQRO1G.kOOQQ1G.{1G.{O%qQRO7+$R",
|
||||||
stateData: "&n~OgOS~ORPOUQO^SO_SO`SOaTO~OSWORiXUiXYiXZiX[iX]iX^iX_iX`iXaiXeiXbiX~ORXOWVP~OY[OZ[O[]O]]ORcXUcX^cX_cX`cXacXecX~ORXOWVX~OWbO~OY[OZ[O[]O]]ObeO~OY[OZ[O[]O]]ORQiUQi^Qi_Qi`QiaQieQibQi~OY[OZ[ORXiUXi[Xi]Xi^Xi_Xi`XiaXieXibXi~OY[OZ[O[]O]]ORTqUTq^Tq_Tq`TqaTqeTqbTq~OU`R`~",
|
stateData: "&i~OgOS~OPPOUQO^SO_SO`SOaTO~OSWOPiXUiXYiXZiX[iX]iX^iX_iX`iXaiXeiXbiX~OPXOWVP~OY[OZ[O[]O]]OPcXUcX^cX_cX`cXacXecX~OPXOWVX~OWbO~OY[OZ[O[]O]]ObeO~OY[OZ[O[]O]]OPRiURi^Ri_Ri`RiaRieRibRi~OY[OZ[OPXiUXi[Xi]Xi^Xi_Xi`XiaXieXibXi~OY[OZ[O[]O]]OPTqUTq^Tq_Tq`TqaTqeTqbTq~O",
|
||||||
goto: "!hjPPkPPkPtPkPPPPPPPPPw}PPP!Tk_UOTVW[]bRZQQVOR_VQYQRaYSROVQ^TQ`WQc[Qd]Rfb",
|
goto: "!hjPPPkPkPtPkPPPPPPPPPw}PPP!Tk_UOTVW[]bRZQQVOR_VQYQRaYSROVQ^TQ`WQc[Qd]Rfb",
|
||||||
nodeNames: "⚠ Program Assignment Identifier Operator Function Keyword Params Colon BinOp Operator Operator Operator Operator Number String Boolean Paren Paren",
|
nodeNames: "⚠ Identifier Program Assignment Operator Function Keyword Params Colon BinOp Operator Operator Operator Operator Number String Boolean Paren Paren",
|
||||||
maxTerm: 25,
|
maxTerm: 25,
|
||||||
propSources: [highlighting],
|
propSources: [highlighting],
|
||||||
skippedNodes: [0],
|
skippedNodes: [0],
|
||||||
repeatNodeCount: 2,
|
repeatNodeCount: 2,
|
||||||
tokenData: "*f~RkX^!vpq!vrs#kxy$Yyz$_z{$d{|$i}!O$n!P!Q$s!Q![$x![!]%c!_!`%h!c!}%m#R#S%m#T#Y%m#Y#Z&R#Z#h%m#h#i)`#i#o%m#y#z!v$f$g!v#BY#BZ!v$IS$I_!v$I|$JO!v$JT$JU!v$KV$KW!v&FU&FV!v~!{Yg~X^!vpq!v#y#z!v$f$g!v#BY#BZ!v$IS$I_!v$I|$JO!v$JT$JU!v$KV$KW!v&FU&FV!v~#nTOr#krs#}s;'S#k;'S;=`$S<%lO#k~$SO_~~$VP;=`<%l#k~$_Oa~~$dOb~~$iOY~~$nO[~~$sO]~~$xOZ~~$}Q^~!O!P%T!Q![$x~%WP!Q![%Z~%`P^~!Q![%Z~%hOW~~%mOS~~%rTR~}!O%m!Q![%m!c!}%m#R#S%m#T#o%m~&WWR~}!O%m!Q![%m!c!}%m#R#S%m#T#U&p#U#b%m#b#c(x#c#o%m~&uVR~}!O%m!Q![%m!c!}%m#R#S%m#T#`%m#`#a'[#a#o%m~'aVR~}!O%m!Q![%m!c!}%m#R#S%m#T#g%m#g#h'v#h#o%m~'{VR~}!O%m!Q![%m!c!}%m#R#S%m#T#X%m#X#Y(b#Y#o%m~(iT`~R~}!O%m!Q![%m!c!}%m#R#S%m#T#o%m~)PTU~R~}!O%m!Q![%m!c!}%m#R#S%m#T#o%m~)eVR~}!O%m!Q![%m!c!}%m#R#S%m#T#f%m#f#g)z#g#o%m~*PVR~}!O%m!Q![%m!c!}%m#R#S%m#T#i%m#i#j'v#j#o%m",
|
tokenData: "&a~RfX^!gpq!grs#[xy#yyz$Oz{$T{|$Y}!O$_!P!Q$d!Q![$i![!]%S!_!`%X#Y#Z%^#h#i&T#y#z!g$f$g!g#BY#BZ!g$IS$I_!g$I|$JO!g$JT$JU!g$KV$KW!g&FU&FV!g~!lYg~X^!gpq!g#y#z!g$f$g!g#BY#BZ!g$IS$I_!g$I|$JO!g$JT$JU!g$KV$KW!g&FU&FV!g~#_TOr#[rs#ns;'S#[;'S;=`#s<%lO#[~#sO_~~#vP;=`<%l#[~$OOa~~$TOb~~$YOY~~$_O[~~$dO]~~$iOZ~~$nQ^~!O!P$t!Q![$i~$wP!Q![$z~%PP^~!Q![$z~%XOW~~%^OS~~%aQ#T#U%g#b#c&O~%jP#`#a%m~%pP#g#h%s~%vP#X#Y%y~&OO`~~&TOU~~&WP#f#g&Z~&^P#i#j%s",
|
||||||
tokenizers: [0],
|
tokenizers: [0, identifierTokenizer],
|
||||||
topRules: {"Program":[0,1]},
|
topRules: {"Program":[0,2]},
|
||||||
tokenPrec: 255
|
tokenPrec: 0
|
||||||
})
|
})
|
||||||
|
|
|
||||||
82
src/parser/tokenizers.ts
Normal file
82
src/parser/tokenizers.ts
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
import { ExternalTokenizer, InputStream } from '@lezer/lr'
|
||||||
|
import { Identifier } from './shrimp.terms'
|
||||||
|
|
||||||
|
function isLowercaseLetter(ch: number): boolean {
|
||||||
|
return ch >= 97 && ch <= 122 // a-z
|
||||||
|
}
|
||||||
|
|
||||||
|
function isDigit(ch: number): boolean {
|
||||||
|
return ch >= 48 && ch <= 57 // 0-9
|
||||||
|
}
|
||||||
|
|
||||||
|
function getFullCodePoint(input: InputStream, pos: number): number {
|
||||||
|
const ch = input.peek(pos)
|
||||||
|
|
||||||
|
// Check if this is a high surrogate (0xD800-0xDBFF)
|
||||||
|
if (ch >= 0xd800 && ch <= 0xdbff) {
|
||||||
|
const low = input.peek(pos + 1)
|
||||||
|
// Check if next is low surrogate (0xDC00-0xDFFF)
|
||||||
|
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||||
|
// Combine surrogate pair into full code point
|
||||||
|
return 0x10000 + ((ch & 0x3ff) << 10) + (low & 0x3ff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ch // Single code unit
|
||||||
|
}
|
||||||
|
|
||||||
|
function isEmoji(ch: number): boolean {
|
||||||
|
return (
|
||||||
|
// Basic Emoticons
|
||||||
|
(ch >= 0x1f600 && ch <= 0x1f64f) ||
|
||||||
|
// Miscellaneous Symbols and Pictographs
|
||||||
|
(ch >= 0x1f300 && ch <= 0x1f5ff) ||
|
||||||
|
// Transport and Map Symbols
|
||||||
|
(ch >= 0x1f680 && ch <= 0x1f6ff) ||
|
||||||
|
// Regional Indicator Symbols (flags)
|
||||||
|
(ch >= 0x1f1e6 && ch <= 0x1f1ff) ||
|
||||||
|
// Miscellaneous Symbols (hearts, stars, weather)
|
||||||
|
(ch >= 0x2600 && ch <= 0x26ff) ||
|
||||||
|
// Dingbats (scissors, pencils, etc)
|
||||||
|
(ch >= 0x2700 && ch <= 0x27bf) ||
|
||||||
|
// Supplemental Symbols and Pictographs (newer emojis)
|
||||||
|
(ch >= 0x1f900 && ch <= 0x1f9ff) ||
|
||||||
|
// Symbols and Pictographs Extended-A (newest emojis)
|
||||||
|
(ch >= 0x1fa70 && ch <= 0x1faff) ||
|
||||||
|
// Various Asian Characters with emoji presentation
|
||||||
|
(ch >= 0x1f018 && ch <= 0x1f270) ||
|
||||||
|
// Variation Selectors (for emoji presentation)
|
||||||
|
(ch >= 0xfe00 && ch <= 0xfe0f) ||
|
||||||
|
// Additional miscellaneous items
|
||||||
|
(ch >= 0x238c && ch <= 0x2454) ||
|
||||||
|
// Combining Diacritical Marks for Symbols
|
||||||
|
(ch >= 0x20d0 && ch <= 0x20ff)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export const identifierTokenizer = new ExternalTokenizer((input: InputStream) => {
|
||||||
|
const ch = getFullCodePoint(input, 0)
|
||||||
|
|
||||||
|
if (isLowercaseLetter(ch) || isEmoji(ch)) {
|
||||||
|
let pos = ch > 0xffff ? 2 : 1 // emoji takes 2 UTF-16 code units
|
||||||
|
|
||||||
|
// Continue consuming identifier characters
|
||||||
|
while (true) {
|
||||||
|
const nextCh = getFullCodePoint(input, pos)
|
||||||
|
|
||||||
|
if (
|
||||||
|
isLowercaseLetter(nextCh) ||
|
||||||
|
isDigit(nextCh) ||
|
||||||
|
nextCh === 45 /* - */ ||
|
||||||
|
isEmoji(nextCh)
|
||||||
|
) {
|
||||||
|
pos += nextCh > 0xffff ? 2 : 1 // advance by 1 or 2 UTF-16 code units
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
input.advance(pos) // advance by total length
|
||||||
|
input.acceptToken(Identifier)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
@ -1,25 +1,34 @@
|
||||||
import { beforeAll, expect } from 'bun:test'
|
import { expect } from 'bun:test'
|
||||||
import { Tree, TreeCursor } from '@lezer/common'
|
import { Tree, TreeCursor } from '@lezer/common'
|
||||||
import grammarFile from './shrimp.grammar'
|
import { parser } from './parser/shrimp.ts'
|
||||||
import { parser } from './shrimp.ts'
|
|
||||||
import { $ } from 'bun'
|
import { $ } from 'bun'
|
||||||
|
|
||||||
// Regenerate the parser if the grammar file is newer than the generated parser
|
const regenerateParser = async () => {
|
||||||
// This makes --watch work without needing to manually regenerate the parser
|
let generate = true
|
||||||
export const regenerateParser = async () => {
|
try {
|
||||||
const grammarStat = await Bun.file('src/parser/shrimp.grammar').stat()
|
const grammarStat = await Bun.file('./src/parser/shrimp.grammar').stat()
|
||||||
const jsStat = await Bun.file('src/parser/shrimp.ts').stat()
|
const tokenizerStat = await Bun.file('./src/parser/tokenizers.ts').stat()
|
||||||
|
const parserStat = await Bun.file('./src/parser/shrimp.ts').stat()
|
||||||
|
|
||||||
if (grammarStat.mtime <= jsStat.mtime) return
|
if (grammarStat.mtime <= parserStat.mtime && tokenizerStat.mtime <= parserStat.mtime) {
|
||||||
|
generate = false
|
||||||
console.log(`Regenerating parser from ${grammarFile}...`)
|
}
|
||||||
await $`bun generate-parser `
|
} catch (e) {
|
||||||
|
console.error('Error checking or regenerating parser:', e)
|
||||||
|
} finally {
|
||||||
|
if (generate) {
|
||||||
|
await $`bun generate-parser`
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await regenerateParser()
|
||||||
|
|
||||||
// Type declaration for TypeScript
|
// Type declaration for TypeScript
|
||||||
declare module 'bun:test' {
|
declare module 'bun:test' {
|
||||||
interface Matchers<T> {
|
interface Matchers<T> {
|
||||||
toMatchTree(expected: string): T
|
toMatchTree(expected: string): T
|
||||||
|
toFailParse(): T
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -46,6 +55,45 @@ expect.extend({
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
toFailParse(received: unknown) {
|
||||||
|
if (typeof received !== 'string') {
|
||||||
|
return {
|
||||||
|
message: () => 'toMatchTree can only be used with string values',
|
||||||
|
pass: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const tree = parser.parse(received)
|
||||||
|
let hasErrors = false
|
||||||
|
tree.iterate({
|
||||||
|
enter(n) {
|
||||||
|
if (n.type.isError) {
|
||||||
|
hasErrors = true
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
if (hasErrors) {
|
||||||
|
return {
|
||||||
|
message: () => `Expected input to fail parsing, and it did.`,
|
||||||
|
pass: true,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const actual = treeToString(tree, received)
|
||||||
|
return {
|
||||||
|
message: () => `Expected input to fail parsing, but it parsed successfully:\n${actual}`,
|
||||||
|
pass: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
message: () => `Parsing threw an error: ${(error as Error).message}`,
|
||||||
|
pass: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
const treeToString = (tree: Tree, input: string): string => {
|
const treeToString = (tree: Tree, input: string): string => {
|
||||||
Loading…
Reference in New Issue
Block a user