141 lines
5.8 KiB
TypeScript
141 lines
5.8 KiB
TypeScript
import { escapeHTML } from "bun"
|
|
|
|
export type TokenType =
|
|
| "string" | "number" | "keyword" | "boolean" | "null" | "undefined"
|
|
| "comment" | "identifier" | "punctuation" | "whitespace" | "unknown"
|
|
|
|
export type Token = { type: TokenType; value: string; start: number; end: number }
|
|
export type Program = { type: "Program"; tokens: Token[] }
|
|
|
|
const RE = {
|
|
// regex literal: /.../flags (handles escapes and [...] classes; still simple)
|
|
regex: /^\/(?![/*])(?:\\.|\[(?:\\.|[^\]\\])*\]|[^\\/\n\r])+\/[a-zA-Z]*/,
|
|
|
|
// comments
|
|
lineComment: /^\/\/[^\n\r]*/,
|
|
blockComment: /^\/\*[\s\S]*?\*\//,
|
|
|
|
// strings
|
|
sng: /^'(?:\\.|[^'\\])*'/,
|
|
dbl: /^"(?:\\.|[^"\\])*"/,
|
|
bkt: /^`(?:\\.|[^`\\])*`/,
|
|
|
|
// numbers
|
|
number: /^(?:0[xX][0-9a-fA-F]+|0[bB][01]+|0[oO][0-7]+|\d+(?:\.\d+)?(?:[eE][+\-]?\d+)?)/,
|
|
|
|
// literals
|
|
boolNullUndef: /^(?:true|false|null|undefined)\b/,
|
|
|
|
// keywords
|
|
keywords: /^(?:async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|function|if|import|in|instanceof|let|new|return|super|switch|this|throw|try|typeof|var|void|while|with|yield|as|implements|interface|package|private|protected|public|readonly|abstract|declare|type|from|of)\b/,
|
|
|
|
// identifier / punct / whitespace
|
|
ident: /^[A-Za-z_$][A-Za-z0-9_$]*/,
|
|
punct: /^[()[\]{}.,;:?~!%^&*+\-=/|<>]+/,
|
|
ws: /^\s+/,
|
|
}
|
|
|
|
const types = ["string", "number", "boolean", "any", "void"]
|
|
|
|
export function highlight(code: string): string {
|
|
const tokens = tokenize(code).tokens
|
|
return `<style> .string { color: #C62828; } .number { color: #C4A000; } .keyword { color: #7C3AED; } .comment { color: #E91E63; } </style>` + tokens.map(t => tokenToHTML(t)).join("")
|
|
}
|
|
|
|
export function highlightToHTML(code: string): { html: string } {
|
|
return { html: `<div style='white-space: pre;'>${highlight(code)}</div>` }
|
|
}
|
|
|
|
export function tokenize(src: string): Program {
|
|
const tokens: Token[] = []
|
|
let i = 0
|
|
|
|
const eat = (re: RegExp): string | null => {
|
|
const m = re.exec(src.slice(i))
|
|
return m ? m[0] : null
|
|
}
|
|
|
|
while (i < src.length) {
|
|
let v: string | null
|
|
|
|
// If current char is '/', disambiguate regex/comment upfront
|
|
if (src[i] === "/") {
|
|
if (src[i + 1] === "/") {
|
|
v = eat(RE.lineComment)
|
|
if (v) { tokens.push({ type: "comment", value: v, start: i, end: i + v.length }); i += v.length; continue }
|
|
} else if (src[i + 1] === "*") {
|
|
v = eat(RE.blockComment)
|
|
if (v) { tokens.push({ type: "comment", value: v, start: i, end: i + v.length }); i += v.length; continue }
|
|
} else if ((v = eat(RE.regex))) {
|
|
// Treat regex literal as a "string" for your minimal category set
|
|
tokens.push({ type: "string", value: v, start: i, end: i + v.length }); i += v.length; continue
|
|
}
|
|
}
|
|
|
|
// Strings
|
|
if ((v = eat(RE.sng) || eat(RE.dbl) || eat(RE.bkt))) {
|
|
tokens.push({ type: "string", value: v, start: i, end: i + v.length }); i += v.length; continue
|
|
}
|
|
// Numbers
|
|
if ((v = eat(RE.number))) {
|
|
tokens.push({ type: "number", value: v, start: i, end: i + v.length }); i += v.length; continue
|
|
}
|
|
// true/false/null/undefined
|
|
if ((v = eat(RE.boolNullUndef))) {
|
|
const t: TokenType = v === "true" || v === "false" ? "boolean" : (v === "null" ? "null" : "undefined")
|
|
tokens.push({ type: t, value: v, start: i, end: i + v.length }); i += v.length; continue
|
|
}
|
|
// Keywords
|
|
if ((v = eat(RE.keywords))) {
|
|
tokens.push({ type: "keyword", value: v, start: i, end: i + v.length }); i += v.length; continue
|
|
}
|
|
// Identifiers
|
|
if ((v = eat(RE.ident))) {
|
|
tokens.push({ type: "identifier", value: v, start: i, end: i + v.length }); i += v.length; continue
|
|
}
|
|
// Punctuation / operators
|
|
if ((v = eat(RE.punct))) {
|
|
tokens.push({ type: "punctuation", value: v, start: i, end: i + v.length }); i += v.length; continue
|
|
}
|
|
// Whitespace
|
|
if ((v = eat(RE.ws))) {
|
|
tokens.push({ type: "whitespace", value: v, start: i, end: i + v.length }); i += v.length; continue
|
|
}
|
|
|
|
// Fallback
|
|
if (src[i]) {
|
|
tokens.push({ type: "unknown", value: src[i]!, start: i, end: i + 1 })
|
|
}
|
|
i += 1
|
|
}
|
|
|
|
return { type: "Program", tokens }
|
|
}
|
|
|
|
function tokenToHTML(token: Token): string {
|
|
switch (token.type) {
|
|
case "string": return `<span style="color: var(--red)">${escapeHTML(token.value)}</span>`
|
|
case "number": return `<span style="color: var(--yellow)">${token.value}</span>`
|
|
case "keyword": return `<span style="color: var(--purple)">${token.value}</span>`
|
|
case "comment": return `<span style="color: var(--magenta)">${escapeHTML(token.value)}</span>`
|
|
case "null": case "undefined": case "boolean":
|
|
return `<span style="color: var(--green)">${token.value}</span>`
|
|
case "punctuation": {
|
|
// if (token.value === "(" || token.value === ")" || token.value === "{" || token.value === "}" || token.value === "[" || token.value === "]")
|
|
// return `<span style="color: var(--yellow)">${token.value}</span>`
|
|
// else
|
|
return escapeHTML(token.value)
|
|
}
|
|
case "identifier": {
|
|
if (token.value[0]?.match(/[A-Z]/) || types.includes(token.value))
|
|
return `<span style="color: var(--blue)">${token.value}</span>`
|
|
else
|
|
return `<span style="color: var(--cyan)">${token.value}</span>`
|
|
}
|
|
case "whitespace":
|
|
case "unknown":
|
|
return `${escapeHTML(token.value)}`
|
|
}
|
|
}
|
|
|