// simple-ts-highlighter.ts — regex-only, self-hostable export type TokenType = | "string" | "number" | "keyword" | "boolean" | "null" | "undefined" | "comment" | "identifier" | "punctuation" | "whitespace" | "unknown" export type Token = { type: TokenType; value: string; start: number; end: number } export type Program = { type: "Program"; tokens: Token[] } const RE = { // regex literal: /.../flags (handles escapes and [...] classes; still simple) regex: /^\/(?![/*])(?:\\.|\[(?:\\.|[^\]\\])*\]|[^\\/\n\r])+\/[a-zA-Z]*/, // comments lineComment: /^\/\/[^\n\r]*/, blockComment: /^\/\*[\s\S]*?\*\//, // strings sng: /^'(?:\\.|[^'\\])*'/, dbl: /^"(?:\\.|[^"\\])*"/, bkt: /^`(?:\\.|[^`\\])*`/, // numbers number: /^(?:0[xX][0-9a-fA-F]+|0[bB][01]+|0[oO][0-7]+|\d+(?:\.\d+)?(?:[eE][+\-]?\d+)?)/, // literals boolNullUndef: /^(?:true|false|null|undefined)\b/, // keywords keywords: /^(?:async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|function|if|import|in|instanceof|let|new|return|super|switch|this|throw|try|typeof|var|void|while|with|yield|as|implements|interface|package|private|protected|public|readonly|abstract|declare|type|from|of)\b/, // identifier / punct / whitespace ident: /^[A-Za-z_$][A-Za-z0-9_$]*/, punct: /^[()[\]{}.,;:?~!%^&*+\-=/|<>]+/, ws: /^\s+/, } const types = ["string", "number", "boolean", "any", "void"] export function highlight(code: string): string { const tokens = tokenize(code).tokens return `` + tokens.map(t => tokenToHTML(t)).join("") } export function tokenize(src: string): Program { const tokens: Token[] = [] let i = 0 const eat = (re: RegExp): string | null => { const m = re.exec(src.slice(i)) return m ? m[0] : null } while (i < src.length) { let v: string | null // If current char is '/', disambiguate regex/comment upfront if (src[i] === "/") { if (src[i + 1] === "/") { v = eat(RE.lineComment) if (v) { tokens.push({ type: "comment", value: v, start: i, end: i + v.length }); i += v.length; continue } } else if (src[i + 1] === "*") { v = eat(RE.blockComment) if (v) { tokens.push({ type: "comment", value: v, start: i, end: i + v.length }); i += v.length; continue } } else if ((v = eat(RE.regex))) { // Treat regex literal as a "string" for your minimal category set tokens.push({ type: "string", value: v, start: i, end: i + v.length }); i += v.length; continue } } // Strings if ((v = eat(RE.sng) || eat(RE.dbl) || eat(RE.bkt))) { tokens.push({ type: "string", value: v, start: i, end: i + v.length }); i += v.length; continue } // Numbers if ((v = eat(RE.number))) { tokens.push({ type: "number", value: v, start: i, end: i + v.length }); i += v.length; continue } // true/false/null/undefined if ((v = eat(RE.boolNullUndef))) { const t: TokenType = v === "true" || v === "false" ? "boolean" : (v === "null" ? "null" : "undefined") tokens.push({ type: t, value: v, start: i, end: i + v.length }); i += v.length; continue } // Keywords if ((v = eat(RE.keywords))) { tokens.push({ type: "keyword", value: v, start: i, end: i + v.length }); i += v.length; continue } // Identifiers if ((v = eat(RE.ident))) { tokens.push({ type: "identifier", value: v, start: i, end: i + v.length }); i += v.length; continue } // Punctuation / operators if ((v = eat(RE.punct))) { tokens.push({ type: "punctuation", value: v, start: i, end: i + v.length }); i += v.length; continue } // Whitespace if ((v = eat(RE.ws))) { tokens.push({ type: "whitespace", value: v, start: i, end: i + v.length }); i += v.length; continue } // Fallback if (src[i]) { tokens.push({ type: "unknown", value: src[i]!, start: i, end: i + 1 }) } i += 1 } return { type: "Program", tokens } } function tokenToHTML(token: Token): string { switch (token.type) { case "string": return `${escapeHtml(token.value)}` case "number": return `${token.value}` case "keyword": return `${token.value}` case "comment": return `${escapeHtml(token.value)}` case "null": case "undefined": case "boolean": return `${token.value}` case "punctuation": { // if (token.value === "(" || token.value === ")" || token.value === "{" || token.value === "}" || token.value === "[" || token.value === "]") // return `${token.value}` // else return escapeHtml(token.value) } case "identifier": { if (token.value[0]?.match(/[A-Z]/) || types.includes(token.value)) return `${token.value}` else return `${token.value}` } case "whitespace": case "unknown": return `${escapeHtml(token.value)}` } } export function escapeHtml(str: string): string { return str .replace(/&/g, "&") .replace(//g, ">") .replace(/"/g, """) .replace(/'/g, "'") }