// simple-ts-highlighter.ts — regex-only, self-hostable
export type TokenType =
| "string" | "number" | "keyword" | "boolean" | "null" | "undefined"
| "comment" | "identifier" | "punctuation" | "whitespace" | "unknown"
export type Token = { type: TokenType; value: string; start: number; end: number }
export type Program = { type: "Program"; tokens: Token[] }
const RE = {
// regex literal: /.../flags (handles escapes and [...] classes; still simple)
regex: /^\/(?![/*])(?:\\.|\[(?:\\.|[^\]\\])*\]|[^\\/\n\r])+\/[a-zA-Z]*/,
// comments
lineComment: /^\/\/[^\n\r]*/,
blockComment: /^\/\*[\s\S]*?\*\//,
// strings
sng: /^'(?:\\.|[^'\\])*'/,
dbl: /^"(?:\\.|[^"\\])*"/,
bkt: /^`(?:\\.|[^`\\])*`/,
// numbers
number: /^(?:0[xX][0-9a-fA-F]+|0[bB][01]+|0[oO][0-7]+|\d+(?:\.\d+)?(?:[eE][+\-]?\d+)?)/,
// literals
boolNullUndef: /^(?:true|false|null|undefined)\b/,
// keywords
keywords: /^(?:async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|function|if|import|in|instanceof|let|new|return|super|switch|this|throw|try|typeof|var|void|while|with|yield|as|implements|interface|package|private|protected|public|readonly|abstract|declare|type|from|of)\b/,
// identifier / punct / whitespace
ident: /^[A-Za-z_$][A-Za-z0-9_$]*/,
punct: /^[()[\]{}.,;:?~!%^&*+\-=/|<>]+/,
ws: /^\s+/,
}
const types = ["string", "number", "boolean", "any", "void"]
export function highlight(code: string): string {
const tokens = tokenize(code).tokens
return `` + tokens.map(t => tokenToHTML(t)).join("")
}
export function tokenize(src: string): Program {
const tokens: Token[] = []
let i = 0
const eat = (re: RegExp): string | null => {
const m = re.exec(src.slice(i))
return m ? m[0] : null
}
while (i < src.length) {
let v: string | null
// If current char is '/', disambiguate regex/comment upfront
if (src[i] === "/") {
if (src[i + 1] === "/") {
v = eat(RE.lineComment)
if (v) { tokens.push({ type: "comment", value: v, start: i, end: i + v.length }); i += v.length; continue }
} else if (src[i + 1] === "*") {
v = eat(RE.blockComment)
if (v) { tokens.push({ type: "comment", value: v, start: i, end: i + v.length }); i += v.length; continue }
} else if ((v = eat(RE.regex))) {
// Treat regex literal as a "string" for your minimal category set
tokens.push({ type: "string", value: v, start: i, end: i + v.length }); i += v.length; continue
}
}
// Strings
if ((v = eat(RE.sng) || eat(RE.dbl) || eat(RE.bkt))) {
tokens.push({ type: "string", value: v, start: i, end: i + v.length }); i += v.length; continue
}
// Numbers
if ((v = eat(RE.number))) {
tokens.push({ type: "number", value: v, start: i, end: i + v.length }); i += v.length; continue
}
// true/false/null/undefined
if ((v = eat(RE.boolNullUndef))) {
const t: TokenType = v === "true" || v === "false" ? "boolean" : (v === "null" ? "null" : "undefined")
tokens.push({ type: t, value: v, start: i, end: i + v.length }); i += v.length; continue
}
// Keywords
if ((v = eat(RE.keywords))) {
tokens.push({ type: "keyword", value: v, start: i, end: i + v.length }); i += v.length; continue
}
// Identifiers
if ((v = eat(RE.ident))) {
tokens.push({ type: "identifier", value: v, start: i, end: i + v.length }); i += v.length; continue
}
// Punctuation / operators
if ((v = eat(RE.punct))) {
tokens.push({ type: "punctuation", value: v, start: i, end: i + v.length }); i += v.length; continue
}
// Whitespace
if ((v = eat(RE.ws))) {
tokens.push({ type: "whitespace", value: v, start: i, end: i + v.length }); i += v.length; continue
}
// Fallback
if (src[i]) {
tokens.push({ type: "unknown", value: src[i]!, start: i, end: i + 1 })
}
i += 1
}
return { type: "Program", tokens }
}
function tokenToHTML(token: Token): string {
switch (token.type) {
case "string": return `${escapeHtml(token.value)}`
case "number": return `${token.value}`
case "keyword": return `${token.value}`
case "comment": return `${escapeHtml(token.value)}`
case "null": case "undefined": case "boolean":
return `${token.value}`
case "punctuation": {
// if (token.value === "(" || token.value === ")" || token.value === "{" || token.value === "}" || token.value === "[" || token.value === "]")
// return `${token.value}`
// else
return escapeHtml(token.value)
}
case "identifier": {
if (token.value[0]?.match(/[A-Z]/) || types.includes(token.value))
return `${token.value}`
else
return `${token.value}`
}
case "whitespace":
case "unknown":
return `${escapeHtml(token.value)}`
}
}
export function escapeHtml(str: string): string {
return str
.replace(/&/g, "&")
.replace(//g, ">")
.replace(/"/g, """)
.replace(/'/g, "'")
}