toes/src/server/apps.ts

865 lines
24 KiB
TypeScript

import type { App as SharedApp, AppState } from '@types'
import type { ToesEvent, ToesEventInput, ToesEventType } from '../shared/events'
import type { Subprocess } from 'bun'
import { DEFAULT_EMOJI } from '@types'
import { buildAppUrl, toSubdomain } from '@urls'
import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync, realpathSync, renameSync, symlinkSync, unlinkSync, writeFileSync } from 'fs'
import { hostname } from 'os'
import { join, resolve } from 'path'
import { loadAppEnv } from '../tools/env'
import { publishApp, unpublishAll, unpublishApp } from './mdns'
import { closeAllTunnels, closeTunnel, openTunnelIfEnabled, renameTunnelConfig, unshareApp } from './tunnels'
import { appLog, hostLog, setApps } from './tui'
export type { AppState } from '@types'
export const APPS_DIR = process.env.APPS_DIR ?? resolve(join(process.env.DATA_DIR ?? '.', 'apps'))
export const TOES_DIR = process.env.TOES_DIR ?? join(process.env.DATA_DIR ?? '.', 'toes')
const defaultHost = process.env.NODE_ENV === 'production' ? `${hostname()}.local` : 'localhost'
export const TOES_URL = process.env.TOES_URL ?? `http://${defaultHost}:${process.env.PORT || 3000}`
const HEALTH_CHECK_FAILURES_BEFORE_RESTART = 3
const HEALTH_CHECK_INTERVAL = 30000
const HEALTH_CHECK_TIMEOUT = 5000
const LOG_RETENTION_DAYS = 7
const MAX_LOGS = 100
const MAX_PORT = 3100
const MAX_RESTART_ATTEMPTS = 5
const MIN_PORT = 3001
const RESTART_DELAYS = [1000, 2000, 4000, 8000, 16000, 32000]
const SHUTDOWN_TIMEOUT = 10000
const STABLE_RUN_TIME = 60000
const STARTUP_TIMEOUT = 30000
const _appPorts = new Map<string, number>()
const _apps = new Map<string, App>()
const _availablePorts: number[] = []
const _eventListeners = new Set<(event: ToesEvent) => void>()
const _listeners = new Set<() => void>()
let _shuttingDown = false
export type App = SharedApp & {
consecutiveHealthFailures?: number
healthCheckTimer?: Timer
lastRestartTime?: number
manuallyStopped?: boolean
proc?: Subprocess
restartAttempts?: number
shutdownTimer?: Timer
startupTimer?: Timer
}
type LoadResult = { pkg: any, error?: string }
export const allApps = (): App[] =>
Array.from(_apps.values())
.sort((a, b) => a.name.localeCompare(b.name))
export const getApp = (dir: string): App | undefined =>
_apps.get(dir)
export const getAppBySubdomain = (subdomain: string): App | undefined =>
_apps.get(subdomain) ?? allApps().find(a => toSubdomain(a.name) === subdomain)
export const runApps = () =>
allAppDirs().filter(isApp).forEach(startApp)
export const runningApps = (): App[] =>
allApps().filter(a => a.state === 'running')
export function appendLog(appName: string, text: string, streamType: 'stdout' | 'stderr' = 'stdout') {
const app = _apps.get(appName)
if (!app) return
info(app, text)
writeLogLine(appName, streamType, text)
app.logs = (app.logs ?? []).slice(-MAX_LOGS)
update()
}
export function getLogDates(appName: string): string[] {
const dir = logDir(appName)
if (!existsSync(dir)) return []
return readdirSync(dir)
.filter(f => f.endsWith('.log'))
.map(f => f.replace('.log', ''))
.sort()
.reverse()
}
export function readLogs(appName: string, date?: string, tail?: number): string[] {
const file = logFile(appName, date ?? formatLogDate())
if (!existsSync(file)) return []
const content = readFileSync(file, 'utf-8')
const lines = content.split('\n').filter(Boolean)
if (tail && tail > 0) {
return lines.slice(-tail)
}
return lines
}
export async function initApps() {
await killStaleProcesses()
initPortPool()
setupShutdownHandlers()
rotateLogs()
createAppSymlinks()
discoverApps()
runApps()
}
export function emit(event: ToesEventInput) {
// Cast: ToesEventInput is DistributiveOmit<ToesEvent, 'time'>, so adding time
// back produces ToesEvent. TS can't prove this because spreads don't distribute.
_eventListeners.forEach(cb => cb({ ...event, time: Date.now() } as ToesEvent))
}
export function onChange(cb: () => void) {
_listeners.add(cb)
return () => _listeners.delete(cb)
}
export function onEvent(cb: (event: ToesEvent) => void) {
_eventListeners.add(cb)
return () => _eventListeners.delete(cb)
}
export function removeApp(dir: string) {
const app = _apps.get(dir)
if (!app) return
unpublishApp(dir)
unshareApp(dir)
// Clear all timers
clearTimers(app)
if (app.state === 'running')
app.proc?.kill()
// Release port if assigned
if (app.port) {
releasePort(app.port)
}
_apps.delete(dir)
update()
emit({ type: 'app:delete', app: dir })
}
export function registerApp(dir: string) {
if (_apps.has(dir)) return // Already registered
const { pkg, error } = loadApp(dir)
const state: AppState = error ? 'invalid' : 'stopped'
const icon = pkg.toes?.icon ?? DEFAULT_EMOJI
const tool = pkg.toes?.tool
_apps.set(dir, { name: dir, state, icon, error, tool })
update()
emit({ type: 'app:create', app: dir })
if (!error) {
runApp(dir, getPort(dir))
}
}
export async function renameApp(oldName: string, newName: string): Promise<{ ok: boolean, error?: string }> {
const app = _apps.get(oldName)
if (!app) return { ok: false, error: 'App not found' }
if (_apps.has(newName)) return { ok: false, error: 'An app with that name already exists' }
if (!/^[a-z][a-z0-9-]*$/.test(newName)) {
return { ok: false, error: 'Name must start with a letter and contain only lowercase letters, numbers, and hyphens' }
}
const oldPath = join(APPS_DIR, oldName)
const newPath = join(APPS_DIR, newName)
// Stop the app and wait for process to fully exit so the port is freed
const wasRunning = app.state === 'running'
if (wasRunning) {
const proc = app.proc
clearTimers(app)
app.proc?.kill()
if (proc) await proc.exited
}
try {
renameSync(oldPath, newPath)
} catch (e) {
return { ok: false, error: `Failed to rename directory: ${e instanceof Error ? e.message : String(e)}` }
}
// Transfer port mapping to new name
const oldPort = _appPorts.get(oldName)
if (oldPort !== undefined) {
_appPorts.delete(oldName)
_appPorts.set(newName, oldPort)
}
// Update the internal registry
_apps.delete(oldName)
app.name = newName
app.state = 'stopped'
app.manuallyStopped = false
app.restartAttempts = 0
_apps.set(newName, app)
renameTunnelConfig(oldName, newName)
update()
emit({ type: 'app:delete', app: oldName })
emit({ type: 'app:create', app: newName })
// Restart if it was running
if (wasRunning) {
startApp(newName)
}
return { ok: true }
}
export function startApp(dir: string) {
const app = _apps.get(dir)
if (!app || (app.state !== 'stopped' && app.state !== 'invalid' && app.state !== 'error')) return
if (!isApp(dir)) return
// Clear flags when explicitly starting
app.manuallyStopped = false
app.restartAttempts = 0
app.error = undefined
runApp(dir, getPort(dir))
}
export async function restartApp(dir: string): Promise<void> {
const app = _apps.get(dir)
if (!app) return
// Stop if running
if (app.state === 'running' || app.state === 'starting') {
stopApp(dir)
// Poll until stopped (with timeout)
const maxWait = 10000 // 10 seconds
const pollInterval = 100
let waited = 0
while (_apps.get(dir)?.state !== 'stopped' && waited < maxWait) {
await new Promise(resolve => setTimeout(resolve, pollInterval))
waited += pollInterval
}
if (_apps.get(dir)?.state !== 'stopped') {
throw new Error(`App ${dir} failed to stop after ${maxWait}ms`)
}
}
// Start the app
startApp(dir)
}
export function stopApp(dir: string) {
const app = _apps.get(dir)
if (!app || app.state !== 'running') return
info(app, 'Stopping...')
app.state = 'stopping'
app.manuallyStopped = true
update()
// Clear health check timer
if (app.healthCheckTimer) {
clearInterval(app.healthCheckTimer)
app.healthCheckTimer = undefined
}
// Start shutdown timeout - escalate to SIGKILL if needed
startShutdownTimeout(app)
app.proc?.kill()
}
export function updateAppIcon(dir: string, icon: string) {
const { pkg, error } = loadApp(dir)
if (error) throw new Error(error)
pkg.toes ??= {}
pkg.toes.icon = icon
saveApp(dir, pkg)
const app = _apps.get(dir)
if (app) {
app.icon = icon
update()
}
}
const clearTimers = (app: App) => {
if (app.startupTimer) {
clearTimeout(app.startupTimer)
app.startupTimer = undefined
}
if (app.shutdownTimer) {
clearTimeout(app.shutdownTimer)
app.shutdownTimer = undefined
}
if (app.healthCheckTimer) {
clearInterval(app.healthCheckTimer)
app.healthCheckTimer = undefined
}
}
const formatLogDate = (date: Date = new Date()) =>
date.toISOString().slice(0, 10)
const info = (app: App, ...msg: string[]) => {
appLog(app, ...msg)
app.logs?.push({ time: Date.now(), text: msg.join(' ') })
}
const logDir = (appName: string) =>
join(APPS_DIR, appName, 'logs')
const logFile = (appName: string, date: string = formatLogDate()) =>
join(logDir(appName), `${date}.log`)
const isApp = (dir: string): boolean =>
!loadApp(dir).error
export const update = () => {
setApps(allApps())
_listeners.forEach(cb => cb())
}
function allAppDirs() {
return readdirSync(APPS_DIR, { withFileTypes: true })
.filter(e => e.isDirectory() && existsSync(join(APPS_DIR, e.name, 'current')))
.map(e => e.name)
.sort()
}
function createAppSymlinks() {
for (const app of readdirSync(APPS_DIR, { withFileTypes: true })) {
if (!app.isDirectory()) continue
const appDir = join(APPS_DIR, app.name)
const currentPath = join(appDir, 'current')
if (existsSync(currentPath)) continue
// Find valid version directories
const versions = readdirSync(appDir, { withFileTypes: true })
.filter(e => {
if (!e.isDirectory()) return false
const pkgPath = join(appDir, e.name, 'package.json')
if (!existsSync(pkgPath)) return false
try {
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'))
return !!pkg.scripts?.toes
} catch {
return false
}
})
.map(e => e.name)
.sort()
.reverse()
const latest = versions[0]
if (latest) {
symlinkSync(latest, currentPath)
}
}
}
function discoverApps() {
for (const dir of allAppDirs()) {
const { pkg, error } = loadApp(dir)
const state: AppState = error ? 'invalid' : 'stopped'
const icon = pkg.toes?.icon ?? DEFAULT_EMOJI
const tool = pkg.toes?.tool
_apps.set(dir, { name: dir, state, icon, error, tool })
}
update()
}
function ensureLogDir(appName: string): string {
const dir = logDir(appName)
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true })
}
return dir
}
function getPort(appName?: string): number {
// Try to return the same port this app used before
if (appName) {
const previousPort = _appPorts.get(appName)
if (previousPort !== undefined) {
// Check if it's still in the available pool
const idx = _availablePorts.indexOf(previousPort)
if (idx !== -1) {
_availablePorts.splice(idx, 1)
return previousPort
}
// Port is in use by another app, fall through to get new one
}
}
// Get next available port
const port = _availablePorts.shift()
if (port === undefined) {
// Pool exhausted - this shouldn't happen with 100 ports
throw new Error('No available ports')
}
// Remember this port for the app
if (appName) {
_appPorts.set(appName, port)
}
return port
}
async function gracefulShutdown(signal: string) {
if (_shuttingDown) return
_shuttingDown = true
unpublishAll()
closeAllTunnels()
hostLog(`Received ${signal}, shutting down gracefully...`)
const running = runningApps()
if (running.length === 0) {
hostLog('No apps running, exiting.')
process.exit(0)
}
hostLog(`Stopping ${running.length} app(s)...`)
// Stop all running apps
for (const app of running) {
app.manuallyStopped = true
if (app.healthCheckTimer) {
clearInterval(app.healthCheckTimer)
app.healthCheckTimer = undefined
}
app.proc?.kill()
}
// Wait for all apps to exit with timeout
const shutdownStart = Date.now()
const checkInterval = setInterval(() => {
const stillRunning = runningApps()
if (stillRunning.length === 0) {
clearInterval(checkInterval)
hostLog('All apps stopped, exiting.')
process.exit(0)
}
// Check for timeout
if (Date.now() - shutdownStart > SHUTDOWN_TIMEOUT) {
clearInterval(checkInterval)
hostLog(`Shutdown timeout, forcing ${stillRunning.length} app(s) to stop...`)
for (const app of stillRunning) {
if (app.proc) {
app.proc.kill(9) // SIGKILL
}
}
// Give a moment for SIGKILL to take effect
setTimeout(() => {
hostLog('Forced shutdown complete, exiting.')
process.exit(1)
}, 500)
}
}, 100)
}
function handleHealthCheckFailure(app: App) {
app.consecutiveHealthFailures = (app.consecutiveHealthFailures ?? 0) + 1
info(app, `Health check failed (${app.consecutiveHealthFailures}/${HEALTH_CHECK_FAILURES_BEFORE_RESTART})`)
if (app.consecutiveHealthFailures >= HEALTH_CHECK_FAILURES_BEFORE_RESTART) {
info(app, 'Too many health check failures, restarting...')
// Clear health check timer before killing
if (app.healthCheckTimer) {
clearInterval(app.healthCheckTimer)
app.healthCheckTimer = undefined
}
// Don't set manuallyStopped - we want auto-restart to kick in
app.proc?.kill()
}
}
async function killStaleProcesses() {
const pids = new Set<number>()
// Find processes listening on our port range
const lsof = Bun.spawnSync(['lsof', '-ti', `:${MIN_PORT - 1}-${MAX_PORT}`])
const lsofOutput = lsof.stdout.toString().trim()
if (lsofOutput) {
for (const pid of lsofOutput.split('\n').map(Number)) {
if (pid && pid !== process.pid) pids.add(pid)
}
}
// Find orphaned "bun run toes" child app processes
const pgrep = Bun.spawnSync(['pgrep', '-f', 'bun run toes'])
const pgrepOutput = pgrep.stdout.toString().trim()
if (pgrepOutput) {
for (const pid of pgrepOutput.split('\n').map(Number)) {
if (pid && pid !== process.pid) pids.add(pid)
}
}
if (pids.size === 0) return
hostLog(`Found ${pids.size} stale process(es)`)
for (const pid of pids) {
try {
process.kill(pid, 'SIGKILL')
hostLog(`Killed stale process ${pid}`)
} catch {
// Process already gone
}
}
}
function initPortPool() {
_availablePorts.length = 0
for (let port = MIN_PORT; port <= MAX_PORT; port++) {
_availablePorts.push(port)
}
}
function markAsRunning(app: App, port: number) {
if (app.startupTimer) {
clearTimeout(app.startupTimer)
app.startupTimer = undefined
}
app.state = 'running'
app.started = Date.now()
update()
emit({ type: 'app:start', app: app.name })
publishApp(app.name)
openTunnelIfEnabled(app.name, port)
startHealthChecks(app, port)
}
function loadApp(dir: string): LoadResult {
try {
const pkgPath = join(APPS_DIR, dir, 'current', 'package.json')
const file = readFileSync(pkgPath, 'utf-8')
try {
const json = JSON.parse(file)
if (json.scripts?.toes) {
return { pkg: json }
} else {
return { pkg: json, error: 'Missing scripts.toes in package.json' }
}
} catch (e) {
const error = `Invalid JSON in package.json: ${e instanceof Error ? e.message : String(e)}`
return { pkg: {}, error }
}
} catch (e) {
return { pkg: {}, error: 'Missing package.json' }
}
}
function maybeResetBackoff(app: App) {
if (app.started && Date.now() - app.started >= STABLE_RUN_TIME) {
app.restartAttempts = 0
}
}
function releasePort(port: number) {
// Return port to pool if not already there
if (!_availablePorts.includes(port)) {
_availablePorts.push(port)
// Keep sorted for predictable allocation
_availablePorts.sort((a, b) => a - b)
}
}
function rotateLogs() {
const cutoff = Date.now() - LOG_RETENTION_DAYS * 24 * 60 * 60 * 1000
for (const appName of allAppDirs()) {
const dir = logDir(appName)
if (!existsSync(dir)) continue
for (const file of readdirSync(dir)) {
if (!file.endsWith('.log')) continue
const dateStr = file.replace('.log', '')
const fileDate = new Date(dateStr).getTime()
if (fileDate < cutoff) {
unlinkSync(join(dir, file))
hostLog(`Rotated old log: ${appName}/logs/${file}`)
}
}
}
}
function writeLogLine(appName: string, streamType: 'stdout' | 'stderr' | 'system', text: string) {
ensureLogDir(appName)
const timestamp = new Date().toISOString()
const line = `[${timestamp}] [${streamType}] ${text}\n`
appendFileSync(logFile(appName), line)
}
async function runApp(dir: string, port: number) {
const { error } = loadApp(dir)
if (error) return
const app = _apps.get(dir)
if (!app) return
// Set state to starting
app.state = 'starting'
app.port = port
app.logs = []
app.consecutiveHealthFailures = 0
update()
// Start startup timeout
app.startupTimer = setTimeout(() => {
if (app.state === 'starting') {
info(app, 'Startup timeout, killing process...')
app.proc?.kill()
}
}, STARTUP_TIMEOUT)
// Resolve symlink to actual timestamp directory
const currentLink = join(APPS_DIR, dir, 'current')
const cwd = realpathSync(currentLink)
const needsInstall = !existsSync(join(cwd, 'node_modules'))
if (needsInstall) info(app, 'Installing dependencies...')
const install = Bun.spawn(['bun', 'install'], { cwd, stdout: 'pipe', stderr: 'pipe' })
await install.exited
info(app, `Starting on port ${port}...`)
// Load env vars from TOES_DIR/env/
const appEnv = loadAppEnv(dir, TOES_DIR)
const dataDir = join(process.env.DATA_DIR ?? '.', 'toes', dir)
mkdirSync(dataDir, { recursive: true })
const proc = Bun.spawn(['bun', 'run', 'toes'], {
cwd,
env: { ...process.env, ...appEnv, PORT: String(port), NO_AUTOPORT: 'true', APP_URL: buildAppUrl(dir, TOES_URL), APPS_DIR, DATA_DIR: dataDir, TOES_DIR, TOES_URL },
stdout: 'pipe',
stderr: 'pipe',
})
app.proc = proc
// Poll to verify app started - waits for /ok to respond 200
const pollStartup = async () => {
const pollInterval = 500
while (app.state === 'starting' && app.proc === proc) {
if (proc.exitCode !== null) {
info(app, 'Process died during startup')
return
}
try {
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), 2000)
const response = await fetch(`http://localhost:${port}/ok`, {
signal: controller.signal,
})
clearTimeout(timeout)
if (response.ok) {
markAsRunning(app, port)
return
}
// App responded but /ok returned error - mark as error and kill
info(app, `/ok returned ${response.status}`)
app.error = `Health check failed: /ok returned ${response.status}`
app.proc?.kill()
return
} catch {
// Connection failed - app not ready yet
}
await new Promise(resolve => setTimeout(resolve, pollInterval))
}
}
pollStartup()
const streamOutput = async (stream: ReadableStream<Uint8Array> | null, streamType: 'stdout' | 'stderr') => {
if (!stream) return
const reader = stream.getReader()
const decoder = new TextDecoder()
while (true) {
const { done, value } = await reader.read()
if (done) break
const chunk = decoder.decode(value)
const lines = chunk.split('\n').map(l => l.trimEnd()).filter(Boolean)
for (const text of lines) {
// Skip health check logs (e.g., "200 GET http://localhost:3001/ok (0ms)")
if (/\bGET\b.*\/ok\b/.test(text)) continue
info(app, text)
writeLogLine(dir, streamType, text)
app.logs = (app.logs ?? []).slice(-MAX_LOGS)
}
if (lines.length) update()
}
}
streamOutput(proc.stdout, 'stdout')
streamOutput(proc.stderr, 'stderr')
// Handle process exit
proc.exited.then(code => {
// If the app has moved on (e.g. renamed and restarted), this is a
// stale exit handler — don't touch current app state or ports
if (app.proc && app.proc !== proc) return
// Clear all timers
clearTimers(app)
// Check if app was stable before crashing (for backoff reset)
maybeResetBackoff(app)
if (code !== 0) {
const msg = `Exited with code ${code}`
app.logs?.push({ time: Date.now(), text: msg })
writeLogLine(dir, 'system', msg)
} else {
app.logs?.push({ time: Date.now(), text: 'Stopped' })
writeLogLine(dir, 'system', 'Stopped')
}
unpublishApp(dir)
closeTunnel(dir)
// Release port back to pool
if (app.port) {
releasePort(app.port)
}
// Reset to stopped state (or invalid if error or no longer valid)
app.state = (isApp(dir) && !app.error) ? 'stopped' : 'invalid'
app.proc = undefined
app.port = undefined
app.started = undefined
update()
if (!_shuttingDown) emit({ type: 'app:stop', app: dir })
// Schedule restart if appropriate
if (shouldAutoRestart(app, code)) {
scheduleRestart(app, dir)
}
})
}
function saveApp(dir: string, pkg: any) {
const path = join(APPS_DIR, dir, 'current', 'package.json')
writeFileSync(path, JSON.stringify(pkg, null, 2) + '\n')
}
function scheduleRestart(app: App, dir: string) {
const attempts = app.restartAttempts ?? 0
if (attempts >= MAX_RESTART_ATTEMPTS) {
info(app, `Too many restart failures (${attempts}/${MAX_RESTART_ATTEMPTS}), giving up.`)
app.state = 'error'
app.error = `Crashed ${attempts} times, restart disabled`
update()
return
}
const delayIndex = Math.min(attempts, RESTART_DELAYS.length - 1)
const delay = RESTART_DELAYS[delayIndex]!
app.restartAttempts = attempts + 1
app.lastRestartTime = Date.now()
info(app, `Scheduling restart in ${delay / 1000}s (attempt ${app.restartAttempts}/${MAX_RESTART_ATTEMPTS})...`)
setTimeout(() => {
// Double-check conditions before restarting
if (_shuttingDown) return
if (app.manuallyStopped) return
if (app.state !== 'stopped') return
if (!isApp(dir)) return
info(app, 'Restarting...')
runApp(dir, getPort(dir))
}, delay)
}
function setupShutdownHandlers() {
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'))
process.on('SIGINT', () => gracefulShutdown('SIGINT'))
}
function shouldAutoRestart(app: App, exitCode: number | null): boolean {
// Don't restart during host shutdown
if (_shuttingDown) return false
// Don't restart if manually stopped
if (app.manuallyStopped) return false
// Don't restart if app became invalid or hit error limit
if (app.state === 'invalid' || app.state === 'error') return false
// Only restart on non-zero exit codes (crashes)
if (exitCode === 0) return false
return true
}
function startHealthChecks(app: App, port: number) {
app.healthCheckTimer = setInterval(async () => {
if (app.state !== 'running') {
if (app.healthCheckTimer) {
clearInterval(app.healthCheckTimer)
app.healthCheckTimer = undefined
}
return
}
try {
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), HEALTH_CHECK_TIMEOUT)
const response = await fetch(`http://localhost:${port}/ok`, {
signal: controller.signal,
})
clearTimeout(timeout)
if (response.ok) {
// Reset consecutive failures on success
app.consecutiveHealthFailures = 0
} else {
handleHealthCheckFailure(app)
}
} catch (e) {
handleHealthCheckFailure(app)
}
}, HEALTH_CHECK_INTERVAL)
}
function startShutdownTimeout(app: App) {
app.shutdownTimer = setTimeout(() => {
if (app.proc && (app.state === 'stopping' || app.state === 'running')) {
info(app, 'Shutdown timeout, sending SIGKILL...')
app.proc.kill(9) // SIGKILL
}
}, SHUTDOWN_TIMEOUT)
}