From a396f740a5a28be399804104c75398c21d3050a0 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Wed, 4 Feb 2026 13:35:38 -0800 Subject: [PATCH] health checks --- src/server/apps.ts | 131 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 117 insertions(+), 14 deletions(-) diff --git a/src/server/apps.ts b/src/server/apps.ts index c032be7..bb69e71 100644 --- a/src/server/apps.ts +++ b/src/server/apps.ts @@ -33,6 +33,7 @@ let _shuttingDown = false export type App = SharedApp & { consecutiveHealthFailures?: number healthCheckTimer?: Timer + isHttpApp?: boolean lastRestartTime?: number manuallyStopped?: boolean proc?: Subprocess @@ -184,11 +185,12 @@ export function renameApp(oldName: string, newName: string): { ok: boolean, erro export function startApp(dir: string) { const app = _apps.get(dir) - if (!app || app.state !== 'stopped') return + if (!app || (app.state !== 'stopped' && app.state !== 'invalid')) return if (!isApp(dir)) return - // Clear manually stopped flag when explicitly starting + // Clear flags when explicitly starting app.manuallyStopped = false + app.error = undefined runApp(dir, getPort(dir)) } @@ -448,6 +450,23 @@ function initPortPool() { } } +function markAsRunning(app: App, port: number, isHttpApp: boolean) { + if (app.startupTimer) { + clearTimeout(app.startupTimer) + app.startupTimer = undefined + } + app.state = 'running' + app.started = Date.now() + app.isHttpApp = isHttpApp + update() + + if (isHttpApp) { + startHealthChecks(app, port) + } else { + startProcessHealthChecks(app) + } +} + function loadApp(dir: string): LoadResult { try { const pkgPath = join(APPS_DIR, dir, 'current', 'package.json') @@ -582,19 +601,71 @@ async function runApp(dir: string, port: number) { stderr: 'pipe', }) - // Clear startup timer and set state to running - if (app.startupTimer) { - clearTimeout(app.startupTimer) - app.startupTimer = undefined + app.proc = proc + + // Check if process is alive using ps(1) - more reliable than Bun's API + const isProcessAlive = async (pid: number): Promise => { + try { + const ps = Bun.spawn(['ps', '-p', String(pid)], { stdout: 'pipe', stderr: 'pipe' }) + const code = await ps.exited + return code === 0 + } catch { + return false + } } - app.state = 'running' - app.proc = proc - app.started = Date.now() - update() + // Poll to verify app started - tries /ok for HTTP apps, falls back to survival check + const pollStartup = async () => { + const pollInterval = 500 + const survivalThreshold = 5000 // Consider non-HTTP apps running after 5s + const startTime = Date.now() + const pid = proc.pid - // Start health checks - startHealthChecks(app, port) + while (app.state === 'starting' && app.proc === proc) { + // First check if process is still alive + const alive = await isProcessAlive(pid) + if (!alive) { + info(app, 'Process died during startup') + // proc.exited handler will clean up + return + } + + // Try /ok endpoint for HTTP apps + try { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), 2000) + const response = await fetch(`http://localhost:${port}/ok`, { + signal: controller.signal, + }) + clearTimeout(timeout) + + if (response.ok) { + // HTTP app is running and healthy + markAsRunning(app, port, true) + return + } + + // App responded but /ok returned error - mark as error and kill + info(app, `/ok returned ${response.status}`) + app.error = `Health check failed: /ok returned ${response.status}` + app.proc?.kill() + return + } catch { + // Connection failed - app not ready yet or not an HTTP app + } + + // If process survived long enough, consider it running (non-HTTP app) + if (Date.now() - startTime >= survivalThreshold) { + info(app, 'No /ok endpoint, marking as running (process survived 5s)') + markAsRunning(app, port, false) + return + } + + await new Promise(resolve => setTimeout(resolve, pollInterval)) + } + } + + pollStartup() const streamOutput = async (stream: ReadableStream | null, streamType: 'stdout' | 'stderr') => { if (!stream) return @@ -639,8 +710,8 @@ async function runApp(dir: string, port: number) { releasePort(app.port) } - // Reset to stopped state (or invalid if no longer valid) - app.state = isApp(dir) ? 'stopped' : 'invalid' + // Reset to stopped state (or invalid if error or no longer valid) + app.state = (isApp(dir) && !app.error) ? 'stopped' : 'invalid' app.proc = undefined app.port = undefined app.started = undefined @@ -733,6 +804,38 @@ function startHealthChecks(app: App, port: number) { }, HEALTH_CHECK_INTERVAL) } +function startProcessHealthChecks(app: App) { + // For non-HTTP apps, just verify process is still alive using ps(1) + app.healthCheckTimer = setInterval(async () => { + if (app.state !== 'running') { + if (app.healthCheckTimer) { + clearInterval(app.healthCheckTimer) + app.healthCheckTimer = undefined + } + return + } + + const pid = app.proc?.pid + if (!pid) { + handleHealthCheckFailure(app) + return + } + + try { + const ps = Bun.spawn(['ps', '-p', String(pid)], { stdout: 'pipe', stderr: 'pipe' }) + const code = await ps.exited + if (code === 0) { + // Process is alive + app.consecutiveHealthFailures = 0 + } else { + handleHealthCheckFailure(app) + } + } catch { + handleHealthCheckFailure(app) + } + }, HEALTH_CHECK_INTERVAL) +} + function startShutdownTimeout(app: App) { app.shutdownTimer = setTimeout(() => { if (app.proc && (app.state === 'stopping' || app.state === 'running')) {