health checks

This commit is contained in:
Chris Wanstrath 2026-02-04 13:35:38 -08:00
parent 02fca1313c
commit a396f740a5

View File

@ -33,6 +33,7 @@ let _shuttingDown = false
export type App = SharedApp & {
consecutiveHealthFailures?: number
healthCheckTimer?: Timer
isHttpApp?: boolean
lastRestartTime?: number
manuallyStopped?: boolean
proc?: Subprocess
@ -184,11 +185,12 @@ export function renameApp(oldName: string, newName: string): { ok: boolean, erro
export function startApp(dir: string) {
const app = _apps.get(dir)
if (!app || app.state !== 'stopped') return
if (!app || (app.state !== 'stopped' && app.state !== 'invalid')) return
if (!isApp(dir)) return
// Clear manually stopped flag when explicitly starting
// Clear flags when explicitly starting
app.manuallyStopped = false
app.error = undefined
runApp(dir, getPort(dir))
}
@ -448,6 +450,23 @@ function initPortPool() {
}
}
function markAsRunning(app: App, port: number, isHttpApp: boolean) {
if (app.startupTimer) {
clearTimeout(app.startupTimer)
app.startupTimer = undefined
}
app.state = 'running'
app.started = Date.now()
app.isHttpApp = isHttpApp
update()
if (isHttpApp) {
startHealthChecks(app, port)
} else {
startProcessHealthChecks(app)
}
}
function loadApp(dir: string): LoadResult {
try {
const pkgPath = join(APPS_DIR, dir, 'current', 'package.json')
@ -582,19 +601,71 @@ async function runApp(dir: string, port: number) {
stderr: 'pipe',
})
// Clear startup timer and set state to running
if (app.startupTimer) {
clearTimeout(app.startupTimer)
app.startupTimer = undefined
app.proc = proc
// Check if process is alive using ps(1) - more reliable than Bun's API
const isProcessAlive = async (pid: number): Promise<boolean> => {
try {
const ps = Bun.spawn(['ps', '-p', String(pid)], { stdout: 'pipe', stderr: 'pipe' })
const code = await ps.exited
return code === 0
} catch {
return false
}
}
app.state = 'running'
app.proc = proc
app.started = Date.now()
update()
// Poll to verify app started - tries /ok for HTTP apps, falls back to survival check
const pollStartup = async () => {
const pollInterval = 500
const survivalThreshold = 5000 // Consider non-HTTP apps running after 5s
const startTime = Date.now()
const pid = proc.pid
// Start health checks
startHealthChecks(app, port)
while (app.state === 'starting' && app.proc === proc) {
// First check if process is still alive
const alive = await isProcessAlive(pid)
if (!alive) {
info(app, 'Process died during startup')
// proc.exited handler will clean up
return
}
// Try /ok endpoint for HTTP apps
try {
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), 2000)
const response = await fetch(`http://localhost:${port}/ok`, {
signal: controller.signal,
})
clearTimeout(timeout)
if (response.ok) {
// HTTP app is running and healthy
markAsRunning(app, port, true)
return
}
// App responded but /ok returned error - mark as error and kill
info(app, `/ok returned ${response.status}`)
app.error = `Health check failed: /ok returned ${response.status}`
app.proc?.kill()
return
} catch {
// Connection failed - app not ready yet or not an HTTP app
}
// If process survived long enough, consider it running (non-HTTP app)
if (Date.now() - startTime >= survivalThreshold) {
info(app, 'No /ok endpoint, marking as running (process survived 5s)')
markAsRunning(app, port, false)
return
}
await new Promise(resolve => setTimeout(resolve, pollInterval))
}
}
pollStartup()
const streamOutput = async (stream: ReadableStream<Uint8Array> | null, streamType: 'stdout' | 'stderr') => {
if (!stream) return
@ -639,8 +710,8 @@ async function runApp(dir: string, port: number) {
releasePort(app.port)
}
// Reset to stopped state (or invalid if no longer valid)
app.state = isApp(dir) ? 'stopped' : 'invalid'
// Reset to stopped state (or invalid if error or no longer valid)
app.state = (isApp(dir) && !app.error) ? 'stopped' : 'invalid'
app.proc = undefined
app.port = undefined
app.started = undefined
@ -733,6 +804,38 @@ function startHealthChecks(app: App, port: number) {
}, HEALTH_CHECK_INTERVAL)
}
function startProcessHealthChecks(app: App) {
// For non-HTTP apps, just verify process is still alive using ps(1)
app.healthCheckTimer = setInterval(async () => {
if (app.state !== 'running') {
if (app.healthCheckTimer) {
clearInterval(app.healthCheckTimer)
app.healthCheckTimer = undefined
}
return
}
const pid = app.proc?.pid
if (!pid) {
handleHealthCheckFailure(app)
return
}
try {
const ps = Bun.spawn(['ps', '-p', String(pid)], { stdout: 'pipe', stderr: 'pipe' })
const code = await ps.exited
if (code === 0) {
// Process is alive
app.consecutiveHealthFailures = 0
} else {
handleHealthCheckFailure(app)
}
} catch {
handleHealthCheckFailure(app)
}
}, HEALTH_CHECK_INTERVAL)
}
function startShutdownTimeout(app: App) {
app.shutdownTimer = setTimeout(() => {
if (app.proc && (app.state === 'stopping' || app.state === 'running')) {