health checks
This commit is contained in:
parent
02fca1313c
commit
a396f740a5
|
|
@ -33,6 +33,7 @@ let _shuttingDown = false
|
|||
export type App = SharedApp & {
|
||||
consecutiveHealthFailures?: number
|
||||
healthCheckTimer?: Timer
|
||||
isHttpApp?: boolean
|
||||
lastRestartTime?: number
|
||||
manuallyStopped?: boolean
|
||||
proc?: Subprocess
|
||||
|
|
@ -184,11 +185,12 @@ export function renameApp(oldName: string, newName: string): { ok: boolean, erro
|
|||
|
||||
export function startApp(dir: string) {
|
||||
const app = _apps.get(dir)
|
||||
if (!app || app.state !== 'stopped') return
|
||||
if (!app || (app.state !== 'stopped' && app.state !== 'invalid')) return
|
||||
if (!isApp(dir)) return
|
||||
|
||||
// Clear manually stopped flag when explicitly starting
|
||||
// Clear flags when explicitly starting
|
||||
app.manuallyStopped = false
|
||||
app.error = undefined
|
||||
runApp(dir, getPort(dir))
|
||||
}
|
||||
|
||||
|
|
@ -448,6 +450,23 @@ function initPortPool() {
|
|||
}
|
||||
}
|
||||
|
||||
function markAsRunning(app: App, port: number, isHttpApp: boolean) {
|
||||
if (app.startupTimer) {
|
||||
clearTimeout(app.startupTimer)
|
||||
app.startupTimer = undefined
|
||||
}
|
||||
app.state = 'running'
|
||||
app.started = Date.now()
|
||||
app.isHttpApp = isHttpApp
|
||||
update()
|
||||
|
||||
if (isHttpApp) {
|
||||
startHealthChecks(app, port)
|
||||
} else {
|
||||
startProcessHealthChecks(app)
|
||||
}
|
||||
}
|
||||
|
||||
function loadApp(dir: string): LoadResult {
|
||||
try {
|
||||
const pkgPath = join(APPS_DIR, dir, 'current', 'package.json')
|
||||
|
|
@ -582,19 +601,71 @@ async function runApp(dir: string, port: number) {
|
|||
stderr: 'pipe',
|
||||
})
|
||||
|
||||
// Clear startup timer and set state to running
|
||||
if (app.startupTimer) {
|
||||
clearTimeout(app.startupTimer)
|
||||
app.startupTimer = undefined
|
||||
app.proc = proc
|
||||
|
||||
// Check if process is alive using ps(1) - more reliable than Bun's API
|
||||
const isProcessAlive = async (pid: number): Promise<boolean> => {
|
||||
try {
|
||||
const ps = Bun.spawn(['ps', '-p', String(pid)], { stdout: 'pipe', stderr: 'pipe' })
|
||||
const code = await ps.exited
|
||||
return code === 0
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
app.state = 'running'
|
||||
app.proc = proc
|
||||
app.started = Date.now()
|
||||
update()
|
||||
// Poll to verify app started - tries /ok for HTTP apps, falls back to survival check
|
||||
const pollStartup = async () => {
|
||||
const pollInterval = 500
|
||||
const survivalThreshold = 5000 // Consider non-HTTP apps running after 5s
|
||||
const startTime = Date.now()
|
||||
const pid = proc.pid
|
||||
|
||||
// Start health checks
|
||||
startHealthChecks(app, port)
|
||||
while (app.state === 'starting' && app.proc === proc) {
|
||||
// First check if process is still alive
|
||||
const alive = await isProcessAlive(pid)
|
||||
if (!alive) {
|
||||
info(app, 'Process died during startup')
|
||||
// proc.exited handler will clean up
|
||||
return
|
||||
}
|
||||
|
||||
// Try /ok endpoint for HTTP apps
|
||||
try {
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort(), 2000)
|
||||
const response = await fetch(`http://localhost:${port}/ok`, {
|
||||
signal: controller.signal,
|
||||
})
|
||||
clearTimeout(timeout)
|
||||
|
||||
if (response.ok) {
|
||||
// HTTP app is running and healthy
|
||||
markAsRunning(app, port, true)
|
||||
return
|
||||
}
|
||||
|
||||
// App responded but /ok returned error - mark as error and kill
|
||||
info(app, `/ok returned ${response.status}`)
|
||||
app.error = `Health check failed: /ok returned ${response.status}`
|
||||
app.proc?.kill()
|
||||
return
|
||||
} catch {
|
||||
// Connection failed - app not ready yet or not an HTTP app
|
||||
}
|
||||
|
||||
// If process survived long enough, consider it running (non-HTTP app)
|
||||
if (Date.now() - startTime >= survivalThreshold) {
|
||||
info(app, 'No /ok endpoint, marking as running (process survived 5s)')
|
||||
markAsRunning(app, port, false)
|
||||
return
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, pollInterval))
|
||||
}
|
||||
}
|
||||
|
||||
pollStartup()
|
||||
|
||||
const streamOutput = async (stream: ReadableStream<Uint8Array> | null, streamType: 'stdout' | 'stderr') => {
|
||||
if (!stream) return
|
||||
|
|
@ -639,8 +710,8 @@ async function runApp(dir: string, port: number) {
|
|||
releasePort(app.port)
|
||||
}
|
||||
|
||||
// Reset to stopped state (or invalid if no longer valid)
|
||||
app.state = isApp(dir) ? 'stopped' : 'invalid'
|
||||
// Reset to stopped state (or invalid if error or no longer valid)
|
||||
app.state = (isApp(dir) && !app.error) ? 'stopped' : 'invalid'
|
||||
app.proc = undefined
|
||||
app.port = undefined
|
||||
app.started = undefined
|
||||
|
|
@ -733,6 +804,38 @@ function startHealthChecks(app: App, port: number) {
|
|||
}, HEALTH_CHECK_INTERVAL)
|
||||
}
|
||||
|
||||
function startProcessHealthChecks(app: App) {
|
||||
// For non-HTTP apps, just verify process is still alive using ps(1)
|
||||
app.healthCheckTimer = setInterval(async () => {
|
||||
if (app.state !== 'running') {
|
||||
if (app.healthCheckTimer) {
|
||||
clearInterval(app.healthCheckTimer)
|
||||
app.healthCheckTimer = undefined
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const pid = app.proc?.pid
|
||||
if (!pid) {
|
||||
handleHealthCheckFailure(app)
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
const ps = Bun.spawn(['ps', '-p', String(pid)], { stdout: 'pipe', stderr: 'pipe' })
|
||||
const code = await ps.exited
|
||||
if (code === 0) {
|
||||
// Process is alive
|
||||
app.consecutiveHealthFailures = 0
|
||||
} else {
|
||||
handleHealthCheckFailure(app)
|
||||
}
|
||||
} catch {
|
||||
handleHealthCheckFailure(app)
|
||||
}
|
||||
}, HEALTH_CHECK_INTERVAL)
|
||||
}
|
||||
|
||||
function startShutdownTimeout(app: App) {
|
||||
app.shutdownTimer = setTimeout(() => {
|
||||
if (app.proc && (app.state === 'stopping' || app.state === 'running')) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user