/** * Drift SDK Health Monitor * * Monitors for accountUnsubscribe errors that indicate WebSocket connection issues. * When detected, triggers container restart via flag file for watch-restart.sh */ import fs from 'fs' import path from 'path' class DriftHealthMonitor { private errorCounts: Map = new Map() private errorWindow: number = 30000 // 30 second window private errorThreshold: number = 50 // 50 errors in 30 seconds = problem private checkInterval: NodeJS.Timeout | null = null private isMonitoring: boolean = false /** * Start monitoring for Drift SDK errors */ start(): void { if (this.isMonitoring) { console.log('⚠️ Drift health monitor already running') return } this.isMonitoring = true console.log('🏥 Drift health monitor started') console.log(` Threshold: ${this.errorThreshold} accountUnsubscribe errors in ${this.errorWindow/1000}s`) // Check error counts every 10 seconds this.checkInterval = setInterval(() => { this.checkErrorThreshold() }, 10000) } /** * Stop monitoring */ stop(): void { if (this.checkInterval) { clearInterval(this.checkInterval) this.checkInterval = null } this.isMonitoring = false console.log('🏥 Drift health monitor stopped') } /** * Record an accountUnsubscribe error */ recordError(errorType: string = 'accountUnsubscribe'): void { const now = Date.now() const key = `${errorType}-${now}` this.errorCounts.set(key, now) // Clean up old errors outside the window this.cleanupOldErrors() } /** * Remove errors older than the error window */ private cleanupOldErrors(): void { const now = Date.now() const cutoff = now - this.errorWindow for (const [key, timestamp] of this.errorCounts.entries()) { if (timestamp < cutoff) { this.errorCounts.delete(key) } } } /** * Check if error threshold exceeded */ private checkErrorThreshold(): void { this.cleanupOldErrors() const errorCount = this.errorCounts.size if (errorCount >= this.errorThreshold) { console.error(`🚨 CRITICAL: ${errorCount} Drift SDK errors in ${this.errorWindow/1000}s (threshold: ${this.errorThreshold})`) console.error('🔄 Triggering container restart to clear WebSocket connection leak...') this.triggerRestart() // Stop monitoring to prevent multiple restart triggers this.stop() } } /** * Trigger container restart via flag file */ private triggerRestart(): void { const restartFlagPath = '/tmp/trading-bot-restart.flag' try { fs.writeFileSync( restartFlagPath, `Drift SDK health check failed: ${this.errorCounts.size} accountUnsubscribe errors\nTimestamp: ${new Date().toISOString()}\n`, 'utf-8' ) console.log(`✅ Restart flag created at ${restartFlagPath}`) console.log(' watch-restart.sh will restart container within 10 seconds') } catch (error) { console.error('❌ Failed to create restart flag:', error) } } /** * Get current error count */ getErrorCount(): number { this.cleanupOldErrors() return this.errorCounts.size } /** * Get health status */ getHealthStatus(): { healthy: boolean; errorCount: number; threshold: number } { const errorCount = this.getErrorCount() return { healthy: errorCount < this.errorThreshold, errorCount, threshold: this.errorThreshold } } } // Singleton instance let monitorInstance: DriftHealthMonitor | null = null /** * Get the Drift health monitor singleton */ export function getDriftHealthMonitor(): DriftHealthMonitor { if (!monitorInstance) { monitorInstance = new DriftHealthMonitor() } return monitorInstance } /** * Start Drift health monitoring */ export function startDriftHealthMonitoring(): void { const monitor = getDriftHealthMonitor() monitor.start() } /** * Stop Drift health monitoring */ export function stopDriftHealthMonitoring(): void { if (monitorInstance) { monitorInstance.stop() } }