Files
trading_bot_v4/lib/monitoring/drift-health-monitor.ts
mindesbunister 302511293c feat: Add production logging gating (Phase 1, Task 1.1)
- Created logger utility with environment-based gating (lib/utils/logger.ts)
- Replaced 517 console.log statements with logger.log (71% reduction)
- Fixed import paths in 15 files (resolved comment-trapped imports)
- Added DEBUG_LOGS=false to .env
- Achieves 71% immediate log reduction (517/731 statements)
- Expected 90% reduction in production when deployed

Impact: Reduced I/O blocking, lower log volume in production
Risk: LOW (easy rollback, non-invasive)
Phase: Phase 1, Task 1.1 (Quick Wins - Console.log Production Gating)

Files changed:
- NEW: lib/utils/logger.ts (production-safe logging)
- NEW: scripts/replace-console-logs.js (automation tool)
- Modified: 15 lib/*.ts files (console.log → logger.log)
- Modified: .env (DEBUG_LOGS=false)

Next: Task 1.2 (Image Size Optimization)
2025-12-05 00:32:41 +01:00

164 lines
4.2 KiB
TypeScript

/**
* Drift SDK Health Monitor
*
* Monitors for accountUnsubscribe errors that indicate WebSocket connection issues.
* When detected, triggers container restart via flag file for watch-restart.sh
*/
import fs from 'fs'
import { logger } from '../utils/logger'
import path from 'path'
class DriftHealthMonitor {
private errorCounts: Map<string, number> = new Map()
private errorWindow: number = 10000 // 10 second window (was 30s - too slow for rapid memory leak)
private errorThreshold: number = 20 // 20 errors in 10 seconds = problem (was 50 in 30s)
private checkInterval: NodeJS.Timeout | null = null
private isMonitoring: boolean = false
/**
* Start monitoring for Drift SDK errors
*/
start(): void {
if (this.isMonitoring) {
logger.log('⚠️ Drift health monitor already running')
return
}
this.isMonitoring = true
logger.log('🏥 Drift health monitor started')
logger.log(` Threshold: ${this.errorThreshold} accountUnsubscribe errors in ${this.errorWindow/1000}s`)
// Check error counts every 3 seconds (was 10s - faster response to memory leak)
this.checkInterval = setInterval(() => {
this.checkErrorThreshold()
}, 3000)
}
/**
* Stop monitoring
*/
stop(): void {
if (this.checkInterval) {
clearInterval(this.checkInterval)
this.checkInterval = null
}
this.isMonitoring = false
logger.log('🏥 Drift health monitor stopped')
}
/**
* Record an accountUnsubscribe error
*/
recordError(errorType: string = 'accountUnsubscribe'): void {
const now = Date.now()
const key = `${errorType}-${now}`
this.errorCounts.set(key, now)
// Clean up old errors outside the window
this.cleanupOldErrors()
}
/**
* Remove errors older than the error window
*/
private cleanupOldErrors(): void {
const now = Date.now()
const cutoff = now - this.errorWindow
for (const [key, timestamp] of this.errorCounts.entries()) {
if (timestamp < cutoff) {
this.errorCounts.delete(key)
}
}
}
/**
* Check if error threshold exceeded
*/
private checkErrorThreshold(): void {
this.cleanupOldErrors()
const errorCount = this.errorCounts.size
if (errorCount >= this.errorThreshold) {
console.error(`🚨 CRITICAL: ${errorCount} Drift SDK errors in ${this.errorWindow/1000}s (threshold: ${this.errorThreshold})`)
console.error('🔄 Triggering container restart to clear WebSocket connection leak...')
this.triggerRestart()
// Stop monitoring to prevent multiple restart triggers
this.stop()
}
}
/**
* Trigger container restart via flag file
*/
private triggerRestart(): void {
const restartFlagPath = '/tmp/trading-bot-restart.flag'
try {
fs.writeFileSync(
restartFlagPath,
`Drift SDK health check failed: ${this.errorCounts.size} accountUnsubscribe errors\nTimestamp: ${new Date().toISOString()}\n`,
'utf-8'
)
logger.log(`✅ Restart flag created at ${restartFlagPath}`)
logger.log(' watch-restart.sh will restart container within 10 seconds')
} catch (error) {
console.error('❌ Failed to create restart flag:', error)
}
}
/**
* Get current error count
*/
getErrorCount(): number {
this.cleanupOldErrors()
return this.errorCounts.size
}
/**
* Get health status
*/
getHealthStatus(): { healthy: boolean; errorCount: number; threshold: number } {
const errorCount = this.getErrorCount()
return {
healthy: errorCount < this.errorThreshold,
errorCount,
threshold: this.errorThreshold
}
}
}
// Singleton instance
let monitorInstance: DriftHealthMonitor | null = null
/**
* Get the Drift health monitor singleton
*/
export function getDriftHealthMonitor(): DriftHealthMonitor {
if (!monitorInstance) {
monitorInstance = new DriftHealthMonitor()
}
return monitorInstance
}
/**
* Start Drift health monitoring
*/
export function startDriftHealthMonitoring(): void {
const monitor = getDriftHealthMonitor()
monitor.start()
}
/**
* Stop Drift health monitoring
*/
export function stopDriftHealthMonitoring(): void {
if (monitorInstance) {
monitorInstance.stop()
}
}