feat: Replace blind 2-hour reconnect with error-based health monitoring
User Request: Replace blind 2-hour restart timer with smart monitoring that only restarts when accountUnsubscribe errors actually occur Changes: . Health Monitor (NEW): - Created lib/monitoring/drift-health-monitor.ts - Tracks accountUnsubscribe errors in 30-second sliding window - Triggers container restart via flag file when 50+ errors detected - Prevents unnecessary restarts when SDK healthy . Drift Client: - Removed blind scheduleReconnection() and 2-hour timer - Added interceptWebSocketErrors() to catch SDK errors - Patches console.error to monitor for accountUnsubscribe patterns - Starts health monitor after successful initialization - Removed unused reconnect() method and reconnectTimer field . Health API (NEW): - GET /api/drift/health - Check current error count and health status - Returns: healthy boolean, errorCount, threshold, message - Useful for external monitoring and debugging Impact: - System only restarts when actual memory leak detected - Prevents unnecessary downtime every 2 hours - More targeted response to SDK issues - Better operational stability Files: - lib/monitoring/drift-health-monitor.ts (NEW - 165 lines) - lib/drift/client.ts (removed timer, added error interception) - app/api/drift/health/route.ts (NEW - health check endpoint) Testing: - Health monitor starts on initialization: ✅ - API endpoint returns healthy status: ✅ - No blind reconnection scheduled: ✅
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
import { Connection, PublicKey, Keypair } from '@solana/web3.js'
|
||||
import { DriftClient, initialize, User, PerpMarkets } from '@drift-labs/sdk'
|
||||
import bs58 from 'bs58'
|
||||
import { getDriftHealthMonitor } from '../monitoring/drift-health-monitor'
|
||||
|
||||
// Manual wallet interface (more compatible than SDK Wallet class)
|
||||
interface ManualWallet {
|
||||
@@ -30,8 +31,6 @@ export class DriftService {
|
||||
private driftClient: DriftClient | null = null
|
||||
private user: User | null = null
|
||||
private isInitialized: boolean = false
|
||||
private reconnectTimer: NodeJS.Timeout | null = null
|
||||
private reconnectIntervalMs: number = 2 * 60 * 60 * 1000 // 2 hours (aggressive - Drift SDK memory leak is severe)
|
||||
|
||||
constructor(private config: DriftConfig) {
|
||||
// Helius connection for Drift SDK initialization (handles burst subscriptions well)
|
||||
@@ -172,6 +171,9 @@ export class DriftService {
|
||||
// Subscribe to Drift account updates (this makes RPC calls)
|
||||
await this.driftClient.subscribe()
|
||||
console.log('✅ Drift client subscribed to account updates')
|
||||
|
||||
// Intercept WebSocket errors for health monitoring
|
||||
this.interceptWebSocketErrors()
|
||||
|
||||
// Get user account
|
||||
this.user = this.driftClient.getUser()
|
||||
@@ -180,8 +182,9 @@ export class DriftService {
|
||||
this.isInitialized = true
|
||||
console.log('✅ Drift service initialized successfully')
|
||||
|
||||
// Start periodic reconnection to prevent memory leaks
|
||||
this.scheduleReconnection()
|
||||
// Start health monitoring (error-based restart instead of blind timer)
|
||||
const monitor = getDriftHealthMonitor()
|
||||
monitor.start()
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Failed to initialize Drift service after retries:', error)
|
||||
@@ -190,61 +193,24 @@ export class DriftService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedule periodic reconnection to prevent WebSocket memory leaks
|
||||
* Drift SDK accumulates subscriptions over time, causing memory leaks
|
||||
* Periodic reconnection clears old subscriptions and resets memory
|
||||
* Intercept WebSocket errors for health monitoring
|
||||
*/
|
||||
private scheduleReconnection(): void {
|
||||
// Clear existing timer if any
|
||||
if (this.reconnectTimer) {
|
||||
clearTimeout(this.reconnectTimer)
|
||||
}
|
||||
|
||||
// Schedule reconnection every 2 hours
|
||||
this.reconnectTimer = setTimeout(async () => {
|
||||
try {
|
||||
console.log('🔄 Scheduled reconnection: Clearing WebSocket subscriptions to prevent memory leak...')
|
||||
await this.reconnect()
|
||||
console.log('✅ Scheduled reconnection complete - memory freed')
|
||||
} catch (error) {
|
||||
console.error('❌ Scheduled reconnection failed:', error)
|
||||
// Try to initialize fresh if reconnect fails
|
||||
try {
|
||||
this.isInitialized = false
|
||||
await this.initialize()
|
||||
} catch (reinitError) {
|
||||
console.error('❌ Failed to reinitialize after reconnect failure:', reinitError)
|
||||
}
|
||||
}
|
||||
}, this.reconnectIntervalMs)
|
||||
|
||||
console.log(`⏰ Scheduled reconnection in ${this.reconnectIntervalMs / 1000 / 60 / 60} hours`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconnect to Drift Protocol (clears old subscriptions)
|
||||
*/
|
||||
private async reconnect(): Promise<void> {
|
||||
console.log('🔄 Reconnecting to Drift Protocol...')
|
||||
private interceptWebSocketErrors(): void {
|
||||
const monitor = getDriftHealthMonitor()
|
||||
|
||||
try {
|
||||
// Unsubscribe from old connections
|
||||
if (this.driftClient) {
|
||||
await this.driftClient.unsubscribe()
|
||||
console.log('✅ Unsubscribed from old Drift connection')
|
||||
// Patch console.error to catch accountUnsubscribe errors
|
||||
const originalConsoleError = console.error
|
||||
console.error = (...args: any[]) => {
|
||||
const errorMessage = args.join(' ')
|
||||
|
||||
// Detect accountUnsubscribe errors (Drift SDK memory leak symptom)
|
||||
if (errorMessage.includes('accountUnsubscribe error') ||
|
||||
errorMessage.includes('readyState was 2')) {
|
||||
monitor.recordError('accountUnsubscribe')
|
||||
}
|
||||
|
||||
// Reset state
|
||||
this.driftClient = null
|
||||
this.user = null
|
||||
this.isInitialized = false
|
||||
|
||||
// Reinitialize
|
||||
await this.initialize()
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Reconnection failed:', error)
|
||||
throw error
|
||||
// Call original console.error
|
||||
originalConsoleError.apply(console, args)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -456,13 +422,6 @@ export class DriftService {
|
||||
* Disconnect from Drift
|
||||
*/
|
||||
async disconnect(): Promise<void> {
|
||||
// Clear reconnection timer
|
||||
if (this.reconnectTimer) {
|
||||
clearTimeout(this.reconnectTimer)
|
||||
this.reconnectTimer = null
|
||||
console.log('⏰ Cleared reconnection timer')
|
||||
}
|
||||
|
||||
if (this.driftClient) {
|
||||
await this.driftClient.unsubscribe()
|
||||
console.log('✅ Drift client disconnected')
|
||||
|
||||
Reference in New Issue
Block a user