🔧 Improve cleanup timing correlation with analysis decisions

FIXES:
- Enhanced signalAnalysisCycleComplete with more intelligent cleanup logic
- Added active session detection to avoid killing processes during analysis
- Implemented graceful shutdown (SIGTERM) before force kill (SIGKILL)
- Only kills processes older than 2 minutes to avoid disrupting active analysis
- Added 10 second delay in runPostCycleCleanup to ensure trading decision is complete
- Improved process age filtering to prevent premature cleanup

- Cleanup now properly correlates with analysis completion + trading decision
- Reduced aggressive kills that were happening during active analysis
- Better CPU usage management through smarter process lifecycle
- Prevents cleanup from interfering with ongoing analysis work

This should significantly reduce the zombie process CPU usage issue by ensuring
cleanup only happens when analysis work is truly complete and decisions are finalized.
This commit is contained in:
mindesbunister
2025-07-19 00:53:25 +02:00
parent ac813b8cd7
commit 700296e664
2 changed files with 83 additions and 31 deletions

View File

@@ -296,55 +296,107 @@ class AggressiveCleanup {
async signalAnalysisCycleComplete(): Promise<void> {
console.log('🎯 Analysis cycle completion signal received')
// Wait a bit longer to ensure all processes have had time to close
console.log('⏳ Waiting 3 seconds for graceful process shutdown...')
await new Promise(resolve => setTimeout(resolve, 3000))
// Wait for graceful shutdown of analysis-related processes
console.log('⏳ Waiting 5 seconds for graceful process shutdown...')
await new Promise(resolve => setTimeout(resolve, 5000))
// Force cleanup of any remaining processes
console.log('🧹 Running forced cleanup after analysis cycle completion...')
await this.forceCleanupAfterCycle()
// Check if there are any active progress sessions first
const activeSessions = await this.checkActiveAnalysisSessions()
if (activeSessions > 0) {
console.log(`⚠️ Found ${activeSessions} active analysis sessions, skipping aggressive cleanup`)
return
}
private async forceCleanupAfterCycle(): Promise<void> {
console.log('🚨 Force cleanup after analysis cycle - cleaning all browser processes')
// Only run cleanup if no active sessions
console.log('🧹 No active sessions detected, running post-analysis cleanup...')
await this.cleanupPostAnalysisProcesses()
}
private async checkActiveAnalysisSessions(): Promise<number> {
// Check if progress tracker has any active sessions
try {
// This is a simple check - in a real scenario you might want to check actual session state
const { stdout } = await execAsync('pgrep -f "automation-.*-.*" | wc -l')
return parseInt(stdout.trim()) || 0
} catch (error) {
return 0
}
}
private async cleanupPostAnalysisProcesses(): Promise<void> {
console.log('🚨 Post-analysis cleanup - targeting orphaned browser processes')
try {
// Find all chromium processes
const chromiumProcesses = await this.findChromiumProcesses()
if (chromiumProcesses.length > 0) {
console.log(`🔍 Found ${chromiumProcesses.length} chromium processes to clean after cycle completion`)
if (chromiumProcesses.length === 0) {
console.log('✅ No chromium processes found to clean up')
return
}
// Force kill all chromium processes after cycle is complete
for (const pid of chromiumProcesses) {
console.log(`🔍 Found ${chromiumProcesses.length} chromium processes`)
// Filter out processes that are too new (less than 2 minutes old)
const oldProcesses = await this.filterOldProcesses(chromiumProcesses, 2 * 60) // 2 minutes
if (oldProcesses.length === 0) {
console.log('✅ All chromium processes are recent, not cleaning up')
return
}
console.log(`🧹 Cleaning up ${oldProcesses.length} old chromium processes`)
// Try graceful shutdown first
for (const pid of oldProcesses) {
try {
console.log(`🗡️ Force killing process ${pid} (cycle complete)`)
await execAsync(`kill -9 ${pid}`)
console.log(`<EFBFBD> Attempting graceful shutdown of process ${pid}`)
await execAsync(`kill -TERM ${pid}`)
} catch (error) {
// Process might already be dead
console.log(` Process ${pid} may already be terminated`)
}
}
console.log(`✅ Forcefully cleaned ${chromiumProcesses.length} processes after cycle completion`)
} else {
console.log('✅ No chromium processes found - cleanup already complete')
}
// Wait for graceful shutdown
await new Promise(resolve => setTimeout(resolve, 3000))
// Clean up temp directories
// Check which processes are still running and force kill only those
const stillRunning = await this.findStillRunningProcesses(oldProcesses)
if (stillRunning.length > 0) {
console.log(`🗡️ Force killing ${stillRunning.length} stubborn processes`)
for (const pid of stillRunning) {
try {
await execAsync('rm -rf /tmp/puppeteer_dev_chrome_profile-* 2>/dev/null || true')
await execAsync('rm -rf /dev/shm/.org.chromium.* 2>/dev/null || true')
console.log('✅ Cleaned up temp directories and shared memory')
await execAsync(`kill -9 ${pid}`)
console.log(`💀 Force killed process ${pid}`)
} catch (error) {
console.warn('⚠️ Could not clean temp directories:', error)
console.log(` Process ${pid} already terminated`)
}
}
}
console.log('✅ Post-analysis cleanup completed')
} catch (error) {
console.error('Error in force cleanup after cycle:', error)
console.error('Error in post-analysis cleanup:', error)
}
}
private async findStillRunningProcesses(pids: string[]): Promise<string[]> {
const stillRunning: string[] = []
for (const pid of pids) {
try {
await execAsync(`kill -0 ${pid}`) // Check if process exists
stillRunning.push(pid)
} catch (error) {
// Process is already dead
}
}
return stillRunning
}
stop(): void {
if (this.cleanupInterval) {
clearInterval(this.cleanupInterval)

View File

@@ -210,8 +210,8 @@ export class AutomationService {
private async runPostCycleCleanup(reason: string): Promise<void> {
console.log(`🧹 Running post-cycle cleanup (reason: ${reason})`)
// Small delay to ensure all analysis processes have finished
await new Promise(resolve => setTimeout(resolve, 2000))
// Longer delay to ensure all analysis processes AND trading decision have finished
await new Promise(resolve => setTimeout(resolve, 10000)) // 10 seconds
try {
// Signal that the complete analysis cycle is done