Files
trading_bot_v4/app/api/cluster/status/route.ts
mindesbunister cc56b72df2 fix: Database-first cluster status detection + Stop button clarification
CRITICAL FIX (Nov 30, 2025):
- Dashboard showed 'idle' despite 22+ worker processes running
- Root cause: SSH-based worker detection timing out
- Solution: Check database for running chunks FIRST

Changes:
1. app/api/cluster/status/route.ts:
   - Query exploration database before SSH detection
   - If running chunks exist, mark workers 'active' even if SSH fails
   - Override worker status: 'offline' → 'active' when chunks running
   - Log: ' Cluster status: ACTIVE (database shows running chunks)'
   - Database is source of truth, SSH only for supplementary metrics

2. app/cluster/page.tsx:
   - Stop button ALREADY EXISTS (conditionally shown)
   - Shows Start when status='idle', Stop when status='active'
   - No code changes needed - fixed by status detection

Result:
- Dashboard now shows 'ACTIVE' with 2 workers (correct)
- Workers show 'active' status (was 'offline')
- Stop button automatically visible when cluster active
- System resilient to SSH timeouts/network issues

Verified:
- Container restarted: Nov 30 21:18 UTC
- API tested: Returns status='active', activeWorkers=2
- Logs confirm: Database-first logic working
- Workers confirmed running: 22+ processes on worker1, workers on worker2
2025-11-30 22:23:01 +01:00

284 lines
9.6 KiB
TypeScript

import { NextRequest, NextResponse } from 'next/server'
import { exec } from 'child_process'
import { promisify } from 'util'
import sqlite3 from 'sqlite3'
import { open, Database } from 'sqlite'
import path from 'path'
const execAsync = promisify(exec)
export const dynamic = 'force-dynamic'
interface WorkerStatus {
name: string
host: string
cpuUsage: number
loadAverage: string
activeProcesses: number
status: 'active' | 'idle' | 'offline'
}
interface ChunkResult {
rank: number
pnl_per_1k: number
win_rate: number
trades: number
profit_factor: number
max_drawdown: number
params: {
flip_threshold: number
ma_gap: number
adx_min: number
long_pos_max: number
short_pos_min: number
}
}
async function getWorkerStatus(workerName: string, sshCommand: string): Promise<WorkerStatus> {
try {
// Get CPU usage
const cpuCmd = `${sshCommand} "top -bn1 | grep 'Cpu(s)' | awk '{print 100-\\$8}'"`
const { stdout: cpuOut } = await execAsync(cpuCmd)
const cpuUsage = parseFloat(cpuOut.trim()) || 0
// Get load average
const loadCmd = `${sshCommand} "uptime | awk -F'load average:' '{print \\$2}'"`
const { stdout: loadOut } = await execAsync(loadCmd)
const loadAverage = loadOut.trim()
// Get worker processes
const procCmd = `${sshCommand} "ps aux | grep distributed_worker | grep -v grep | wc -l"`
const { stdout: procOut } = await execAsync(procCmd)
const activeProcesses = parseInt(procOut.trim()) || 0
const status: 'active' | 'idle' | 'offline' =
activeProcesses > 0 ? 'active' :
cpuUsage > 10 ? 'active' : 'idle'
return {
name: workerName,
host: sshCommand.includes('10.20.254.100') ? 'bd-host01 (32 cores)' : 'pve-nu-monitor01 (32 cores)',
cpuUsage,
loadAverage,
activeProcesses,
status
}
} catch (error) {
return {
name: workerName,
host: sshCommand.includes('10.20.254.100') ? 'bd-host01' : 'pve-nu-monitor01',
cpuUsage: 0,
loadAverage: 'N/A',
activeProcesses: 0,
status: 'offline'
}
}
}
async function getExplorationData() {
try {
const dbPath = path.join(process.cwd(), 'cluster', 'exploration.db')
const db = await open({
filename: dbPath,
driver: sqlite3.Database
})
// Get total combos and chunk statistics
const totalCombosRow = await db.get('SELECT SUM(total_combos) as total FROM chunks')
const totalCombos = totalCombosRow?.total || 0
const chunks = await db.all('SELECT * FROM chunks ORDER BY chunk_start')
const completedChunks = chunks.filter(c => c.status === 'completed').length
const runningChunks = chunks.filter(c => c.status === 'running').length
const pendingChunks = chunks.filter(c => c.status === 'pending').length
// Try to get strategies (table may not exist yet)
let strategies: any[] = []
let testedCombos = 0
try {
const strategiesCount = await db.get('SELECT COUNT(*) as count FROM strategies')
testedCombos = strategiesCount?.count || 0
strategies = await db.all(`
SELECT * FROM strategies
WHERE total_trades >= 700
ORDER BY pnl_per_1k DESC
LIMIT 10
`)
} catch (e) {
// Strategies table doesn't exist yet - this is fine
console.log('Strategies table not yet available')
}
await db.close()
const progress = totalCombos > 0 ? Math.round((testedCombos / totalCombos) * 100) : 0
return {
totalCombos,
testedCombos,
progress,
chunks: {
total: chunks.length,
completed: completedChunks,
running: runningChunks,
pending: pendingChunks
},
strategies
}
} catch (error) {
console.error('Error reading exploration database:', error)
return {
totalCombos: 0,
testedCombos: 0,
progress: 0,
chunks: { total: 0, completed: 0, running: 0, pending: 0 },
strategies: []
}
}
}
interface ChunkResult {
rank: number
pnl_per_1k: number
win_rate: number
trades: number
profit_factor: number
max_drawdown: number
params: {
flip_threshold: number
ma_gap: number
adx_min: number
long_pos_max: number
short_pos_min: number
}
}
function generateRecommendation(results: ChunkResult[]): string {
if (results.length === 0) {
return "Cluster is processing parameter combinations. Check back soon for optimization recommendations."
}
const best = results[0]
const avgWinRate = results.reduce((sum, r) => sum + r.win_rate, 0) / results.length
const avgPnL = results.reduce((sum, r) => sum + r.pnl_per_1k, 0) / results.length
let recommendation = `🎯 **Top Strategy Found:**\n\n`
recommendation += `- **Expected Profit:** $${best.pnl_per_1k.toFixed(2)} per $1,000 capital\n`
recommendation += `- **Win Rate:** ${(best.win_rate * 100).toFixed(1)}%\n`
recommendation += `- **Profit Factor:** ${best.profit_factor.toFixed(2)}x\n`
recommendation += `- **Max Drawdown:** $${Math.abs(best.max_drawdown).toFixed(2)}\n\n`
recommendation += `📊 **Optimal Parameters:**\n`
recommendation += `- Flip Threshold: ${best.params.flip_threshold}%\n`
recommendation += `- MA Gap: ${best.params.ma_gap}\n`
recommendation += `- Min ADX: ${best.params.adx_min}\n`
recommendation += `- Long Max Position: ${best.params.long_pos_max}%\n`
recommendation += `- Short Min Position: ${best.params.short_pos_min}%\n\n`
if (best.pnl_per_1k > avgPnL * 1.5) {
recommendation += `✅ **Action:** This strategy shows exceptional performance (${((best.pnl_per_1k / avgPnL) * 100 - 100).toFixed(0)}% better than average). Consider implementing these parameters in production.`
} else if (best.win_rate > 0.6) {
recommendation += `✅ **Action:** Strong win rate detected. This configuration provides consistent results with good risk management.`
} else {
recommendation += `⚠️ **Action:** Continue exploration. Current top performer needs more validation across different market conditions.`
}
return recommendation
}
export async function GET(request: NextRequest) {
try {
// CRITICAL FIX (Nov 30, 2025): Check database FIRST before SSH detection
// Database is the source of truth - SSH may timeout but workers are still running
const explorationData = await getExplorationData()
const hasRunningChunks = explorationData.chunks.running > 0
// Get status from both workers (SSH for supplementary metrics only)
const [worker1Status, worker2Status] = await Promise.all([
getWorkerStatus('worker1', 'ssh root@10.10.254.106'),
getWorkerStatus('worker2', 'ssh root@10.10.254.106 "ssh root@10.20.254.100"')
])
// If database shows running chunks but SSH shows offline, override to active
// This prevents false "idle" status when SSH detection times out
const workers = [worker1Status, worker2Status].map(w => {
if (hasRunningChunks && w.status === 'offline') {
console.log(`${w.name}: Database shows running chunks - overriding SSH offline to active`)
return {
...w,
status: 'active' as const,
activeProcesses: w.activeProcesses || 1 // Assume at least 1 process if chunks running
}
}
return w
})
const totalCPU = workers.reduce((sum, w) => sum + w.cpuUsage, 0) / workers.length
const totalProcesses = workers.reduce((sum, w) => sum + w.activeProcesses, 0)
const activeWorkers = workers.filter(w => w.status === 'active').length
// Determine cluster status: DATABASE-FIRST APPROACH
// If running chunks exist, cluster is active regardless of SSH detection
let clusterStatus: 'active' | 'idle' = 'idle'
if (hasRunningChunks) {
clusterStatus = 'active'
console.log('✅ Cluster status: ACTIVE (database shows running chunks)')
} else if (activeWorkers > 0) {
clusterStatus = 'active'
console.log('✅ Cluster status: ACTIVE (SSH detected active workers)')
} else {
console.log('⏸️ Cluster status: IDLE (no running chunks or active workers)')
}
// Convert strategies to ChunkResult format for recommendation
const topStrategies: ChunkResult[] = explorationData.strategies.map((s: any, idx: number) => ({
rank: idx + 1,
pnl_per_1k: s.pnl_per_1k || 0,
win_rate: s.win_rate || 0,
trades: s.total_trades || 0,
profit_factor: s.profit_factor || 0,
max_drawdown: s.max_drawdown || 0,
params: {
flip_threshold: s.flip_threshold || 0,
ma_gap: s.ma_gap || 0,
adx_min: s.momentum_adx || 0,
long_pos_max: s.momentum_long_pos || 0,
short_pos_min: s.momentum_short_pos || 0
}
}))
const recommendation = generateRecommendation(topStrategies)
return NextResponse.json({
cluster: {
totalCores: 64,
activeCores: Math.round(totalCPU * 0.64), // 70% of 64 cores
cpuUsage: totalCPU,
activeWorkers,
totalWorkers: 2,
workerProcesses: totalProcesses,
status: clusterStatus // Use database-aware status
},
workers,
exploration: {
totalCombinations: explorationData.totalCombos,
testedCombinations: explorationData.testedCombos,
progress: explorationData.progress,
chunks: explorationData.chunks
},
topStrategies: topStrategies.slice(0, 5),
recommendation,
lastUpdate: new Date().toISOString()
}, { status: 200 })
} catch (error) {
console.error('Error fetching cluster status:', error)
return NextResponse.json({
error: 'Failed to fetch cluster status',
details: error instanceof Error ? error.message : 'Unknown error'
}, { status: 500 })
}
}