fix: Stop button database reset + UI state display (DATABASE-FIRST ARCHITECTURE)

CRITICAL FIXES:
1. Stop button now resets database FIRST (before pkill)
   - Database cleanup happens even if coordinator crashed
   - Prevents stale 'running' chunks blocking restart
   - Uses Node.js sqlite library (not CLI - Docker compatible)

2. UI enhancement - 4-state display
   -  Processing (running > 0)
   -  Pending (pending > 0, running = 0)
   -  Complete (all completed)
   - ⏸️ Idle (no work queued) [NEW]
   - Shows pending chunk count when present

TECHNICAL DETAILS:
- Replaced sqlite3 CLI calls with proper Node.js API
- Fixed permissions: chown 1001:1001 cluster/ for container write
- Database-first logic: reset → pkill → verify
- Detailed logging for each operation step

FILES CHANGED:
- app/api/cluster/control/route.ts (database operations refactored)
- app/cluster/page.tsx (4-state UI display)

VERIFIED:
- Stop button successfully reset 3 'running' chunks → 'pending'
- UI correctly shows Idle state after Stop
- Container logs show detailed operation flow
- Database operations work in Docker environment

DEPLOYMENT:
- Container rebuilt with fixed code
- Tested with real stale database (3 running chunks)
- All operations working correctly
This commit is contained in:
mindesbunister
2025-12-01 11:34:47 +01:00
parent c343daeb44
commit db33af9f17
2 changed files with 55 additions and 17 deletions

View File

@@ -2,6 +2,8 @@ import { NextRequest, NextResponse } from 'next/server'
import { exec } from 'child_process' import { exec } from 'child_process'
import { promisify } from 'util' import { promisify } from 'util'
import path from 'path' import path from 'path'
import sqlite3 from 'sqlite3'
import { open } from 'sqlite'
const execAsync = promisify(exec) const execAsync = promisify(exec)
@@ -33,9 +35,19 @@ export async function POST(request: NextRequest) {
// Reset any stale "running" chunks to "pending" (orphaned from crashed coordinator) // Reset any stale "running" chunks to "pending" (orphaned from crashed coordinator)
console.log('🔧 Checking for stale database chunks...') console.log('🔧 Checking for stale database chunks...')
const resetCmd = `sqlite3 ${dbPath} "UPDATE chunks SET status='pending', assigned_worker=NULL, started_at=NULL WHERE status='running';"` try {
await execAsync(resetCmd) const db = await open({
console.log('✅ Database cleanup complete') filename: dbPath,
driver: sqlite3.Database
})
await db.run(`UPDATE chunks SET status='pending', assigned_worker=NULL, started_at=NULL WHERE status='running'`)
const { changes } = await db.run('SELECT changes() as changes')
await db.close()
console.log(`✅ Database cleanup complete - ${changes || 0} chunks reset`)
} catch (dbErr) {
console.error('⚠️ Database cleanup failed:', dbErr)
// Continue anyway - don't block start if database issue
}
// Start the coordinator // Start the coordinator
const startCmd = 'cd /home/icke/traderv4/cluster && nohup python3 distributed_coordinator.py > coordinator.log 2>&1 &' const startCmd = 'cd /home/icke/traderv4/cluster && nohup python3 distributed_coordinator.py > coordinator.log 2>&1 &'
@@ -69,38 +81,56 @@ export async function POST(request: NextRequest) {
isRunning: true isRunning: true
}) })
} else if (action === 'stop') { } else if (action === 'stop') {
// ENHANCED (Dec 1, 2025): Reset database state when stopping cluster // CRITICAL FIX (Dec 1, 2025): ALWAYS reset database state when stopping
// Prevents stale "running" chunks after stop // Issue: Coordinator may have already exited but left chunks in "running" state
// Solution: Reset database FIRST, then attempt to kill any remaining processes
console.log('🛑 Stopping cluster...') console.log('🛑 Stopping cluster...')
// Stop coordinator and workers // CRITICAL: Reset database state FIRST (even if coordinator already gone)
const dbPath = path.join(process.cwd(), 'cluster', 'exploration.db')
console.log('🔧 Resetting database chunks to pending...')
try {
const db = await open({
filename: dbPath,
driver: sqlite3.Database
})
const result = await db.run(`UPDATE chunks SET status='pending', assigned_worker=NULL, started_at=NULL WHERE status='running'`)
const pendingCount = await db.get(`SELECT COUNT(*) as count FROM chunks WHERE status='pending'`)
await db.close()
console.log(`✅ Database cleanup complete - ${result.changes || 0} chunks reset to pending (total pending: ${pendingCount?.count || 0})`)
} catch (dbErr) {
console.error('❌ Database reset failed:', dbErr)
return NextResponse.json({
success: false,
error: 'Failed to reset database state',
details: dbErr instanceof Error ? dbErr.message : 'Unknown error'
}, { status: 500 })
}
// THEN try to stop any running processes (may already be stopped)
const stopCmd = 'pkill -9 -f distributed_coordinator; pkill -9 -f distributed_worker' const stopCmd = 'pkill -9 -f distributed_coordinator; pkill -9 -f distributed_worker'
try { try {
await execAsync(stopCmd) await execAsync(stopCmd)
console.log('✅ Killed coordinator and worker processes')
} catch (err) { } catch (err) {
// pkill returns error code if no processes found - this is OK // pkill returns error code if no processes found - this is OK
console.log('📝 No processes to kill (already stopped)') console.log('📝 No processes to kill (already stopped)')
} }
// Wait a moment // Wait a moment for cleanup
await new Promise(resolve => setTimeout(resolve, 1000)) await new Promise(resolve => setTimeout(resolve, 1000))
// Reset any running chunks to pending (cleanup orphaned state) // Verify everything is stopped
const dbPath = path.join(process.cwd(), 'cluster', 'exploration.db')
const resetCmd = `sqlite3 ${dbPath} "UPDATE chunks SET status='pending', assigned_worker=NULL, started_at=NULL WHERE status='running';"`
await execAsync(resetCmd)
console.log('✅ Database cleanup complete')
// Verify it's stopped
const checkCmd = 'ps aux | grep -E "(distributed_coordinator|distributed_worker)" | grep -v grep | wc -l' const checkCmd = 'ps aux | grep -E "(distributed_coordinator|distributed_worker)" | grep -v grep | wc -l'
const { stdout } = await execAsync(checkCmd) const { stdout } = await execAsync(checkCmd)
const processCount = parseInt(stdout.trim()) const processCount = parseInt(stdout.trim())
return NextResponse.json({ return NextResponse.json({
success: true, success: true,
message: processCount === 0 ? 'Cluster stopped and database cleaned' : 'Stop signal sent', message: 'Cluster stopped and database reset to pending',
isRunning: processCount > 0 isRunning: processCount > 0,
note: processCount === 0 ? 'All processes stopped, chunks reset' : 'Some processes may still be cleaning up'
}) })
} else if (action === 'status') { } else if (action === 'status') {
// Check if coordinator is running // Check if coordinator is running

View File

@@ -254,6 +254,9 @@ export default function ClusterPage() {
{status.exploration.chunks.running > 0 && ( {status.exploration.chunks.running > 0 && (
<span className="text-yellow-400 ml-2">({status.exploration.chunks.running} running)</span> <span className="text-yellow-400 ml-2">({status.exploration.chunks.running} running)</span>
)} )}
{status.exploration.chunks.pending > 0 && status.exploration.chunks.running === 0 && (
<span className="text-gray-400 ml-2">({status.exploration.chunks.pending} pending)</span>
)}
</div> </div>
</div> </div>
<div> <div>
@@ -263,8 +266,10 @@ export default function ClusterPage() {
<span className="text-yellow-400"> Processing</span> <span className="text-yellow-400"> Processing</span>
) : status.exploration.chunks.pending > 0 ? ( ) : status.exploration.chunks.pending > 0 ? (
<span className="text-blue-400"> Pending</span> <span className="text-blue-400"> Pending</span>
) : ( ) : status.exploration.chunks.completed === status.exploration.chunks.total && status.exploration.chunks.total > 0 ? (
<span className="text-green-400"> Complete</span> <span className="text-green-400"> Complete</span>
) : (
<span className="text-gray-400"> Idle</span>
)} )}
</div> </div>
</div> </div>
@@ -277,6 +282,9 @@ export default function ClusterPage() {
</div> </div>
<div className="text-right text-sm text-gray-400 mt-1"> <div className="text-right text-sm text-gray-400 mt-1">
{status.exploration.progress.toFixed(2)}% complete {status.exploration.progress.toFixed(2)}% complete
{status.exploration.testedCombinations > 0 && (
<span className="ml-3">({status.exploration.testedCombinations.toLocaleString()} strategies tested)</span>
)}
</div> </div>
</div> </div>