trading_bot_v4/app/api/cluster/control/route.ts

import { NextRequest, NextResponse } from 'next/server'
import { exec } from 'child_process'
import { promisify } from 'util'
import path from 'path'
import sqlite3 from 'sqlite3'
import { open } from 'sqlite'

const execAsync = promisify(exec)

export const dynamic = 'force-dynamic'

export async function POST(request: NextRequest) {
  try {
    const { action } = await request.json()

    if (action === 'start') {
      // CRITICAL FIX (Dec 1, 2025): Check for stale database state before starting
      // Problem: Database shows chunks as "running" but no coordinator process exists
      // Solution: Reset stale chunks to "pending" before starting new exploration

      const dbPath = path.join(process.cwd(), 'cluster', 'exploration.db')

      // First check if coordinator is already running
      const checkCmd = 'ps aux | grep distributed_coordinator.py | grep -v grep | wc -l'
      const { stdout: checkStdout } = await execAsync(checkCmd)
      const alreadyRunning = parseInt(checkStdout.trim()) > 0

      if (alreadyRunning) {
        return NextResponse.json({
          success: false,
          error: 'Coordinator is already running',
          message: 'Use Stop button first if you want to restart'
        }, { status: 400 })
      }

      // Reset any stale "running" chunks to "pending" (orphaned from crashed coordinator)
      console.log('🔧 Checking for stale database chunks...')
      try {
        const db = await open({
          filename: dbPath,
          driver: sqlite3.Database
        })
        await db.run(`UPDATE chunks SET status='pending', assigned_worker=NULL, started_at=NULL WHERE status='running'`)
        const { changes } = await db.run('SELECT changes() as changes')
        await db.close()
        console.log(`✅ Database cleanup complete - ${changes || 0} chunks reset`)
      } catch (dbErr) {
        console.error('⚠️ Database cleanup failed:', dbErr)
        // Continue anyway - don't block start if database issue
      }

      // Start the coordinator
      const startCmd = 'cd /home/icke/traderv4/cluster && nohup python3 distributed_coordinator.py > coordinator.log 2>&1 &'
      await execAsync(startCmd)
      console.log('🚀 Coordinator start command issued')

      // Wait a moment for it to start
      await new Promise(resolve => setTimeout(resolve, 2000))

      // Verify it's running
      const { stdout } = await execAsync(checkCmd)
      const isRunning = parseInt(stdout.trim()) > 0

      if (!isRunning) {
        // Check coordinator log for errors
        const logCmd = 'tail -20 /home/icke/traderv4/cluster/coordinator.log 2>/dev/null || echo "No log file"'
        const { stdout: logOutput } = await execAsync(logCmd)
        console.error('❌ Coordinator failed to start. Log:\n', logOutput)

        return NextResponse.json({
          success: false,
          error: 'Coordinator failed to start',
          details: logOutput,
          message: 'Check coordinator.log for details'
        }, { status: 500 })
      }

      return NextResponse.json({
        success: true,
        message: 'Coordinator started successfully',
        isRunning: true
      })
    } else if (action === 'stop') {
      // CRITICAL FIX (Dec 1, 2025): ALWAYS reset database state when stopping
      // Issue: Coordinator may have already exited but left chunks in "running" state
      // Solution: Reset database FIRST, then attempt to kill any remaining processes

      console.log('🛑 Stopping cluster...')

      // CRITICAL: Reset database state FIRST (even if coordinator already gone)
      const dbPath = path.join(process.cwd(), 'cluster', 'exploration.db')
      console.log('🔧 Resetting database chunks to pending...')
      try {
        const db = await open({
          filename: dbPath,
          driver: sqlite3.Database
        })
        const result = await db.run(`UPDATE chunks SET status='pending', assigned_worker=NULL, started_at=NULL WHERE status='running'`)
        const pendingCount = await db.get(`SELECT COUNT(*) as count FROM chunks WHERE status='pending'`)
        await db.close()
        console.log(`✅ Database cleanup complete - ${result.changes || 0} chunks reset to pending (total pending: ${pendingCount?.count || 0})`)
      } catch (dbErr) {
        console.error('❌ Database reset failed:', dbErr)
        return NextResponse.json({
          success: false,
          error: 'Failed to reset database state',
          details: dbErr instanceof Error ? dbErr.message : 'Unknown error'
        }, { status: 500 })
      }

      // THEN try to stop any running processes (may already be stopped)
      const stopCmd = 'pkill -9 -f distributed_coordinator; pkill -9 -f distributed_worker'
      try {
        await execAsync(stopCmd)
        console.log('✅ Killed coordinator and worker processes')
      } catch (err) {
        // pkill returns error code if no processes found - this is OK
        console.log('📝 No processes to kill (already stopped)')
      }

      // Wait a moment for cleanup
      await new Promise(resolve => setTimeout(resolve, 1000))

      // Verify everything is stopped
      const checkCmd = 'ps aux | grep -E "(distributed_coordinator|distributed_worker)" | grep -v grep | wc -l'
      const { stdout } = await execAsync(checkCmd)
      const processCount = parseInt(stdout.trim())

      return NextResponse.json({
        success: true,
        message: 'Cluster stopped and database reset to pending',
        isRunning: processCount > 0,
        note: processCount === 0 ? 'All processes stopped, chunks reset' : 'Some processes may still be cleaning up'
      })
    } else if (action === 'status') {
      // Check if coordinator is running
      const checkCmd = 'ps aux | grep distributed_coordinator.py | grep -v grep | wc -l'
      const { stdout } = await execAsync(checkCmd)
      const isRunning = parseInt(stdout.trim()) > 0

      return NextResponse.json({
        success: true,
        isRunning,
        message: isRunning ? 'Coordinator is running' : 'Coordinator is not running'
      })
    } else {
      return NextResponse.json({
        success: false,
        error: 'Invalid action. Use "start", "stop", or "status"'
      }, { status: 400 })
    }
  } catch (error) {
    console.error('Error controlling cluster:', error)
    return NextResponse.json({
      success: false,
      error: 'Failed to control cluster',
      details: error instanceof Error ? error.message : 'Unknown error'
    }, { status: 500 })
  }
}