feat: Add EPYC cluster distributed sweep with web UI

New Features:
- Distributed coordinator orchestrates 2x AMD EPYC 16-core servers
- 64 total cores processing 12M parameter combinations (70% CPU limit)
- Worker1 (pve-nu-monitor01): Direct SSH access at 10.10.254.106
- Worker2 (bd-host01): 2-hop SSH through worker1 (10.20.254.100)
- Web UI at /cluster shows real-time status and AI recommendations
- API endpoint /api/cluster/status serves cluster metrics
- Auto-refresh every 30s with top strategies and actionable insights

Files Added:
- cluster/distributed_coordinator.py (510 lines) - Main orchestrator
- cluster/distributed_worker.py (271 lines) - Worker1 script
- cluster/distributed_worker_bd_clean.py (275 lines) - Worker2 script
- cluster/monitor_bd_host01.sh - Monitoring script
- app/api/cluster/status/route.ts (274 lines) - API endpoint
- app/cluster/page.tsx (258 lines) - Web UI
- cluster/CLUSTER_SETUP.md - Complete setup and access documentation

Technical Details:
- SQLite database tracks chunk assignments
- 10,000 combinations per chunk (1,195 total chunks)
- Multiprocessing.Pool with 70% CPU limit (22 cores per EPYC)
- SSH/SCP for deployment and result collection
- Handles 2-hop SSH for bd-host01 access
- Results in CSV format with top strategies ranked

Access Documentation:
- Worker1: ssh root@10.10.254.106
- Worker2: ssh root@10.10.254.106 "ssh root@10.20.254.100"
- Web UI: http://localhost:3001/cluster
- See CLUSTER_SETUP.md for complete guide

Status: Deployed and operational
This commit is contained in:
mindesbunister
2025-11-30 13:02:18 +01:00
parent 2a8e04fe57
commit b77282b560
9 changed files with 2190 additions and 0 deletions

273
app/cluster/page.tsx Normal file
View File

@@ -0,0 +1,273 @@
'use client'
import { useEffect, useState } from 'react'
interface ClusterStatus {
cluster: {
totalCores: number
activeCores: number
cpuUsage: number
activeWorkers: number
totalWorkers: number
workerProcesses: number
status: string
}
workers: Array<{
name: string
host: string
cpuUsage: number
loadAverage: string
activeProcesses: number
status: string
}>
exploration: {
totalCombinations: number
combinationsPerChunk: number
totalChunks: number
chunksCompleted: number
currentChunk: string
progress: number
}
topStrategies: Array<{
rank: number
pnl_per_1k: number
win_rate: number
trades: number
profit_factor: number
max_drawdown: number
params: {
flip_threshold: number
ma_gap: number
adx_min: number
long_pos_max: number
short_pos_min: number
}
}>
recommendation: string
lastUpdate: string
}
export default function ClusterPage() {
const [status, setStatus] = useState<ClusterStatus | null>(null)
const [loading, setLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
const fetchStatus = async () => {
try {
const res = await fetch('/api/cluster/status')
if (!res.ok) throw new Error('Failed to fetch')
const data = await res.json()
setStatus(data)
setError(null)
} catch (err: any) {
setError(err.message)
} finally {
setLoading(false)
}
}
useEffect(() => {
fetchStatus()
const interval = setInterval(fetchStatus, 30000) // Refresh every 30s
return () => clearInterval(interval)
}, [])
if (loading) {
return (
<div className="min-h-screen bg-gray-900 text-white p-8">
<div className="max-w-7xl mx-auto">
<h1 className="text-3xl font-bold mb-8">🖥 EPYC Cluster Status</h1>
<div className="text-gray-400">Loading cluster status...</div>
</div>
</div>
)
}
if (error) {
return (
<div className="min-h-screen bg-gray-900 text-white p-8">
<div className="max-w-7xl mx-auto">
<h1 className="text-3xl font-bold mb-8">🖥 EPYC Cluster Status</h1>
<div className="bg-red-900/20 border border-red-500 rounded p-4">
<p className="text-red-400">Error: {error}</p>
</div>
</div>
</div>
)
}
if (!status) return null
const getStatusColor = (statusStr: string) => {
if (statusStr === 'active') return 'text-green-400'
if (statusStr === 'idle') return 'text-yellow-400'
return 'text-red-400'
}
const getStatusBg = (statusStr: string) => {
if (statusStr === 'active') return 'bg-green-900/20 border-green-500'
if (statusStr === 'idle') return 'bg-yellow-900/20 border-yellow-500'
return 'bg-red-900/20 border-red-500'
}
return (
<div className="min-h-screen bg-gray-900 text-white p-8">
<div className="max-w-7xl mx-auto">
<div className="flex justify-between items-center mb-8">
<h1 className="text-3xl font-bold">🖥 EPYC Cluster Status</h1>
<button
onClick={fetchStatus}
className="px-4 py-2 bg-blue-600 hover:bg-blue-700 rounded text-sm"
>
🔄 Refresh
</button>
</div>
{/* Cluster Overview */}
<div className={`border rounded-lg p-6 mb-6 ${getStatusBg(status.cluster.status)}`}>
<h2 className="text-xl font-semibold mb-4">Cluster Overview</h2>
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
<div>
<div className="text-gray-400 text-sm">Status</div>
<div className={`text-2xl font-bold ${getStatusColor(status.cluster.status)}`}>
{status.cluster.status.toUpperCase()}
</div>
</div>
<div>
<div className="text-gray-400 text-sm">CPU Usage</div>
<div className="text-2xl font-bold">{status.cluster.cpuUsage.toFixed(1)}%</div>
</div>
<div>
<div className="text-gray-400 text-sm">Active Cores</div>
<div className="text-2xl font-bold">{status.cluster.activeCores} / {status.cluster.totalCores}</div>
</div>
<div>
<div className="text-gray-400 text-sm">Workers</div>
<div className="text-2xl font-bold">{status.cluster.activeWorkers} / {status.cluster.totalWorkers}</div>
</div>
</div>
</div>
{/* Worker Details */}
<div className="grid grid-cols-1 md:grid-cols-2 gap-6 mb-6">
{status.workers.map((worker) => (
<div key={worker.name} className={`border rounded-lg p-4 ${getStatusBg(worker.status)}`}>
<h3 className="font-semibold mb-2">{worker.name}</h3>
<div className="text-sm text-gray-400 mb-3">{worker.host}</div>
<div className="space-y-2">
<div className="flex justify-between">
<span className="text-gray-400">CPU:</span>
<span className="font-mono">{worker.cpuUsage.toFixed(1)}%</span>
</div>
<div className="flex justify-between">
<span className="text-gray-400">Load:</span>
<span className="font-mono">{worker.loadAverage}</span>
</div>
<div className="flex justify-between">
<span className="text-gray-400">Processes:</span>
<span className="font-mono">{worker.activeProcesses}</span>
</div>
</div>
</div>
))}
</div>
{/* Exploration Progress */}
<div className="border border-blue-500 bg-blue-900/20 rounded-lg p-6 mb-6">
<h2 className="text-xl font-semibold mb-4">📊 Parameter Exploration</h2>
<div className="grid grid-cols-2 md:grid-cols-3 gap-4 mb-4">
<div>
<div className="text-gray-400 text-sm">Total Space</div>
<div className="text-lg font-bold">{status.exploration.totalCombinations.toLocaleString()}</div>
</div>
<div>
<div className="text-gray-400 text-sm">Chunks Completed</div>
<div className="text-lg font-bold">{status.exploration.chunksCompleted} / {status.exploration.totalChunks}</div>
</div>
<div>
<div className="text-gray-400 text-sm">Current Chunk</div>
<div className="text-lg font-bold font-mono text-sm">{status.exploration.currentChunk}</div>
</div>
</div>
<div className="w-full bg-gray-700 rounded-full h-4">
<div
className="bg-blue-500 h-4 rounded-full transition-all"
style={{ width: `${status.exploration.progress * 100}%` }}
/>
</div>
<div className="text-right text-sm text-gray-400 mt-1">
{(status.exploration.progress * 100).toFixed(2)}% complete
</div>
</div>
{/* Recommendation */}
{status.recommendation && (
<div className="border border-purple-500 bg-purple-900/20 rounded-lg p-6 mb-6">
<h2 className="text-xl font-semibold mb-4">🎯 AI Recommendation</h2>
<div className="whitespace-pre-line text-gray-300 leading-relaxed">
{status.recommendation}
</div>
</div>
)}
{/* Top Strategies */}
{status.topStrategies.length > 0 && (
<div className="border border-gray-700 rounded-lg p-6">
<h2 className="text-xl font-semibold mb-4">🏆 Top Strategies</h2>
<div className="space-y-3">
{status.topStrategies.map((strategy) => (
<div key={strategy.rank} className="bg-gray-800 rounded p-4">
<div className="flex justify-between items-start mb-2">
<div className="text-lg font-semibold">#{strategy.rank}</div>
<div className="text-right">
<div className="text-2xl font-bold text-green-400">
${strategy.pnl_per_1k.toFixed(2)}
</div>
<div className="text-sm text-gray-400">per $1k</div>
</div>
</div>
<div className="grid grid-cols-2 md:grid-cols-4 gap-3 text-sm">
<div>
<span className="text-gray-400">Win Rate:</span>{' '}
<span className="font-semibold">{(strategy.win_rate * 100).toFixed(1)}%</span>
</div>
<div>
<span className="text-gray-400">Trades:</span>{' '}
<span className="font-semibold">{strategy.trades}</span>
</div>
<div>
<span className="text-gray-400">PF:</span>{' '}
<span className="font-semibold">{strategy.profit_factor.toFixed(2)}x</span>
</div>
<div>
<span className="text-gray-400">Max DD:</span>{' '}
<span className="font-semibold text-red-400">
${Math.abs(strategy.max_drawdown).toFixed(0)}
</span>
</div>
</div>
<details className="mt-3">
<summary className="cursor-pointer text-blue-400 text-sm hover:text-blue-300">
Show Parameters
</summary>
<div className="mt-2 grid grid-cols-2 md:grid-cols-3 gap-2 text-xs font-mono bg-gray-900 p-3 rounded">
<div>flip: {strategy.params.flip_threshold}</div>
<div>ma_gap: {strategy.params.ma_gap}</div>
<div>adx: {strategy.params.adx_min}</div>
<div>long_pos: {strategy.params.long_pos_max}</div>
<div>short_pos: {strategy.params.short_pos_min}</div>
</div>
</details>
</div>
))}
</div>
</div>
)}
<div className="mt-6 text-center text-sm text-gray-500">
Last updated: {new Date(status.lastUpdate).toLocaleString()}
</div>
</div>
</div>
)
}