Files
trading_bot_v4/cluster/setup_cluster.sh
mindesbunister 2a8e04fe57 feat: Continuous optimization cluster for 2 EPYC servers
- Master controller with job queue and result aggregation
- Worker scripts for parallel backtesting (22 workers per server)
- SQLite database for strategy ranking and performance tracking
- File-based job queue (simple, robust, survives crashes)
- Auto-setup script for both EPYC servers
- Status dashboard for monitoring progress
- Comprehensive deployment guide

Architecture:
- Master: Job generation, worker coordination, result collection
- Worker 1 (pve-nu-monitor01): AMD EPYC 7282, 22 parallel jobs
- Worker 2 (srv-bd-host01): AMD EPYC 7302, 22 parallel jobs
- Total capacity: ~49,000 backtests/day (44 cores @ 70%)

Initial focus: v9 parameter refinement (27 configurations)
Target: Find strategies >00/1k P&L (current baseline 92/1k)

Files:
- cluster/master.py: Main controller (570 lines)
- cluster/worker.py: Worker execution script (220 lines)
- cluster/setup_cluster.sh: Automated deployment
- cluster/status.py: Real-time status dashboard
- cluster/README.md: Operational documentation
- cluster/DEPLOYMENT.md: Step-by-step deployment guide
2025-11-29 22:34:52 +01:00

100 lines
3.1 KiB
Bash
Executable File

#!/bin/bash
# Setup optimization cluster on both EPYC servers
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "🚀 Setting up optimization cluster..."
# Configuration
WORKER1_HOST="root@10.10.254.106"
WORKER2_HOP="$WORKER1_HOST"
WORKER2_HOST="root@10.20.254.100"
WORKSPACE="/root/optimization-cluster"
# Colors
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m'
setup_worker() {
local HOST=$1
local NAME=$2
local VIA_HOP=${3:-}
echo -e "\n${BLUE}Setting up $NAME ($HOST)...${NC}"
# Build SSH command
SSH_CMD="ssh $HOST"
if [ -n "$VIA_HOP" ]; then
SSH_CMD="ssh $VIA_HOP ssh $HOST"
fi
# Create workspace
$SSH_CMD "mkdir -p $WORKSPACE/{jobs,results,data,backtester,logs}"
# Install Python dependencies
echo " 📦 Installing Python packages..."
$SSH_CMD "cd $WORKSPACE && python3 -m venv .venv"
$SSH_CMD "cd $WORKSPACE && .venv/bin/pip install pandas numpy"
# Copy backtester code
echo " 📁 Copying backtester modules..."
if [ -n "$VIA_HOP" ]; then
# Two-hop transfer: local -> worker1 -> worker2
scp -r "$SCRIPT_DIR/../backtester/"* "$VIA_HOP:$WORKSPACE/backtester/" > /dev/null 2>&1
$SSH_CMD "rsync -a $WORKSPACE/backtester/ $HOST:$WORKSPACE/backtester/"
else
scp -r "$SCRIPT_DIR/../backtester/"* "$HOST:$WORKSPACE/backtester/" > /dev/null 2>&1
fi
# Copy worker script
echo " 📄 Installing worker script..."
if [ -n "$VIA_HOP" ]; then
scp "$SCRIPT_DIR/worker.py" "$VIA_HOP:$WORKSPACE/" > /dev/null 2>&1
$SSH_CMD "scp $WORKSPACE/worker.py $HOST:$WORKSPACE/"
else
scp "$SCRIPT_DIR/worker.py" "$HOST:$WORKSPACE/" > /dev/null 2>&1
fi
$SSH_CMD "chmod +x $WORKSPACE/worker.py"
# Copy data file
echo " 📊 Copying OHLCV data..."
if [ -f "$SCRIPT_DIR/../backtester/data/solusdt_5m.csv" ]; then
if [ -n "$VIA_HOP" ]; then
scp "$SCRIPT_DIR/../backtester/data/solusdt_5m.csv" "$VIA_HOP:$WORKSPACE/data/" > /dev/null 2>&1
$SSH_CMD "scp $WORKSPACE/data/solusdt_5m.csv $HOST:$WORKSPACE/data/"
else
scp "$SCRIPT_DIR/../backtester/data/solusdt_5m.csv" "$HOST:$WORKSPACE/data/" > /dev/null 2>&1
fi
else
echo " ⚠️ Warning: solusdt_5m.csv not found, download manually"
fi
# Verify setup
echo " ✅ Verifying installation..."
$SSH_CMD "cd $WORKSPACE && ls -lah"
echo -e "${GREEN}$NAME setup complete${NC}"
}
# Setup Worker 1 (direct connection)
setup_worker "$WORKER1_HOST" "Worker 1 (pve-nu-monitor01)"
# Setup Worker 2 (via Worker 1 hop)
setup_worker "$WORKER2_HOST" "Worker 2 (srv-bd-host01)" "$WORKER1_HOST"
echo -e "\n${GREEN}🎉 Cluster setup complete!${NC}"
echo ""
echo "Next steps:"
echo " 1. Start master controller:"
echo " cd $SCRIPT_DIR && python3 master.py"
echo ""
echo " 2. Monitor cluster status:"
echo " watch -n 5 'ls -1 cluster/queue/*.json 2>/dev/null | wc -l'"
echo ""
echo " 3. View results:"
echo " sqlite3 cluster/strategies.db 'SELECT * FROM strategies ORDER BY pnl_per_1k DESC LIMIT 10'"