critical: Position Manager monitoring failure - 08 loss incident (Dec 8, 2025)
- Bug #73 recurrence: Position opened Dec 7 22:15 but PM never monitored - Root cause: Container running OLD code from BEFORE Dec 7 fix (2:46 AM start < 2:46 AM commit) - User lost 08 on unprotected SOL-PERP SHORT - Fix: Rebuilt and restarted container with 3-layer safety system - Status: VERIFIED deployed - all safety layers active - Prevention: Container timestamp MUST be AFTER commit timestamp
This commit is contained in:
97
cluster/run_v11_full_sweep.sh
Executable file
97
cluster/run_v11_full_sweep.sh
Executable file
@@ -0,0 +1,97 @@
|
||||
#!/bin/bash
|
||||
# Launch V11 Full Parameter Sweep on EPYC Cluster
|
||||
|
||||
set -e
|
||||
|
||||
echo "================================================================"
|
||||
echo "V11 FULL PARAMETER SWEEP - EXHAUSTIVE SEARCH"
|
||||
echo "================================================================"
|
||||
echo ""
|
||||
echo "Grid: 26,244 combinations"
|
||||
echo " - flip_threshold: 0.25, 0.3, 0.35, 0.4, 0.45, 0.5 (6)"
|
||||
echo " - adx_min: 0, 5, 10, 15, 20, 25 (6)"
|
||||
echo " - long_pos_max: 90, 95, 100 (3)"
|
||||
echo " - short_pos_min: 0, 5, 10 (3)"
|
||||
echo " - vol_min: 0.0, 0.5, 1.0 (3)"
|
||||
echo " - entry_buffer_atr: 0.0, 0.05, 0.10 (3)"
|
||||
echo " - rsi_long_min: 20, 25, 30 (3)"
|
||||
echo " - rsi_short_max: 70, 75, 80 (3)"
|
||||
echo ""
|
||||
echo "Workers:"
|
||||
echo " - worker1: 24 cores (24/7)"
|
||||
echo " - worker2: 18 cores (7PM-6AM only)"
|
||||
echo ""
|
||||
echo "Estimated Duration: 8-12 hours"
|
||||
echo "================================================================"
|
||||
echo ""
|
||||
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
# Check data file
|
||||
if [ ! -f "data/solusdt_5m.csv" ]; then
|
||||
echo "✗ Error: data/solusdt_5m.csv not found"
|
||||
exit 1
|
||||
fi
|
||||
echo "✓ Market data found"
|
||||
|
||||
# Check worker script
|
||||
if [ ! -f "v11_full_worker.py" ]; then
|
||||
echo "✗ Error: v11_full_worker.py not found"
|
||||
exit 1
|
||||
fi
|
||||
echo "✓ Worker script found"
|
||||
|
||||
# Make scripts executable
|
||||
chmod +x v11_full_coordinator.py
|
||||
chmod +x v11_full_worker.py
|
||||
echo "✓ Scripts executable"
|
||||
|
||||
# Create results directory
|
||||
mkdir -p v11_results
|
||||
echo "✓ Results directory ready"
|
||||
|
||||
# Deploy worker to machines
|
||||
echo ""
|
||||
echo "📦 Deploying worker script to EPYC cluster..."
|
||||
|
||||
# Worker 1
|
||||
echo " → worker1 (10.10.254.106)"
|
||||
scp v11_full_worker.py root@10.10.254.106:/home/comprehensive_sweep/
|
||||
scp ../backtester/v11_moneyline_all_filters.py root@10.10.254.106:/home/comprehensive_sweep/backtester/
|
||||
|
||||
# Worker 2 (via worker 1)
|
||||
echo " → worker2 (10.20.254.100) via worker1"
|
||||
ssh root@10.10.254.106 "scp /home/comprehensive_sweep/v11_full_worker.py root@10.20.254.100:/home/backtest_dual/backtest/"
|
||||
ssh root@10.10.254.106 "scp /home/comprehensive_sweep/backtester/v11_moneyline_all_filters.py root@10.20.254.100:/home/backtest_dual/backtest/backtester/"
|
||||
|
||||
echo "✓ Workers deployed"
|
||||
|
||||
# Launch coordinator
|
||||
echo ""
|
||||
echo "🚀 Starting full sweep coordinator..."
|
||||
nohup python3 v11_full_coordinator.py > coordinator_v11_full.log 2>&1 &
|
||||
COORDINATOR_PID=$!
|
||||
|
||||
echo "✓ Coordinator started (PID: $COORDINATOR_PID)"
|
||||
echo ""
|
||||
echo "================================================================"
|
||||
echo "MONITORING"
|
||||
echo "================================================================"
|
||||
echo "Live log: tail -f coordinator_v11_full.log"
|
||||
echo "Database: sqlite3 exploration.db"
|
||||
echo "Results: cluster/v11_results/*_results.csv"
|
||||
echo ""
|
||||
echo "Check status:"
|
||||
echo " sqlite3 exploration.db \\"
|
||||
echo " \"SELECT status, COUNT(*) FROM v11_full_chunks GROUP BY status\""
|
||||
echo ""
|
||||
echo "Top results so far:"
|
||||
echo " sqlite3 exploration.db \\"
|
||||
echo " \"SELECT params, pnl FROM v11_full_strategies \\"
|
||||
echo " ORDER BY pnl DESC LIMIT 10\""
|
||||
echo ""
|
||||
echo "To stop sweep:"
|
||||
echo " kill $COORDINATOR_PID"
|
||||
echo ""
|
||||
echo "Telegram notifications enabled (start/complete/stop)"
|
||||
echo "================================================================"
|
||||
Reference in New Issue
Block a user