Created comprehensive HA roadmap with 6 phases: - Phase 1: Warm standby (CURRENT - manual failover) - Phase 2: Database replication - Phase 3: Health monitoring - Phase 4: Reverse proxy + floating IP - Phase 5: Automated failover - Phase 6: Geographic redundancy Includes: - Decision gates based on capital and stability - Cost-benefit analysis - Scripts for healthcheck, failover, DB sync - Recommendation to defer full HA until capital > $5k Secondary server ready at 72.62.39.24 for emergency manual failover. Related: User concern about system uptime, but full HA complexity not justified at current scale (~$600 capital). Revisit in Q1 2026.
91 lines
2.1 KiB
Bash
91 lines
2.1 KiB
Bash
#!/bin/bash
|
|
#
|
|
# Trading Bot Health Check Script
|
|
# Checks if trading bot is healthy and responding
|
|
#
|
|
# Usage: ./healthcheck.sh
|
|
# Exit codes: 0 = healthy, 1 = unhealthy
|
|
|
|
set -eu
|
|
|
|
TRADING_BOT_HOST="${TRADING_BOT_HOST:-localhost:3001}"
|
|
MAX_FAILURES="${MAX_FAILURES:-3}"
|
|
CHECK_INTERVAL="${CHECK_INTERVAL:-10}"
|
|
|
|
# Source API key from .env
|
|
if [ -f "/home/icke/traderv4/.env" ]; then
|
|
export $(grep "^API_SECRET_KEY=" /home/icke/traderv4/.env | xargs)
|
|
fi
|
|
|
|
log() {
|
|
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $*"
|
|
}
|
|
|
|
# Check if container is running
|
|
check_container() {
|
|
docker ps --filter "name=trading-bot-v4" --filter "status=running" | grep -q "trading-bot-v4"
|
|
return $?
|
|
}
|
|
|
|
# Check if API is responding
|
|
check_api() {
|
|
local response
|
|
response=$(curl -s -f -m 5 \
|
|
-H "Authorization: Bearer ${API_SECRET_KEY}" \
|
|
"http://${TRADING_BOT_HOST}/api/drift/account-summary" 2>&1)
|
|
|
|
if [ $? -eq 0 ] && echo "$response" | grep -q '"success":true'; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check if Position Manager is monitoring (if positions exist)
|
|
check_position_manager() {
|
|
local logs
|
|
logs=$(docker logs --tail=50 trading-bot-v4 2>&1)
|
|
|
|
# Check for recent monitoring activity (within last 30 seconds)
|
|
if echo "$logs" | grep -q "🔍 Monitoring"; then
|
|
return 0
|
|
fi
|
|
|
|
# If no monitoring logs but no positions open, that's OK
|
|
if echo "$logs" | grep -q "No positions to monitor"; then
|
|
return 0
|
|
fi
|
|
|
|
# If container just started (less than 1 min), give it time
|
|
if docker inspect trading-bot-v4 --format='{{.State.StartedAt}}' | grep -q "$(date -u +%Y-%m-%dT%H)"; then
|
|
return 0
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
# Main health check
|
|
main() {
|
|
log "Starting health check..."
|
|
|
|
if ! check_container; then
|
|
log "❌ UNHEALTHY: Container not running"
|
|
exit 1
|
|
fi
|
|
|
|
if ! check_api; then
|
|
log "❌ UNHEALTHY: API not responding"
|
|
exit 1
|
|
fi
|
|
|
|
if ! check_position_manager; then
|
|
log "⚠️ WARNING: Position Manager may not be monitoring (check logs)"
|
|
# Don't fail on this - API working is primary health indicator
|
|
fi
|
|
|
|
log "✅ HEALTHY: All checks passed"
|
|
exit 0
|
|
}
|
|
|
|
main "$@"
|