#!/bin/bash # # HA Failover Controller # Monitors primary server and activates secondary on failure # # IMPORTANT: Run this ONLY on SECONDARY server # Primary should always be active unless failed # set -eu PRIMARY_HOST="root@192.168.1.100" # Update with primary IP SECONDARY_HOST="root@72.62.39.24" CHECK_INTERVAL=15 # seconds between checks MAX_FAILURES=3 # failures before failover SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="/home/icke/traderv4" FAILURE_COUNT=0 log() { echo "[$(date +'%Y-%m-%d %H:%M:%S')] $*" | tee -a /var/log/trading-bot-ha.log } telegram_notify() { local message="$1" # Use the Telegram bot to send notification if [ -f "${PROJECT_DIR}/.env" ]; then source "${PROJECT_DIR}/.env" curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \ -d chat_id="${TELEGRAM_CHAT_ID}" \ -d text="🚨 HA FAILOVER: ${message}" \ -d parse_mode="HTML" > /dev/null fi } check_primary_health() { # SSH to primary and run healthcheck ssh -o ConnectTimeout=5 -o BatchMode=yes "${PRIMARY_HOST}" \ "cd ${PROJECT_DIR} && bash ha-setup/healthcheck.sh" &>/dev/null return $? } is_secondary_active() { docker ps --filter "name=trading-bot-v4" --filter "status=running" | grep -q "trading-bot-v4" return $? } start_secondary() { log "🚀 Starting secondary (failover activation)..." cd "${PROJECT_DIR}" docker compose up -d trading-bot sleep 10 if docker ps --filter "name=trading-bot-v4" --filter "status=running" | grep -q "trading-bot-v4"; then log "✅ Secondary activated successfully" telegram_notify "Secondary server activated (primary failed ${MAX_FAILURES} health checks)" return 0 else log "❌ Failed to start secondary" telegram_notify "âš ī¸ CRITICAL: Secondary failed to start after primary failure!" return 1 fi } stop_secondary() { log "🛑 Stopping secondary (primary recovered)..." cd "${PROJECT_DIR}" docker compose stop trading-bot if ! is_secondary_active; then log "✅ Secondary stopped successfully" telegram_notify "Primary server recovered, secondary deactivated" return 0 else log "❌ Failed to stop secondary" return 1 fi } main_loop() { log "đŸŽ¯ HA Failover Controller started (Secondary mode)" log "Monitoring primary: ${PRIMARY_HOST}" log "Check interval: ${CHECK_INTERVAL}s, Max failures: ${MAX_FAILURES}" while true; do if check_primary_health; then # Primary is healthy if [ $FAILURE_COUNT -gt 0 ]; then log "✅ Primary recovered (was at ${FAILURE_COUNT} failures)" FAILURE_COUNT=0 fi # If secondary is running, stop it (primary should be active) if is_secondary_active; then log "âš ī¸ Secondary is active but primary is healthy - stopping secondary" stop_secondary fi else # Primary is unhealthy FAILURE_COUNT=$((FAILURE_COUNT + 1)) log "❌ Primary health check failed (${FAILURE_COUNT}/${MAX_FAILURES})" if [ $FAILURE_COUNT -ge $MAX_FAILURES ]; then if ! is_secondary_active; then log "🚨 PRIMARY FAILED - Activating secondary..." telegram_notify "Primary server failed ${MAX_FAILURES} consecutive health checks. Activating secondary..." start_secondary else log "â„šī¸ Secondary already active (primary still failing)" fi fi fi sleep $CHECK_INTERVAL done } # Ensure running as root (needs docker access) if [ "$EUID" -ne 0 ]; then log "❌ Must run as root (needs docker and SSH access)" exit 1 fi main_loop