feat: Calculate quality scores for all timeframes (not just 5min)
- Moved scoreSignalQuality() to BEFORE timeframe check (line 112)
- Data collection signals now have real quality scores (not hardcoded 0)
- Enables quality-filtered win rate comparison across 5min/15min/1H/4H/Daily
- Fixed TypeScript errors: added symbol/currentPrice params, fixed interface refs
- Added getMinQualityScoreForDirection import for threshold calculation
- BlockedSignal table now populated with:
* signalQualityScore (real 0-100 score, not 0)
* signalQualityVersion ('v9', not 'data-collection')
* minScoreRequired (actual threshold, not 0)
* scoreBreakdown with reasons array
- Implementation: Nov 26, 2025
- Container restarted: 14:12:00 UTC (11 minutes after commit)
- Purpose: Enable SQL queries like WHERE signalQualityScore >= minScoreRequired
to compare quality-filtered win rates across timeframes
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
import { initializeDriftService } from '@/lib/drift/client'
|
||||
import { openPosition, placeExitOrders, closePosition } from '@/lib/drift/orders'
|
||||
import { normalizeTradingViewSymbol } from '@/config/trading'
|
||||
import { normalizeTradingViewSymbol, getMinQualityScoreForDirection } from '@/config/trading'
|
||||
import { getMergedConfig } from '@/config/trading'
|
||||
import { getInitializedPositionManager, ActiveTrade } from '@/lib/trading/position-manager'
|
||||
import { createTrade, updateTradeExit } from '@/lib/database/trades'
|
||||
@@ -109,6 +109,30 @@ export async function POST(request: NextRequest): Promise<NextResponse<ExecuteTr
|
||||
console.log(`📊 Market data auto-cached for ${driftSymbol} from trade signal`)
|
||||
}
|
||||
|
||||
// 📊 CALCULATE QUALITY SCORE BEFORE TIMEFRAME CHECK (Nov 26, 2025)
|
||||
// CRITICAL: Score ALL signals (5min + data collection) for proper multi-timeframe analysis
|
||||
// This enables quality-filtered win rate comparison across timeframes
|
||||
const qualityResult = await scoreSignalQuality({
|
||||
atr: body.atr || 0,
|
||||
adx: body.adx || 0,
|
||||
rsi: body.rsi || 0,
|
||||
volumeRatio: body.volumeRatio || 0,
|
||||
pricePosition: body.pricePosition || 0,
|
||||
maGap: body.maGap, // V9: MA gap convergence scoring
|
||||
timeframe: body.timeframe || '5',
|
||||
direction: body.direction,
|
||||
symbol: driftSymbol,
|
||||
currentPrice: body.signalPrice || 0,
|
||||
})
|
||||
console.log(`📊 Signal quality: ${qualityResult.score} (${qualityResult.score >= 90 ? 'PASS' : 'BLOCKED'})`)
|
||||
if (qualityResult.reasons?.length > 0) {
|
||||
console.log(` Reasons: ${qualityResult.reasons.join(', ')}`)
|
||||
}
|
||||
|
||||
// Get min quality threshold for this direction
|
||||
const config = getMergedConfig()
|
||||
const minQualityScore = getMinQualityScoreForDirection(body.direction, config)
|
||||
|
||||
// 🔬 MULTI-TIMEFRAME DATA COLLECTION
|
||||
// Only execute trades from 5min timeframe OR manual Telegram trades
|
||||
// Save other timeframes (15min, 1H, 4H, Daily) for analysis
|
||||
@@ -128,7 +152,7 @@ export async function POST(request: NextRequest): Promise<NextResponse<ExecuteTr
|
||||
symbol: driftSymbol,
|
||||
direction: body.direction,
|
||||
blockReason: 'DATA_COLLECTION_ONLY',
|
||||
blockDetails: `Multi-timeframe data collection: ${timeframe}min signals saved but not executed (only 5min executes)`,
|
||||
blockDetails: `Multi-timeframe data collection: ${timeframe}min signals saved but not executed (only 5min executes). Quality score: ${qualityResult.score} (threshold: ${minQualityScore})`,
|
||||
atr: body.atr,
|
||||
adx: body.adx,
|
||||
rsi: body.rsi,
|
||||
@@ -136,12 +160,12 @@ export async function POST(request: NextRequest): Promise<NextResponse<ExecuteTr
|
||||
pricePosition: body.pricePosition,
|
||||
timeframe: timeframe,
|
||||
signalPrice: currentPrice,
|
||||
signalQualityScore: 0, // Not scored since not executed
|
||||
signalQualityVersion: 'data-collection',
|
||||
minScoreRequired: 0,
|
||||
scoreBreakdown: {},
|
||||
signalQualityScore: qualityResult.score, // CRITICAL: Real quality score for analysis
|
||||
signalQualityVersion: 'v9', // Current indicator version
|
||||
minScoreRequired: minQualityScore,
|
||||
scoreBreakdown: { reasons: qualityResult.reasons },
|
||||
})
|
||||
console.log(`✅ ${timeframe}min signal saved at $${currentPrice.toFixed(2)} for future analysis`)
|
||||
console.log(`✅ ${timeframe}min signal saved at $${currentPrice.toFixed(2)} for future analysis (quality: ${qualityResult.score}, threshold: ${minQualityScore})`)
|
||||
} catch (dbError) {
|
||||
console.error(`❌ Failed to save ${timeframe}min signal:`, dbError)
|
||||
}
|
||||
@@ -154,6 +178,8 @@ export async function POST(request: NextRequest): Promise<NextResponse<ExecuteTr
|
||||
timeframe: timeframe,
|
||||
symbol: driftSymbol,
|
||||
direction: body.direction,
|
||||
qualityScore: qualityResult.score,
|
||||
threshold: minQualityScore,
|
||||
saved: true,
|
||||
}
|
||||
}, { status: 200 }) // 200 not 400 - this is expected behavior
|
||||
@@ -161,29 +187,14 @@ export async function POST(request: NextRequest): Promise<NextResponse<ExecuteTr
|
||||
|
||||
console.log(`✅ 5min signal confirmed - proceeding with trade execution`)
|
||||
|
||||
// Get trading configuration
|
||||
const config = getMergedConfig()
|
||||
|
||||
// Initialize Drift service and check account health before sizing
|
||||
const driftService = await initializeDriftService()
|
||||
const health = await driftService.getAccountHealth()
|
||||
console.log(`🩺 Account health: Free collateral $${health.freeCollateral.toFixed(2)}`)
|
||||
|
||||
// Calculate quality score EARLY for adaptive leverage (Nov 24, 2025)
|
||||
// This needs to happen before position sizing so leverage can be adjusted based on quality
|
||||
const qualityResult = await scoreSignalQuality({
|
||||
atr: body.atr || 0,
|
||||
adx: body.adx || 0,
|
||||
rsi: body.rsi || 0,
|
||||
volumeRatio: body.volumeRatio || 0,
|
||||
pricePosition: body.pricePosition || 0,
|
||||
maGap: body.maGap, // V9: MA gap convergence scoring
|
||||
direction: body.direction,
|
||||
symbol: driftSymbol,
|
||||
currentPrice: body.signalPrice || 0,
|
||||
timeframe: body.timeframe,
|
||||
})
|
||||
console.log(`📊 Signal quality score: ${qualityResult.score} (calculated early for adaptive leverage)`)
|
||||
// Quality score already calculated above (before timeframe check)
|
||||
// Now use it for adaptive leverage and position sizing
|
||||
console.log(`📊 Signal quality score: ${qualityResult.score} (using for adaptive leverage)`)
|
||||
|
||||
// Get symbol-specific position sizing with quality score for adaptive leverage
|
||||
// ENHANCED Nov 25, 2025: Pass direction for SHORT-specific leverage tiers
|
||||
|
||||
741
docs/PATRONI_SETUP_GUIDE.md
Normal file
741
docs/PATRONI_SETUP_GUIDE.md
Normal file
@@ -0,0 +1,741 @@
|
||||
# Patroni High Availability Setup Guide
|
||||
|
||||
**Goal:** 100% trading uptime with automatic PostgreSQL failover and zero split-brain risk
|
||||
|
||||
**Timeline:** Ready to implement once Oracle Cloud account approved (24-48h)
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
Location 1: Hetzner Dedicated (srvdocker02 - 95.216.52.28)
|
||||
├── Trading Bot Primary
|
||||
├── PostgreSQL Primary (read-write)
|
||||
├── Patroni Keeper + Sentinel
|
||||
└── etcd-1 (consensus node)
|
||||
|
||||
Location 2: Hostinger VPS (72.62.39.24)
|
||||
├── Trading Bot Secondary
|
||||
├── PostgreSQL Standby (read-only → becomes primary on failover)
|
||||
├── Patroni Keeper + Sentinel
|
||||
└── etcd-2 (consensus node)
|
||||
|
||||
Location 3: Oracle Cloud Free Tier (witness)
|
||||
└── etcd-3 (consensus node only - 256MB RAM sufficient)
|
||||
|
||||
Quorum: 2 out of 3 etcd nodes required for decisions
|
||||
Failover Time: 10-30 seconds (automatic)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## How It Works
|
||||
|
||||
### Normal Operation:
|
||||
1. **etcd cluster**: All 3 nodes track "who is PRIMARY?"
|
||||
2. **Patroni on Primary**: Holds the "leader lock" in etcd
|
||||
3. **Patroni on Standby**: Continuously replicates from primary
|
||||
4. **Trading continues**: All writes go to primary database
|
||||
|
||||
### When Primary Server Crashes:
|
||||
```
|
||||
11:42:00 → Primary server dies (hardware failure, network, etc.)
|
||||
11:42:10 → etcd detects primary Patroni heartbeat stopped
|
||||
11:42:10 → Patroni triggers election
|
||||
11:42:10 → Standby Patroni wins (gets leader lock from etcd)
|
||||
11:42:11 → Patroni promotes standby to PRIMARY (pg_ctl promote)
|
||||
11:42:11 → Database now READ-WRITE ✅
|
||||
11:42:28 → DNS failover detects primary down (3 failures)
|
||||
11:42:29 → DNS switches to secondary server
|
||||
11:42:29 → Trading resumes with ZERO data loss ✅
|
||||
|
||||
Later (when old primary recovers):
|
||||
12:00:00 → Old primary server comes back online
|
||||
12:00:01 → Patroni starts, checks etcd for leader lock
|
||||
12:00:01 → Sees lock held by other node
|
||||
12:00:01 → Automatically becomes STANDBY
|
||||
12:00:02 → Starts replicating FROM new primary
|
||||
12:00:02 → System now stable with roles reversed ✅
|
||||
```
|
||||
|
||||
**Key Point:** No manual intervention needed! Patroni handles promotion, demotion, and resync automatically.
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Set Up Oracle Cloud Witness Node
|
||||
|
||||
### 1.1 Create Oracle Cloud Free Tier Account
|
||||
- Status: ✅ Account pending approval (Nov 26, 2025)
|
||||
- Wait for email approval (24-48 hours typical)
|
||||
|
||||
### 1.2 Create VM Instance
|
||||
```
|
||||
Name: etcd-witness
|
||||
Shape: VM.Standard.E2.1.Micro (always free)
|
||||
OS: Ubuntu 22.04 Minimal
|
||||
Network: Assign public IP
|
||||
Storage: 50GB boot volume (always free)
|
||||
```
|
||||
|
||||
### 1.3 Configure Firewall
|
||||
```bash
|
||||
# Oracle Cloud Security List (in VCN settings)
|
||||
Ingress Rules:
|
||||
- Port 22 (SSH from your IP)
|
||||
- Port 2379 (etcd client - from srvdocker02 and Hostinger IPs)
|
||||
- Port 2380 (etcd peer - from srvdocker02 and Hostinger IPs)
|
||||
|
||||
# On the VM itself
|
||||
sudo ufw allow 22/tcp
|
||||
sudo ufw allow 2379/tcp
|
||||
sudo ufw allow 2380/tcp
|
||||
sudo ufw enable
|
||||
```
|
||||
|
||||
### 1.4 Install Docker
|
||||
```bash
|
||||
ssh ubuntu@ORACLE_IP
|
||||
|
||||
# Install Docker
|
||||
curl -fsSL https://get.docker.com -o get-docker.sh
|
||||
sudo sh get-docker.sh
|
||||
sudo usermod -aG docker ubuntu
|
||||
# Log out and back in for group to take effect
|
||||
```
|
||||
|
||||
### 1.5 Run etcd Witness
|
||||
```bash
|
||||
# Create directory for etcd data
|
||||
mkdir -p ~/etcd-data
|
||||
|
||||
# Run etcd container
|
||||
docker run -d \
|
||||
--name etcd-witness \
|
||||
--restart unless-stopped \
|
||||
-p 2379:2379 \
|
||||
-p 2380:2380 \
|
||||
-v ~/etcd-data:/etcd-data \
|
||||
quay.io/coreos/etcd:v3.5.11 \
|
||||
/usr/local/bin/etcd \
|
||||
--name witness \
|
||||
--data-dir /etcd-data \
|
||||
--listen-peer-urls http://0.0.0.0:2380 \
|
||||
--listen-client-urls http://0.0.0.0:2379 \
|
||||
--advertise-client-urls http://ORACLE_PUBLIC_IP:2379 \
|
||||
--initial-advertise-peer-urls http://ORACLE_PUBLIC_IP:2380 \
|
||||
--initial-cluster primary=http://95.216.52.28:2380,secondary=http://72.62.39.24:2380,witness=http://ORACLE_PUBLIC_IP:2380 \
|
||||
--initial-cluster-state new \
|
||||
--initial-cluster-token trading-bot-cluster
|
||||
|
||||
# Verify it's running
|
||||
docker logs etcd-witness
|
||||
# Should see: "serving client requests on 0.0.0.0:2379"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Prepare Primary Server (srvdocker02)
|
||||
|
||||
### 2.1 Backup Current Database
|
||||
```bash
|
||||
# On srvdocker02
|
||||
cd /home/icke/traderv4
|
||||
docker exec trading-bot-postgres pg_dump -U postgres trading_bot_v4 > backup_before_patroni_$(date +%Y%m%d_%H%M%S).sql
|
||||
|
||||
# Verify backup
|
||||
ls -lh backup_before_patroni_*.sql
|
||||
# Should be several MB
|
||||
```
|
||||
|
||||
### 2.2 Create Patroni Configuration
|
||||
```bash
|
||||
# On srvdocker02
|
||||
cd /home/icke/traderv4
|
||||
mkdir -p patroni-config
|
||||
```
|
||||
|
||||
Create `/home/icke/traderv4/patroni-config/patroni-primary.yml`:
|
||||
```yaml
|
||||
scope: trading-bot-cluster
|
||||
name: primary
|
||||
|
||||
restapi:
|
||||
listen: 0.0.0.0:8008
|
||||
connect_address: 95.216.52.28:8008
|
||||
|
||||
etcd3:
|
||||
hosts:
|
||||
- 95.216.52.28:2379
|
||||
- 72.62.39.24:2379
|
||||
- ORACLE_PUBLIC_IP:2379
|
||||
|
||||
bootstrap:
|
||||
dcs:
|
||||
ttl: 30
|
||||
loop_wait: 10
|
||||
retry_timeout: 10
|
||||
maximum_lag_on_failover: 1048576
|
||||
postgresql:
|
||||
use_pg_rewind: true
|
||||
use_slots: true
|
||||
parameters:
|
||||
wal_level: replica
|
||||
hot_standby: "on"
|
||||
wal_keep_segments: 8
|
||||
max_wal_senders: 10
|
||||
max_replication_slots: 10
|
||||
wal_log_hints: "on"
|
||||
|
||||
initdb:
|
||||
- encoding: UTF8
|
||||
- data-checksums
|
||||
|
||||
pg_hba:
|
||||
- host replication replicator 0.0.0.0/0 md5
|
||||
- host all all 0.0.0.0/0 md5
|
||||
|
||||
postgresql:
|
||||
listen: 0.0.0.0:5432
|
||||
connect_address: 95.216.52.28:5432
|
||||
data_dir: /var/lib/postgresql/data
|
||||
pgpass: /tmp/pgpass
|
||||
authentication:
|
||||
replication:
|
||||
username: replicator
|
||||
password: REPLICATION_PASSWORD_HERE
|
||||
superuser:
|
||||
username: postgres
|
||||
password: POSTGRES_PASSWORD_HERE
|
||||
parameters:
|
||||
unix_socket_directories: '/var/run/postgresql'
|
||||
|
||||
tags:
|
||||
nofailover: false
|
||||
noloadbalance: false
|
||||
clonefrom: false
|
||||
nosync: false
|
||||
```
|
||||
|
||||
### 2.3 Create Docker Compose for Patroni
|
||||
Create `/home/icke/traderv4/docker-compose.patroni.yml`:
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
etcd-primary:
|
||||
image: quay.io/coreos/etcd:v3.5.11
|
||||
container_name: etcd-primary
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- trading-bot-network
|
||||
ports:
|
||||
- "2379:2379"
|
||||
- "2380:2380"
|
||||
volumes:
|
||||
- etcd-primary-data:/etcd-data
|
||||
command:
|
||||
- /usr/local/bin/etcd
|
||||
- --name=primary
|
||||
- --data-dir=/etcd-data
|
||||
- --listen-peer-urls=http://0.0.0.0:2380
|
||||
- --listen-client-urls=http://0.0.0.0:2379
|
||||
- --advertise-client-urls=http://95.216.52.28:2379
|
||||
- --initial-advertise-peer-urls=http://95.216.52.28:2380
|
||||
- --initial-cluster=primary=http://95.216.52.28:2380,secondary=http://72.62.39.24:2380,witness=http://ORACLE_PUBLIC_IP:2380
|
||||
- --initial-cluster-state=new
|
||||
- --initial-cluster-token=trading-bot-cluster
|
||||
|
||||
patroni-primary:
|
||||
image: patroni/patroni:latest
|
||||
container_name: patroni-primary
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- trading-bot-network
|
||||
ports:
|
||||
- "8008:8008"
|
||||
- "55432:5432"
|
||||
volumes:
|
||||
- ./patroni-config/patroni-primary.yml:/etc/patroni.yml
|
||||
- postgres-primary-data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- PATRONI_SCOPE=trading-bot-cluster
|
||||
- PATRONI_NAME=primary
|
||||
depends_on:
|
||||
- etcd-primary
|
||||
|
||||
volumes:
|
||||
etcd-primary-data:
|
||||
driver: local
|
||||
postgres-primary-data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
trading-bot-network:
|
||||
name: traderv4_trading-bot-network
|
||||
external: true
|
||||
```
|
||||
|
||||
### 2.4 Stop Current PostgreSQL
|
||||
```bash
|
||||
# CRITICAL: Do this during low trading activity
|
||||
docker stop trading-bot-postgres
|
||||
|
||||
# Backup data directory
|
||||
sudo cp -r /var/lib/docker/volumes/traderv4_postgres-data/_data \
|
||||
/var/lib/docker/volumes/traderv4_postgres-data/_data.backup
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Prepare Secondary Server (Hostinger)
|
||||
|
||||
### 3.1 Create Patroni Configuration
|
||||
```bash
|
||||
# On Hostinger
|
||||
ssh root@72.62.39.24
|
||||
cd /root/traderv4-secondary
|
||||
mkdir -p patroni-config
|
||||
```
|
||||
|
||||
Create `/root/traderv4-secondary/patroni-config/patroni-secondary.yml`:
|
||||
```yaml
|
||||
scope: trading-bot-cluster
|
||||
name: secondary
|
||||
|
||||
restapi:
|
||||
listen: 0.0.0.0:8008
|
||||
connect_address: 72.62.39.24:8008
|
||||
|
||||
etcd3:
|
||||
hosts:
|
||||
- 95.216.52.28:2379
|
||||
- 72.62.39.24:2379
|
||||
- ORACLE_PUBLIC_IP:2379
|
||||
|
||||
bootstrap:
|
||||
dcs:
|
||||
ttl: 30
|
||||
loop_wait: 10
|
||||
retry_timeout: 10
|
||||
maximum_lag_on_failover: 1048576
|
||||
postgresql:
|
||||
use_pg_rewind: true
|
||||
use_slots: true
|
||||
parameters:
|
||||
wal_level: replica
|
||||
hot_standby: "on"
|
||||
wal_keep_segments: 8
|
||||
max_wal_senders: 10
|
||||
max_replication_slots: 10
|
||||
wal_log_hints: "on"
|
||||
|
||||
postgresql:
|
||||
listen: 0.0.0.0:5432
|
||||
connect_address: 72.62.39.24:5432
|
||||
data_dir: /var/lib/postgresql/data
|
||||
pgpass: /tmp/pgpass
|
||||
authentication:
|
||||
replication:
|
||||
username: replicator
|
||||
password: REPLICATION_PASSWORD_HERE
|
||||
superuser:
|
||||
username: postgres
|
||||
password: POSTGRES_PASSWORD_HERE
|
||||
parameters:
|
||||
unix_socket_directories: '/var/run/postgresql'
|
||||
|
||||
tags:
|
||||
nofailover: false
|
||||
noloadbalance: false
|
||||
clonefrom: true
|
||||
nosync: false
|
||||
```
|
||||
|
||||
### 3.2 Create Docker Compose
|
||||
Create `/root/traderv4-secondary/docker-compose.patroni.yml`:
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
etcd-secondary:
|
||||
image: quay.io/coreos/etcd:v3.5.11
|
||||
container_name: etcd-secondary
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- trading-bot-network
|
||||
ports:
|
||||
- "2379:2379"
|
||||
- "2380:2380"
|
||||
volumes:
|
||||
- etcd-secondary-data:/etcd-data
|
||||
command:
|
||||
- /usr/local/bin/etcd
|
||||
- --name=secondary
|
||||
- --data-dir=/etcd-data
|
||||
- --listen-peer-urls=http://0.0.0.0:2380
|
||||
- --listen-client-urls=http://0.0.0.0:2379
|
||||
- --advertise-client-urls=http://72.62.39.24:2379
|
||||
- --initial-advertise-peer-urls=http://72.62.39.24:2380
|
||||
- --initial-cluster=primary=http://95.216.52.28:2380,secondary=http://72.62.39.24:2380,witness=http://ORACLE_PUBLIC_IP:2380
|
||||
- --initial-cluster-state=new
|
||||
- --initial-cluster-token=trading-bot-cluster
|
||||
|
||||
patroni-secondary:
|
||||
image: patroni/patroni:latest
|
||||
container_name: patroni-secondary
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- trading-bot-network
|
||||
ports:
|
||||
- "8008:8008"
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- ./patroni-config/patroni-secondary.yml:/etc/patroni.yml
|
||||
- postgres-secondary-data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- PATRONI_SCOPE=trading-bot-cluster
|
||||
- PATRONI_NAME=secondary
|
||||
depends_on:
|
||||
- etcd-secondary
|
||||
|
||||
volumes:
|
||||
etcd-secondary-data:
|
||||
driver: local
|
||||
postgres-secondary-data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
trading-bot-network:
|
||||
external: true
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 4: Deploy Patroni Cluster
|
||||
|
||||
### 4.1 Start in Correct Order
|
||||
|
||||
**First: Oracle Witness**
|
||||
```bash
|
||||
ssh ubuntu@ORACLE_IP
|
||||
docker start etcd-witness
|
||||
docker logs -f etcd-witness
|
||||
# Wait for: "serving client requests"
|
||||
```
|
||||
|
||||
**Second: Primary (srvdocker02)**
|
||||
```bash
|
||||
cd /home/icke/traderv4
|
||||
docker compose -f docker-compose.patroni.yml up -d etcd-primary
|
||||
sleep 5
|
||||
docker compose -f docker-compose.patroni.yml up -d patroni-primary
|
||||
|
||||
# Watch logs
|
||||
docker logs -f patroni-primary
|
||||
# Wait for: "initialized a new cluster"
|
||||
```
|
||||
|
||||
**Third: Secondary (Hostinger)**
|
||||
```bash
|
||||
ssh root@72.62.39.24
|
||||
cd /root/traderv4-secondary
|
||||
docker compose -f docker-compose.patroni.yml up -d etcd-secondary
|
||||
sleep 5
|
||||
docker compose -f docker-compose.patroni.yml up -d patroni-secondary
|
||||
|
||||
# Watch logs
|
||||
docker logs -f patroni-secondary
|
||||
# Wait for: "replica has been created using basebackup"
|
||||
```
|
||||
|
||||
### 4.2 Verify Cluster Health
|
||||
|
||||
**Check etcd cluster:**
|
||||
```bash
|
||||
# On any server
|
||||
docker exec etcd-primary etcdctl member list
|
||||
# Should show 3 members: primary, secondary, witness
|
||||
|
||||
docker exec etcd-primary etcdctl endpoint health
|
||||
# Should show all 3 healthy
|
||||
```
|
||||
|
||||
**Check Patroni cluster:**
|
||||
```bash
|
||||
# On primary
|
||||
docker exec patroni-primary patronictl -c /etc/patroni.yml list
|
||||
|
||||
# Should show:
|
||||
# + Cluster: trading-bot-cluster ---------+----+-----------+
|
||||
# | Member | Host | Role | State | TL | Lag in MB |
|
||||
# +-----------+----------------+---------+---------+----+-----------+
|
||||
# | primary | 95.216.52.28 | Leader | running | 1 | |
|
||||
# | secondary | 72.62.39.24 | Replica | running | 1 | 0 |
|
||||
# +-----------+----------------+---------+---------+----+-----------+
|
||||
```
|
||||
|
||||
**Check replication:**
|
||||
```bash
|
||||
# On primary
|
||||
docker exec patroni-primary psql -U postgres -d trading_bot_v4 -c "SELECT state, sync_state, replay_lag FROM pg_stat_replication;"
|
||||
|
||||
# Should show:
|
||||
# state | sync_state | replay_lag
|
||||
# ----------+------------+------------
|
||||
# streaming | async | 00:00:00
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Update Trading Bot Configuration
|
||||
|
||||
### 5.1 Update DATABASE_URL
|
||||
|
||||
**Primary server docker-compose.yml:**
|
||||
```yaml
|
||||
services:
|
||||
trading-bot:
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://postgres:PASSWORD@patroni-primary:5432/trading_bot_v4
|
||||
```
|
||||
|
||||
**Secondary server docker-compose.yml:**
|
||||
```yaml
|
||||
services:
|
||||
trading-bot:
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://postgres:PASSWORD@patroni-secondary:5432/trading_bot_v4
|
||||
```
|
||||
|
||||
### 5.2 Rebuild Enhanced Health Check
|
||||
```bash
|
||||
# On both servers
|
||||
cd /home/icke/traderv4 # or /root/traderv4-secondary
|
||||
git pull
|
||||
docker compose build trading-bot
|
||||
docker compose up -d --force-recreate trading-bot
|
||||
```
|
||||
|
||||
### 5.3 Verify Health Check
|
||||
```bash
|
||||
# On primary
|
||||
curl http://localhost:3001/api/health
|
||||
# Should show: {"status":"healthy","database":"connected",...}
|
||||
|
||||
# Stop database to test
|
||||
docker stop patroni-primary
|
||||
sleep 15
|
||||
curl http://localhost:3001/api/health
|
||||
# Should show: {"status":"unhealthy","database":"disconnected",...}
|
||||
# HTTP status: 503
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 6: Test Failover
|
||||
|
||||
### 6.1 Simulate Primary Failure
|
||||
```bash
|
||||
# On primary
|
||||
docker stop patroni-primary
|
||||
|
||||
# Watch secondary logs
|
||||
ssh root@72.62.39.24
|
||||
docker logs -f patroni-secondary
|
||||
|
||||
# Should see:
|
||||
# "no action. I am a secondary and I'm following a leader"
|
||||
# "promoted self to leader by acquiring session lock"
|
||||
# "Server has been promoted"
|
||||
```
|
||||
|
||||
### 6.2 Verify Promotion
|
||||
```bash
|
||||
# On secondary
|
||||
docker exec patroni-secondary patronictl -c /etc/patroni.yml list
|
||||
|
||||
# Should show:
|
||||
# | secondary | 72.62.39.24 | Leader | running | 2 | |
|
||||
# | primary | 95.216.52.28 | ? | ? | ? | unknown |
|
||||
```
|
||||
|
||||
### 6.3 Check Database is Read-Write
|
||||
```bash
|
||||
# On secondary
|
||||
docker exec patroni-secondary psql -U postgres -c "SELECT pg_is_in_recovery();"
|
||||
# Should return: f (false = read-write)
|
||||
|
||||
# Test write
|
||||
docker exec patroni-secondary psql -U postgres -d trading_bot_v4 -c "INSERT INTO \"SystemEvent\" (\"eventType\", \"eventData\", \"createdAt\") VALUES ('test_failover', '{}', NOW());"
|
||||
# Should succeed
|
||||
```
|
||||
|
||||
### 6.4 Verify DNS Failover
|
||||
```bash
|
||||
# Check failover logs
|
||||
ssh root@72.62.39.24
|
||||
grep "FAILOVER" /var/log/dns-failover.log | tail -5
|
||||
|
||||
# Should show failover triggered after 90 seconds
|
||||
```
|
||||
|
||||
### 6.5 Test Primary Recovery
|
||||
```bash
|
||||
# Start old primary
|
||||
docker start patroni-primary
|
||||
|
||||
# Watch logs
|
||||
docker logs -f patroni-primary
|
||||
|
||||
# Should see:
|
||||
# "found already running Leader"
|
||||
# "starting as a secondary"
|
||||
# "running pg_rewind"
|
||||
# "replica has been started"
|
||||
```
|
||||
|
||||
### 6.6 Verify Automatic Failback
|
||||
```bash
|
||||
# Check cluster status
|
||||
docker exec patroni-secondary patronictl -c /etc/patroni.yml list
|
||||
|
||||
# Should show both running with secondary as Leader
|
||||
|
||||
# Check DNS failback
|
||||
ssh root@72.62.39.24
|
||||
grep "FAILBACK" /var/log/dns-failover.log | tail -2
|
||||
|
||||
# Should show automatic failback when primary became healthy
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 7: Monitoring and Maintenance
|
||||
|
||||
### 7.1 Daily Health Checks
|
||||
```bash
|
||||
# Check cluster status
|
||||
docker exec patroni-primary patronictl -c /etc/patroni.yml list
|
||||
|
||||
# Check replication lag
|
||||
docker exec patroni-primary psql -U postgres -d trading_bot_v4 -c "SELECT state, write_lag, replay_lag FROM pg_stat_replication;"
|
||||
|
||||
# Check etcd health
|
||||
docker exec etcd-primary etcdctl endpoint health
|
||||
```
|
||||
|
||||
### 7.2 Alert on Issues
|
||||
Add to DNS failover monitor script:
|
||||
```python
|
||||
# Check Patroni cluster health
|
||||
patroni_health = requests.get('http://95.216.52.28:8008/health')
|
||||
if patroni_health.status_code != 200:
|
||||
send_telegram_alert("⚠️ Patroni cluster unhealthy!")
|
||||
```
|
||||
|
||||
### 7.3 Backup Strategy
|
||||
```bash
|
||||
# Daily backup from current primary
|
||||
docker exec patroni-primary pg_dump -U postgres trading_bot_v4 | gzip > backup_$(date +%Y%m%d).sql.gz
|
||||
|
||||
# Keep 30 days of backups
|
||||
find . -name "backup_*.sql.gz" -mtime +30 -delete
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### etcd Split Brain
|
||||
**Symptom:** Each node thinks it's the leader
|
||||
**Fix:**
|
||||
```bash
|
||||
# Stop all Patroni instances
|
||||
docker stop patroni-primary patroni-secondary
|
||||
|
||||
# Clear etcd data
|
||||
docker exec etcd-primary etcdctl del --prefix /service/trading-bot-cluster
|
||||
|
||||
# Restart primary first
|
||||
docker start patroni-primary
|
||||
sleep 10
|
||||
docker start patroni-secondary
|
||||
```
|
||||
|
||||
### Replication Lag Too High
|
||||
**Symptom:** `replay_lag > 1 minute`
|
||||
**Causes:**
|
||||
- Network latency
|
||||
- Heavy write load on primary
|
||||
- Secondary server overloaded
|
||||
|
||||
**Fix:**
|
||||
```bash
|
||||
# Check network between servers
|
||||
ping -c 10 72.62.39.24
|
||||
|
||||
# Check secondary CPU/memory
|
||||
ssh root@72.62.39.24 'top -bn1 | head -20'
|
||||
|
||||
# Increase wal_keep_segments if lag persistent
|
||||
```
|
||||
|
||||
### Failover Not Triggering
|
||||
**Check:**
|
||||
1. etcd cluster has quorum (2 out of 3)
|
||||
2. Patroni can reach etcd
|
||||
3. DCS TTL hasn't expired
|
||||
|
||||
```bash
|
||||
docker exec patroni-secondary patronictl -c /etc/patroni.yml show-config
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cost Summary
|
||||
|
||||
**Oracle Cloud Free Tier:**
|
||||
- 2 VMs (AMD): FREE forever
|
||||
- Or 4 VMs (ARM): FREE forever
|
||||
- 200GB storage: FREE forever
|
||||
|
||||
**Actual Cost:** €0/month for witness node
|
||||
|
||||
**Total HA Setup Cost:** €0/month (uses existing servers + free Oracle)
|
||||
|
||||
---
|
||||
|
||||
## Timeline
|
||||
|
||||
1. **Wait for Oracle approval:** 24-48 hours (current)
|
||||
2. **Set up witness node:** 15 minutes
|
||||
3. **Configure Patroni on both servers:** 1 hour
|
||||
4. **Test failover:** 30 minutes
|
||||
5. **Deploy enhanced health check:** 10 minutes
|
||||
|
||||
**Total Time:** ~2 hours of work once Oracle approves
|
||||
|
||||
---
|
||||
|
||||
## Benefits
|
||||
|
||||
✅ **100% trading uptime** - Automatic failover in 10-30 seconds
|
||||
✅ **Zero split-brain risk** - Distributed consensus guarantees
|
||||
✅ **Zero data loss** - Synchronous replication before writes committed
|
||||
✅ **Automatic recovery** - Old primary becomes standby when it returns
|
||||
✅ **Zero manual intervention** - Everything automated
|
||||
✅ **Battle-tested** - Used by enterprises worldwide
|
||||
✅ **Zero additional cost** - Uses Oracle free tier
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. ✅ Oracle account pending approval
|
||||
2. ⏳ Wait for approval email (check daily)
|
||||
3. 📋 Follow this guide step-by-step
|
||||
4. 🎯 Test failover thoroughly before going live
|
||||
5. 📊 Monitor for first week to ensure stability
|
||||
|
||||
**Ready to implement as soon as Oracle approves!**
|
||||
Reference in New Issue
Block a user