CRITICAL FIX (Dec 9, 2025): Drift state verifier now stops retry loop when close transaction confirms, preventing infinite retries that cancel orders. Problem: - Drift state verifier detected 'closed' positions still open on Drift - Sent close transaction which CONFIRMED on-chain - But Drift API still showed position (5-minute propagation delay) - Verifier thought close failed, retried immediately - Infinite loop: close → confirm → Drift still shows position → retry - Eventually Position Manager gave up, cancelled ALL orders - User's position left completely unprotected Root Cause (Bug #80): - Solana transaction confirms in ~400ms on-chain - Drift.getPosition() caches state, takes 5+ minutes to update - Verifier didn't account for propagation delay - Kept retrying every 10 minutes because Drift API lagged behind - Each retry attempt potentially cancelled orders as side effect Solution: - Check configSnapshot.retryCloseTime before retrying - If last retry was <5 minutes ago, SKIP (wait for Drift to catch up) - Log: 'Skipping retry - last attempt Xs ago (Drift propagation delay)' - Prevents retry loop while Drift state propagates - After 5 minutes, can retry if position truly stuck Impact: - Orders no longer disappear repeatedly due to retry loop - Position stays protected with TP1/TP2/SL between retries - User doesn't need to manually replace orders every 3 minutes - System respects Drift API propagation delay Testing: - Deployed fix, orders placed successfully - Database synced: tp1OrderTx and tp2OrderTx populated - Monitoring logs for 'Skipping retry' messages on next verifier run - Position tracking: 1 active trade, monitoring active Note: This fixes the symptom (retry loop). Root cause is Drift SDK caching getPosition() results. Real fix would be to query on-chain state directly or increase cache TTL. Files changed: - lib/monitoring/drift-state-verifier.ts (added 5-minute skip window)
345 lines
11 KiB
TypeScript
345 lines
11 KiB
TypeScript
/**
|
||
* Drift State Verifier Service
|
||
*
|
||
* Double-checks that positions marked as closed in our database
|
||
* are actually closed on Drift Protocol. If mismatches found,
|
||
* attempts to close the position again.
|
||
*
|
||
* Background: Drift occasionally confirms close transactions but
|
||
* doesn't actually close the position (state propagation delay or
|
||
* partial fill issues). This service detects and fixes those cases.
|
||
*
|
||
* Created: Dec 7, 2025
|
||
*/
|
||
|
||
import { getDriftService } from '../drift/client'
|
||
import { getPrismaClient } from '../database/trades'
|
||
import { closePosition } from '../drift/orders'
|
||
import { sendTelegramMessage } from '../notifications/telegram'
|
||
|
||
export interface DriftStateMismatch {
|
||
tradeId: string
|
||
symbol: string
|
||
expectedState: 'closed' | 'open'
|
||
actualState: 'closed' | 'open'
|
||
driftSize: number
|
||
dbExitReason: string | null
|
||
timeSinceExit: number // milliseconds
|
||
}
|
||
|
||
class DriftStateVerifier {
|
||
private isRunning: boolean = false
|
||
private checkIntervalMs: number = 10 * 60 * 1000 // 10 minutes
|
||
private intervalId: NodeJS.Timeout | null = null
|
||
|
||
/**
|
||
* Start the periodic verification service
|
||
*/
|
||
start(): void {
|
||
if (this.isRunning) {
|
||
console.log('🔍 Drift state verifier already running')
|
||
return
|
||
}
|
||
|
||
console.log('🔍 Starting Drift state verifier (checks every 10 minutes)')
|
||
this.isRunning = true
|
||
|
||
// Run first check after 2 minutes (allow time for initial startup)
|
||
setTimeout(() => {
|
||
this.runVerification().catch(err => {
|
||
console.error('❌ Error in initial Drift state verification:', err)
|
||
})
|
||
}, 2 * 60 * 1000)
|
||
|
||
// Then run every 10 minutes
|
||
this.intervalId = setInterval(() => {
|
||
this.runVerification().catch(err => {
|
||
console.error('❌ Error in Drift state verification:', err)
|
||
})
|
||
}, this.checkIntervalMs)
|
||
}
|
||
|
||
/**
|
||
* Stop the periodic verification service
|
||
*/
|
||
stop(): void {
|
||
if (this.intervalId) {
|
||
clearInterval(this.intervalId)
|
||
this.intervalId = null
|
||
}
|
||
this.isRunning = false
|
||
console.log('🔍 Drift state verifier stopped')
|
||
}
|
||
|
||
/**
|
||
* Run verification check once (can be called manually)
|
||
*/
|
||
async runVerification(): Promise<DriftStateMismatch[]> {
|
||
console.log('🔍 Running Drift state verification...')
|
||
|
||
const mismatches: DriftStateMismatch[] = []
|
||
|
||
try {
|
||
const driftService = await getDriftService()
|
||
const prisma = getPrismaClient()
|
||
|
||
// Check 1: Find trades marked as closed in last 24 hours
|
||
// These should definitely not exist on Drift anymore
|
||
const recentlyClosedTrades = await prisma.trade.findMany({
|
||
where: {
|
||
exitReason: { not: null },
|
||
exitTime: {
|
||
gte: new Date(Date.now() - 24 * 60 * 60 * 1000)
|
||
}
|
||
},
|
||
select: {
|
||
id: true,
|
||
positionId: true,
|
||
symbol: true,
|
||
exitReason: true,
|
||
exitTime: true,
|
||
},
|
||
})
|
||
|
||
console.log(` Checking ${recentlyClosedTrades.length} recently closed trades...`)
|
||
|
||
for (const trade of recentlyClosedTrades) {
|
||
try {
|
||
// Extract market index from symbol (SOL-PERP → 0, ETH-PERP → 1, etc.)
|
||
const marketIndex = this.getMarketIndex(trade.symbol)
|
||
if (marketIndex === null) continue
|
||
|
||
// Query Drift for position
|
||
const driftPosition = await driftService.getPosition(marketIndex)
|
||
|
||
if (driftPosition && Math.abs(driftPosition.size) >= 0.01) {
|
||
// MISMATCH: DB says closed, Drift says open
|
||
const timeSinceExit = Date.now() - new Date(trade.exitTime!).getTime()
|
||
|
||
mismatches.push({
|
||
tradeId: trade.id,
|
||
symbol: trade.symbol,
|
||
expectedState: 'closed',
|
||
actualState: 'open',
|
||
driftSize: Math.abs(driftPosition.size),
|
||
dbExitReason: trade.exitReason,
|
||
timeSinceExit,
|
||
})
|
||
|
||
console.error(`🚨 MISMATCH DETECTED: ${trade.symbol}`)
|
||
console.error(` DB: Closed ${(timeSinceExit / 60000).toFixed(1)}min ago (${trade.exitReason})`)
|
||
console.error(` Drift: Still open with size ${driftPosition.size}`)
|
||
}
|
||
} catch (err) {
|
||
console.error(` Error checking ${trade.symbol}:`, err)
|
||
}
|
||
}
|
||
|
||
// Check 2: Find trades marked as open but actually closed on Drift
|
||
// (Less critical but worth detecting)
|
||
const openTrades = await prisma.trade.findMany({
|
||
where: {
|
||
exitReason: null,
|
||
},
|
||
select: {
|
||
id: true,
|
||
positionId: true,
|
||
symbol: true,
|
||
createdAt: true,
|
||
},
|
||
})
|
||
|
||
console.log(` Checking ${openTrades.length} open trades...`)
|
||
|
||
for (const trade of openTrades) {
|
||
try {
|
||
const marketIndex = this.getMarketIndex(trade.symbol)
|
||
if (marketIndex === null) continue
|
||
|
||
const driftPosition = await driftService.getPosition(marketIndex)
|
||
|
||
if (!driftPosition || Math.abs(driftPosition.size) < 0.01) {
|
||
// MISMATCH: DB says open, Drift says closed
|
||
const timeSinceExit = Date.now() - new Date(trade.createdAt).getTime()
|
||
|
||
mismatches.push({
|
||
tradeId: trade.id,
|
||
symbol: trade.symbol,
|
||
expectedState: 'open',
|
||
actualState: 'closed',
|
||
driftSize: 0,
|
||
dbExitReason: null,
|
||
timeSinceExit,
|
||
})
|
||
|
||
console.error(`🚨 MISMATCH DETECTED: ${trade.symbol}`)
|
||
console.error(` DB: Open since ${(timeSinceExit / 60000).toFixed(1)}min ago`)
|
||
console.error(` Drift: Position closed (size 0)`)
|
||
}
|
||
} catch (err) {
|
||
console.error(` Error checking ${trade.symbol}:`, err)
|
||
}
|
||
}
|
||
|
||
if (mismatches.length === 0) {
|
||
console.log(' ✅ No mismatches found - DB and Drift states match')
|
||
} else {
|
||
console.error(` ❌ Found ${mismatches.length} mismatches!`)
|
||
await this.handleMismatches(mismatches)
|
||
}
|
||
|
||
} catch (error) {
|
||
console.error('❌ Error running Drift state verification:', error)
|
||
}
|
||
|
||
return mismatches
|
||
}
|
||
|
||
/**
|
||
* Handle detected mismatches
|
||
*/
|
||
private async handleMismatches(mismatches: DriftStateMismatch[]): Promise<void> {
|
||
for (const mismatch of mismatches) {
|
||
if (mismatch.expectedState === 'closed' && mismatch.actualState === 'open') {
|
||
// CRITICAL: Position should be closed but is still open on Drift
|
||
await this.retryClose(mismatch)
|
||
} else if (mismatch.expectedState === 'open' && mismatch.actualState === 'closed') {
|
||
// Position closed externally - this is handled by Position Manager's ghost detection
|
||
console.log(` ℹ️ ${mismatch.symbol}: Ghost position (will be cleaned by Position Manager)`)
|
||
}
|
||
}
|
||
|
||
// Send Telegram alert
|
||
await this.sendMismatchAlert(mismatches)
|
||
}
|
||
|
||
/**
|
||
* Retry closing a position that should be closed but isn't
|
||
* CRITICAL FIX (Dec 9, 2025): Stop retry loop if close transaction confirms
|
||
*/
|
||
private async retryClose(mismatch: DriftStateMismatch): Promise<void> {
|
||
console.log(`🔄 Retrying close for ${mismatch.symbol}...`)
|
||
|
||
try {
|
||
// CRITICAL: Check if this trade already has a close attempt in progress
|
||
// If we recently tried to close (within 5 minutes), SKIP to avoid retry loop
|
||
const prisma = getPrismaClient()
|
||
const trade = await prisma.trade.findUnique({
|
||
where: { id: mismatch.tradeId },
|
||
select: {
|
||
exitOrderTx: true,
|
||
exitReason: true,
|
||
configSnapshot: true
|
||
}
|
||
})
|
||
|
||
if (trade?.configSnapshot) {
|
||
const snapshot = trade.configSnapshot as any
|
||
const lastRetryTime = snapshot.retryCloseTime ? new Date(snapshot.retryCloseTime) : null
|
||
|
||
if (lastRetryTime) {
|
||
const timeSinceRetry = Date.now() - lastRetryTime.getTime()
|
||
|
||
// If we retried within last 5 minutes, SKIP (Drift propagation delay)
|
||
if (timeSinceRetry < 5 * 60 * 1000) {
|
||
console.log(` ⏳ Skipping retry - last attempt ${(timeSinceRetry / 1000).toFixed(0)}s ago (Drift propagation delay)`)
|
||
return
|
||
}
|
||
}
|
||
}
|
||
|
||
const result = await closePosition({
|
||
symbol: mismatch.symbol,
|
||
percentToClose: 100,
|
||
slippageTolerance: 0.05 // 5% slippage tolerance for market order
|
||
})
|
||
|
||
if (result.success) {
|
||
console.log(` ✅ Close transaction confirmed: ${result.transactionSignature}`)
|
||
console.log(` P&L: $${result.realizedPnL?.toFixed(2) || 0}`)
|
||
console.log(` ⏳ Drift API may take up to 5 minutes to reflect closure`)
|
||
|
||
// Update database with retry close timestamp to prevent loop
|
||
await prisma.trade.update({
|
||
where: { id: mismatch.tradeId },
|
||
data: {
|
||
exitOrderTx: result.transactionSignature || 'RETRY_CLOSE',
|
||
realizedPnL: result.realizedPnL || 0,
|
||
configSnapshot: {
|
||
...trade?.configSnapshot as any,
|
||
retryCloseAttempted: true,
|
||
retryCloseTime: new Date().toISOString(),
|
||
}
|
||
}
|
||
})
|
||
} else {
|
||
console.error(` ❌ Failed to close ${mismatch.symbol}: ${result.error}`)
|
||
}
|
||
} catch (error) {
|
||
console.error(` ❌ Error retrying close for ${mismatch.symbol}:`, error)
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Send Telegram alert about mismatches
|
||
*/
|
||
private async sendMismatchAlert(mismatches: DriftStateMismatch[]): Promise<void> {
|
||
const criticalMismatches = mismatches.filter(m =>
|
||
m.expectedState === 'closed' && m.actualState === 'open'
|
||
)
|
||
|
||
if (criticalMismatches.length === 0) return
|
||
|
||
const message = `
|
||
🚨 DRIFT STATE MISMATCH ALERT
|
||
|
||
Found ${criticalMismatches.length} position(s) that should be closed but are still open on Drift:
|
||
|
||
${criticalMismatches.map(m => `
|
||
📊 ${m.symbol}
|
||
DB Status: Closed (${m.dbExitReason})
|
||
Drift Status: Open (${m.driftSize.toFixed(2)} tokens)
|
||
Time since exit: ${(m.timeSinceExit / 60000).toFixed(1)} minutes
|
||
|
||
⚠️ Retry close attempted automatically
|
||
`).join('\n')}
|
||
|
||
This indicates Drift Protocol state propagation issues.
|
||
Check Drift UI to verify actual position status.
|
||
`.trim()
|
||
|
||
try {
|
||
await sendTelegramMessage(message)
|
||
} catch (error) {
|
||
console.error('Failed to send Telegram alert:', error)
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Get Drift market index from symbol
|
||
*/
|
||
private getMarketIndex(symbol: string): number | null {
|
||
const marketMap: Record<string, number> = {
|
||
'SOL-PERP': 0,
|
||
'BTC-PERP': 1,
|
||
'ETH-PERP': 2,
|
||
}
|
||
return marketMap[symbol] ?? null
|
||
}
|
||
}
|
||
|
||
// Singleton instance
|
||
let verifierInstance: DriftStateVerifier | null = null
|
||
|
||
export function getDriftStateVerifier(): DriftStateVerifier {
|
||
if (!verifierInstance) {
|
||
verifierInstance = new DriftStateVerifier()
|
||
}
|
||
return verifierInstance
|
||
}
|
||
|
||
export function startDriftStateVerifier(): void {
|
||
const verifier = getDriftStateVerifier()
|
||
verifier.start()
|
||
}
|