feat: V9 advanced parameter sweep with MA gap filter (810K configs)

Parameter space expansion: - Original 15 params: 101K configurations - NEW: MA gap filter (3 dimensions) = 18× expansion - Total: ~810,000 configurations across 4 time profiles - Chunk size: 1,000 configs/chunk = ~810 chunks MA Gap Filter parameters: - use_ma_gap: True/False (2 values) - ma_gap_min_long: -5.0%, 0%, +5.0% (3 values) - ma_gap_min_short: -5.0%, 0%, +5.0% (3 values) Implementation: - money_line_v9.py: Full v9 indicator with MA gap logic - v9_advanced_worker.py: Chunk processor (1,000 configs) - v9_advanced_coordinator.py: Work distributor (2 EPYC workers) - run_v9_advanced_sweep.sh: Startup script (generates + launches) Infrastructure: - Uses existing EPYC cluster (64 cores total) - Worker1: bd-epyc-02 (32 threads) - Worker2: bd-host01 (32 threads via SSH hop) - Expected runtime: 70-80 hours - Database: SQLite (chunk tracking + results) Goal: Find optimal MA gap thresholds for filtering false breakouts during MA whipsaw zones while preserving trend entries.
2025-12-01 18:11:47 +01:00
parent 2993bc8895
commit 7e1fe1cc30
9 changed files with 2541 additions and 0 deletions
--- a/scripts/run_advanced_v9_sweep.py
+++ b/scripts/run_advanced_v9_sweep.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python3
+"""
+Advanced v9 Money Line parameter sweep - AGGRESSIVE optimization.
+
+This explores ~100K-200K parameter combinations across:
+- ATR profiles (period + multiplier variations)
+- RSI boundaries (4 parameters)
+- Volume max threshold
+- Entry buffer size
+- ADX length
+- Source mode (Chart vs Heikin Ashi)
+- MA gap filter (optional)
+
+Expected runtime: 40-80 hours on 2-worker cluster
+Target: Beat baseline $194.43/1k (19.44% returns)
+"""
+
+import itertools
+import multiprocessing as mp
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+import pandas as pd
+from tqdm import tqdm
+
+from backtester.data_loader import load_data
+from backtester.indicators.money_line_v9 import MoneyLineV9Inputs, money_line_v9_signals
+from backtester.simulator import simulate_money_line
+
+
+def test_config(args):
+    """Test a single configuration."""
+    config_id, params = args
+    
+    # Load data
+    df = load_data("solusdt_5m.csv")
+    
+    # Create inputs with parameters
+    inputs = MoneyLineV9Inputs(
+        # Basic optimized params (FIXED from previous sweep)
+        confirm_bars=0,
+        flip_threshold_percent=0.5,
+        cooldown_bars=3,
+        adx_min=21,
+        long_pos_max=75,
+        short_pos_min=20,
+        vol_min=1.0,
+        
+        # ADVANCED OPTIMIZATION PARAMETERS:
+        atr_period=params['atr_period'],
+        multiplier=params['multiplier'],
+        adx_length=params['adx_length'],
+        rsi_length=params['rsi_length'],
+        rsi_long_min=params['rsi_long_min'],
+        rsi_long_max=params['rsi_long_max'],
+        rsi_short_min=params['rsi_short_min'],
+        rsi_short_max=params['rsi_short_max'],
+        vol_max=params['vol_max'],
+        entry_buffer_atr=params['entry_buffer_atr'],
+        use_heikin_ashi=params['use_heikin_ashi'],
+        use_ma_gap_filter=params['use_ma_gap_filter'],
+        ma_gap_long_min=params['ma_gap_long_min'],
+        ma_gap_short_max=params['ma_gap_short_max'],
+    )
+    
+    try:
+        # Generate signals
+        signals = money_line_v9_signals(df, inputs)
+        
+        # Simulate trades
+        results = simulate_money_line(df, signals)
+        
+        return {
+            'config_id': config_id,
+            'atr_period': params['atr_period'],
+            'multiplier': params['multiplier'],
+            'adx_length': params['adx_length'],
+            'rsi_length': params['rsi_length'],
+            'rsi_long_min': params['rsi_long_min'],
+            'rsi_long_max': params['rsi_long_max'],
+            'rsi_short_min': params['rsi_short_min'],
+            'rsi_short_max': params['rsi_short_max'],
+            'vol_max': params['vol_max'],
+            'entry_buffer_atr': params['entry_buffer_atr'],
+            'use_heikin_ashi': params['use_heikin_ashi'],
+            'use_ma_gap_filter': params['use_ma_gap_filter'],
+            'ma_gap_long_min': params['ma_gap_long_min'],
+            'ma_gap_short_max': params['ma_gap_short_max'],
+            'pnl': results['total_pnl'],
+            'win_rate': results['win_rate'],
+            'profit_factor': results['profit_factor'],
+            'max_drawdown': results['max_drawdown'],
+            'total_trades': results['total_trades'],
+        }
+    except Exception as e:
+        print(f"Error testing config {config_id}: {e}")
+        return {
+            'config_id': config_id,
+            'pnl': 0,
+            'win_rate': 0,
+            'profit_factor': 0,
+            'max_drawdown': 0,
+            'total_trades': 0,
+        }
+
+
+def generate_parameter_grid():
+    """
+    Generate comprehensive parameter grid for advanced optimization.
+    
+    AGGRESSIVE SEARCH SPACE:
+    - ATR periods: 5 values (10, 12, 14, 16, 18)
+    - Multipliers: 6 values (3.0, 3.2, 3.5, 3.8, 4.0, 4.2)
+    - ADX length: 4 values (14, 16, 18, 20)
+    - RSI length: 3 values (12, 14, 16)
+    - RSI long min: 4 values (30, 35, 40, 45)
+    - RSI long max: 4 values (65, 70, 75, 80)
+    - RSI short min: 4 values (25, 30, 35, 40)
+    - RSI short max: 4 values (60, 65, 70, 75)
+    - Volume max: 4 values (3.0, 3.5, 4.0, 4.5)
+    - Entry buffer: 3 values (0.15, 0.20, 0.25)
+    - Source mode: 2 values (Chart, Heikin Ashi)
+    - MA gap filter: 3 modes (disabled, longs_only, both)
+    
+    Total: 5×6×4×3×4×4×4×4×4×3×2×3 = 829,440 combinations
+    
+    This will take 2-3 days on 2-worker cluster but will find optimal settings.
+    """
+    
+    # ATR profile variations (5 × 6 = 30 combos)
+    atr_periods = [10, 12, 14, 16, 18]
+    multipliers = [3.0, 3.2, 3.5, 3.8, 4.0, 4.2]
+    
+    # ADX length variations (4 values)
+    adx_lengths = [14, 16, 18, 20]
+    
+    # RSI length (3 values)
+    rsi_lengths = [12, 14, 16]
+    
+    # RSI boundaries (4×4×4×4 = 256 combos)
+    rsi_long_mins = [30, 35, 40, 45]
+    rsi_long_maxs = [65, 70, 75, 80]
+    rsi_short_mins = [25, 30, 35, 40]
+    rsi_short_maxs = [60, 65, 70, 75]
+    
+    # Volume max (4 values)
+    vol_maxs = [3.0, 3.5, 4.0, 4.5]
+    
+    # Entry buffer (3 values)
+    entry_buffers = [0.15, 0.20, 0.25]
+    
+    # Source mode (2 values)
+    use_heikin_ashis = [False, True]
+    
+    # MA gap filter modes (3 modes = 3 parameter sets)
+    # Mode 1: Disabled
+    # Mode 2: Longs only (require ma50 > ma200)
+    # Mode 3: Both directions (bull/bear confirmation)
+    ma_gap_configs = [
+        (False, 0.0, 0.0),      # Disabled
+        (True, 0.5, 0.0),       # Longs only: require 0.5% gap
+        (True, 0.5, -0.5),      # Both: longs need +0.5%, shorts need -0.5%
+    ]
+    
+    configs = []
+    config_id = 0
+    
+    for atr_period, multiplier, adx_length, rsi_length, \
+        rsi_long_min, rsi_long_max, rsi_short_min, rsi_short_max, \
+        vol_max, entry_buffer, use_ha, ma_gap_config in \
+        itertools.product(
+            atr_periods, multipliers, adx_lengths, rsi_lengths,
+            rsi_long_mins, rsi_long_maxs, rsi_short_mins, rsi_short_maxs,
+            vol_maxs, entry_buffers, use_heikin_ashis, ma_gap_configs
+        ):
+        
+        # Validity check: RSI min < max
+        if rsi_long_min >= rsi_long_max:
+            continue
+        if rsi_short_min >= rsi_short_max:
+            continue
+        
+        use_ma_gap, ma_gap_long_min, ma_gap_short_max = ma_gap_config
+        
+        configs.append((config_id, {
+            'atr_period': atr_period,
+            'multiplier': multiplier,
+            'adx_length': adx_length,
+            'rsi_length': rsi_length,
+            'rsi_long_min': rsi_long_min,
+            'rsi_long_max': rsi_long_max,
+            'rsi_short_min': rsi_short_min,
+            'rsi_short_max': rsi_short_max,
+            'vol_max': vol_max,
+            'entry_buffer_atr': entry_buffer,
+            'use_heikin_ashi': use_ha,
+            'use_ma_gap_filter': use_ma_gap,
+            'ma_gap_long_min': ma_gap_long_min,
+            'ma_gap_short_max': ma_gap_short_max,
+        }))
+        config_id += 1
+    
+    return configs
+
+
+def main():
+    """Run advanced parameter sweep."""
+    print("=" * 80)
+    print("v9 ADVANCED PARAMETER SWEEP - AGGRESSIVE OPTIMIZATION")
+    print("=" * 80)
+    print()
+    print("This will explore ~800K parameter combinations across:")
+    print("  - ATR profiles (5 periods × 6 multipliers)")
+    print("  - RSI boundaries (4×4×4×4 = 256 combinations)")
+    print("  - Volume max (4 values)")
+    print("  - Entry buffer (3 values)")
+    print("  - ADX length (4 values)")
+    print("  - RSI length (3 values)")
+    print("  - Source mode (Chart vs Heikin Ashi)")
+    print("  - MA gap filter (3 modes)")
+    print()
+    print("Expected runtime: 40-80 hours on 2-worker cluster")
+    print("Target: Beat baseline $194.43/1k (19.44% returns)")
+    print()
+    
+    # Generate parameter grid
+    print("Generating parameter combinations...")
+    configs = generate_parameter_grid()
+    print(f"Total configurations: {len(configs):,}")
+    print()
+    
+    # Determine number of workers
+    n_workers = mp.cpu_count()
+    print(f"Using {n_workers} CPU cores")
+    print()
+    
+    # Run sweep
+    print("Starting parameter sweep...")
+    with mp.Pool(n_workers) as pool:
+        results = list(tqdm(
+            pool.imap(test_config, configs),
+            total=len(configs),
+            desc="Testing configs"
+        ))
+    
+    # Convert to DataFrame
+    results_df = pd.DataFrame(results)
+    
+    # Save full results
+    output_file = "sweep_v9_advanced_full.csv"
+    results_df.to_csv(output_file, index=False)
+    print(f"Full results saved to: {output_file}")
+    
+    # Sort by PnL and save top 1000
+    top_results = results_df.nlargest(1000, 'pnl')
+    top_file = "sweep_v9_advanced_top1000.csv"
+    top_results.to_csv(top_file, index=False)
+    print(f"Top 1000 configurations saved to: {top_file}")
+    
+    # Print summary
+    print()
+    print("=" * 80)
+    print("SWEEP COMPLETE")
+    print("=" * 80)
+    print()
+    print(f"Best configuration:")
+    best = top_results.iloc[0]
+    print(f"  PnL: ${best['pnl']:.2f}")
+    print(f"  Win Rate: {best['win_rate']:.1f}%")
+    print(f"  Profit Factor: {best['profit_factor']:.2f}")
+    print(f"  Max Drawdown: ${best['max_drawdown']:.2f}")
+    print(f"  Total Trades: {best['total_trades']}")
+    print()
+    print("Parameters:")
+    print(f"  ATR Period: {best['atr_period']}")
+    print(f"  Multiplier: {best['multiplier']}")
+    print(f"  ADX Length: {best['adx_length']}")
+    print(f"  RSI Length: {best['rsi_length']}")
+    print(f"  RSI Long: {best['rsi_long_min']}-{best['rsi_long_max']}")
+    print(f"  RSI Short: {best['rsi_short_min']}-{best['rsi_short_max']}")
+    print(f"  Volume Max: {best['vol_max']}")
+    print(f"  Entry Buffer: {best['entry_buffer_atr']}")
+    print(f"  Heikin Ashi: {best['use_heikin_ashi']}")
+    print(f"  MA Gap Filter: {best['use_ma_gap_filter']}")
+    if best['use_ma_gap_filter']:
+        print(f"    Long Min: {best['ma_gap_long_min']:.1f}%")
+        print(f"    Short Max: {best['ma_gap_short_max']:.1f}%")
+    print()
+    print(f"Baseline to beat: $194.43 (19.44%)")
+    improvement = ((best['pnl'] - 194.43) / 194.43) * 100
+    print(f"Improvement: {improvement:+.1f}%")
+
+
+if __name__ == "__main__":
+    main()