bogged-broker · December 30, 2025 20:24
diff --git a/gistfile1.txt b/gistfile1.txt
 """
 audio_reinforcement_loop.py - ADVANCED VIRAL INTELLIGENCE SYSTEM

 Multi-Agent Reinforcement Learning system engineered for guaranteed 5M+ views.
 Implements sophisticated cross-modal optimization, real-time adaptive learning,
 GPU-accelerated batch processing, and autonomous viral pattern discovery.

 Architecture:
 - Primary Audio Agent: Core audio virality optimization
 - Visual/Hook Agent: Cross-modal synchronization with video elements
 - Meta-Viral Agent: Engagement prediction & dynamic reward adjustment
 - Memory Integration: Full HOT/WARM/COLD pattern retrieval and storage
 - A/B Testing Engine: Multi-variant generation and viral scoring
 - Real-time Feedback: Continuous online learning from platform metrics
 """

 import json
 import numpy as np
 from typing import Dict, List, Tuple, Optional, Any, Callable
 from dataclasses import dataclass, field, asdict
 from datetime import datetime, timedelta
 from collections import defaultdict, deque
 import hashlib
 import logging
 from enum import Enum
 import threading
 import queue
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import time

 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


 class Platform(Enum):
    """Supported platforms with specific optimization rules"""
    TIKTOK = "tiktok"
    YOUTUBE_SHORTS = "youtube_shorts"
    INSTAGRAM_REELS = "instagram_reels"


 class BeatType(Enum):
    """Audio beat patterns for viral content"""
    TRAP = "trap"
    DRILL = "drill"
    HYPERPOP = "hyperpop"
    PHONK = "phonk"
    LOFI = "lofi"
    ORCHESTRAL = "orchestral"
    ELECTRONIC = "electronic"
    JERSEY_CLUB = "jersey_club"
    CUSTOM = "custom"


 class MemoryLayer(Enum):
    """Memory priority tiers for pattern storage"""
    HOT = "hot"      # Recently viral, actively used (efficacy > 0.7, < 7 days)
    WARM = "warm"    # Proven patterns, occasionally used (efficacy > 0.5, < 30 days)
    COLD = "cold"    # Historical data, rarely accessed


 class HookType(Enum):
    """Hook patterns for viral audio"""
    QUESTION = "question"
    SHOCK = "shock"
    CURIOSITY = "curiosity"
    EMOTIONAL = "emotional"
    PATTERN_INTERRUPT = "pattern_interrupt"
    STORY_OPENER = "story_opener"
    CONTROVERSY = "controversy"


 @dataclass
 class AudioFeatures:
    """Comprehensive audio feature representation for RL state"""
    # Core audio metrics
    pace_wpm: float
    pitch_variance: float
    hook_jumps: int
    pause_timing: List[float]
    spectral_centroid: float
    emotional_intensity: float
    beat_alignment_error: float
    volume_dynamics: float
    timbre_complexity: float
    tempo_bpm: float
    syllable_timing_variance: float
    
    # Advanced viral metrics
    hook_position_seconds: float  # When the hook hits
    earworm_score: float  # Predicted memorability (0-1)
    energy_curve: List[float]  # Energy over time
    silence_ratio: float  # Strategic pauses
    vocal_clarity: float  # Voice intelligibility
    background_music_ratio: float  # Voice vs music balance
    transition_smoothness: float  # Between sections
    
    # Cross-modal sync features
    beat_scene_alignment: float  # How well beats align with scene cuts
    caption_sync_score: float  # Alignment with on-screen text
    visual_hook_coordination: float  # Audio-visual hook timing
    
    def to_vector(self) -> np.ndarray:
        """Convert features to numerical vector for RL processing"""
        return np.array([
            self.pace_wpm,
            self.pitch_variance,
            float(self.hook_jumps),
            np.mean(self.pause_timing) if self.pause_timing else 0.0,
            self.spectral_centroid,
            self.emotional_intensity,
            self.beat_alignment_error,
            self.volume_dynamics,
            self.timbre_complexity,
            self.tempo_bpm,
            self.syllable_timing_variance,
            self.hook_position_seconds,
            self.earworm_score,
            np.mean(self.energy_curve) if self.energy_curve else 0.5,
            self.silence_ratio,
            self.vocal_clarity,
            self.background_music_ratio,
            self.transition_smoothness,
            self.beat_scene_alignment,
            self.caption_sync_score,
            self.visual_hook_coordination
        ])
    
    def viral_quality_score(self) -> float:
        """Calculate intrinsic viral quality of audio features"""
        return (
            self.earworm_score * 0.25 +
            self.emotional_intensity * 0.20 +
            (1.0 - self.beat_alignment_error) * 0.15 +
            self.vocal_clarity * 0.15 +
            self.beat_scene_alignment * 0.15 +
            self.energy_curve[0] if self.energy_curve else 0.5 * 0.10  # First 3s energy
        )


 @dataclass
 class PerformanceMetrics:
    """Comprehensive video performance tracking"""
    views: int
    
    # Retention metrics
    retention_2s: float  # % who watched 2+ seconds
    first_3s_retention: float  # Critical TikTok metric
    completion_rate: float
    avg_watch_time: float  # Seconds
    watch_through_rate: float  # % who watched to end
    
    # Engagement metrics
    likes: int
    shares: int
    saves: int
    comments: int
    
    # Viral metrics
    replay_rate: float  # % who rewatched
    loop_frequency: float  # Avg replays per viewer
    share_rate: float  # Shares / views
    save_rate: float  # Saves / views
    
    # Platform-specific
    ctr: float  # Click-through rate for thumbnails
    profile_visits: int
    follower_conversion: float
    
    # Time-based metrics
    views_first_hour: int
    views_first_24h: int
    velocity_score: float  # View acceleration rate
    
    # Audience behavior
    scroll_stop_rate: float  # % who stopped scrolling
    sound_use_rate: float  # If others used the sound
    
    def viral_score(self, platform: Platform) -> float:
        """Calculate comprehensive platform-specific viral score"""
        if platform == Platform.TIKTOK:
            return (
                self.first_3s_retention * 0.25 +
                self.loop_frequency * 0.20 +
                self.completion_rate * 0.15 +
                self.share_rate * 100 * 0.15 +  # Normalize share rate
                min(self.views / 5_000_000, 1.0) * 0.15 +
                self.velocity_score * 0.10
            )
        elif platform == Platform.YOUTUBE_SHORTS:
            return (
                self.watch_through_rate * 0.25 +
                self.ctr * 0.20 +
                self.avg_watch_time / 60 * 0.20 +  # Normalize to 60s max
                min(self.views / 5_000_000, 1.0) * 0.20 +
                self.save_rate * 100 * 0.15
            )
        else:  # Instagram Reels
            return (
                self.share_rate * 100 * 0.25 +
                self.save_rate * 100 * 0.20 +
                self.completion_rate * 0.20 +
                min(self.views / 5_000_000, 1.0) * 0.20 +
                self.follower_conversion * 0.15
            )
    
    def engagement_quality(self) -> float:
        """Calculate engagement quality independent of view count"""
        return (
            self.completion_rate * 0.30 +
            self.loop_frequency * 0.25 +
            (self.likes + self.shares * 3 + self.saves * 5) / max(self.views, 1) * 1000 * 0.25 +
            self.scroll_stop_rate * 0.20
        )


 @dataclass
 class VideoContext:
    """Video-specific context for cross-modal optimization"""
    scene_cuts: int
    scene_cut_timestamps: List[float]
    visual_intensity_curve: List[float]  # Visual energy over time
    caption_timestamps: List[Tuple[float, str]]
    thumbnail_predicted_ctr: float
    hook_visual_position: float  # When visual hook appears
    color_palette_energy: float  # Vibrance of visuals
    motion_intensity: float  # How much movement
    text_overlay_density: float  # Amount of on-screen text
    duration_seconds: float


 @dataclass
 class PlatformMetadata:
    """Platform-specific metadata and trends"""
    platform: Platform
    current_trending_sounds: List[str]
    trending_beat_types: List[BeatType]
    peak_posting_times: List[int]  # Hours of day
    avg_viral_threshold: int  # View count considered viral
    algorithm_weights: Dict[str, float]  # Platform algorithm priorities
    audience_age_range: Tuple[int, int]
    device_usage: Dict[str, float]  # mobile vs desktop percentages


 @dataclass
 class AudienceBehaviorProjections:
    """Predicted audience behavior for optimization"""
    predicted_watch_time: float
    predicted_loop_probability: float
    predicted_scroll_stop_rate: float
    predicted_engagement_rate: float
    predicted_share_likelihood: float
    predicted_save_likelihood: float
    virality_confidence: float  # How confident are predictions


 @dataclass
 class HistoricalPattern:
    """Historical performance pattern from memory manager"""
    pattern_id: str
    features: AudioFeatures
    performance: PerformanceMetrics
    niche: str
    platform: Platform
    beat_type: BeatType
    timestamp: datetime
    efficacy_score: float
    memory_layer: MemoryLayer


 @dataclass
 class ActionSpace:
    """Comprehensive audio modification actions"""
    # Hook optimization
    hook_type: HookType
    hook_position: float  # 0.0 to 3.0 seconds
    hook_intensity: float  # 0.0 to 1.0
    hook_pitch_shift: float  # -3 to +3 semitones
    
    # Beat and tempo
    beat_timing_adjustment: float  # -0.5 to +0.5 seconds offset
    tempo_multiplier: float  # 0.8 to 1.3x
    beat_drop_position: Optional[float]  # Seconds, or None
    
    # Voice modulation
    volume_modulation: float  # 0.5 to 1.5 multiplier
    pitch_shift: float  # -2 to +2 semitones
    voice_energy_level: str  # "low", "medium", "high", "explosive"
    voice_clarity_enhancement: float  # 0.0 to 1.0
    
    # Emotional triggers
    emotional_arc: List[float]  # Emotional intensity over time
    suspense_buildup: bool
    payoff_timing: Optional[float]  # When payoff hits
    
    # Cross-modal sync
    beat_to_scene_cut_sync: bool
    audio_visual_hook_sync: bool
    caption_sync_adjustment: float  # Timing offset for captions
    
    # Effects and transitions
    transition_type: str  # "cut", "fade", "beat_drop", "reverb_swell", "silence"
    effect_intensity: float  # 0.0 to 1.0
    reverb_amount: float
    compression_level: float
    
    # Earworm optimization
    melodic_repetition: int  # Number of times to repeat catchy element
    syllable_pattern_emphasis: bool
    
    def to_vector(self) -> np.ndarray:
        """Convert action to numerical vector for Q-learning"""
        hook_map = {h: i for i, h in enumerate(HookType)}
        energy_map = {"low": 0.2, "medium": 0.5, "high": 0.8, "explosive": 1.0}
        transition_map = {"cut": 0.0, "fade": 0.25, "beat_drop": 0.5, "reverb_swell": 0.75, "silence": 1.0}
        
        return np.array([
            float(hook_map.get(self.hook_type, 0)),
            self.hook_position,
            self.hook_intensity,
            self.hook_pitch_shift,
            self.beat_timing_adjustment,
            self.tempo_multiplier,
            self.beat_drop_position if self.beat_drop_position else -1.0,
            self.volume_modulation,
            self.pitch_shift,
            energy_map.get(self.voice_energy_level, 0.5),
            self.voice_clarity_enhancement,
            np.mean(self.emotional_arc) if self.emotional_arc else 0.5,
            float(self.suspense_buildup),
            self.payoff_timing if self.payoff_timing else -1.0,
            float(self.beat_to_scene_cut_sync),
            float(self.audio_visual_hook_sync),
            self.caption_sync_adjustment,
            transition_map.get(self.transition_type, 0.0),
            self.effect_intensity,
            self.reverb_amount,
            self.compression_level,
            float(self.melodic_repetition),
            float(self.syllable_pattern_emphasis)
        ])


 @dataclass
 class State:
    """Complete RL state representation with all contextual information"""
    # Audio features
    audio_features: AudioFeatures
    
    # Video context
    video_context: VideoContext
    
    # Platform & niche
    platform: Platform
    niche: str
    beat_type: BeatType
    
    # Historical patterns from memory manager
    historical_patterns: List[HistoricalPattern]
    top_pattern_efficacy: float  # Best pattern efficacy for this context
    
    # Platform metadata and trends
    platform_metadata: PlatformMetadata
    
    # Audience predictions
    audience_projections: AudienceBehaviorProjections
    
    # Temporal context
    posting_time_hour: int  # Hour of day
    day_of_week: int
    is_trending_period: bool
    
    def to_vector(self) -> np.ndarray:
        """Convert state to numerical vector for neural network input"""
        # Audio features (21 dimensions)
        audio_vec = self.audio_features.to_vector()
        
        # Video context (10 dimensions)
        video_vec = np.array([
            float(self.video_context.scene_cuts),
            np.mean(self.video_context.visual_intensity_curve) if self.video_context.visual_intensity_curve else 0.5,
            self.video_context.thumbnail_predicted_ctr,
            self.video_context.hook_visual_position,
            self.video_context.color_palette_energy,
            self.video_context.motion_intensity,
            self.video_context.text_overlay_density,
            self.video_context.duration_seconds,
            len(self.video_context.caption_timestamps) / max(self.video_context.duration_seconds, 1),
            np.std(self.video_context.scene_cut_timestamps) if self.video_context.scene_cut_timestamps else 0.0
        ])
        
        # Historical performance (5 dimensions)
        hist_vec = np.array([
            self.top_pattern_efficacy,
            len(self.historical_patterns) / 100.0,  # Normalize
            np.mean([p.efficacy_score for p in self.historical_patterns]) if self.historical_patterns else 0.0,
            sum(1 for p in self.historical_patterns if p.memory_layer == MemoryLayer.HOT) / max(len(self.historical_patterns), 1),
            np.mean([p.performance.viral_score(self.platform) for p in self.historical_patterns]) if self.historical_patterns else 0.0
        ])
        
        # Platform trends (6 dimensions)
        platform_vec = np.array([
            len(self.platform_metadata.current_trending_sounds) / 10.0,
            len(self.platform_metadata.trending_beat_types) / 5.0,
            self.platform_metadata.algorithm_weights.get('retention', 0.5),
            self.platform_metadata.algorithm_weights.get('engagement', 0.5),
            self.platform_metadata.device_usage.get('mobile', 0.8),
            float(self.platform in [Platform.TIKTOK])  # Platform encoding
        ])
        
        # Audience projections (7 dimensions)
        audience_vec = np.array([
            self.audience_projections.predicted_watch_time,
            self.audience_projections.predicted_loop_probability,
            self.audience_projections.predicted_scroll_stop_rate,
            self.audience_projections.predicted_engagement_rate,
            self.audience_projections.predicted_share_likelihood,
            self.audience_projections.predicted_save_likelihood,
            self.audience_projections.virality_confidence
        ])
        
        # Temporal context (3 dimensions)
        temporal_vec = np.array([
            self.posting_time_hour / 24.0,
            self.day_of_week / 7.0,
            float(self.is_trending_period)
        ])
        
        # Concatenate all vectors (52 total dimensions)
        return np.concatenate([audio_vec, video_vec, hist_vec, platform_vec, audience_vec, temporal_vec])
    
    def get_context_hash(self) -> str:
        """Generate unique hash for state context (for Q-table indexing)"""
        context_str = f"{self.niche}_{self.platform.value}_{self.beat_type.value}_{int(self.top_pattern_efficacy*10)}"
        return hashlib.md5(context_str.encode()).hexdigest()[:16]


 class AdvancedRewardFunction:
    """Multi-dimensional reward calculation with dynamic weighting"""
    
    def __init__(self):
        # Base weights for reward components
        self.weights = {
            'views': 0.20,
            'retention': 0.25,
            'engagement': 0.20,
            'loopability': 0.20,
            'velocity': 0.15
        }
        
        # Platform-specific multipliers
        self.platform_multipliers = {
            Platform.TIKTOK: {
                'first_3s_retention': 1.5,
                'loop_frequency': 1.4,
                'sound_usage': 1.3
            },
            Platform.YOUTUBE_SHORTS: {
                'watch_through': 1.5,
                'ctr': 1.3,
                'avg_watch_time': 1.2
            },
            Platform.INSTAGRAM_REELS: {
                'share_rate': 1.6,
                'save_rate': 1.4,
                'profile_visits': 1.2
            }
        }
        
        # Dynamic adjustment factors
        self.trend_multiplier = 1.0
        self.niche_multiplier = {}
        self.time_decay_factor = 0.95
    
    def calculate(
        self,
        metrics: PerformanceMetrics,
        state: State,
        action: ActionSpace,
        predicted_metrics: Dict[str, float],
        pattern_history: List[HistoricalPattern]
    ) -> float:
        """
        Calculate comprehensive reward with multi-dimensional scoring.
        Designed to push towards 5M+ view baseline.
        """
        # 1. Base viral score
        viral_score = metrics.viral_score(state.platform)
        
        # 2. View threshold rewards (exponential scaling)
        view_reward = self._exponential_view_reward(metrics.views)
        
        # 3. Early retention boost (critical for algorithm push)
        retention_boost = self._advanced_retention_scoring(metrics, state.platform)
        
        # 4. Engagement quality (shares/saves > likes)
        engagement_score = self._engagement_quality_score(metrics)
        
        # 5. Loopability and addiction score
        loop_score = self._loopability_score(metrics, action)
        
        # 6. Velocity bonus (fast viral spread)
        velocity_bonus = self._velocity_reward(metrics)
        
        # 7. Platform-specific bonuses
        platform_bonus = self._platform_specific_rewards(metrics, state.platform)
        
        # 8. Cross-modal sync reward
        crossmodal_reward = self._crossmodal_sync_score(state, action, metrics)
        
        # 9. Prediction accuracy bonus
        prediction_bonus = self._prediction_accuracy_reward(metrics, predicted_metrics)
        
        # 10. Pattern consistency reward
        pattern_reward = self._pattern_consistency_score(pattern_history, metrics)
        
        # 11. Anti-viral penalties
        penalties = self._comprehensive_penalties(metrics, state, action, pattern_history)
        
        # 12. Trend alignment bonus
        trend_bonus = self._trend_alignment_reward(state, metrics)
        
        # Weighted combination
        total_reward = (
            view_reward * self.weights['views'] +
            retention_boost * self.weights['retention'] +
            (engagement_score + loop_score) / 2 * self.weights['engagement'] +
            velocity_bonus * self.weights['velocity'] +
            platform_bonus * 0.15 +
            crossmodal_reward * 0.10 +
            prediction_bonus * 0.08 +
            pattern_reward * 0.07 +
            trend_bonus * 0.05 -
            penalties
        )
        
        # Apply dynamic multipliers
        total_reward *= self.trend_multiplier
        total_reward *= self.niche_multiplier.get(state.niche, 1.0)
        
        # Ensure reward is in reasonable range
        return np.clip(total_reward, -2.0, 5.0)
    
    def _exponential_view_reward(self, views: int) -> float:
        """Exponential rewards for view milestones - pushes towards 5M+ baseline"""
        if views >= 10_000_000:
            return 3.5
        elif views >= 5_000_000:
            return 2.5  # Target baseline
        elif views >= 2_000_000:
            return 1.8
        elif views >= 1_000_000:
            return 1.3
        elif views >= 500_000:
            return 0.9
        elif views >= 100_000:
            return 0.5
        else:
            return 0.2 * (views / 100_000)  # Gradual scaling below 100k
    
    def _advanced_retention_scoring(self, metrics: PerformanceMetrics, platform: Platform) -> float:
        """Advanced retention analysis - first 3s is CRITICAL"""
        # First 3 seconds retention (make or break)
        first_3s_score = metrics.first_3s_retention ** 2  # Quadratic to emphasize importance
        
        # 2 second retention
        retention_2s_score = metrics.retention_2s * 0.8
        
        # Completion rate
        completion_score = metrics.completion_rate * 0.9
        
        # Watch-through rate
        watch_through_score = metrics.watch_through_rate * 1.1
        
        # Platform-specific weighting
        if platform == Platform.TIKTOK:
            return first_3s_score * 0.50 + retention_2s_score * 0.25 + completion_score * 0.15 + watch_through_score * 0.10
        elif platform == Platform.YOUTUBE_SHORTS:
            return watch_through_score * 0.40 + completion_score * 0.30 + first_3s_score * 0.20 + retention_2s_score * 0.10
        else:
            return completion_score * 0.35 + first_3s_score * 0.30 + watch_through_score * 0.25 + retention_2s_score * 0.10
    
    def _engagement_quality_score(self, metrics: PerformanceMetrics) -> float:
        """High-quality engagement > vanity metrics"""
        # Shares are 3x more valuable than likes
        # Saves are 5x more valuable than likes
        engagement_value = (
            metrics.likes +
            metrics.shares * 3 +
            metrics.saves * 5 +
            metrics.comments * 2
        ) / max(metrics.views, 1) * 10000  # Normalize to reasonable scale
        
        return min(engagement_value, 1.5)  # Cap to prevent skew
    
    def _loopability_score(self, metrics: PerformanceMetrics, action: ActionSpace) -> float:
        """Reward highly loopable content"""
        # Base loop score
        loop_score = metrics.loop_frequency * 0.6
        
        # Replay rate boost
        replay_boost = metrics.replay_rate * 0.4
        
        # Action-based prediction (did we optimize for loops?)
        if action.beat_drop_position and action.beat_drop_position > 0:
            loop_score *= 1.15  # Beat drops encourage replays
        
        if action.melodic_repetition >= 2:
            loop_score *= 1.10  # Repetition = earworm = loops
        
        return min(loop_score + replay_boost, 1.5)
    
    def _velocity_reward(self, metrics: PerformanceMetrics) -> float:
        """Reward fast viral spread (algorithm loves this)"""
        # Views in first hour relative to final views
        first_hour_ratio = metrics.views_first_hour / max(metrics.views, 1)
        
        # Velocity score
        velocity = metrics.velocity_score
        
        # Combined scoring
        velocity_reward = velocity * 0.6 + first_hour_ratio * 0.4
        
        # Bonus for explosive start
        if metrics.views_first_hour > 50_000:
            velocity_reward *= 1.3
        
        return velocity_reward
    
    def _platform_specific_rewards(self, metrics: PerformanceMetrics, platform: Platform) -> float:
        """Apply platform-specific reward multipliers"""
        multipliers = self.platform_multipliers.get(platform, {})
        reward = 0.0
        
        if platform == Platform.TIKTOK:
            reward += metrics.first_3s_retention * multipliers.get('first_3s_retention', 1.0) * 0.4
            reward += metrics.loop_frequency * multipliers.get('loop_frequency', 1.0) * 0.35
            reward += metrics.sound_use_rate * multipliers.get('sound_usage', 1.0) * 0.25
        
        elif platform == Platform.YOUTUBE_SHORTS:
            reward += metrics.watch_through_rate * multipliers.get('watch_through', 1.0) * 0.4
            reward += metrics.ctr * multipliers.get('ctr', 1.0) * 0.35
            reward += (metrics.avg_watch_time / 60) * multipliers.get('avg_watch_time', 1.0) * 0.25
        
        elif platform == Platform.INSTAGRAM_REELS:
            reward += metrics.share_rate * 100 * multipliers.get('share_rate', 1.0) * 0.4
            reward += metrics.save_rate * 100 * multipliers.get('save_rate', 1.0) * 0.35
            reward += (metrics.profile_visits / max(metrics.views, 1)) * 100 * multipliers.get('profile_visits', 1.0) * 0.25
        
        return reward
    
    def _crossmodal_sync_score(self, state: State, action: ActionSpace, metrics: PerformanceMetrics) -> float:
        """Reward effective audio-visual synchronization"""
        sync_score = 0.0
        
        # Beat to scene cut alignment
        if action.beat_to_scene_cut_sync:
            sync_score += state.audio_features.beat_scene_alignment * 0.35
        
        # Audio-visual hook coordination
        if action.audio_visual_hook_sync:
            sync_score += state.audio_features.visual_hook_coordination * 0.35
        
        # Caption timing
        sync_score += state.audio_features.caption_sync_score * 0.30
        
        # Bonus if metrics show strong retention (suggests sync worked)
        if metrics.completion_rate > 0.7:
            sync_score *= 1.2
        
        return sync_score
    
    def _prediction_accuracy_reward(self, actual: PerformanceMetrics, predicted: Dict[str, float]) -> float:
        """Bonus for accurate engagement predictions"""
        if not predicted:
            return 0.0
        
        # Compare predictions to actual
        watch_time_error = abs(actual.watch_through_rate - predicted.get('watch_time', 0.5))
        loop_error = abs(actual.loop_frequency - predicted.get('loop_prob', 0.5))
        engagement_error = abs(actual.engagement_quality() - predicted.get('engagement', 0.5))
        
        # Calculate accuracy (1.0 = perfect, 0.0 = completely wrong)
        accuracy = 1.0 - (watch_time_error + loop_error + engagement_error) / 3.0
        
        # Bonus reward for good predictions (helps meta-agent learn)
        return accuracy * 0.5
    
    def _pattern_consistency_score(self, pattern_history: List[HistoricalPattern], metrics: PerformanceMetrics) -> float:
        """Reward consistency with proven viral patterns"""
        if not pattern_history:
            return 0.0
        
        # Get average performance of historical patterns
        avg_viral_score = np.mean([p.performance.viral_score(p.platform) for p in pattern_history])
        
        # Current performance relative to history
        current_score = metrics.viral_score(pattern_history[0].platform) if pattern_history else 0.5
        
        # Reward if we matched or exceeded historical performance
        if current_score >= avg_viral_score:
            return (current_score - avg_viral_score) * 2.0  # Amplify improvements
        else:
            return (current_score - avg_viral_score) * 0.5  # Smaller penalty for underperformance
    
    def _comprehensive_penalties(
        self,
        metrics: PerformanceMetrics,
        state: State,
        action: ActionSpace,
        pattern_history: List[HistoricalPattern]
    ) -> float:
        """Comprehensive penalty system for anti-viral patterns"""
        penalty = 0.0
        
        # 1. Poor retention penalties
        if metrics.first_3s_retention < 0.4:
            penalty += 0.4  # Severe penalty - algorithm will bury this
        elif metrics.first_3s_retention < 0.6:
            penalty += 0.2
        
        if metrics.completion_rate < 0.25:
            penalty += 0.3
        
        # 2. Low engagement penalties
        engagement_rate = (metrics.likes + metrics.shares + metrics.comments) / max(metrics.views, 1)
        if engagement_rate < 0.005:  # Less than 0.5% engagement
            penalty += 0.25
        
        # 3. Beat alignment violations
        if state.audio_features.beat_alignment_error > 0.25:
            penalty += 0.3  # Poor audio quality hurts virality
        
        # 4. Overused pattern penalty (audience fatigue)
        if pattern_history:
            recent_usage = sum(1 for p in pattern_history[-20:] if p.efficacy_score > 0.6)
            if recent_usage > 15:  # Same pattern used too much
                penalty += 0.25
        
        # 5. Cross-modal misalignment
        if action.beat_to_scene_cut_sync and state.audio_features.beat_scene_alignment < 0.5:
            penalty += 0.2  # Promised sync but failed to deliver
        
        # 6. Hook timing violations
        if action.hook_position > 3.0:
            penalty += 0.35  # Hook too late - viewers already scrolled
        
        # 7. Platform rule violations
        platform_rules = {
            Platform.TIKTOK: {'min_loop_freq': 0.3, 'min_first_3s': 0.6},
            Platform.YOUTUBE_SHORTS: {'min_watch_through': 0.4, 'min_ctr': 0.05},
            Platform.INSTAGRAM_REELS: {'min_share_rate': 0.01, 'min_save_rate': 0.008}
        }
        
        rules = platform_rules.get(state.platform, {})
        if state.platform == Platform.TIKTOK:
            if metrics.loop_frequency < rules.get('min_loop_freq', 0):
                penalty += 0.2
            if metrics.first_3s_retention < rules.get('min_first_3s', 0):
                penalty += 0.15
        elif state.platform == Platform.YOUTUBE_SHORTS:
            if metrics.watch_through_rate < rules.get('min_watch_through', 0):
                penalty += 0.2
            if metrics.ctr < rules.get('min_ctr', 0):
                penalty += 0.15
        elif state.platform == Platform.INSTAGRAM_REELS:
            if metrics.share_rate < rules.get('min_share_rate', 0):
                penalty += 0.2
            if metrics.save_rate < rules.get('min_save_rate', 0):
                penalty += 0.15
        
        # 8. Velocity penalties (slow spread = algorithm deprioritization)
        if metrics.velocity_score < 0.3:
            penalty += 0.2
        
        # 9. Audio quality issues
        if state.audio_features.vocal_clarity < 0.5:
            penalty += 0.15  # Viewers can't understand = scroll
        
        if state.audio_features.background_music_ratio > 0.7:
            penalty += 0.1  # Music drowning out voice
        
        return penalty
    
    def _trend_alignment_reward(self, state: State, metrics: PerformanceMetrics) -> float:
        """Bonus for aligning with current platform trends"""
        reward = 0.0
        
        # Trending beat type bonus
        if state.beat_type in state.platform_metadata.trending_beat_types:
            reward += 0.3
        
        # Posted during peak hours
        if state.posting_time_hour in state.platform_metadata.peak_posting_times:
            reward += 0.2
        
        # During trending period
        if state.is_trending_period:
            reward += 0.25
        
        # Sound usage by others (indicates we created trend)
        if metrics.sound_use_rate > 0.1:
            reward += 0.4  # Big bonus - we made a viral sound
        
        return reward
    
    def update_dynamic_multipliers(self, recent_performance: List[Tuple[State, PerformanceMetrics]]):
        """Dynamically adjust reward multipliers based on recent trends"""
        if len(recent_performance) < 20:
            return
        
        # Analyze niche performance
        niche_scores = defaultdict(list)
        for state, metrics in recent_performance:
            niche_scores[state.niche].append(metrics.viral_score(state.platform))
        
        # Update niche multipliers
        for niche, scores in niche_scores.items():
            avg_score = np.mean(scores)
            if avg_score > 0.7:
                self.niche_multiplier[niche] = 1.2
            elif avg_score < 0.4:
                self.niche_multiplier[niche] = 0.9
            else:
                self.niche_multiplier[niche] = 1.0
        
        # Update trend multiplier based on overall performance
        overall_avg = np.mean([m.viral_score(s.platform) for s, m in recent_performance])
        if overall_avg > 0.65:
            self.trend_multiplier = 1.15
        elif overall_avg < 0.45:
            self.trend_multiplier = 0.95
        else:
            self.trend_multiplier = 1.0


 class PrimaryAudioAgent:
    """
    Primary Audio Agent - Core RL agent for audio optimization.
    Uses deep Q-learning with experience replay and target networks.
    """
    
    def __init__(self, state_dim: int = 52, action_dim: int = 23):
        self.agent_id = "primary_audio_agent"
        self.state_dim = state_dim
        self.action_dim = action_dim
        
        # Q-network (simplified - in production would use neural network)
        self.q_table = defaultdict(lambda: np.random.randn(action_dim) * 0.01)
        self.target_q_table = defaultdict(lambda: np.random.randn(action_dim) * 0.01)
        
        # Hyperparameters
        self.learning_rate = 0.001
        self.discount_factor = 0.97  # Long-term thinking for viral growth
        self.epsilon = 0.25  # Exploration rate
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.998
        
        # Experience replay
        self.replay_buffer = deque(maxlen=10000)
        self.batch_size = 64
        
        # Training metrics
        self.episode_count = 0
        self.total_reward = 0.0
        self.avg_q_value = 0.0
        
        # Target network update frequency
        self.target_update_frequency = 100
    
    def select_action(self, state: State, explore: bool = True) -> ActionSpace:
        """Select action using epsilon-greedy policy with intelligent exploration"""
        if explore and np.random.random() < self.epsilon:
            # Intelligent exploration - not completely random
            return self._intelligent_exploration(state)
        else:
            # Exploitation - use learned Q-values
            return self._greedy_action(state)
    
    def _greedy_action(self, state: State) -> ActionSpace:
        """Select best action based on Q-values"""
        state_key = state.get_context_hash()
        q_values = self.q_table[state_key]
        
        # Convert Q-values to action
        return self._q_to_action(q_values, state)
    
    def _intelligent_exploration(self, state: State) -> ActionSpace:
        """
        Intelligent exploration that considers context.
        Not completely random - biased towards reasonable actions.
        """
        # Use historical patterns as guide
        if state.historical_patterns:
            best_pattern = max(state.historical_patterns, key=lambda p: p.efficacy_score)
            
            # Add noise to best known pattern
            base_action = self._pattern_to_action(best_pattern)
            return self._add_exploration_noise(base_action)
        else:
            # No history - generate reasonable random action
            return self._generate_reasonable_action(state)
    
    def _pattern_to_action(self, pattern: HistoricalPattern) -> ActionSpace:
        """Convert historical pattern to action space"""
        features = pattern.features
        
        return ActionSpace(
            hook_type=HookType.CURIOSITY,  # Default to curiosity hooks
            hook_position=features.hook_position_seconds,
            hook_intensity=features.emotional_intensity,
            hook_pitch_shift=0.0,
            beat_timing_adjustment=0.0,
            tempo_multiplier=features.tempo_bpm / 130.0,  # Normalize around 130 BPM
            beat_drop_position=features.duration if hasattr(features, 'duration') else None,
            volume_modulation=features.volume_dynamics,
            pitch_shift=0.0,
            voice_energy_level="high" if features.emotional_intensity > 0.7 else "medium",
            voice_clarity_enhancement=features.vocal_clarity,
            emotional_arc=[features.emotional_intensity] * 5,
            suspense_buildup=True,
            payoff_timing=None,
            beat_to_scene_cut_sync=features.beat_scene_alignment > 0.7,
            audio_visual_hook_sync=features.visual_hook_coordination > 0.7,
            caption_sync_adjustment=0.0,
            transition_type="beat_drop",
            effect_intensity=0.6,
            reverb_amount=0.3,
            compression_level=0.7,
            melodic_repetition=2,
            syllable_pattern_emphasis=True
        )
    
    def _add_exploration_noise(self, action: ActionSpace) -> ActionSpace:
        """Add noise to action for exploration"""
        action.hook_position = np.clip(action.hook_position + np.random.normal(0, 0.3), 0.0, 3.0)
        action.hook_intensity = np.clip(action.hook_intensity + np.random.normal(0, 0.1), 0.0, 1.0)
        action.tempo_multiplier = np.clip(action.tempo_multiplier + np.random.normal(0, 0.1), 0.8, 1.3)
        action.volume_modulation = np.clip(action.volume_modulation + np.random.normal(0, 0.1), 0.5, 1.5)
        
        return action
    
    def _generate_reasonable_action(self, state: State) -> ActionSpace:
        """Generate reasonable action based on state context"""
        # Platform-specific defaults
        if state.platform == Platform.TIKTOK:
            hook_pos = np.random.uniform(0.3, 1.0)  # Early hook for TikTok
            tempo_mult = np.random.uniform(1.0, 1.2)  # Slightly faster
        elif state.platform == Platform.YOUTUBE_SHORTS:
            hook_pos = np.random.uniform(0.5, 1.5)
            tempo_mult = np.random.uniform(0.95, 1.15)
        else:  # Instagram
            hook_pos = np.random.uniform(0.5, 1.2)
            tempo_mult = np.random.uniform(0.9, 1.1)
        
        return ActionSpace(
            hook_type=np.random.choice(list(HookType)),
            hook_position=hook_pos,
            hook_intensity=np.random.uniform(0.6, 0.9),
            hook_pitch_shift=np.random.uniform(-1.0, 1.0),
            beat_timing_adjustment=np.random.uniform(-0.2, 0.2),
            tempo_multiplier=tempo_mult,
            beat_drop_position=np.random.uniform(5, 15) if np.random.random() > 0.5 else None,
            volume_modulation=np.random.uniform(0.8, 1.2),
            pitch_shift=np.random.uniform(-1.0, 1.0),
            voice_energy_level=np.random.choice(["medium", "high", "explosive"]),
            voice_clarity_enhancement=np.random.uniform(0.6, 0.9),
            emotional_arc=[np.random.uniform(0.5, 0.9) for _ in range(5)],
            suspense_buildup=np.random.random() > 0.5,
            payoff_timing=np.random.uniform(8, 20) if np.random.random() > 0.6 else None,
            beat_to_scene_cut_sync=np.random.random() > 0.4,
            audio_visual_hook_sync=np.random.random() > 0.3,
            caption_sync_adjustment=np.random.uniform(-0.2, 0.2),
            transition_type=np.random.choice(["beat_drop", "fade", "cut", "reverb_swell"]),
            effect_intensity=np.random.uniform(0.4, 0.8),
            reverb_amount=np.random.uniform(0.2, 0.5),
            compression_level=np.random.uniform(0.6, 0.8),
            melodic_repetition=np.random.randint(1, 4),
            syllable_pattern_emphasis=np.random.random() > 0.5
        )
    
    def _q_to_action(self, q_values: np.ndarray, state: State) -> ActionSpace:
        """Convert Q-values to action space"""
        # Decode Q-values into action parameters
        return ActionSpace(
            hook_type=list(HookType)[int(abs(q_values[0]) % len(HookType))],
            hook_position=np.clip(abs(q_values[1]), 0.0, 3.0),
            hook_intensity=np.clip(abs(q_values[2]), 0.0, 1.0),
            hook_pitch_shift=np.clip(q_values[3], -3.0, 3.0),
            beat_timing_adjustment=np.clip(q_values[4], -0.5, 0.5),
            tempo_multiplier=np.clip(q_values[5], 0.8, 1.3),
            beat_drop_position=abs(q_values[6]) if q_values[6] > 0 else None,
            volume_modulation=np.clip(q_values[7], 0.5, 1.5),
            pitch_shift=np.clip(q_values[8], -2.0, 2.0),
            voice_energy_level=["low", "medium", "high", "explosive"][int(abs(q_values[9]) % 4)],
            voice_clarity_enhancement=np.clip(abs(q_values[10]), 0.0, 1.0),
            emotional_arc=[np.clip(abs(q_values[11 + i]), 0.0, 1.0) for i in range(5)],
            suspense_buildup=q_values[16] > 0,
            payoff_timing=abs(q_values[17]) if q_values[17] > 0 else None,
            beat_to_scene_cut_sync=q_values[18] > 0,
            audio_visual_hook_sync=q_values[19] > 0,
            caption_sync_adjustment=np.clip(q_values[20], -0.5, 0.5),
            transition_type=["cut", "fade", "beat_drop", "reverb_swell", "silence"][int(abs(q_values[21]) % 5)],
            effect_intensity=np.clip(abs(q_values[22]), 0.0, 1.0),
            reverb_amount=np.clip(abs(q_values[22]) * 0.5, 0.0, 1.0),
            compression_level=np.clip(0.6 + abs(q_values[22]) * 0.2, 0.0, 1.0),
            melodic_repetition=int(abs(q_values[22]) * 3) + 1,
            syllable_pattern_emphasis=q_values[22] > 0.5
        )
    
    def update(self, state: State, action: ActionSpace, reward: float, next_state: State):
        """Update Q-values using TD learning with experience replay"""
        # Store experience
        self.replay_buffer.append((state, action, reward, next_state))
        
        # Update from mini-batch
        if len(self.replay_buffer) >= self.batch_size:
            self._batch_update()
        
        # Update target network periodically
        if self.episode_count % self.target_update_frequency == 0:
            self._update_target_network()
        
        # Decay epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
        
        self.episode_count += 1
        self.total_reward += reward
    
    def _batch_update(self):
        """Batch update from experience replay"""
        # Sample random batch
        batch = list(np.random.choice(list(self.replay_buffer), self.batch_size, replace=False))
        
        for state, action, reward, next_state in batch:
            state_key = state.get_context_hash()
            next_state_key = next_state.get_context_hash()
            
            # Current Q-values
            current_q = self.q_table[state_key]
            
            # Next Q-values from target network
            next_q = self.target_q_table[next_state_key]
            
            # TD target
            td_target = reward + self.discount_factor * np.max(next_q)
            
            # Action vector
            action_vec = action.to_vector()
            
            # TD error
            td_error = td_target - np.dot(current_q, action_vec)
            
            # Update Q-values
            self.q_table[state_key] += self.learning_rate * td_error * action_vec
            
            # Track avg Q-value
            self.avg_q_value = 0.95 * self.avg_q_value + 0.05 * np.max(current_q)
    
    def _update_target_network(self):
        """Soft update of target network"""
        tau = 0.01  # Soft update parameter
        
        for key in self.q_table.keys():
            self.target_q_table[key] = tau * self.q_table[key] + (1 - tau) * self.target_q_table[key]


 class VisualHookAgent:
    """
    Visual/Hook Agent - Specializes in cross-modal synchronization.
    Ensures audio hooks align perfectly with visual elements.
    """
    
    def __init__(self):
        self.agent_id = "visual_hook_agent"
        self.sync_memory = defaultdict(list)  # Track successful sync patterns
        self.learning_rate = 0.02
    
    def optimize_crossmodal_sync(
        self,
        state: State,
        audio_action: ActionSpace
    ) -> ActionSpace:
        """
        Optimize action for cross-modal synchronization.
        Takes audio agent's action and adjusts for video context.
        """
        optimized_action = audio_action
        
        # 1. Align beats with scene cuts
        if state.video_context.scene_cut_timestamps:
            optimal_beat_timing = self._find_optimal_beat_timing(
                state.video_context.scene_cut_timestamps,
                audio_action.beat_timing_adjustment
            )
            optimized_action.beat_timing_adjustment = optimal_beat_timing
            optimized_action.beat_to_scene_cut_sync = True
        
        # 2. Align audio hook with visual hook
        visual_hook_time = state.video_context.hook_visual_position
        if abs(audio_action.hook_position - visual_hook_time) > 0.5:
            # Adjust audio hook to match visual
            optimized_action.hook_position = (audio_action.hook_position + visual_hook_time) / 2
            optimized_action.audio_visual_hook_sync = True
        
        # 3. Adjust for caption timing
        if state.video_context.caption_timestamps:
            caption_adjustment = self._optimize_caption_sync(
                state.video_context.caption_timestamps,
                audio_action.hook_position
            )
            optimized_action.caption_sync_adjustment = caption_adjustment
        
        # 4. Energy curve matching
        if state.video_context.visual_intensity_curve:
            optimized_action.emotional_arc = self._match_visual_energy(
                state.video_context.visual_intensity_curve,
                audio_action.emotional_arc
            )
        
        # 5. Optimize beat drop for maximum impact
        if audio_action.beat_drop_position and state.video_context.scene_cuts > 0:
            # Place beat drop at high-impact scene cut
            best_scene_cut = self._find_best_scene_cut_for_drop(
                state.video_context.scene_cut_timestamps,
                audio_action.beat_drop_position
            )
            optimized_action.beat_drop_position = best_scene_cut
        
        return optimized_action
    
    def _find_optimal_beat_timing(self, scene_cuts: List[float], current_timing: float) -> float:
        """Find beat timing that aligns with scene cuts"""
        if not scene_cuts:
            return current_timing
        
        # Find nearest scene cut to current timing
        nearest_cut = min(scene_cuts, key=lambda x: abs(x - current_timing))
        
        # Adjust timing to align with scene cut
        adjustment = nearest_cut - current_timing
        
        # Don't adjust more than 0.3 seconds
        return np.clip(adjustment, -0.3, 0.3)
    
    def _optimize_caption_sync(
        self,
        caption_timestamps: List[Tuple[float, str]],
        hook_position: float
    ) -> float:
        """Optimize caption timing relative to audio hook"""
        if not caption_timestamps:
            return 0.0
        
        # Find caption closest to hook
        nearest_caption_time = min(caption_timestamps, key=lambda x: abs(x[0] - hook_position))[0]
        
        # Calculate adjustment to sync
        return nearest_caption_time - hook_position
    
    def _match_visual_energy(
        self,
        visual_curve: List[float],
        audio_arc: List[float]
    ) -> List[float]:
        """Match audio emotional arc to visual energy curve"""
        if not visual_curve or not audio_arc:
            return audio_arc
        
        # Interpolate visual curve to match audio arc length
        visual_resampled = np.interp(
            np.linspace(0, 1, len(audio_arc)),
            np.linspace(0, 1, len(visual_curve)),
            visual_curve
        )
        
        # Blend audio and visual (70% audio, 30% visual)
        matched_arc = [0.7 * a + 0.3 * v for a, v in zip(audio_arc, visual_resampled)]
        
        return matched_arc
    
    def _find_best_scene_cut_for_drop(
        self,
        scene_cuts: List[float],
        target_position: float
    ) -> float:
        """Find best scene cut position for beat drop"""
        if not scene_cuts:
            return target_position
        
        # Find scene cuts in middle to late section (5-20 seconds)
        valid_cuts = [cut for cut in scene_cuts if 5.0 <= cut <= 20.0]
        
        if not valid_cuts:
            return target_position
        
        # Return cut closest to target
        return min(valid_cuts, key=lambda x: abs(x - target_position))
    
    def learn_from_feedback(self, state: State, action: ActionSpace, performance: PerformanceMetrics):
        """Learn which sync patterns work best"""
        sync_key = f"{state.platform.value}_{state.niche}"
        
        sync_pattern = {
            'beat_scene_alignment': state.audio_features.beat_scene_alignment,
            'visual_hook_coord': state.audio_features.visual_hook_coordination,
            'caption_sync': state.audio_features.caption_sync_score,
            'performance_score': performance.viral_score(state.platform)
        }
        
        self.sync_memory[sync_key].append(sync_pattern)
        
        # Keep only top 50 patterns per context
        if len(self.sync_memory[sync_key]) > 50:
            self.sync_memory[sync_key] = sorted(
                self.sync_memory[sync_key],
                key=lambda x: x['performance_score'],
                reverse=True
            )[:50]


 class MetaViralAgent:
    """
    Meta-Viral Agent - Oversees engagement predictions and dynamically
    adjusts reward multipliers based on platform trends and performance patterns.
    """
    
    def __init__(self):
        self.agent_id = "meta_viral_agent"
        
        # Prediction models (simplified - would use ML in production)
        self.prediction_history = deque(maxlen=2000)
        self.prediction_accuracy = defaultdict(lambda: 0.5)
        
        # Dynamic reward multipliers
        self.reward_multipliers = {
            'trending': 1.0,
            'niche': defaultdict(lambda: 1.0),
            'platform': defaultdict(lambda: 1.0),
            'beat_type': defaultdict(lambda: 1.0),
            'time_of_day': defaultdict(lambda: 1.0)
        }
        
        # Performance tracking
        self.platform_performance = defaultdict(list)
        self.niche_performance = defaultdict(list)
        
    def predict_engagement(
        self,
        state: State,
        action: ActionSpace
    ) -> Dict[str, float]:
        """
        Predict comprehensive engagement metrics before video is posted.
        Uses historical patterns and current context.
        """
        # Base predictions from state
        base_watch_time = state.audience_projections.predicted_watch_time
        base_loop_prob = state.audience_projections.predicted_loop_probability
        base_engagement = state.audience_projections.predicted_engagement_rate
        
        # Adjust based on action quality
        action_quality = self._assess_action_quality(action, state)
        
        # Adjust based on historical patterns
        historical_boost = self._historical_pattern_boost(state)
        
        # Platform-specific adjustments
        platform_factor = self._platform_prediction_factor(state.platform, state)
        
        # Calculate predictions
        predicted_watch_time = np.clip(
            base_watch_time * action_quality * historical_boost * platform_factor,
            0.0, 1.0
        )
        
        predicted_loop_prob = np.clip(
            base_loop_prob * (1.0 + action.melodic_repetition * 0.1) * historical_boost,
            0.0, 1.0
        )
        
        predicted_engagement = np.clip(
            base_engagement * action_quality * platform_factor,
            0.0, 0.5
        )
        
        # Predict views based on all factors
        predicted_views = self._predict_views(
            predicted_watch_time,
            predicted_engagement,
            state,
            action
        )
        
        # Calculate viral score
        viral_score = (
            predicted_watch_time * 0.35 +
            predicted_loop_prob * 0.30 +
            predicted_engagement * 0.25 +
            min(predicted_views / 5_000_000, 1.0) * 0.10
        )
        
        predictions = {
            'views': predicted_views,
            'watch_time': predicted_watch_time,
            'loop_prob': predicted_loop_prob,
            'engagement': predicted_engagement,
            'viral_score': viral_score,
            'first_hour_views': predicted_views * 0.15,  # Estimate
            'confidence': state.audience_projections.virality_confidence
        }
        
        return predictions
    
    def _assess_action_quality(self, action: ActionSpace, state: State) -> float:
        """Assess intrinsic quality of action for virality"""
        quality = 1.0
        
        # Hook positioning (earlier is better)
        if action.hook_position < 1.0:
            quality *= 1.3
        elif action.hook_position > 2.5:
            quality *= 0.7
        
        # Hook intensity
        quality *= (0.7 + action.hook_intensity * 0.3)
        
        # Cross-modal sync
        if action.beat_to_scene_cut_sync:
            quality *= 1.15
        if action.audio_visual_hook_sync:
            quality *= 1.2
        
        # Earworm factors
        if action.melodic_repetition >= 2:
            quality *= 1.1
        
        # Voice energy
        energy_boost = {"low": 0.9, "medium": 1.0, "high": 1.15, "explosive": 1.25}
        quality *= energy_boost.get(action.voice_energy_level, 1.0)
        
        return np.clip(quality, 0.5, 2.0)
    
    def _historical_pattern_boost(self, state: State) -> float:
        """Boost predictions based on historical success"""
        if not state.historical_patterns:
            return 1.0
        
        # Get average efficacy of historical patterns
        avg_efficacy = np.mean([p.efficacy_score for p in state.historical_patterns])
        
        # Best pattern efficacy
        best_efficacy = state.top_pattern_efficacy
        
        # Boost based on proven patterns
        boost = 0.8 + (avg_efficacy * 0.3) + (best_efficacy * 0.2)
        
        return np.clip(boost, 0.8, 1.5)
    
    def _platform_prediction_factor(self, platform: Platform, state: State) -> float:
        """Platform-specific prediction adjustments"""
        factor = 1.0
        
        # Check if posting at optimal time
        if state.posting_time_hour in state.platform_metadata.peak_posting_times:
            factor *= 1.2
        
        # Check if using trending beat
        if state.beat_type in state.platform_metadata.trending_beat_types:
            factor *= 1.15
        
        # Platform-specific factors
        if platform == Platform.TIKTOK:
            # TikTok loves high energy and loops
            if state.audio_features.emotional_intensity > 0.7:
                factor *= 1.1
        elif platform == Platform.YOUTUBE_SHORTS:
            # YouTube values watch time
            if state.video_context.duration_seconds > 30:
                factor *= 1.05
        
        return factor
    
    def _predict_views(
        self,
        watch_time: float,
        engagement: float,
        state: State,
        action: ActionSpace
    ) -> int:
        """Predict total view count"""
        # Base views from historical patterns
        if state.historical_patterns:
            avg_historical_views = np.mean([p.performance.views for p in state.historical_patterns])
            base_views = avg_historical_views
        else:
            base_views = 500_000  # Conservative baseline
        
        # Scale based on predicted metrics
        view_multiplier = (
            watch_time * 1.5 +
            engagement * 2.0 +
            state.audience_projections.predicted_scroll_stop_rate * 1.2
        )
        
        # Platform-specific scaling
        platform_scaling = {
            Platform.TIKTOK: 1.3,
            Platform.YOUTUBE_SHORTS: 1.1,
            Platform.INSTAGRAM_REELS: 1.0
        }
        
        predicted_views = int(base_views * view_multiplier * platform_scaling.get(state.platform, 1.0))
        
        # Apply trend multipliers
        if state.is_trending_period:
            predicted_views = int(predicted_views * 1.4)
        
        # Cap at realistic maximum
        return min(predicted_views, 50_000_000)
    
    def adjust_reward_multipliers(self, recent_performance: List[Tuple[State, PerformanceMetrics]]):
        """Dynamically adjust reward multipliers based on recent performance trends"""
        if len(recent_performance) < 30:
            return
        
        # Track performance by different dimensions
        platform_scores = defaultdict(list)
        niche_scores = defaultdict(list)
        beat_scores = defaultdict(list)
        time_scores = defaultdict(list)
        
        for state, metrics in recent_performance:
            viral_score = metrics.viral_score(state.platform)
            
            platform_scores[state.platform].append(viral_score)
            niche_scores[state.niche].append(viral_score)
            beat_scores[state.beat_type].append(viral_score)
            time_scores[state.posting_time_hour].append(viral_score)
        
        # Update platform multipliers
        for platform, scores in platform_scores.items():
            avg_score = np.mean(scores)
            if avg_score > 0.7:
                self.reward_multipliers['platform'][platform] = 1.25
            elif avg_score > 0.55:
                self.reward_multipliers['platform'][platform] = 1.1
            elif avg_score < 0.4:
                self.reward_multipliers['platform'][platform] = 0.9
            else:
                self.reward_multipliers['platform'][platform] = 1.0
        
        # Update niche multipliers
        for niche, scores in niche_scores.items():
            avg_score = np.mean(scores)
            if avg_score > 0.65:
                self.reward_multipliers['niche'][niche] = 1.2
            elif avg_score < 0.45:
                self.reward_multipliers['niche'][niche] = 0.95
            else:
                self.reward_multipliers['niche'][niche] = 1.0
        
        # Update beat type multipliers
        for beat, scores in beat_scores.items():
            avg_score = np.mean(scores)
            if avg_score > 0.7:
                self.reward_multipliers['beat_type'][beat] = 1.15
            else:
                self.reward_multipliers['beat_type'][beat] = 1.0
        
        # Update trending multiplier based on overall performance
        overall_avg = np.mean([m.viral_score(s.platform) for s, m in recent_performance])
        if overall_avg > 0.7:
            self.reward_multipliers['trending'] = 1.3
        elif overall_avg > 0.6:
            self.reward_multipliers['trending'] = 1.15
        else:
            self.reward_multipliers['trending'] = 1.0
        
        logger.info(f"Updated reward multipliers - trending: {self.reward_multipliers['trending']:.2f}")
    
    def update_prediction_accuracy(self, predicted: Dict[str, float], actual: PerformanceMetrics, state: State):
        """Track prediction accuracy to improve meta-agent over time"""
        # Calculate errors
        view_error = abs(predicted['views'] - actual.views) / max(actual.views, 1)
        watch_time_error = abs(predicted['watch_time'] - actual.watch_through_rate)
        engagement_error = abs(predicted['engagement'] - actual.engagement_quality())
        
        # Update accuracy tracking
        context_key = f"{state.platform.value}_{state.niche}"
        current_accuracy = self.prediction_accuracy[context_key]
        
        # Calculate new accuracy (inverse of average error)
        new_accuracy = 1.0 - (view_error * 0.4 + watch_time_error * 0.3 + engagement_error * 0.3)
        
        # Exponential moving average
        self.prediction_accuracy[context_key] = 0.8 * current_accuracy + 0.2 * new_accuracy
        
        # Store in history
        self.prediction_history.append({
            'predicted': predicted,
            'actual': actual,
            'context': context_key,
            'accuracy': new_accuracy,
            'timestamp': datetime.now()
        })


 class ABTestingEngine:
    """
    A/B Testing Engine - Generates multiple audio variants and ranks them
    by predicted viral performance before deployment.
    """
    
    def __init__(self, primary_agent: PrimaryAudioAgent, visual_agent: VisualHookAgent, meta_agent: MetaViralAgent):
        self.primary_agent = primary_agent
        self.visual_agent = visual_agent
        self.meta_agent = meta_agent
        self.variant_count = 10  # Generate 10 variants per video
        self.executor = ThreadPoolExecutor(max_workers=8)
    
    def generate_and_rank_variants(
        self,
        state: State,
        n_variants: int = 10
    ) -> List[Tuple[ActionSpace, Dict[str, float], float]]:
        """
        Generate multiple audio variants and rank by predicted viral score.
        Returns list of (action, predictions, score) tuples sorted by score.
        """
        variants = []
        
        # Generate variants in parallel
        futures = []
        for i in range(n_variants):
            future = self.executor.submit(self._generate_variant, state, i)
            futures.append(future)
        
        # Collect results
        for future in as_completed(futures):
            try:
                variant_data = future.result()
                if variant_data:
                    variants.append(variant_data)
            except Exception as e:
                logger.error(f"Variant generation failed: {e}")
        
        # Sort by viral score (descending)
        variants.sort(key=lambda x: x[2], reverse=True)
        
        logger.info(f"Generated {len(variants)} variants, top score: {variants[0][2]:.3f}")
        
        return variants
    
    def _generate_variant(
        self,
        state: State,
        variant_id: int
    ) -> Optional[Tuple[ActionSpace, Dict[str, float], float]]:
        """Generate single variant with predictions and scoring"""
        try:
            # Use different exploration strategies for variety
            if variant_id == 0:
                # Variant 0: Pure exploitation (best known)
                action = self.primary_agent.select_action(state, explore=False)
            elif variant_id < 3:
                # Variants 1-2: Slight exploration
                action = self.primary_agent._intelligent_exploration(state)
            else:
                # Variants 3+: More random exploration
                action = self.primary_agent.select_action(state, explore=True)
            
            # Apply cross-modal optimization
            optimized_action = self.visual_agent.optimize_crossmodal_sync(state, action)
            
            # Get predictions from meta-agent
            predictions = self.meta_agent.predict_engagement(state, optimized_action)
            
            # Calculate composite score for ranking
            viral_score = predictions['viral_score']
            
            # Boost score based on meta-agent confidence
            confidence_boost = predictions['confidence']
            final_score = viral_score * (0.8 + confidence_boost * 0.2)
            
            return (optimized_action, predictions, final_score)
        
        except Exception as e:
            logger.error(f"Failed to generate variant {variant_id}: {e}")
            return None
    
    def select_best_variant(
        self,
        variants: List[Tuple[ActionSpace, Dict[str, float], float]],
        risk_tolerance: float = 0.1
    ) -> Tuple[ActionSpace, Dict[str, float]]:
        """
        Select best variant with optional risk tolerance.
        risk_tolerance: 0.0 = always pick #1, 1.0 = allow more experimentation
        """
        if not variants:
            raise ValueError("No variants available for selection")
        
        # With some probability, pick a high-scoring but not top variant
        if np.random.random() < risk_tolerance and len(variants) > 3:
            # Pick from top 3
            selected_idx = np.random.randint(0, 3)
            selected = variants[selected_idx]
            logger.info(f"Selected variant #{selected_idx+1} (exploration) with score {selected[2]:.3f}")
        else:
            # Pick the best
            selected = variants[0]
            logger.info(f"Selected top variant with score {selected[2]:.3f}")
        
        return selected[0], selected[1]


 class AdvancedMemoryManager:
    """
    Advanced Memory Manager - Full integration with HOT/WARM/COLD pattern storage.
    Implements sophisticated pattern retrieval, diversity enforcement, and decay.
    """
    
    def __init__(self):
        self.patterns: Dict[str, HistoricalPattern] = {}
        self.hot_patterns: List[str] = []
        self.warm_patterns: List[str] = []
        self.cold_patterns: List[str] = []
        
        # Replay buffer for high-performing patterns
        self.replay_buffer = deque(maxlen=200)
        
        # Pattern usage tracking
        self.usage_count = defaultdict(int)
        self.last_used = {}
        
        # Diversity enforcement
        self.diversity_threshold = 3  # Max times to use similar pattern in window
        self.recent_window = deque(maxlen=20)
    
    def store_pattern(
        self,
        pattern_id: str,
        features: AudioFeatures,
        performance: PerformanceMetrics,
        niche: str,
        platform: Platform,
        beat_type: BeatType
    ):
        """Store new audio pattern with performance data"""
        # Calculate efficacy score
        efficacy = self._calculate_efficacy(performance, platform)
        
        # Determine memory layer
        memory_layer = self._assign_memory_layer(efficacy, datetime.now())
        
        pattern = HistoricalPattern(
            pattern_id=pattern_id,
            features=features,
            performance=performance,
            niche=niche,
            platform=platform,
            beat_type=beat_type,
            timestamp=datetime.now(),
            efficacy_score=efficacy,
            memory_layer=memory_layer
        )
        
        self.patterns[pattern_id] = pattern
        self._assign_to_layer(pattern_id, memory_layer)
        
        # Add to replay buffer if high-performing
        if efficacy > 0.6:
            self.replay_buffer.append(pattern)
        
        logger.info(f"Stored pattern {pattern_id} in {memory_layer.value} layer, efficacy: {efficacy:.3f}")
    
    def _calculate_efficacy(self, performance: PerformanceMetrics, platform: Platform) -> float:
        """Calculate pattern efficacy score"""
        viral_score = performance.viral_score(platform)
        view_score = min(performance.views / 5_000_000, 1.0)
        engagement_score = performance.engagement_quality()
        
        efficacy = (
            viral_score * 0.4 +
            view_score * 0.35 +
            engagement_score * 0.25
        )
        
        return np.clip(efficacy, 0.0, 1.0)
    
    def _assign_memory_layer(self, efficacy: float, timestamp: datetime) -> MemoryLayer:
        """Assign pattern to appropriate memory layer"""
        if efficacy > 0.7:
            return MemoryLayer.HOT
        elif efficacy > 0.5:
            return MemoryLayer.WARM
        else:
            return MemoryLayer.COLD
    
    def _assign_to_layer(self, pattern_id: str, layer: MemoryLayer):
        """Assign pattern ID to specific layer"""
        # Remove from all layers first
        self.hot_patterns = [p for p in self.hot_patterns if p != pattern_id]
        self.warm_patterns = [p for p in self.warm_patterns if p != pattern_id]
        self.cold_patterns = [p for p in self.cold_patterns if p != pattern_id]
        
        # Add to appropriate layer
        if layer == MemoryLayer.HOT:
            self.hot_patterns.append(pattern_id)
        elif layer == MemoryLayer.WARM:
            self.warm_patterns.append(pattern_id)
        else:
            self.cold_patterns.append(pattern_id)
    
    def retrieve_top_patterns(
        self,
        niche: str,
        platform: Platform,
        beat_type: BeatType,
        n: int = 10,
        enforce_diversity: bool = True
    ) -> List[HistoricalPattern]:
        """
        Retrieve top-performing patterns for given context.
        Prioritizes HOT layer, applies decay, enforces diversity.
        """
        # Filter by context
        candidates = [
            p for p in self.patterns.values()
            if p.niche == niche and p.platform == platform and p.beat_type == beat_type
        ]
        
        if not candidates:
            logger.warning(f"No patterns found for {niche}/{platform.value}/{beat_type.value}")
            return []
        
        # Apply time-based decay
        current_time = datetime.now()
        for pattern in candidates:
            days_old = (current_time - pattern.timestamp).days
            decay_factor = np.exp(-0.03 * days_old)
            pattern.efficacy_score *= decay_factor
        
        # Sort by efficacy
        candidates.sort(key=lambda p: p.efficacy_score, reverse=True)
        
        # Prioritize HOT layer
        hot_candidates = [p for p in candidates if p.memory_layer == MemoryLayer.HOT]
        warm_candidates = [p for p in candidates if p.memory_layer == MemoryLayer.WARM]
        cold_candidates = [p for p in candidates if p.memory_layer == MemoryLayer.COLD]
        
        # Combine with priority
        prioritized = hot_candidates + warm_candidates + cold_candidates
        
        # Enforce diversity if requested
        if enforce_diversity:
            prioritized = self._enforce_pattern_diversity(prioritized)
        
        # Return top N
        selected = prioritized[:n]
        
        # Update usage tracking
        for pattern in selected:
            self.usage_count[pattern.pattern_id] += 1
            self.last_used[pattern.pattern_id] = current_time
            self.recent_window.append(pattern.pattern_id)
        
        return selected
    
    def _enforce_pattern_diversity(self, patterns: List[HistoricalPattern]) -> List[HistoricalPattern]:
        """Enforce diversity to avoid overusing similar patterns"""
        diversified = []
        pattern_signatures = set()
        
        for pattern in patterns:
            # Create signature based on key features
            signature = self._pattern_signature(pattern)
            
            # Check usage in recent window
            recent_usage = sum(1 for pid in list(self.recent_window) if pid == pattern.pattern_id)
            
            # Skip if overused or too similar to already selected
            if recent_usage >= self.diversity_threshold:
                continue
            
            if signature in pattern_signatures:
                continue
            
            diversified.append(pattern)
            pattern_signatures.add(signature)
        
        # Fill remainder with any remaining patterns if needed
        if len(diversified) < len(patterns):
            remaining = [p for p in patterns if p not in diversified]
            diversified.extend(remaining[:len(patterns) - len(diversified)])
        
        return diversified
    
    def _pattern_signature(self, pattern: HistoricalPattern) -> str:
        """Create signature for similarity detection"""
        features = pattern.features
        signature = (
            f"{int(features.tempo_bpm / 10) * 10}_"
            f"{int(features.emotional_intensity * 10)}_"
            f"{int(features.hook_position_seconds)}_"
            f"{pattern.beat_type.value}"
        )
        return signature
    
    def get_replay_samples(self, n: int = 20) -> List[HistoricalPattern]:
        """Get high-performing samples from replay buffer for training"""
        if len(self.replay_buffer) < n:
            return list(self.replay_buffer)
        
        # Sample with preference for higher efficacy
        efficacy_scores = [p.efficacy_score for p in self.replay_buffer]
        probabilities = np.array(efficacy_scores) / sum(efficacy_scores)
        
        indices = np.random.choice(
            len(self.replay_buffer),
            size=n,
            replace=False,
            p=probabilities
        )
        
        return [self.replay_buffer[i] for i in indices]
    
    def update_pattern_performance(
        self,
        pattern_id: str,
        new_performance: PerformanceMetrics,
        platform: Platform
    ):
        """Update existing pattern with new performance data"""
        if pattern_id not in self.patterns:
            logger.warning(f"Pattern {pattern_id} not found for update")
            return
        
        pattern = self.patterns[pattern_id]
        
        # Recalculate efficacy
        new_efficacy = self._calculate_efficacy(new_performance, platform)
        
        # Update with exponential moving average
        alpha = 0.3
        pattern.efficacy_score = alpha * new_efficacy + (1 - alpha) * pattern.efficacy_score
        
        # Update performance data
        pattern.performance = new_performance
        pattern.timestamp = datetime.now()
        
        # Reassign memory layer if needed
        new_layer = self._assign_memory_layer(pattern.efficacy_score, pattern.timestamp)
        if new_layer != pattern.memory_layer:
            pattern.memory_layer = new_layer
            self._assign_to_layer(pattern_id, new_layer)
    
    def prune_old_patterns(self, days_threshold: int = 90):
        """Remove patterns older than threshold from COLD layer"""
        current_time = datetime.now()
        to_remove = []
        
        for pattern_id in self.cold_patterns:
            pattern = self.patterns[pattern_id]
            age_days = (current_time - pattern.timestamp).days
            
            if age_days > days_threshold and pattern.efficacy_score < 0.3:
                to_remove.append(pattern_id)
        
        for pattern_id in to_remove:
            del self.patterns[pattern_id]
            self.cold_patterns.remove(pattern_id)
        
        if to_remove:
            logger.info(f"Pruned {len(to_remove)} old patterns from COLD layer")


 class AudioReinforcementLoop:
    """
    Main RL System - Orchestrates all agents, memory, and learning.
    Designed for autonomous 5M+ view optimization.
    """
    
    def __init__(self):
        # Initialize all agents
        self.primary_agent = PrimaryAudioAgent(state_dim=52, action_dim=23)
        self.visual_agent = VisualHookAgent()
        self.meta_agent = MetaViralAgent()
        
        # Reward function
        self.reward_function = AdvancedRewardFunction()
        
        # Memory manager
        self.memory_manager = AdvancedMemoryManager()
        
        # A/B testing engine
        self.ab_engine = ABTestingEngine(self.primary_agent, self.visual_agent, self.meta_agent)
        
        # Performance tracking
        self.performance_history = deque(maxlen=2000)
        self.training_episodes = 0
        
        # Engine weights for TTS and voice sync
        self.engine_weights = {
            'tts': {
                'pace_wpm': 150,
                'pitch_variance': 0.5,
                'emotional_intensity': 0.7,
                'voice_clarity': 0.8
            },
            'voice_sync': {
                'beat_alignment_tolerance': 0.1,
                'scene_sync_priority': 0.8,
                'caption_sync_priority': 0.7
            }
        }
        
        # Real-time learning queue
        self.feedback_queue = queue.Queue()
        self.learning_thread = threading.Thread(target=self._continuous_learning_loop, daemon=True)
        self.learning_thread.start()
        
        # Training metrics
        self.metrics = {
            'total_videos': 0,
            'avg_views': 0.0,
            'avg_viral_score': 0.0,
            'success_rate_5m': 0.0,  # % of videos hitting 5M+
            'pattern_diversity': 0.0,
            'prediction_accuracy': 0.0
        }
        
        logger.info("AudioReinforcementLoop initialized with all agents")
    
    def process_video_performance(
        self,
        pattern_id: str,
        metrics: PerformanceMetrics,
        state: State,
        action: ActionSpace,
        async_learning: bool = True
    ) -> float:
        """
        Process performance feedback and update RL system.
        Returns calculated reward.
        """
        # Get historical patterns for context
        pattern_history = self.memory_manager.retrieve_top_patterns(
            state.niche,
            state.platform,
            state.beat_type,
            n=15
        )
        
        # Get predictions that were made
        predicted_metrics = self.meta_agent.predict_engagement(state, action)
        
        # Calculate reward
        reward = self.reward_function.calculate(
            metrics,
            state,
            action,
            predicted_metrics,
            pattern_history
        )
        
        # Apply meta-agent multipliers
        reward *= self.meta_agent.reward_multipliers['trending']
        reward *= self.meta_agent.reward_multipliers['niche'].get(state.niche, 1.0)
        reward *= self.meta_agent.reward_multipliers['platform'].get(state.platform, 1.0)
        
        # Update memory manager with performance
        self.memory_manager.store_pattern(
            pattern_id=pattern_id,
            features=state.audio_features,
            performance=metrics,
            niche=state.niche,
            platform=state.platform,
            beat_type=state.beat_type
        )
        
        # Update prediction accuracy
        self.meta_agent.update_prediction_accuracy(predicted_metrics, metrics, state)
        
        # Update visual agent learning
        self.visual_agent.learn_from_feedback(state, action, metrics)
        
        # Store performance in history
        self.performance_history.append((state, metrics, reward))
        
        # Update agents
        if async_learning:
            # Add to queue for async processing
            self.feedback_queue.put((state, action, reward, metrics))
        else:
            # Immediate synchronous update
            self._update_agents(state, action, reward)
        
        # Periodically adjust multipliers
        if len(self.performance_history) % 50 == 0:
            recent = list(self.performance_history)[-100:]
            self.meta_agent.adjust_reward_multipliers([(s, m) for s, m, r in recent])
            self.reward_function.update_dynamic_multipliers([(s, m) for s, m, r in recent])
        
        # Update training metrics
        self._update_training_metrics(metrics, reward, state.platform)
        
        # Update engine weights if high-performing
        if metrics.views >= 5_000_000:
            self._update_engine_weights_from_success(pattern_id, state, action)
        
        self.training_episodes += 1
        
        logger.info(
            f"Processed pattern {pattern_id}: "
            f"views={metrics.views:,}, reward={reward:.3f}, "
            f"viral_score={metrics.viral_score(state.platform):.3f}"
        )
        
        return reward
    
    def _update_agents(self, state: State, action: ActionSpace, reward: float):
        """Update RL agents with feedback"""
        # For next state, use current state with updated timestamp
        next_state = state  # Simplified - in production would generate next state
        
        # Update primary audio agent
        self.primary_agent.update(state, action, reward, next_state)
    
    def _continuous_learning_loop(self):
        """Background thread for continuous learning from feedback queue"""
        while True:
            try:
                # Get feedback from queue (blocking)
                state, action, reward, metrics = self.feedback_queue.get(timeout=1.0)
                
                # Update agents
                self._update_agents(state, action, reward)
                
                self.feedback_queue.task_done()
                
            except queue.Empty:
                continue
            except Exception as e:
                logger.error(f"Error in continuous learning loop: {e}")
    
    def _update_training_metrics(self, metrics: PerformanceMetrics, reward: float, platform: Platform):
        """Update overall training statistics"""
        self.metrics['total_videos'] += 1
        
        # Exponential moving average for continuous metrics
        alpha = 0.05
        self.metrics['avg_views'] = alpha * metrics.views + (1 - alpha) * self.metrics['avg_views']
        self.metrics['avg_viral_score'] = alpha * metrics.viral_score(platform) + (1 - alpha) * self.metrics['avg_viral_score']
        
        # Success rate (5M+ views)
        if metrics.views >= 5_000_000:
            success_count = sum(1 for _, m, _ in list(self.performance_history) if m.views >= 5_000_000)
            self.metrics['success_rate_5m'] = success_count / max(len(self.performance_history), 1)
        
        # Pattern diversity (unique patterns in recent window)
        recent_patterns = set(self.memory_manager.recent_window)
        self.metrics['pattern_diversity'] = len(recent_patterns) / max(len(self.memory_manager.recent_window), 1)
        
        # Prediction accuracy
        if self.meta_agent.prediction_accuracy:
            avg_accuracy = np.mean(list(self.meta_agent.prediction_accuracy.values()))
            self.metrics['prediction_accuracy'] = avg_accuracy
    
    def _update_engine_weights_from_success(self, pattern_id: str, state: State, action: ActionSpace):
        """Update TTS and voice sync engine weights from successful pattern"""
        features = state.audio_features
        
        # Update TTS weights
        self.engine_weights['tts']['pace_wpm'] = 0.7 * self.engine_weights['tts']['pace_wpm'] + 0.3 * features.pace_wpm
        self.engine_weights['tts']['pitch_variance'] = 0.7 * self.engine_weights['tts']['pitch_variance'] + 0.3 * features.pitch_variance
        self.engine_weights['tts']['emotional_intensity'] = 0.7 * self.engine_weights['tts']['emotional_intensity'] + 0.3 * features.emotional_intensity
        self.engine_weights['tts']['voice_clarity'] = 0.7 * self.engine_weights['tts']['voice_clarity'] + 0.3 * features.vocal_clarity
        
        # Update voice sync weights
        self.engine_weights['voice_sync']['beat_alignment_tolerance'] = min(
            0.7 * self.engine_weights['voice_sync']['beat_alignment_tolerance'] + 0.3 * features.beat_alignment_error,
            0.15
        )
        self.engine_weights['voice_sync']['scene_sync_priority'] = 0.7 * self.engine_weights['voice_sync']['scene_sync_priority'] + 0.3 * features.beat_scene_alignment
        self.engine_weights['voice_sync']['caption_sync_priority'] = 0.7 * self.engine_weights['voice_sync']['caption_sync_priority'] + 0.3 * features.caption_sync_score
        
        logger.info(f"Updated engine weights from successful pattern {pattern_id}")
    
    def get_optimal_audio_action(
        self,
        niche: str,
        platform: Platform,
        beat_type: BeatType,
        video_context: VideoContext,
        use_ab_testing: bool = True,
        n_variants: int = 10
    ) -> Tuple[ActionSpace, Dict[str, float]]:
        """
        Get optimal audio action for new video.
        Uses A/B testing to generate and rank multiple variants.
        
        Returns: (best_action, predictions)
        """
        # Retrieve historical patterns
        historical_patterns = self.memory_manager.retrieve_top_patterns(
            niche, platform, beat_type, n=10, enforce_diversity=True
        )
        
        # Build state
        state = self._build_state(
            niche=niche,
            platform=platform,
            beat_type=beat_type,
            video_context=video_context,
            historical_patterns=historical_patterns
        )
        
        if use_ab_testing and n_variants > 1:
            # Generate and rank multiple variants
            variants = self.ab_engine.generate_and_rank_variants(state, n_variants=n_variants)
            
            if variants:
                # Select best variant (with small exploration chance)
                best_action, predictions = self.ab_engine.select_best_variant(
                    variants,
                    risk_tolerance=0.1
                )
                
                logger.info(f"Selected best of {len(variants)} variants, predicted views: {predictions['views']:,}")
                return best_action, predictions
        
        # Fallback: single action from primary agent
        action = self.primary_agent.select_action(state, explore=False)
        optimized_action = self.visual_agent.optimize_crossmodal_sync(state, action)
        predictions = self.meta_agent.predict_engagement(state, optimized_action)
        
        return optimized_action, predictions
    
    def _build_state(
        self,
        niche: str,
        platform: Platform,
        beat_type: BeatType,
        video_context: VideoContext,
        historical_patterns: List[HistoricalPattern]
    ) -> State:
        """Build complete state representation"""
        # Get platform metadata
        platform_metadata = self._get_platform_metadata(platform)
        
        # Generate audience projections
        audience_projections = self._generate_audience_projections(
            niche, platform, historical_patterns
        )
        
        # Use best pattern's features as starting point, or defaults
        if historical_patterns:
            audio_features = historical_patterns[0].features
            top_efficacy = historical_patterns[0].efficacy_score
        else:
            audio_features = self._get_default_audio_features()
            top_efficacy = 0.5
        
        # Current time context
        now = datetime.now()
        posting_hour = now.hour
        day_of_week = now.weekday()
        is_trending = posting_hour in platform_metadata.peak_posting_times
        
        state = State(
            audio_features=audio_features,
            video_context=video_context,
            platform=platform,
            niche=niche,
            beat_type=beat_type,
            historical_patterns=historical_patterns,
            top_pattern_efficacy=top_efficacy,
            platform_metadata=platform_metadata,
            audience_projections=audience_projections,
            posting_time_hour=posting_hour,
            day_of_week=day_of_week,
            is_trending_period=is_trending
        )
        
        return state
    
    def _get_platform_metadata(self, platform: Platform) -> PlatformMetadata:
        """Get current platform metadata and trends"""
        # Simplified - in production would fetch from API
        if platform == Platform.TIKTOK:
            return PlatformMetadata(
                platform=platform,
                current_trending_sounds=["phonk_beat_01", "drill_sample_03"],
                trending_beat_types=[BeatType.PHONK, BeatType.DRILL],
                peak_posting_times=[9, 12, 15, 18, 21],
                avg_viral_threshold=1_000_000,
                algorithm_weights={'retention': 0.8, 'engagement': 0.7, 'velocity': 0.9},
                audience_age_range=(16, 34),
                device_usage={'mobile': 0.95, 'desktop': 0.05}
            )
        elif platform == Platform.YOUTUBE_SHORTS:
            return PlatformMetadata(
                platform=platform,
                current_trending_sounds=["electronic_drop", "lo-fi_chill"],
                trending_beat_types=[BeatType.ELECTRONIC, BeatType.LOFI],
                peak_posting_times=[10, 14, 17, 20],
                avg_viral_threshold=2_000_000,
                algorithm_weights={'watch_time': 0.9, 'ctr': 0.8, 'retention': 0.7},
                audience_age_range=(18, 44),
                device_usage={'mobile': 0.75, 'desktop': 0.25}
            )
        else:  # Instagram
            return PlatformMetadata(
                platform=platform,
                current_trending_sounds=["trap_beat", "hyperpop_sample"],
                trending_beat_types=[BeatType.TRAP, BeatType.HYPERPOP],
                peak_posting_times=[8, 11, 14, 19, 22],
                avg_viral_threshold=1_500_000,
                algorithm_weights={'shares': 0.9, 'saves': 0.85, 'engagement': 0.8},
                audience_age_range=(18, 34),
                device_usage={'mobile': 0.92, 'desktop': 0.08}
            )
    
    def _generate_audience_projections(
        self,
        niche: str,
        platform: Platform,
        historical_patterns: List[HistoricalPattern]
    ) -> AudienceBehaviorProjections:
        """Generate audience behavior projections from historical data"""
        if historical_patterns:
            # Average from historical performance
            avg_watch_time = np.mean([p.performance.watch_through_rate for p in historical_patterns])
            avg_loop_prob = np.mean([p.performance.loop_frequency for p in historical_patterns])
            avg_engagement = np.mean([p.performance.engagement_quality() for p in historical_patterns])
            
            confidence = min(len(historical_patterns) / 10, 0.9)
        else:
            # Default conservative projections
            avg_watch_time = 0.5
            avg_loop_prob = 0.3
            avg_engagement = 0.3
            confidence = 0.3
        
        return AudienceBehaviorProjections(
            predicted_watch_time=avg_watch_time,
            predicted_loop_probability=avg_loop_prob,
            predicted_scroll_stop_rate=0.6,
            predicted_engagement_rate=avg_engagement,
            predicted_share_likelihood=avg_engagement * 0.15,
            predicted_save_likelihood=avg_engagement * 0.12,
            virality_confidence=confidence
        )
    
    def _get_default_audio_features(self) -> AudioFeatures:
        """Get default audio features for new contexts"""
        return AudioFeatures(
            pace_wpm=150,
            pitch_variance=0.5,
            hook_jumps=2,
            pause_timing=[0.5, 1.0],
            spectral_centroid=2000,
            emotional_intensity=0.7,
            beat_alignment_error=0.1,
            volume_dynamics=0.7,
            timbre_complexity=0.6,
            tempo_bpm=130,
            syllable_timing_variance=0.15,
            hook_position_seconds=0.8,
            earworm_score=0.6,
            energy_curve=[0.8, 0.75, 0.7, 0.65, 0.6],
            silence_ratio=0.1,
            vocal_clarity=0.8,
            background_music_ratio=0.4,
            transition_smoothness=0.7,
            beat_scene_alignment=0.7,
            caption_sync_score=0.7,
            visual_hook_coordination=0.7
        )
    
    def get_current_optimal_audio_profile(
        self,
        niche: str,
        platform: Platform,
        beat_type: BeatType
    ) -> Dict[str, Any]:
        """
        API Method: Get current optimal audio profile for given context.
        Returns comprehensive audio configuration based on learned patterns.
        """
        # Retrieve top patterns
        top_patterns = self.memory_manager.retrieve_top_patterns(
            niche, platform, beat_type, n=5
        )
        
        if not top_patterns:
            return self._get_default_profile(niche, platform, beat_type)
        
        best_pattern = top_patterns[0]
        
        profile = {
            'pattern_id': best_pattern.pattern_id,
            'efficacy_score': best_pattern.efficacy_score,
            'memory_layer': best_pattern.memory_layer.value,
            'audio_features': {
                'pace_wpm': best_pattern.features.pace_wpm,
                'pitch_variance': best_pattern.features.pitch_variance,
                'emotional_intensity': best_pattern.features.emotional_intensity,
                'tempo_bpm': best_pattern.features.tempo_bpm,
                'hook_position': best_pattern.features.hook_position_seconds,
                'earworm_score': best_pattern.features.earworm_score,
                'vocal_clarity': best_pattern.features.vocal_clarity,
                'beat_alignment_error': best_pattern.features.beat_alignment_error
            },
            'performance_stats': {
                'views': best_pattern.performance.views,
                'viral_score': best_pattern.performance.viral_score(platform),
                'completion_rate': best_pattern.performance.completion_rate,
                'engagement_quality': best_pattern.performance.engagement_quality()
            },
            'engine_weights': self.engine_weights,
            'predictions': {
                'expected_views': self._estimate_views_from_pattern(best_pattern),
                'success_probability': min(best_pattern.efficacy_score, 0.95)
            },
            'recommendations': self._generate_recommendations(best_pattern, top_patterns, platform),
            'alternative_patterns': [
                {
                    'pattern_id': p.pattern_id,
                    'efficacy_score': p.efficacy_score,
                    'views': p.performance.views
                }
                for p in top_patterns[1:4]
            ]
        }
        
        return profile
    
    def _estimate_views_from_pattern(self, pattern: HistoricalPattern) -> int:
        """Estimate expected views based on pattern efficacy"""
        base_views = pattern.performance.views
        efficacy_multiplier = pattern.efficacy_score / 0.7  # Normalize around 0.7 efficacy
        
        estimated = int(base_views * efficacy_multiplier)
        return min(estimated, 20_000_000)  # Cap at reasonable maximum
    
    def _generate_recommendations(
        self,
        best_pattern: HistoricalPattern,
        all_patterns: List[HistoricalPattern],
        platform: Platform
    ) -> List[str]:
        """Generate actionable recommendations"""
        recommendations = []
        features = best_pattern.features
        
        if features.emotional_intensity > 0.75:
            recommendations.append("✓ High emotional intensity proven effective - maintain energy")
        
        if features.hook_position_seconds < 1.0:
            recommendations.append("✓ Early hook placement working - keep hook within first second")
        
        if features.earworm_score > 0.7:
            recommendations.append("✓ Strong earworm potential - emphasize melodic repetition")
        
        if features.beat_scene_alignment > 0.8:
            recommendations.append("✓ Excellent beat-scene sync - continue prioritizing alignment")
        
        if best_pattern.performance.loop_frequency > 0.5:
            recommendations.append("✓ High loop rate detected - optimize for rewatch value")
        
        if platform == Platform.TIKTOK and best_pattern.performance.first_3s_retention > 0.75:
            recommendations.append("✓ Strong first 3s retention - this pattern crushes TikTok algorithm")
        
        # Pattern consistency check
        if len(all_patterns) >= 3:
            avg_views = np.mean([p.performance.views for p in all_patterns])
            if best_pattern.performance.views > avg_views * 1.5:
                recommendations.append(f"⚠ This pattern significantly outperforms others - prioritize it")
        
        return recommendations
    
    def _get_default_profile(self, niche: str, platform: Platform, beat_type: BeatType) -> Dict[str, Any]:
        """Default profile when no patterns exist"""
        return {
            'pattern_id': 'default',
            'efficacy_score': 0.5,
            'memory_layer': 'warm',
            'audio_features': {
                'pace_wpm': 150,
                'pitch_variance': 0.5,
                'emotional_intensity': 0.7,
                'tempo_bpm': 130,
                'hook_position': 0.8,
                'earworm_score': 0.5,
                'vocal_clarity': 0.8,
                'beat_alignment_error': 0.1
            },
            'performance_stats': {
                'views': 0,
                'viral_score': 0.0,
                'completion_rate': 0.0,
                'engagement_quality': 0.0
            },
            'engine_weights': self.engine_weights,
            'predictions': {
                'expected_views': 500_000,
                'success_probability': 0.5
            },
            'recommendations': [
                '⚠ No historical patterns - using safe defaults',
                '→ Generate test videos to build pattern library',
                '→ Focus on early hook (< 1s) and high energy'
            ],
            'alternative_patterns': []
        }
    
    def update_engine_weights(self, pattern_id: Optional[str] = None, manual_weights: Optional[Dict] = None):
        """
        API Method: Update TTS and voice sync engine weights.
        Can update from specific pattern or manual configuration.
        """
        if manual_weights:
            # Manual override
            if 'tts' in manual_weights:
                self.engine_weights['tts'].update(manual_weights['tts'])
            if 'voice_sync' in manual_weights:
                self.engine_weights['voice_sync'].update(manual_weights['voice_sync'])
            logger.info("Engine weights updated manually")
            return
        
        if pattern_id and pattern_id in self.memory_manager.patterns:
            # Update from specific pattern
            pattern = self.memory_manager.patterns[pattern_id]
            state = State(
                audio_features=pattern.features,
                video_context=VideoContext(
                    scene_cuts=0, scene_cut_timestamps=[], visual_intensity_curve=[],
                    caption_timestamps=[], thumbnail_predicted_ctr=0.5, hook_visual_position=0.5,
                    color_palette_energy=0.5, motion_intensity=0.5, text_overlay_density=0.5,
                    duration_seconds=30
                ),
                platform=pattern.platform,
                niche=pattern.niche,
                beat_type=pattern.beat_type,
                historical_patterns=[pattern],
                top_pattern_efficacy=pattern.efficacy_score,
                platform_metadata=self._get_platform_metadata(pattern.platform),
                audience_projections=AudienceBehaviorProjections(
                    predicted_watch_time=0.5, predicted_loop_probability=0.3,
                    predicted_scroll_stop_rate=0.6, predicted_engagement_rate=0.3,
                    predicted_share_likelihood=0.05, predicted_save_likelihood=0.04,
                    virality_confidence=0.5
                ),
                posting_time_hour=12,
                day_of_week=2,
                is_trending_period=False
            )
            action = ActionSpace(
                hook_type=HookType.CURIOSITY, hook_position=pattern.features.hook_position_seconds,
                hook_intensity=pattern.features.emotional_intensity, hook_pitch_shift=0.0,
                beat_timing_adjustment=0.0, tempo_multiplier=1.0, beat_drop_position=None,
                volume_modulation=pattern.features.volume_dynamics, pitch_shift=0.0,
                voice_energy_level="high", voice_clarity_enhancement=pattern.features.vocal_clarity,
                emotional_arc=[pattern.features.emotional_intensity] * 5, suspense_buildup=True,
                payoff_timing=None, beat_to_scene_cut_sync=True, audio_visual_hook_sync=True,
                caption_sync_adjustment=0.0, transition_type="beat_drop", effect_intensity=0.6,
                reverb_amount=0.3, compression_level=0.7, melodic_repetition=2,
                syllable_pattern_emphasis=True
            )
            self._update_engine_weights_from_success(pattern_id, state, action)
        else:
            # Update from all HOT patterns
            hot_patterns = [self.memory_manager.patterns[pid] for pid in self.memory_manager.hot_patterns]
            if hot_patterns:
                for pattern in hot_patterns[:5]:  # Top 5 HOT patterns
                    features = pattern.features
                    self.engine_weights['tts']['pace_wpm'] = 0.8 * self.engine_weights['tts']['pace_wpm'] + 0.2 * features.pace_wpm
                    self.engine_weights['tts']['emotional_intensity'] = 0.8 * self.engine_weights['tts']['emotional_intensity'] + 0.2 * features.emotional_intensity
                logger.info(f"Updated engine weights from {len(hot_patterns)} HOT patterns")
    
    def export_state(self) -> Dict[str, Any]:
        """Export complete system state for persistence"""
        return {
            'patterns': {
                pid: {
                    'pattern_id': p.pattern_id,
                    'features': asdict(p.features),
                    'performance': asdict(p.performance),
                    'niche': p.niche,
                    'platform': p.platform.value,
                    'beat_type': p.beat_type.value,
                    'timestamp': p.timestamp.isoformat(),
                    'efficacy_score': p.efficacy_score,
                    'memory_layer': p.memory_layer.value
                }
                for pid, p in self.memory_manager.patterns.items()
            },
            'memory_layers': {
                'hot': self.memory_manager.hot_patterns,
                'warm': self.memory_manager.warm_patterns,
                'cold': self.memory_manager.cold_patterns
            },
            'engine_weights': self.engine_weights,
            'metrics': self.metrics,
            'reward_multipliers': {
                'trending': self.meta_agent.reward_multipliers['trending'],
                'niche': dict(self.meta_agent.reward_multipliers['niche']),
                'platform': {k.value: v for k, v in self.meta_agent.reward_multipliers['platform'].items()}
            },
            'training_episodes': self.training_episodes,
            'q_table_size': len(self.primary_agent.q_table)
        }
    
    def get_training_status(self) -> Dict[str, Any]:
        """Get current training status and metrics"""
        return {
            'total_videos_processed': self.metrics['total_videos'],
            'training_episodes': self.training_episodes,
            'avg_views': int(self.metrics['avg_views']),
            'avg_viral_score': self.metrics['avg_viral_score'],
            'success_rate_5m_plus': f"{self.metrics['success_rate_5m'] * 100:.1f}%",
            'pattern_diversity': self.metrics['pattern_diversity'],
            'prediction_accuracy': self.metrics['prediction_accuracy'],
            'hot_patterns': len(self.memory_manager.hot_patterns),
            'warm_patterns': len(self.memory_manager.warm_patterns),
            'cold_patterns': len(self.memory_manager.cold_patterns),
            'exploration_rate': self.primary_agent.epsilon,
            'avg_q_value': self.primary_agent.avg_q_value,
            'feedback_queue_size': self.feedback_queue.qsize()
        }


 # Example Usage
 if __name__ == "__main__":
    print("="*80)
    print("AUDIO REINFORCEMENT LOOP - VIRAL INTELLIGENCE SYSTEM")
    print("="*80)
    
    # Initialize system
    rl_system = AudioReinforcementLoop()
    
    # Example: Get optimal audio profile
    print("\n[1] Getting optimal audio profile...")
    profile = rl_system.get_current_optimal_audio_profile(
        niche="tech_reviews",
        platform=Platform.TIKTOK,
        beat_type=BeatType.PHONK
    )
    print(json.dumps(profile, indent=2, default=str))
    
    # Example: Generate optimal action with A/B testing
    print("\n[2] Generating optimal action with A/B testing...")
    video_context = VideoContext(
        scene_cuts=8,
        scene_cut_timestamps=[2.1, 4.5, 7.2, 10.1, 13.5, 17.0, 20.5, 24.0],
        visual_intensity_curve=[0.8, 0.75, 0.9, 0.85, 0.7, 0.8, 0.85, 0.9],
        caption_timestamps=[(0.5, "Hook"), (3.0, "Value"), (8.0, "CTA")],
        thumbnail_predicted_ctr=0.11,
        hook_visual_position=0.6,
        color_palette_energy=0.8,
        motion_intensity=0.75,
        text_overlay_density=0.6,
        duration_seconds=30
    )
    
    action, predictions = rl_system.get_optimal_audio_action(
        niche="tech_reviews",
        platform=Platform.TIKTOK,
        beat_type=BeatType.PHONK,
        video_context=video_context,
        use_ab_testing=True,
        n_variants=10
    )
    
    print(f"Optimal Action: {action.hook_type.value}, position: {action.hook_position:.2f}s")
    print(f"Predictions: {json.dumps(predictions, indent=2, default=str)}")
    
    # Example: Process performance feedback
    print("\n[3] Processing performance feedback...")
    sample_metrics = PerformanceMetrics(
        views=7_200_000,
        retention_2s=0.84,
        first_3s_retention=0.81,
        completion_rate=0.73,
        avg_watch_time=22.5,
        watch_through_rate=0.70,
        likes=920_000,
        shares=145_000,
        saves=98_000,
        comments=52_000,
        replay_rate=0.48,
        loop_frequency=0.55,
        share_rate=0.020,
        save_rate=0.014,
        ctr=0.13,
        profile_visits=85_000,
        follower_conversion=0.12,
        views_first_hour=180_000,
        views_first_24h=3_500_000,
        velocity_score=0.82,
        scroll_stop_rate=0.78,
        sound_use_rate=0.15
    )
    
    # Build state
    sample_features = AudioFeatures(
        pace_wpm=158,
        pitch_variance=0.62,
        hook_jumps=3,
        pause_timing=[0.4, 0.9, 0.3],
        spectral_centroid=2600,
        emotional_intensity=0.85,
        beat_alignment_error=0.06,
        volume_dynamics=0.75,
        timbre_complexity=0.68,
        tempo_bpm=138,
        syllable_timing_variance=0.14,
        hook_position_seconds=0.65,
        earworm_score=0.78,
        energy_curve=[0.85, 0.80, 0.78, 0.72, 0.68],
        """
 audio_reinforcement_loop.py - ADVANCED VIRAL INTELLIGENCE SYSTEM

 Multi-Agent Reinforcement Learning system engineered for guaranteed 5M+ views.
 Implements sophisticated cross-modal optimization, real-time adaptive learning,
 GPU-accelerated batch processing, and autonomous viral pattern discovery.

 Architecture:
 - Primary Audio Agent: Core audio virality optimization
 - Visual/Hook Agent: Cross-modal synchronization with video elements
 - Meta-Viral Agent: Engagement prediction & dynamic reward adjustment
 - Memory Integration: Full HOT/WARM/COLD pattern retrieval and storage
 - A/B Testing Engine: Multi-variant generation and viral scoring
 - Real-time Feedback: Continuous online learning from platform metrics
 """

 import json
 import numpy as np
 from typing import Dict, List, Tuple, Optional, Any, Callable
 from dataclasses import dataclass, field, asdict
 from datetime import datetime, timedelta
 from collections import defaultdict, deque
 import hashlib
 import logging
 from enum import Enum
 import threading
 import queue
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import time

 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


 class Platform(Enum):
    """Supported platforms with specific optimization rules"""
    TIKTOK = "tiktok"
    YOUTUBE_SHORTS = "youtube_shorts"
    INSTAGRAM_REELS = "instagram_reels"


 class BeatType(Enum):
    """Audio beat patterns for viral content"""
    TRAP = "trap"
    DRILL = "drill"
    HYPERPOP = "hyperpop"
    PHONK = "phonk"
    LOFI = "lofi"
    ORCHESTRAL = "orchestral"
    ELECTRONIC = "electronic"
    JERSEY_CLUB = "jersey_club"
    CUSTOM = "custom"


 class MemoryLayer(Enum):
    """Memory priority tiers for pattern storage"""
    HOT = "hot"      # Recently viral, actively used (efficacy > 0.7, < 7 days)
    WARM = "warm"    # Proven patterns, occasionally used (efficacy > 0.5, < 30 days)
    COLD = "cold"    # Historical data, rarely accessed


 class HookType(Enum):
    """Hook patterns for viral audio"""
    QUESTION = "question"
    SHOCK = "shock"
    CURIOSITY = "curiosity"
    EMOTIONAL = "emotional"
    PATTERN_INTERRUPT = "pattern_interrupt"
    STORY_OPENER = "story_opener"
    CONTROVERSY = "controversy"


 @dataclass
 class AudioFeatures:
    """Comprehensive audio feature representation for RL state"""
    # Core audio metrics
    pace_wpm: float
    pitch_variance: float
    hook_jumps: int
    pause_timing: List[float]
    spectral_centroid: float
    emotional_intensity: float
    beat_alignment_error: float
    volume_dynamics: float
    timbre_complexity: float
    tempo_bpm: float
    syllable_timing_variance: float
    
    # Advanced viral metrics
    hook_position_seconds: float  # When the hook hits
    earworm_score: float  # Predicted memorability (0-1)
    energy_curve: List[float]  # Energy over time
    silence_ratio: float  # Strategic pauses
    vocal_clarity: float  # Voice intelligibility
    background_music_ratio: float  # Voice vs music balance
    transition_smoothness: float  # Between sections
    
    # Cross-modal sync features
    beat_scene_alignment: float  # How well beats align with scene cuts
    caption_sync_score: float  # Alignment with on-screen text
    visual_hook_coordination: float  # Audio-visual hook timing
    
    def to_vector(self) -> np.ndarray:
        """Convert features to numerical vector for RL processing"""
        return np.array([
            self.pace_wpm,
            self.pitch_variance,
            float(self.hook_jumps),
            np.mean(self.pause_timing) if self.pause_timing else 0.0,
            self.spectral_centroid,
            self.emotional_intensity,
            self.beat_alignment_error,
            self.volume_dynamics,
            self.timbre_complexity,
            self.tempo_bpm,
            self.syllable_timing_variance,
            self.hook_position_seconds,
            self.earworm_score,
            np.mean(self.energy_curve) if self.energy_curve else 0.5,
            self.silence_ratio,
            self.vocal_clarity,
            self.background_music_ratio,
            self.transition_smoothness,
            self.beat_scene_alignment,
            self.caption_sync_score,
            self.visual_hook_coordination
        ])
    
    def viral_quality_score(self) -> float:
        """Calculate intrinsic viral quality of audio features"""
        return (
            self.earworm_score * 0.25 +
            self.emotional_intensity * 0.20 +
            (1.0 - self.beat_alignment_error) * 0.15 +
            self.vocal_clarity * 0.15 +
            self.beat_scene_alignment * 0.15 +
            self.energy_curve[0] if self.energy_curve else 0.5 * 0.10  # First 3s energy
        )


 @dataclass
 class PerformanceMetrics:
    """Comprehensive video performance tracking"""
    views: int
    
    # Retention metrics
    retention_2s: float  # % who watched 2+ seconds
    first_3s_retention: float  # Critical TikTok metric
    completion_rate: float
    avg_watch_time: float  # Seconds
    watch_through_rate: float  # % who watched to end
    
    # Engagement metrics
    likes: int
    shares: int
    saves: int
    comments: int
    
    # Viral metrics
    replay_rate: float  # % who rewatched
    loop_frequency: float  # Avg replays per viewer
    share_rate: float  # Shares / views
    save_rate: float  # Saves / views
    
    # Platform-specific
    ctr: float  # Click-through rate for thumbnails
    profile_visits: int
    follower_conversion: float
    
    # Time-based metrics
    views_first_hour: int
    views_first_24h: int
    velocity_score: float  # View acceleration rate
    
    # Audience behavior
    scroll_stop_rate: float  # % who stopped scrolling
    sound_use_rate: float  # If others used the sound
    
    def viral_score(self, platform: Platform) -> float:
        """Calculate comprehensive platform-specific viral score"""
        if platform == Platform.TIKTOK:
            return (
                self.first_3s_retention * 0.25 +
                self.loop_frequency * 0.20 +
                self.completion_rate * 0.15 +
                self.share_rate * 100 * 0.15 +  # Normalize share rate
                min(self.views / 5_000_000, 1.0) * 0.15 +
                self.velocity_score * 0.10
            )
        elif platform == Platform.YOUTUBE_SHORTS:
            return (
                self.watch_through_rate * 0.25 +
                self.ctr * 0.20 +
                self.avg_watch_time / 60 * 0.20 +  # Normalize to 60s max
                min(self.views / 5_000_000, 1.0) * 0.20 +
                self.save_rate * 100 * 0.15
            )
        else:  # Instagram Reels
            return (
                self.share_rate * 100 * 0.25 +
                self.save_rate * 100 * 0.20 +
                self.completion_rate * 0.20 +
                min(self.views / 5_000_000, 1.0) * 0.20 +
                self.follower_conversion * 0.15
            )
    
    def engagement_quality(self) -> float:
        """Calculate engagement quality independent of view count"""
        return (
            self.completion_rate * 0.30 +
            self.loop_frequency * 0.25 +
            (self.likes + self.shares * 3 + self.saves * 5) / max(self.views, 1) * 1000 * 0.25 +
            self.scroll_stop_rate * 0.20
        )


 @dataclass
 class VideoContext:
    """Video-specific context for cross-modal optimization"""
    scene_cuts: int
    scene_cut_timestamps: List[float]
    visual_intensity_curve: List[float]  # Visual energy over time
    caption_timestamps: List[Tuple[float, str]]
    thumbnail_predicted_ctr: float
    hook_visual_position: float  # When visual hook appears
    color_palette_energy: float  # Vibrance of visuals
    motion_intensity: float  # How much movement
    text_overlay_density: float  # Amount of on-screen text
    duration_seconds: float


 @dataclass
 class PlatformMetadata:
    """Platform-specific metadata and trends"""
    platform: Platform
    current_trending_sounds: List[str]
    trending_beat_types: List[BeatType]
    peak_posting_times: List[int]  # Hours of day
    avg_viral_threshold: int  # View count considered viral
    algorithm_weights: Dict[str, float]  # Platform algorithm priorities
    audience_age_range: Tuple[int, int]
    device_usage: Dict[str, float]  # mobile vs desktop percentages


 @dataclass
 class AudienceBehaviorProjections:
    """Predicted audience behavior for optimization"""
    predicted_watch_time: float
    predicted_loop_probability: float
    predicted_scroll_stop_rate: float
    predicted_engagement_rate: float
    predicted_share_likelihood: float
    predicted_save_likelihood: float
    virality_confidence: float  # How confident are predictions


 @dataclass
 class HistoricalPattern:
    """Historical performance pattern from memory manager"""
    pattern_id: str
    features: AudioFeatures
    performance: PerformanceMetrics
    niche: str
    platform: Platform
    beat_type: BeatType
    timestamp: datetime
    efficacy_score: float
    memory_layer: MemoryLayer


 @dataclass
 class ActionSpace:
    """Comprehensive audio modification actions"""
    # Hook optimization
    hook_type: HookType
    hook_position: float  # 0.0 to 3.0 seconds
    hook_intensity: float  # 0.0 to 1.0
    hook_pitch_shift: float  # -3 to +3 semitones
    
    # Beat and tempo
    beat_timing_adjustment: float  # -0.5 to +0.5 seconds offset
    tempo_multiplier: float  # 0.8 to 1.3x
    beat_drop_position: Optional[float]  # Seconds, or None
    
    # Voice modulation
    volume_modulation: float  # 0.5 to 1.5 multiplier
    pitch_shift: float  # -2 to +2 semitones
    voice_energy_level: str  # "low", "medium", "high", "explosive"
    voice_clarity_enhancement: float  # 0.0 to 1.0
    
    # Emotional triggers
    emotional_arc: List[float]  # Emotional intensity over time
    suspense_buildup: bool
    payoff_timing: Optional[float]  # When payoff hits
    
    # Cross-modal sync
    beat_to_scene_cut_sync: bool
    audio_visual_hook_sync: bool
    caption_sync_adjustment: float  # Timing offset for captions
    
    # Effects and transitions
    transition_type: str  # "cut", "fade", "beat_drop", "reverb_swell", "silence"
    effect_intensity: float  # 0.0 to 1.0
    reverb_amount: float
    compression_level: float
    
    # Earworm optimization
    melodic_repetition: int  # Number of times to repeat catchy element
    syllable_pattern_emphasis: bool
    
    def to_vector(self) -> np.ndarray:
        """Convert action to numerical vector for Q-learning"""
        hook_map = {h: i for i, h in enumerate(HookType)}
        energy_map = {"low": 0.2, "medium": 0.5, "high": 0.8, "explosive": 1.0}
        transition_map = {"cut": 0.0, "fade": 0.25, "beat_drop": 0.5, "reverb_swell": 0.75, "silence": 1.0}
        
        return np.array([
            float(hook_map.get(self.hook_type, 0)),
            self.hook_position,
            self.hook_intensity,
            self.hook_pitch_shift,
            self.beat_timing_adjustment,
            self.tempo_multiplier,
            self.beat_drop_position if self.beat_drop_position else -1.0,
            self.volume_modulation,
            self.pitch_shift,
            energy_map.get(self.voice_energy_level, 0.5),
            self.voice_clarity_enhancement,
            np.mean(self.emotional_arc) if self.emotional_arc else 0.5,
            float(self.suspense_buildup),
            self.payoff_timing if self.payoff_timing else -1.0,
            float(self.beat_to_scene_cut_sync),
            float(self.audio_visual_hook_sync),
            self.caption_sync_adjustment,
            transition_map.get(self.transition_type, 0.0),
            self.effect_intensity,
            self.reverb_amount,
            self.compression_level,
            float(self.melodic_repetition),
            float(self.syllable_pattern_emphasis)
        ])


 @dataclass
 class State:
    """Complete RL state representation with all contextual information"""
    # Audio features
    audio_features: AudioFeatures
    
    # Video context
    video_context: VideoContext
    
    # Platform & niche
    platform: Platform
    niche: str
    beat_type: BeatType
    
    # Historical patterns from memory manager
    historical_patterns: List[HistoricalPattern]
    top_pattern_efficacy: float  # Best pattern efficacy for this context
    
    # Platform metadata and trends
    platform_metadata: PlatformMetadata
    
    # Audience predictions
    audience_projections: AudienceBehaviorProjections
    
    # Temporal context
    posting_time_hour: int  # Hour of day
    day_of_week: int
    is_trending_period: bool
    
    def to_vector(self) -> np.ndarray:
        """Convert state to numerical vector for neural network input"""
        # Audio features (21 dimensions)
        audio_vec = self.audio_features.to_vector()
        
        # Video context (10 dimensions)
        video_vec = np.array([
            float(self.video_context.scene_cuts),
            np.mean(self.video_context.visual_intensity_curve) if self.video_context.visual_intensity_curve else 0.5,
            self.video_context.thumbnail_predicted_ctr,
            self.video_context.hook_visual_position,
            self.video_context.color_palette_energy,
            self.video_context.motion_intensity,
            self.video_context.text_overlay_density,
            self.video_context.duration_seconds,
            len(self.video_context.caption_timestamps) / max(self.video_context.duration_seconds, 1),
            np.std(self.video_context.scene_cut_timestamps) if self.video_context.scene_cut_timestamps else 0.0
        ])
        
        # Historical performance (5 dimensions)
        hist_vec = np.array([
            self.top_pattern_efficacy,
            len(self.historical_patterns) / 100.0,  # Normalize
            np.mean([p.efficacy_score for p in self.historical_patterns]) if self.historical_patterns else 0.0,
            sum(1 for p in self.historical_patterns if p.memory_layer == MemoryLayer.HOT) / max(len(self.historical_patterns), 1),
            np.mean([p.performance.viral_score(self.platform) for p in self.historical_patterns]) if self.historical_patterns else 0.0
        ])
        
        # Platform trends (6 dimensions)
        platform_vec = np.array([
            len(self.platform_metadata.current_trending_sounds) / 10.0,
            len(self.platform_metadata.trending_beat_types) / 5.0,
            self.platform_metadata.algorithm_weights.get('retention', 0.5),
            self.platform_metadata.algorithm_weights.get('engagement', 0.5),
            self.platform_metadata.device_usage.get('mobile', 0.8),
            float(self.platform in [Platform.TIKTOK])  # Platform encoding
        ])
        
        # Audience projections (7 dimensions)
        audience_vec = np.array([
            self.audience_projections.predicted_watch_time,
            self.audience_projections.predicted_loop_probability,
            self.audience_projections.predicted_scroll_stop_rate,
            self.audience_projections.predicted_engagement_rate,
            self.audience_projections.predicted_share_likelihood,
            self.audience_projections.predicted_save_likelihood,
            self.audience_projections.virality_confidence
        ])
        
        # Temporal context (3 dimensions)
        temporal_vec = np.array([
            self.posting_time_hour / 24.0,
            self.day_of_week / 7.0,
            float(self.is_trending_period)
        ])
        
        # Concatenate all vectors (52 total dimensions)
        return np.concatenate([audio_vec, video_vec, hist_vec, platform_vec, audience_vec, temporal_vec])
    
    def get_context_hash(self) -> str:
        """Generate unique hash for state context (for Q-table indexing)"""
        context_str = f"{self.niche}_{self.platform.value}_{self.beat_type.value}_{int(self.top_pattern_efficacy*10)}"
        return hashlib.md5(context_str.encode()).hexdigest()[:16]


 class AdvancedRewardFunction:
    """Multi-dimensional reward calculation with dynamic weighting"""
    
    def __init__(self):
        # Base weights for reward components
        self.weights = {
            'views': 0.20,
            'retention': 0.25,
            'engagement': 0.20,
            'loopability': 0.20,
            'velocity': 0.15
        }
        
        # Platform-specific multipliers
        self.platform_multipliers = {
            Platform.TIKTOK: {
                'first_3s_retention': 1.5,
                'loop_frequency': 1.4,
                'sound_usage': 1.3
            },
            Platform.YOUTUBE_SHORTS: {
                'watch_through': 1.5,
                'ctr': 1.3,
                'avg_watch_time': 1.2
            },
            Platform.INSTAGRAM_REELS: {
                'share_rate': 1.6,
                'save_rate': 1.4,
                'profile_visits': 1.2
            }
        }
        
        # Dynamic adjustment factors
        self.trend_multiplier = 1.0
        self.niche_multiplier = {}
        self.time_decay_factor = 0.95
    
    def calculate(
        self,
        metrics: PerformanceMetrics,
        state: State,
        action: ActionSpace,
        predicted_metrics: Dict[str, float],
        pattern_history: List[HistoricalPattern]
    ) -> float:
        """
        Calculate comprehensive reward with multi-dimensional scoring.
        Designed to push towards 5M+ view baseline.
        """
        # 1. Base viral score
        viral_score = metrics.viral_score(state.platform)
        
        # 2. View threshold rewards (exponential scaling)
        view_reward = self._exponential_view_reward(metrics.views)
        
        # 3. Early retention boost (critical for algorithm push)
        retention_boost = self._advanced_retention_scoring(metrics, state.platform)
        
        # 4. Engagement quality (shares/saves > likes)
        engagement_score = self._engagement_quality_score(metrics)
        
        # 5. Loopability and addiction score
        loop_score = self._loopability_score(metrics, action)
        
        # 6. Velocity bonus (fast viral spread)
        velocity_bonus = self._velocity_reward(metrics)
        
        # 7. Platform-specific bonuses
        platform_bonus = self._platform_specific_rewards(metrics, state.platform)
        
        # 8. Cross-modal sync reward
        crossmodal_reward = self._crossmodal_sync_score(state, action, metrics)
        
        # 9. Prediction accuracy bonus
        prediction_bonus = self._prediction_accuracy_reward(metrics, predicted_metrics)
        
        # 10. Pattern consistency reward
        pattern_reward = self._pattern_consistency_score(pattern_history, metrics)
        
        # 11. Anti-viral penalties
        penalties = self._comprehensive_penalties(metrics, state, action, pattern_history)
        
        # 12. Trend alignment bonus
        trend_bonus = self._trend_alignment_reward(state, metrics)
        
        # Weighted combination
        total_reward = (
            view_reward * self.weights['views'] +
            retention_boost * self.weights['retention'] +
            (engagement_score + loop_score) / 2 * self.weights['engagement'] +
            velocity_bonus * self.weights['velocity'] +
            platform_bonus * 0.15 +
            crossmodal_reward * 0.10 +
            prediction_bonus * 0.08 +
            pattern_reward * 0.07 +
            trend_bonus * 0.05 -
            penalties
        )
        
        # Apply dynamic multipliers
        total_reward *= self.trend_multiplier
        total_reward *= self.niche_multiplier.get(state.niche, 1.0)
        
        # Ensure reward is in reasonable range
        return np.clip(total_reward, -2.0, 5.0)
    
    def _exponential_view_reward(self, views: int) -> float:
        """Exponential rewards for view milestones - pushes towards 5M+ baseline"""
        if views >= 10_000_000:
            return 3.5
        elif views >= 5_000_000:
            return 2.5  # Target baseline
        elif views >= 2_000_000:
            return 1.8
        elif views >= 1_000_000:
            return 1.3
        elif views >= 500_000:
            return 0.9
        elif views >= 100_000:
            return 0.5
        else:
            return 0.2 * (views / 100_000)  # Gradual scaling below 100k
    
    def _advanced_retention_scoring(self, metrics: PerformanceMetrics, platform: Platform) -> float:
        """Advanced retention analysis - first 3s is CRITICAL"""
        # First 3 seconds retention (make or break)
        first_3s_score = metrics.first_3s_retention ** 2  # Quadratic to emphasize importance
        
        # 2 second retention
        retention_2s_score = metrics.retention_2s * 0.8
        
        # Completion rate
        completion_score = metrics.completion_rate * 0.9
        
        # Watch-through rate
        watch_through_score = metrics.watch_through_rate * 1.1
        
        # Platform-specific weighting
        if platform == Platform.TIKTOK:
            return first_3s_score * 0.50 + retention_2s_score * 0.25 + completion_score * 0.15 + watch_through_score * 0.10
        elif platform == Platform.YOUTUBE_SHORTS:
            return watch_through_score * 0.40 + completion_score * 0.30 + first_3s_score * 0.20 + retention_2s_score * 0.10
        else:
            return completion_score * 0.35 + first_3s_score * 0.30 + watch_through_score * 0.25 + retention_2s_score * 0.10
    
    def _engagement_quality_score(self, metrics: PerformanceMetrics) -> float:
        """High-quality engagement > vanity metrics"""
        # Shares are 3x more valuable than likes
        # Saves are 5x more valuable than likes
        engagement_value = (
            metrics.likes +
            metrics.shares * 3 +
            metrics.saves * 5 +
            metrics.comments * 2
        ) / max(metrics.views, 1) * 10000  # Normalize to reasonable scale
        
        return min(engagement_value, 1.5)  # Cap to prevent skew
    
    def _loopability_score(self, metrics: PerformanceMetrics, action: ActionSpace) -> float:
        """Reward highly loopable content"""
        # Base loop score
        loop_score = metrics.loop_frequency * 0.6
        
        # Replay rate boost
        replay_boost = metrics.replay_rate * 0.4
        
        # Action-based prediction (did we optimize for loops?)
        if action.beat_drop_position and action.beat_drop_position > 0:
            loop_score *= 1.15  # Beat drops encourage replays
        
        if action.melodic_repetition >= 2:
            loop_score *= 1.10  # Repetition = earworm = loops
        
        return min(loop_score + replay_boost, 1.5)
    
    def _velocity_reward(self, metrics: PerformanceMetrics) -> float:
        """Reward fast viral spread (algorithm loves this)"""
        # Views in first hour relative to final views
        first_hour_ratio = metrics.views_first_hour / max(metrics.views, 1)
        
        # Velocity score
        velocity = metrics.velocity_score
        
        # Combined scoring
        velocity_reward = velocity * 0.6 + first_hour_ratio * 0.4
        
        # Bonus for explosive start
        if metrics.views_first_hour > 50_000:
            velocity_reward *= 1.3
        
        return velocity_reward
    
    def _platform_specific_rewards(self, metrics: PerformanceMetrics, platform: Platform) -> float:
        """Apply platform-specific reward multipliers"""
        multipliers = self.platform_multipliers.get(platform, {})
        reward = 0.0
        
        if platform == Platform.TIKTOK:
            reward += metrics.first_3s_retention * multipliers.get('first_3s_retention', 1.0) * 0.4
            reward += metrics.loop_frequency * multipliers.get('loop_frequency', 1.0) * 0.35
            reward += metrics.sound_use_rate * multipliers.get('sound_usage', 1.0) * 0.25
        
        elif platform == Platform.YOUTUBE_SHORTS:
            reward += metrics.watch_through_rate * multipliers.get('watch_through', 1.0) * 0.4
            reward += metrics.ctr * multipliers.get('ctr', 1.0) * 0.35
            reward += (metrics.avg_watch_time / 60) * multipliers.get('avg_watch_time', 1.0) * 0.25
        
        elif platform == Platform.INSTAGRAM_REELS:
            reward += metrics.share_rate * 100 * multipliers.get('share_rate', 1.0) * 0.4
            reward += metrics.save_rate * 100 * multipliers.get('save_rate', 1.0) * 0.35
            reward += (metrics.profile_visits / max(metrics.views, 1)) * 100 * multipliers.get('profile_visits', 1.0) * 0.25
        
        return reward
    
    def _crossmodal_sync_score(self, state: State, action: ActionSpace, metrics: PerformanceMetrics) -> float:
        """Reward effective audio-visual synchronization"""
        sync_score = 0.0
        
        # Beat to scene cut alignment
        if action.beat_to_scene_cut_sync:
            sync_score += state.audio_features.beat_scene_alignment * 0.35
        
        # Audio-visual hook coordination
        if action.audio_visual_hook_sync:
            sync_score += state.audio_features.visual_hook_coordination * 0.35
        
        # Caption timing
        sync_score += state.audio_features.caption_sync_score * 0.30
        
        # Bonus if metrics show strong retention (suggests sync worked)
        if metrics.completion_rate > 0.7:
            sync_score *= 1.2
        
        return sync_score
    
    def _prediction_accuracy_reward(self, actual: PerformanceMetrics, predicted: Dict[str, float]) -> float:
        """Bonus for accurate engagement predictions"""
        if not predicted:
            return 0.0
        
        # Compare predictions to actual
        watch_time_error = abs(actual.watch_through_rate - predicted.get('watch_time', 0.5))
        loop_error = abs(actual.loop_frequency - predicted.get('loop_prob', 0.5))
        engagement_error = abs(actual.engagement_quality() - predicted.get('engagement', 0.5))
        
        # Calculate accuracy (1.0 = perfect, 0.0 = completely wrong)
        accuracy = 1.0 - (watch_time_error + loop_error + engagement_error) / 3.0
        
        # Bonus reward for good predictions (helps meta-agent learn)
        return accuracy * 0.5
    
    def _pattern_consistency_score(self, pattern_history: List[HistoricalPattern], metrics: PerformanceMetrics) -> float:
        """Reward consistency with proven viral patterns"""
        if not pattern_history:
            return 0.0
        
        # Get average performance of historical patterns
        avg_viral_score = np.mean([p.performance.viral_score(p.platform) for p in pattern_history])
        
        # Current performance relative to history
        current_score = metrics.viral_score(pattern_history[0].platform) if pattern_history else 0.5
        
        # Reward if we matched or exceeded historical performance
        if current_score >= avg_viral_score:
            return (current_score - avg_viral_score) * 2.0  # Amplify improvements
        else:
            return (current_score - avg_viral_score) * 0.5  # Smaller penalty for underperformance
    
    def _comprehensive_penalties(
        self,
        metrics: PerformanceMetrics,
        state: State,
        action: ActionSpace,
        pattern_history: List[HistoricalPattern]
    ) -> float:
        """Comprehensive penalty system for anti-viral patterns"""
        penalty = 0.0
        
        # 1. Poor retention penalties
        if metrics.first_3s_retention < 0.4:
            penalty += 0.4  # Severe penalty - algorithm will bury this
        elif metrics.first_3s_retention < 0.6:
            penalty += 0.2
        
        if metrics.completion_rate < 0.25:
            penalty += 0.3
        
        # 2. Low engagement penalties
        engagement_rate = (metrics.likes + metrics.shares + metrics.comments) / max(metrics.views, 1)
        if engagement_rate < 0.005:  # Less than 0.5% engagement
            penalty += 0.25
        
        # 3. Beat alignment violations
        if state.audio_features.beat_alignment_error > 0.25:
            penalty += 0.3  # Poor audio quality hurts virality
        
        # 4. Overused pattern penalty (audience fatigue)
        if pattern_history:
            recent_usage = sum(1 for p in pattern_history[-20:] if p.efficacy_score > 0.6)
            if recent_usage > 15:  # Same pattern used too much
                penalty += 0.25
        
        # 5. Cross-modal misalignment
        if action.beat_to_scene_cut_sync and state.audio_features.beat_scene_alignment < 0.5:
            penalty += 0.2  # Promised sync but failed to deliver
        
        # 6. Hook timing violations
        if action.hook_position > 3.0:
            penalty += 0.35  # Hook too late - viewers already scrolled
        
        # 7. Platform rule violations
        platform_rules = {
            Platform.TIKTOK: {'min_loop_freq': 0.3, 'min_first_3s': 0.6},
            Platform.YOUTUBE_SHORTS: {'min_watch_through': 0.4, 'min_ctr': 0.05},
            Platform.INSTAGRAM_REELS: {'min_share_rate': 0.01, 'min_save_rate': 0.008}
        }
        
        rules = platform_rules.get(state.platform, {})
        if state.platform == Platform.TIKTOK:
            if metrics.loop_frequency < rules.get('min_loop_freq', 0):
                penalty += 0.2
            if metrics.first_3s_retention < rules.get('min_first_3s', 0):
                penalty += 0.15
        elif state.platform == Platform.YOUTUBE_SHORTS:
            if metrics.watch_through_rate < rules.get('min_watch_through', 0):
                penalty += 0.2
            if metrics.ctr < rules.get('min_ctr', 0):
                penalty += 0.15
        elif state.platform == Platform.INSTAGRAM_REELS:
            if metrics.share_rate < rules.get('min_share_rate', 0):
                penalty += 0.2
            if metrics.save_rate < rules.get('min_save_rate', 0):
                penalty += 0.15
        
        # 8. Velocity penalties (slow spread = algorithm deprioritization)
        if metrics.velocity_score < 0.3:
            penalty += 0.2
        
        # 9. Audio quality issues
        if state.audio_features.vocal_clarity < 0.5:
            penalty += 0.15  # Viewers can't understand = scroll
        
        if state.audio_features.background_music_ratio > 0.7:
            penalty += 0.1  # Music drowning out voice
        
        return penalty
    
    def _trend_alignment_reward(self, state: State, metrics: PerformanceMetrics) -> float:
        """Bonus for aligning with current platform trends"""
        reward = 0.0
        
        # Trending beat type bonus
        if state.beat_type in state.platform_metadata.trending_beat_types:
            reward += 0.3
        
        # Posted during peak hours
        if state.posting_time_hour in state.platform_metadata.peak_posting_times:
            reward += 0.2
        
        # During trending period
        if state.is_trending_period:
            reward += 0.25
        
        # Sound usage by others (indicates we created trend)
        if metrics.sound_use_rate > 0.1:
            reward += 0.4  # Big bonus - we made a viral sound
        
        return reward
    
    def update_dynamic_multipliers(self, recent_performance: List[Tuple[State, PerformanceMetrics]]):
        """Dynamically adjust reward multipliers based on recent trends"""
        if len(recent_performance) < 20:
            return
        
        # Analyze niche performance
        niche_scores = defaultdict(list)
        for state, metrics in recent_performance:
            niche_scores[state.niche].append(metrics.viral_score(state.platform))
        
        # Update niche multipliers
        for niche, scores in niche_scores.items():
            avg_score = np.mean(scores)
            if avg_score > 0.7:
                self.niche_multiplier[niche] = 1.2
            elif avg_score < 0.4:
                self.niche_multiplier[niche] = 0.9
            else:
                self.niche_multiplier[niche] = 1.0
        
        # Update trend multiplier based on overall performance
        overall_avg = np.mean([m.viral_score(s.platform) for s, m in recent_performance])
        if overall_avg > 0.65:
            self.trend_multiplier = 1.15
        elif overall_avg < 0.45:
            self.trend_multiplier = 0.95
        else:
            self.trend_multiplier = 1.0


 class PrimaryAudioAgent:
    """
    Primary Audio Agent - Core RL agent for audio optimization.
    Uses deep Q-learning with experience replay and target networks.
    """
    
    def __init__(self, state_dim: int = 52, action_dim: int = 23):
        self.agent_id = "primary_audio_agent"
        self.state_dim = state_dim
        self.action_dim = action_dim
        
        # Q-network (simplified - in production would use neural network)
        self.q_table = defaultdict(lambda: np.random.randn(action_dim) * 0.01)
        self.target_q_table = defaultdict(lambda: np.random.randn(action_dim) * 0.01)
        
        # Hyperparameters
        self.learning_rate = 0.001
        self.discount_factor = 0.97  # Long-term thinking for viral growth
        self.epsilon = 0.25  # Exploration rate
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.998
        
        # Experience replay
        self.replay_buffer = deque(maxlen=10000)
        self.batch_size = 64
        
        # Training metrics
        self.episode_count = 0
        self.total_reward = 0.0
        self.avg_q_value = 0.0
        
        # Target network update frequency
        self.target_update_frequency = 100
    
    def select_action(self, state: State, explore: bool = True) -> ActionSpace:
        """Select action using epsilon-greedy policy with intelligent exploration"""
        if explore and np.random.random() < self.epsilon:
            # Intelligent exploration - not completely random
            return self._intelligent_exploration(state)
        else:
            # Exploitation - use learned Q-values
            return self._greedy_action(state)
    
    def _greedy_action(self, state: State) -> ActionSpace:
        """Select best action based on Q-values"""
        state_key = state.get_context_hash()
        q_values = self.q_table[state_key]
        
        # Convert Q-values to action
        return self._q_to_action(q_values, state)
    
    def _intelligent_exploration(self, state: State) -> ActionSpace:
        """
        Intelligent exploration that considers context.
        Not completely random - biased towards reasonable actions.
        """
        # Use historical patterns as guide
        if state.historical_patterns:
            best_pattern = max(state.historical_patterns, key=lambda p: p.efficacy_score)
            
            # Add noise to best known pattern
            base_action = self._pattern_to_action(best_pattern)
            return self._add_exploration_noise(base_action)
        else:
            # No history - generate reasonable random action
            return self._generate_reasonable_action(state)
    
    def _pattern_to_action(self, pattern: HistoricalPattern) -> ActionSpace:
        """Convert historical pattern to action space"""
        features = pattern.features
        
        return ActionSpace(
            hook_type=HookType.CURIOSITY,  # Default to curiosity hooks
            hook_position=features.hook_position_seconds,
            hook_intensity=features.emotional_intensity,
            hook_pitch_shift=0.0,
            beat_timing_adjustment=0.0,
            tempo_multiplier=features.tempo_bpm / 130.0,  # Normalize around 130 BPM
            beat_drop_position=features.duration if hasattr(features, 'duration') else None,
            volume_modulation=features.volume_dynamics,
            pitch_shift=0.0,
            voice_energy_level="high" if features.emotional_intensity > 0.7 else "medium",
            voice_clarity_enhancement=features.vocal_clarity,
            emotional_arc=[features.emotional_intensity] * 5,
            suspense_buildup=True,
            payoff_timing=None,
            beat_to_scene_cut_sync=features.beat_scene_alignment > 0.7,
            audio_visual_hook_sync=features.visual_hook_coordination > 0.7,
            caption_sync_adjustment=0.0,
            transition_type="beat_drop",
            effect_intensity=0.6,
            reverb_amount=0.3,
            compression_level=0.7,
            melodic_repetition=2,
            syllable_pattern_emphasis=True
        )
    
    def _add_exploration_noise(self, action: ActionSpace) -> ActionSpace:
        """Add noise to action for exploration"""
        action.hook_position = np.clip(action.hook_position + np.random.normal(0, 0.3), 0.0, 3.0)
        action.hook_intensity = np.clip(action.hook_intensity + np.random.normal(0, 0.1), 0.0, 1.0)
        action.tempo_multiplier = np.clip(action.tempo_multiplier + np.random.normal(0, 0.1), 0.8, 1.3)
        action.volume_modulation = np.clip(action.volume_modulation + np.random.normal(0, 0.1), 0.5, 1.5)
        
        return action
    
    def _generate_reasonable_action(self, state: State) -> ActionSpace:
        """Generate reasonable action based on state context"""
        # Platform-specific defaults
        if state.platform == Platform.TIKTOK:
            hook_pos = np.random.uniform(0.3, 1.0)  # Early hook for TikTok
            tempo_mult = np.random.uniform(1.0, 1.2)  # Slightly faster
        elif state.platform == Platform.YOUTUBE_SHORTS:
            hook_pos = np.random.uniform(0.5, 1.5)
            tempo_mult = np.random.uniform(0.95, 1.15)
        else:  # Instagram
            hook_pos = np.random.uniform(0.5, 1.2)
            tempo_mult = np.random.uniform(0.9, 1.1)
        
        return ActionSpace(
            hook_type=np.random.choice(list(HookType)),
            hook_position=hook_pos,
            hook_intensity=np.random.uniform(0.6, 0.9),
            hook_pitch_shift=np.random.uniform(-1.0, 1.0),
            beat_timing_adjustment=np.random.uniform(-0.2, 0.2),
            tempo_multiplier=tempo_mult,
            beat_drop_position=np.random.uniform(5, 15) if np.random.random() > 0.5 else None,
            volume_modulation=np.random.uniform(0.8, 1.2),
            pitch_shift=np.random.uniform(-1.0, 1.0),
            voice_energy_level=np.random.choice(["medium", "high", "explosive"]),
            voice_clarity_enhancement=np.random.uniform(0.6, 0.9),
            emotional_arc=[np.random.uniform(0.5, 0.9) for _ in range(5)],
            suspense_buildup=np.random.random() > 0.5,
            payoff_timing=np.random.uniform(8, 20) if np.random.random() > 0.6 else None,
            beat_to_scene_cut_sync=np.random.random() > 0.4,
            audio_visual_hook_sync=np.random.random() > 0.3,
            caption_sync_adjustment=np.random.uniform(-0.2, 0.2),
            transition_type=np.random.choice(["beat_drop", "fade", "cut", "reverb_swell"]),
            effect_intensity=np.random.uniform(0.4, 0.8),
            reverb_amount=np.random.uniform(0.2, 0.5),
            compression_level=np.random.uniform(0.6, 0.8),
            melodic_repetition=np.random.randint(1, 4),
            syllable_pattern_emphasis=np.random.random() > 0.5
        )
    
    def _q_to_action(self, q_values: np.ndarray, state: State) -> ActionSpace:
        """Convert Q-values to action space"""
        # Decode Q-values into action parameters
        return ActionSpace(
            hook_type=list(HookType)[int(abs(q_values[0]) % len(HookType))],
            hook_position=np.clip(abs(q_values[1]), 0.0, 3.0),
            hook_intensity=np.clip(abs(q_values[2]), 0.0, 1.0),
            hook_pitch_shift=np.clip(q_values[3], -3.0, 3.0),
            beat_timing_adjustment=np.clip(q_values[4], -0.5, 0.5),
            tempo_multiplier=np.clip(q_values[5], 0.8, 1.3),
            beat_drop_position=abs(q_values[6]) if q_values[6] > 0 else None,
            volume_modulation=np.clip(q_values[7], 0.5, 1.5),
            pitch_shift=np.clip(q_values[8], -2.0, 2.0),
            voice_energy_level=["low", "medium", "high", "explosive"][int(abs(q_values[9]) % 4)],
            voice_clarity_enhancement=np.clip(abs(q_values[10]), 0.0, 1.0),
            emotional_arc=[np.clip(abs(q_values[11 + i]), 0.0, 1.0) for i in range(5)],
            suspense_buildup=q_values[16] > 0,
            payoff_timing=abs(q_values[17]) if q_values[17] > 0 else None,
            beat_to_scene_cut_sync=q_values[18] > 0,
            audio_visual_hook_sync=q_values[19] > 0,
            caption_sync_adjustment=np.clip(q_values[20], -0.5, 0.5),
            transition_type=["cut", "fade", "beat_drop", "reverb_swell", "silence"][int(abs(q_values[21]) % 5)],
            effect_intensity=np.clip(abs(q_values[22]), 0.0, 1.0),
            reverb_amount=np.clip(abs(q_values[22]) * 0.5, 0.0, 1.0),
            compression_level=np.clip(0.6 + abs(q_values[22]) * 0.2, 0.0, 1.0),
            melodic_repetition=int(abs(q_values[22]) * 3) + 1,
            syllable_pattern_emphasis=q_values[22] > 0.5
        )
    
    def update(self, state: State, action: ActionSpace, reward: float, next_state: State):
        """Update Q-values using TD learning with experience replay"""
        # Store experience
        self.replay_buffer.append((state, action, reward, next_state))
        
        # Update from mini-batch
        if len(self.replay_buffer) >= self.batch_size:
            self._batch_update()
        
        # Update target network periodically
        if self.episode_count % self.target_update_frequency == 0:
            self._update_target_network()
        
        # Decay epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
        
        self.episode_count += 1
        self.total_reward += reward
    
    def _batch_update(self):
        """Batch update from experience replay"""
        # Sample random batch
        batch = list(np.random.choice(list(self.replay_buffer), self.batch_size, replace=False))
        
        for state, action, reward, next_state in batch:
            state_key = state.get_context_hash()
            next_state_key = next_state.get_context_hash()
            
            # Current Q-values
            current_q = self.q_table[state_key]
            
            # Next Q-values from target network
            next_q = self.target_q_table[next_state_key]
            
            # TD target
            td_target = reward + self.discount_factor * np.max(next_q)
            
            # Action vector
            action_vec = action.to_vector()
            
            # TD error
            td_error = td_target - np.dot(current_q, action_vec)
            
            # Update Q-values
            self.q_table[state_key] += self.learning_rate * td_error * action_vec
            
            # Track avg Q-value
            self.avg_q_value = 0.95 * self.avg_q_value + 0.05 * np.max(current_q)
    
    def _update_target_network(self):
        """Soft update of target network"""
        tau = 0.01  # Soft update parameter
        
        for key in self.q_table.keys():
            self.target_q_table[key] = tau * self.q_table[key] + (1 - tau) * self.target_q_table[key]


 class VisualHookAgent:
    """
    Visual/Hook Agent - Specializes in cross-modal synchronization.
    Ensures audio hooks align perfectly with visual elements.
    """
    
    def __init__(self):
        self.agent_id = "visual_hook_agent"
        self.sync_memory = defaultdict(list)  # Track successful sync patterns
        self.learning_rate = 0.02
    
    def optimize_crossmodal_sync(
        self,
        state: State,
        audio_action: ActionSpace
    ) -> ActionSpace:
        """
        Optimize action for cross-modal synchronization.
        Takes audio agent's action and adjusts for video context.
        """
        optimized_action = audio_action
        
        # 1. Align beats with scene cuts
        if state.video_context.scene_cut_timestamps:
            optimal_beat_timing = self._find_optimal_beat_timing(
                state.video_context.scene_cut_timestamps,
                audio_action.beat_timing_adjustment
            )
            optimized_action.beat_timing_adjustment = optimal_beat_timing
            optimized_action.beat_to_scene_cut_sync = True
        
        # 2. Align audio hook with visual hook
        visual_hook_time = state.video_context.hook_visual_position
        if abs(audio_action.hook_position - visual_hook_time) > 0.5:
            # Adjust audio hook to match visual
            optimized_action.hook_position = (audio_action.hook_position + visual_hook_time) / 2
            optimized_action.audio_visual_hook_sync = True
        
        # 3. Adjust for caption timing
        if state.video_context.caption_timestamps:
            caption_adjustment = self._optimize_caption_sync(
                state.video_context.caption_timestamps,
                audio_action.hook_position
            )
            optimized_action.caption_sync_adjustment = caption_adjustment
        
        # 4. Energy curve matching
        if state.video_context.visual_intensity_curve:
            optimized_action.emotional_arc = self._match_visual_energy(
                state.video_context.visual_intensity_curve,
                audio_action.emotional_arc
            )
        
        # 5. Optimize beat drop for maximum impact
        if audio_action.beat_drop_position and state.video_context.scene_cuts > 0:
            # Place beat drop at high-impact scene cut
            best_scene_cut = self._find_best_scene_cut_for_drop(
                state.video_context.scene_cut_timestamps,
                audio_action.beat_drop_position
            )
            optimized_action.beat_drop_position = best_scene_cut
        
        return optimized_action
    
    def _find_optimal_beat_timing(self, scene_cuts: List[float], current_timing: float) -> float:
        """Find beat timing that aligns with scene cuts"""
        if not scene_cuts:
            return current_timing
        
        # Find nearest scene cut to current timing
        nearest_cut = min(scene_cuts, key=lambda x: abs(x - current_timing))
        
        # Adjust timing to align with scene cut
        adjustment = nearest_cut - current_timing
        
        # Don't adjust more than 0.3 seconds
        return np.clip(adjustment, -0.3, 0.3)
    
    def _optimize_caption_sync(
        self,
        caption_timestamps: List[Tuple[float, str]],
        hook_position: float
    ) -> float:
        """Optimize caption timing relative to audio hook"""
        if not caption_timestamps:
            return 0.0
        
        # Find caption closest to hook
        nearest_caption_time = min(caption_timestamps, key=lambda x: abs(x[0] - hook_position))[0]
        
        # Calculate adjustment to sync
        return nearest_caption_time - hook_position
    
    def _match_visual_energy(
        self,
        visual_curve: List[float],
        audio_arc: List[float]
    ) -> List[float]:
        """Match audio emotional arc to visual energy curve"""
        if not visual_curve or not audio_arc:
            return audio_arc
        
        # Interpolate visual curve to match audio arc length
        visual_resampled = np.interp(
            np.linspace(0, 1, len(audio_arc)),
            np.linspace(0, 1, len(visual_curve)),
            visual_curve
        )
        
        # Blend audio and visual (70% audio, 30% visual)
        matched_arc = [0.7 * a + 0.3 * v for a, v in zip(audio_arc, visual_resampled)]
        
        return matched_arc
    
    def _find_best_scene_cut_for_drop(
        self,
        scene_cuts: List[float],
        target_position: float
    ) -> float:
        """Find best scene cut position for beat drop"""
        if not scene_cuts:
            return target_position
        
        # Find scene cuts in middle to late section (5-20 seconds)
        valid_cuts = [cut for cut in scene_cuts if 5.0 <= cut <= 20.0]
        
        if not valid_cuts:
            return target_position
        
        # Return cut closest to target
        return min(valid_cuts, key=lambda x: abs(x - target_position))
    
    def learn_from_feedback(self, state: State, action: ActionSpace, performance: PerformanceMetrics):
        """Learn which sync patterns work best"""
        sync_key = f"{state.platform.value}_{state.niche}"
        
        sync_pattern = {
            'beat_scene_alignment': state.audio_features.beat_scene_alignment,
            'visual_hook_coord': state.audio_features.visual_hook_coordination,
            'caption_sync': state.audio_features.caption_sync_score,
            'performance_score': performance.viral_score(state.platform)
        }
        
        self.sync_memory[sync_key].append(sync_pattern)
        
        # Keep only top 50 patterns per context
        if len(self.sync_memory[sync_key]) > 50:
            self.sync_memory[sync_key] = sorted(
                self.sync_memory[sync_key],
                key=lambda x: x['performance_score'],
                reverse=True
            )[:50]


 class MetaViralAgent:
    """
    Meta-Viral Agent - Oversees engagement predictions and dynamically
    adjusts reward multipliers based on platform trends and performance patterns.
    """
    
    def __init__(self):
        self.agent_id = "meta_viral_agent"
        
        # Prediction models (simplified - would use ML in production)
        self.prediction_history = deque(maxlen=2000)
        self.prediction_accuracy = defaultdict(lambda: 0.5)
        
        # Dynamic reward multipliers
        self.reward_multipliers = {
            'trending': 1.0,
            'niche': defaultdict(lambda: 1.0),
            'platform': defaultdict(lambda: 1.0),
            'beat_type': defaultdict(lambda: 1.0),
            'time_of_day': defaultdict(lambda: 1.0)
        }
        
        # Performance tracking
        self.platform_performance = defaultdict(list)
        self.niche_performance = defaultdict(list)
        
    def predict_engagement(
        self,
        state: State,
        action: ActionSpace
    ) -> Dict[str, float]:
        """
        Predict comprehensive engagement metrics before video is posted.
        Uses historical patterns and current context.
        """
        # Base predictions from state
        base_watch_time = state.audience_projections.predicted_watch_time
        base_loop_prob = state.audience_projections.predicted_loop_probability
        base_engagement = state.audience_projections.predicted_engagement_rate
        
        # Adjust based on action quality
        action_quality = self._assess_action_quality(action, state)
        
        # Adjust based on historical patterns
        historical_boost = self._historical_pattern_boost(state)
        
        # Platform-specific adjustments
        platform_factor = self._platform_prediction_factor(state.platform, state)
        
        # Calculate predictions
        predicted_watch_time = np.clip(
            base_watch_time * action_quality * historical_boost * platform_factor,
            0.0, 1.0
        )
        
        predicted_loop_prob = np.clip(
            base_loop_prob * (1.0 + action.melodic_repetition * 0.1) * historical_boost,
            0.0, 1.0
        )
        
        predicted_engagement = np.clip(
            base_engagement * action_quality * platform_factor,
            0.0, 0.5
        )
        
        # Predict views based on all factors
        predicted_views = self._predict_views(
            predicted_watch_time,
            predicted_engagement,
            state,
            action
        )
        
        # Calculate viral score
        viral_score = (
            predicted_watch_time * 0.35 +
            predicted_loop_prob * 0.30 +
            predicted_engagement * 0.25 +
            min(predicted_views / 5_000_000, 1.0) * 0.10
        )
        
        predictions = {
            'views': predicted_views,
            'watch_time': predicted_watch_time,
            'loop_prob': predicted_loop_prob,
            'engagement': predicted_engagement,
            'viral_score': viral_score,
            'first_hour_views': predicted_views * 0.15,  # Estimate
            'confidence': state.audience_projections.virality_confidence
        }
        
        return predictions
    
    def _assess_action_quality(self, action: ActionSpace, state: State) -> float:
        """Assess intrinsic quality of action for virality"""
        quality = 1.0
        
        # Hook positioning (earlier is better)
        if action.hook_position < 1.0:
            quality *= 1.3
        elif action.hook_position > 2.5:
            quality *= 0.7
        
        # Hook intensity
        quality *= (0.7 + action.hook_intensity * 0.3)
        
        # Cross-modal sync
        if action.beat_to_scene_cut_sync:
            quality *= 1.15
        if action.audio_visual_hook_sync:
            quality *= 1.2
        
        # Earworm factors
        if action.melodic_repetition >= 2:
            quality *= 1.1
        
        # Voice energy
        energy_boost = {"low": 0.9, "medium": 1.0, "high": 1.15, "explosive": 1.25}
        quality *= energy_boost.get(action.voice_energy_level, 1.0)
        
        return np.clip(quality, 0.5, 2.0)
    
    def _historical_pattern_boost(self, state: State) -> float:
        """Boost predictions based on historical success"""
        if not state.historical_patterns:
            return 1.0
        
        # Get average efficacy of historical patterns
        avg_efficacy = np.mean([p.efficacy_score for p in state.historical_patterns])
        
        # Best pattern efficacy
        best_efficacy = state.top_pattern_efficacy
        
        # Boost based on proven patterns
        boost = 0.8 + (avg_efficacy * 0.3) + (best_efficacy * 0.2)
        
        return np.clip(boost, 0.8, 1.5)
    
    def _platform_prediction_factor(self, platform: Platform, state: State) -> float:
        """Platform-specific prediction adjustments"""
        factor = 1.0
        
        # Check if posting at optimal time
        if state.posting_time_hour in state.platform_metadata.peak_posting_times:
            factor *= 1.2
        
        # Check if using trending beat
        if state.beat_type in state.platform_metadata.trending_beat_types:
            factor *= 1.15
        
        # Platform-specific factors
        if platform == Platform.TIKTOK:
            # TikTok loves high energy and loops
            if state.audio_features.emotional_intensity > 0.7:
                factor *= 1.1
        elif platform == Platform.YOUTUBE_SHORTS:
            # YouTube values watch time
            if state.video_context.duration_seconds > 30:
                factor *= 1.05
        
        return factor
    
    def _predict_views(
        self,
        watch_time: float,
        engagement: float,
        state: State,
        action: ActionSpace
    ) -> int:
        """Predict total view count"""
        # Base views from historical patterns
        if state.historical_patterns:
            avg_historical_views = np.mean([p.performance.views for p in state.historical_patterns])
            base_views = avg_historical_views
        else:
            base_views = 500_000  # Conservative baseline
        
        # Scale based on predicted metrics
        view_multiplier = (
            watch_time * 1.5 +
            engagement * 2.0 +
            state.audience_projections.predicted_scroll_stop_rate * 1.2
        )
        
        # Platform-specific scaling
        platform_scaling = {
            Platform.TIKTOK: 1.3,
            Platform.YOUTUBE_SHORTS: 1.1,
            Platform.INSTAGRAM_REELS: 1.0
        }
        
        predicted_views = int(base_views * view_multiplier * platform_scaling.get(state.platform, 1.0))
        
        # Apply trend multipliers
        if state.is_trending_period:
            predicted_views = int(predicted_views * 1.4)
        
        # Cap at realistic maximum
        return min(predicted_views, 50_000_000)
    
    def adjust_reward_multipliers(self, recent_performance: List[Tuple[State, PerformanceMetrics]]):
        """Dynamically adjust reward multipliers based on recent performance trends"""
        if len(recent_performance) < 30:
            return
        
        # Track performance by different dimensions
        platform_scores = defaultdict(list)
        niche_scores = defaultdict(list)
        beat_scores = defaultdict(list)
        time_scores = defaultdict(list)
        
        for state, metrics in recent_performance:
            viral_score = metrics.viral_score(state.platform)
            
            platform_scores[state.platform].append(viral_score)
            niche_scores[state.niche].append(viral_score)
            beat_scores[state.beat_type].append(viral_score)
            time_scores[state.posting_time_hour].append(viral_score)
        
        # Update platform multipliers
        for platform, scores in platform_scores.items():
            avg_score = np.mean(scores)
            if avg_score > 0.7:
                self.reward_multipliers['platform'][platform] = 1.25
            elif avg_score > 0.55:
                self.reward_multipliers['platform'][platform] = 1.1
            elif avg_score < 0.4:
                self.reward_multipliers['platform'][platform] = 0.9
            else:
                self.reward_multipliers['platform'][platform] = 1.0
        
        # Update niche multipliers
        for niche, scores in niche_scores.items():
            avg_score = np.mean(scores)
            if avg_score > 0.65:
                self.reward_multipliers['niche'][niche] = 1.2
            elif avg_score < 0.45:
                self.reward_multipliers['niche'][niche] = 0.95
            else:
                self.reward_multipliers['niche'][niche] = 1.0
        
        # Update beat type multipliers
        for beat, scores in beat_scores.items():
            avg_score = np.mean(scores)
            if avg_score > 0.7:
                self.reward_multipliers['beat_type'][beat] = 1.15
            else:
                self.reward_multipliers['beat_type'][beat] = 1.0
        
        # Update trending multiplier based on overall performance
        overall_avg = np.mean([m.viral_score(s.platform) for s, m in recent_performance])
        if overall_avg > 0.7:
            self.reward_multipliers['trending'] = 1.3
        elif overall_avg > 0.6:
            self.reward_multipliers['trending'] = 1.15
        else:
            self.reward_multipliers['trending'] = 1.0
        
        logger.info(f"Updated reward multipliers - trending: {self.reward_multipliers['trending']:.2f}")
    
    def update_prediction_accuracy(self, predicted: Dict[str, float], actual: PerformanceMetrics, state: State):
        """Track prediction accuracy to improve meta-agent over time"""
        # Calculate errors
        view_error = abs(predicted['views'] - actual.views) / max(actual.views, 1)
        watch_time_error = abs(predicted['watch_time'] - actual.watch_through_rate)
        engagement_error = abs(predicted['engagement'] - actual.engagement_quality())
        
        # Update accuracy tracking
        context_key = f"{state.platform.value}_{state.niche}"
        current_accuracy = self.prediction_accuracy[context_key]
        
        # Calculate new accuracy (inverse of average error)
        new_accuracy = 1.0 - (view_error * 0.4 + watch_time_error * 0.3 + engagement_error * 0.3)
        
        # Exponential moving average
        self.prediction_accuracy[context_key] = 0.8 * current_accuracy + 0.2 * new_accuracy
        
        # Store in history
        self.prediction_history.append({
            'predicted': predicted,
            'actual': actual,
            'context': context_key,
            'accuracy': new_accuracy,
            'timestamp': datetime.now()
        })


 class ABTestingEngine:
    """
    A/B Testing Engine - Generates multiple audio variants and ranks them
    by predicted viral performance before deployment.
    """
    
    def __init__(self, primary_agent: PrimaryAudioAgent, visual_agent: VisualHookAgent, meta_agent: MetaViralAgent):
        self.primary_agent = primary_agent
        self.visual_agent = visual_agent
        self.meta_agent = meta_agent
        self.variant_count = 10  # Generate 10 variants per video
        self.executor = ThreadPoolExecutor(max_workers=8)
    
    def generate_and_rank_variants(
        self,
        state: State,
        n_variants: int = 10
    ) -> List[Tuple[ActionSpace, Dict[str, float], float]]:
        """
        Generate multiple audio variants and rank by predicted viral score.
        Returns list of (action, predictions, score) tuples sorted by score.
        """
        variants = []
        
        # Generate variants in parallel
        futures = []
        for i in range(n_variants):
            future = self.executor.submit(self._generate_variant, state, i)
            futures.append(future)
        
        # Collect results
        for future in as_completed(futures):
            try:
                variant_data = future.result()
                if variant_data:
                    variants.append(variant_data)
            except Exception as e:
                logger.error(f"Variant generation failed: {e}")
        
        # Sort by viral score (descending)
        variants.sort(key=lambda x: x[2], reverse=True)
        
        logger.info(f"Generated {len(variants)} variants, top score: {variants[0][2]:.3f}")
        
        return variants
    
    def _generate_variant(
        self,
        state: State,
        variant_id: int
    ) -> Optional[Tuple[ActionSpace, Dict[str, float], float]]:
        """Generate single variant with predictions and scoring"""
        try:
            # Use different exploration strategies for variety
            if variant_id == 0:
                # Variant 0: Pure exploitation (best known)
                action = self.primary_agent.select_action(state, explore=False)
            elif variant_id < 3:
                # Variants 1-2: Slight exploration
                action = self.primary_agent._intelligent_exploration(state)
            else:
                # Variants 3+: More random exploration
                action = self.primary_agent.select_action(state, explore=True)
            
            # Apply cross-modal optimization
            optimized_action = self.visual_agent.optimize_crossmodal_sync(state, action)
            
            # Get predictions from meta-agent
            predictions = self.meta_agent.predict_engagement(state, optimized_action)
            
            # Calculate composite score for ranking
            viral_score = predictions['viral_score']
            
            # Boost score based on meta-agent confidence
            confidence_boost = predictions['confidence']
            final_score = viral_score * (0.8 + confidence_boost * 0.2)
            
            return (optimized_action, predictions, final_score)
        
        except Exception as e:
            logger.error(f"Failed to generate variant {variant_id}: {e}")
            return None
    
    def select_best_variant(
        self,
        variants: List[Tuple[ActionSpace, Dict[str, float], float]],
        risk_tolerance: float = 0.1
    ) -> Tuple[ActionSpace, Dict[str, float]]:
        """
        Select best variant with optional risk tolerance.
        risk_tolerance: 0.0 = always pick #1, 1.0 = allow more experimentation
        """
        if not variants:
            raise ValueError("No variants available for selection")
        
        # With some probability, pick a high-scoring but not top variant
        if np.random.random() < risk_tolerance and len(variants) > 3:
            # Pick from top 3
            selected_idx = np.random.randint(0, 3)
            selected = variants[selected_idx]
            logger.info(f"Selected variant #{selected_idx+1} (exploration) with score {selected[2]:.3f}")
        else:
            # Pick the best
            selected = variants[0]
            logger.info(f"Selected top variant with score {selected[2]:.3f}")
        
        return selected[0], selected[1]


 class AdvancedMemoryManager:
    """
    Advanced Memory Manager - Full integration with HOT/WARM/COLD pattern storage.
    Implements sophisticated pattern retrieval, diversity enforcement, and decay.
    """
    
    def __init__(self):
        self.patterns: Dict[str, HistoricalPattern] = {}
        self.hot_patterns: List[str] = []
        self.warm_patterns: List[str] = []
        self.cold_patterns: List[str] = []
        
        # Replay buffer for high-performing patterns
        self.replay_buffer = deque(maxlen=200)
        
        # Pattern usage tracking
        self.usage_count = defaultdict(int)
        self.last_used = {}
        
        # Diversity enforcement
        self.diversity_threshold = 3  # Max times to use similar pattern in window
        self.recent_window = deque(maxlen=20)
    
    def store_pattern(
        self,
        pattern_id: str,
        features: AudioFeatures,
        performance: PerformanceMetrics,
        niche: str,
        platform: Platform,
        beat_type: BeatType
    ):
        """Store new audio pattern with performance data"""
        # Calculate efficacy score
        efficacy = self._calculate_efficacy(performance, platform)
        
        # Determine memory layer
        memory_layer = self._assign_memory_layer(efficacy, datetime.now())
        
        pattern = HistoricalPattern(
            pattern_id=pattern_id,
            features=features,
            performance=performance,
            niche=niche,
            platform=platform,
            beat_type=beat_type,
            timestamp=datetime.now(),
            efficacy_score=efficacy,
            memory_layer=memory_layer
        )
        
        self.patterns[pattern_id] = pattern
        self._assign_to_layer(pattern_id, memory_layer)
        
        # Add to replay buffer if high-performing
        if efficacy > 0.6:
            self.replay_buffer.append(pattern)
        
        logger.info(f"Stored pattern {pattern_id} in {memory_layer.value} layer, efficacy: {efficacy:.3f}")
    
    def _calculate_efficacy(self, performance: PerformanceMetrics, platform: Platform) -> float:
        """Calculate pattern efficacy score"""
        viral_score = performance.viral_score(platform)
        view_score = min(performance.views / 5_000_000, 1.0)
        engagement_score = performance.engagement_quality()
        
        efficacy = (
            viral_score * 0.4 +
            view_score * 0.35 +
            engagement_score * 0.25
        )
        
        return np.clip(efficacy, 0.0, 1.0)
    
    def _assign_memory_layer(self, efficacy: float, timestamp: datetime) -> MemoryLayer:
        """Assign pattern to appropriate memory layer"""
        if efficacy > 0.7:
            return MemoryLayer.HOT
        elif efficacy > 0.5:
            return MemoryLayer.WARM
        else:
            return MemoryLayer.COLD
    
    def _assign_to_layer(self, pattern_id: str, layer: MemoryLayer):
        """Assign pattern ID to specific layer"""
        # Remove from all layers first
        self.hot_patterns = [p for p in self.hot_patterns if p != pattern_id]
        self.warm_patterns = [p for p in self.warm_patterns if p != pattern_id]
        self.cold_patterns = [p for p in self.cold_patterns if p != pattern_id]
        
        # Add to appropriate layer
        if layer == MemoryLayer.HOT:
            self.hot_patterns.append(pattern_id)
        elif layer == MemoryLayer.WARM:
            self.warm_patterns.append(pattern_id)
        else:
            self.cold_patterns.append(pattern_id)
    
    def retrieve_top_patterns(
        self,
        niche: str,
        platform: Platform,
        beat_type: BeatType,
        n: int = 10,
        enforce_diversity: bool = True
    ) -> List[HistoricalPattern]:
        """
        Retrieve top-performing patterns for given context.
        Prioritizes HOT layer, applies decay, enforces diversity.
        """
        # Filter by context
        candidates = [
            p for p in self.patterns.values()
            if p.niche == niche and p.platform == platform and p.beat_type == beat_type
        ]
        
        if not candidates:
            logger.warning(f"No patterns found for {niche}/{platform.value}/{beat_type.value}")
            return []
        
        # Apply time-based decay
        current_time = datetime.now()
        for pattern in candidates:
            days_old = (current_time - pattern.timestamp).days
            decay_factor = np.exp(-0.03 * days_old)
            pattern.efficacy_score *= decay_factor
        
        # Sort by efficacy
        candidates.sort(key=lambda p: p.efficacy_score, reverse=True)
        
        # Prioritize HOT layer
        hot_candidates = [p for p in candidates if p.memory_layer == MemoryLayer.HOT]
        warm_candidates = [p for p in candidates if p.memory_layer == MemoryLayer.WARM]
        cold_candidates = [p for p in candidates if p.memory_layer == MemoryLayer.COLD]
        
        # Combine with priority
        prioritized = hot_candidates + warm_candidates + cold_candidates
        
        # Enforce diversity if requested
        if enforce_diversity:
            prioritized = self._enforce_pattern_diversity(prioritized)
        
        # Return top N
        selected = prioritized[:n]
        
        # Update usage tracking
        for pattern in selected:
            self.usage_count[pattern.pattern_id] += 1
            self.last_used[pattern.pattern_id] = current_time
            self.recent_window.append(pattern.pattern_id)
        
        return selected
    
    def _enforce_pattern_diversity(self, patterns: List[HistoricalPattern]) -> List[HistoricalPattern]:
        """Enforce diversity to avoid overusing similar patterns"""
        diversified = []
        pattern_signatures = set()
        
        for pattern in patterns:
            # Create signature based on key features
            signature = self._pattern_signature(pattern)
            
            # Check usage in recent window
            recent_usage = sum(1 for pid in list(self.recent_window) if pid == pattern.pattern_id)
            
            # Skip if overused or too similar to already selected
            if recent_usage >= self.diversity_threshold:
                continue
            
            if signature in pattern_signatures:
                continue
            
            diversified.append(pattern)
            pattern_signatures.add(signature)
        
        # Fill remainder with any remaining patterns if needed
        if len(diversified) < len(patterns):
            remaining = [p for p in patterns if p not in diversified]
            diversified.extend(remaining[:len(patterns) - len(diversified)])
        
        return diversified
    
    def _pattern_signature(self, pattern: HistoricalPattern) -> str:
        """Create signature for similarity detection"""
        features = pattern.features
        signature = (
            f"{int(features.tempo_bpm / 10) * 10}_"
            f"{int(features.emotional_intensity * 10)}_"
            f"{int(features.hook_position_seconds)}_"
            f"{pattern.beat_type.value}"
        )
        return signature
    
    def get_replay_samples(self, n: int = 20) -> List[HistoricalPattern]:
        """Get high-performing samples from replay buffer for training"""
        if len(self.replay_buffer) < n:
            return list(self.replay_buffer)
        
        # Sample with preference for higher efficacy
        efficacy_scores = [p.efficacy_score for p in self.replay_buffer]
        probabilities = np.array(efficacy_scores) / sum(efficacy_scores)
        
        indices = np.random.choice(
            len(self.replay_buffer),
            size=n,
            replace=False,
            p=probabilities
        )
        
        return [self.replay_buffer[i] for i in indices]
    
    def update_pattern_performance(
        self,
        pattern_id: str,
        new_performance: PerformanceMetrics,
        platform: Platform
    ):
        """Update existing pattern with new performance data"""
        if pattern_id not in self.patterns:
            logger.warning(f"Pattern {pattern_id} not found for update")
            return
        
        pattern = self.patterns[pattern_id]
        
        # Recalculate efficacy
        new_efficacy = self._calculate_efficacy(new_performance, platform)
        
        # Update with exponential moving average
        alpha = 0.3
        pattern.efficacy_score = alpha * new_efficacy + (1 - alpha) * pattern.efficacy_score
        
        # Update performance data
        pattern.performance = new_performance
        pattern.timestamp = datetime.now()
        
        # Reassign memory layer if needed
        new_layer = self._assign_memory_layer(pattern.efficacy_score, pattern.timestamp)
        if new_layer != pattern.memory_layer:
            pattern.memory_layer = new_layer
            self._assign_to_layer(pattern_id, new_layer)
    
    def prune_old_patterns(self, days_threshold: int = 90):
        """Remove patterns older than threshold from COLD layer"""
        current_time = datetime.now()
        to_remove = []
        
        for pattern_id in self.cold_patterns:
            pattern = self.patterns[pattern_id]
            age_days = (current_time - pattern.timestamp).days
            
            if age_days > days_threshold and pattern.efficacy_score < 0.3:
                to_remove.append(pattern_id)
        
        for pattern_id in to_remove:
            del self.patterns[pattern_id]
            self.cold_patterns.remove(pattern_id)
        
        if to_remove:
            logger.info(f"Pruned {len(to_remove)} old patterns from COLD layer")


 class AudioReinforcementLoop:
    """
    Main RL System - Orchestrates all agents, memory, and learning.
    Designed for autonomous 5M+ view optimization.
    """
    
    def __init__(self):
        # Initialize all agents
        self.primary_agent = PrimaryAudioAgent(state_dim=52, action_dim=23)
        self.visual_agent = VisualHookAgent()
        self.meta_agent = MetaViralAgent()
        
        # Reward function
        self.reward_function = AdvancedRewardFunction()
        
        # Memory manager
        self.memory_manager = AdvancedMemoryManager()
        
        # A/B testing engine
        self.ab_engine = ABTestingEngine(self.primary_agent, self.visual_agent, self.meta_agent)
        
        # Performance tracking
        self.performance_history = deque(maxlen=2000)
        self.training_episodes = 0
        
        # Engine weights for TTS and voice sync
        self.engine_weights = {
            'tts': {
                'pace_wpm': 150,
                'pitch_variance': 0.5,
                'emotional_intensity': 0.7,
                'voice_clarity': 0.8
            },
            'voice_sync': {
                'beat_alignment_tolerance': 0.1,
                'scene_sync_priority': 0.8,
                'caption_sync_priority': 0.7
            }
        }
        
        # Real-time learning queue
        self.feedback_queue = queue.Queue()
        self.learning_thread = threading.Thread(target=self._continuous_learning_loop, daemon=True)
        self.learning_thread.start()
        
        # Training metrics
        self.metrics = {
            'total_videos': 0,
            'avg_views': 0.0,
            'avg_viral_score': 0.0,
            'success_rate_5m': 0.0,  # % of videos hitting 5M+
            'pattern_diversity': 0.0,
            'prediction_accuracy': 0.0
        }
        
        logger.info("AudioReinforcementLoop initialized with all agents")
    
    def process_video_performance(
        self,
        pattern_id: str,
        metrics: PerformanceMetrics,
        state: State,
        action: ActionSpace,
        async_learning: bool = True
    ) -> float:
        """
        Process performance feedback and update RL system.
        Returns calculated reward.
No results found