bogged-broker · December 30, 2025 20:07
diff --git a/audio_reinforcement_loop.py b/audio_reinforcement_loop.py
 """
 audio_reinforcement_loop.py

 Multi-Agent Reinforcement Learning system for audio virality optimization.
 Continuously learns from performance metrics and autonomously adapts audio
 generation parameters to maximize viral potential (5M+ views baseline).

 Architecture:
 - Primary Audio Agent: Optimizes core audio virality patterns
 - Visual/Hook Agent: Aligns audio with visual elements
 - Meta-Viral Agent: Oversees engagement predictions and reward multipliers
 - Real-time feedback integration with platform-specific tuning
 """

 import json
 import numpy as np
 from typing import Dict, List, Tuple, Optional, Any
 from dataclasses import dataclass, field, asdict
 from datetime import datetime, timedelta
 from collections import defaultdict, deque
 import hashlib
 import logging
 from enum import Enum

 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


 class Platform(Enum):
    """Supported platforms with specific optimization rules"""
    TIKTOK = "tiktok"
    YOUTUBE_SHORTS = "youtube_shorts"
    INSTAGRAM_REELS = "instagram_reels"


 class BeatType(Enum):
    """Audio beat patterns"""
    TRAP = "trap"
    DRILL = "drill"
    HYPERPOP = "hyperpop"
    PHONK = "phonk"
    LOFI = "lofi"
    ORCHESTRAL = "orchestral"
    ELECTRONIC = "electronic"
    CUSTOM = "custom"


 class MemoryLayer(Enum):
    """Memory priority tiers"""
    HOT = "hot"      # Recently viral, actively used
    WARM = "warm"    # Proven patterns, occasionally used
    COLD = "cold"    # Historical data, rarely accessed


 @dataclass
 class AudioFeatures:
    """Comprehensive audio feature representation"""
    pace_wpm: float
    pitch_variance: float
    hook_jumps: int
    pause_timing: List[float]
    spectral_centroid: float
    emotional_intensity: float
    beat_alignment_error: float
    volume_dynamics: float
    timbre_complexity: float
    tempo_bpm: float
    syllable_timing_variance: float
    
    def to_vector(self) -> np.ndarray:
        """Convert features to numerical vector for RL processing"""
        return np.array([
            self.pace_wpm,
            self.pitch_variance,
            float(self.hook_jumps),
            np.mean(self.pause_timing) if self.pause_timing else 0.0,
            self.spectral_centroid,
            self.emotional_intensity,
            self.beat_alignment_error,
            self.volume_dynamics,
            self.timbre_complexity,
            self.tempo_bpm,
            self.syllable_timing_variance
        ])


 @dataclass
 class PerformanceMetrics:
    """Video performance tracking"""
    views: int
    retention_2s: float  # % who watched 2+ seconds
    completion_rate: float
    replay_rate: float
    shares: int
    saves: int
    comments: int
    likes: int
    watch_through_rate: float
    loop_frequency: float
    first_3s_retention: float
    ctr: float  # Click-through rate for thumbnails
    
    def viral_score(self, platform: Platform) -> float:
        """Calculate platform-specific viral score"""
        if platform == Platform.TIKTOK:
            # TikTok prioritizes loopability and first 3s retention
            return (
                self.loop_frequency * 0.35 +
                self.first_3s_retention * 0.30 +
                self.completion_rate * 0.20 +
                min(self.views / 5_000_000, 1.0) * 0.15
            )
        elif platform == Platform.YOUTUBE_SHORTS:
            # YouTube prioritizes watch-through and CTR
            return (
                self.watch_through_rate * 0.35 +
                self.ctr * 0.25 +
                self.completion_rate * 0.20 +
                min(self.views / 5_000_000, 1.0) * 0.20
            )
        else:  # Instagram Reels
            return (
                self.shares / max(self.views, 1) * 0.30 +
                self.saves / max(self.views, 1) * 0.25 +
                self.completion_rate * 0.25 +
                min(self.views / 5_000_000, 1.0) * 0.20
            )


 @dataclass
 class AudioPattern:
    """Identified viral audio pattern"""
    pattern_id: str
    features: AudioFeatures
    niche: str
    platform: Platform
    beat_type: BeatType
    voice_style: str
    language: str
    music_track: Optional[str]
    trending_beat: bool
    
    # Performance tracking
    times_used: int = 0
    total_views: int = 0
    avg_viral_score: float = 0.0
    last_used: Optional[datetime] = None
    
    # Memory management
    memory_layer: MemoryLayer = MemoryLayer.WARM
    decay_factor: float = 1.0
    efficacy_score: float = 0.0
    
    def update_performance(self, metrics: PerformanceMetrics, platform: Platform):
        """Update pattern performance with new data"""
        self.times_used += 1
        self.total_views += metrics.views
        viral_score = metrics.viral_score(platform)
        
        # Exponential moving average for viral score
        alpha = 0.3  # Learning rate
        self.avg_viral_score = (
            alpha * viral_score + (1 - alpha) * self.avg_viral_score
            if self.avg_viral_score > 0 else viral_score
        )
        
        self.last_used = datetime.now()
        self.efficacy_score = self._calculate_efficacy()
        self._update_memory_layer()
    
    def _calculate_efficacy(self) -> float:
        """Calculate pattern efficacy score"""
        view_score = min(self.total_views / (5_000_000 * self.times_used), 1.0)
        consistency_score = self.avg_viral_score
        recency_bonus = 1.2 if self.trending_beat else 1.0
        usage_factor = min(self.times_used / 10, 1.0)  # Proven patterns score higher
        
        return (view_score * 0.4 + consistency_score * 0.4 + usage_factor * 0.2) * recency_bonus
    
    def _update_memory_layer(self):
        """Dynamically assign memory layer based on performance and recency"""
        days_since_use = (datetime.now() - self.last_used).days if self.last_used else 999
        
        if self.efficacy_score > 0.7 and days_since_use < 7:
            self.memory_layer = MemoryLayer.HOT
        elif self.efficacy_score > 0.5 and days_since_use < 30:
            self.memory_layer = MemoryLayer.WARM
        else:
            self.memory_layer = MemoryLayer.COLD
    
    def apply_decay(self):
        """Apply time-based decay to pattern weights"""
        days_since_use = (datetime.now() - self.last_used).days if self.last_used else 999
        self.decay_factor = np.exp(-0.05 * days_since_use)  # Exponential decay


 @dataclass
 class ActionSpace:
    """Available audio modification actions"""
    hook_selection: int  # Index of hook variant to use
    beat_timing_adjustment: float  # -0.5 to +0.5 seconds
    volume_modulation: float  # 0.5 to 1.5 multiplier
    pitch_shift: float  # -2 to +2 semitones
    voice_modulation: str  # "energetic", "calm", "dramatic", etc.
    transition_type: str  # "cut", "fade", "beat_drop", etc.
    effect_intensity: float  # 0.0 to 1.0
    
    def to_vector(self) -> np.ndarray:
        """Convert action to numerical vector"""
        voice_map = {"energetic": 1.0, "calm": 0.3, "dramatic": 0.7, "neutral": 0.5}
        transition_map = {"cut": 1.0, "fade": 0.5, "beat_drop": 0.8, "none": 0.0}
        
        return np.array([
            float(self.hook_selection),
            self.beat_timing_adjustment,
            self.volume_modulation,
            self.pitch_shift,
            voice_map.get(self.voice_modulation, 0.5),
            transition_map.get(self.transition_type, 0.5),
            self.effect_intensity
        ])


 @dataclass
 class State:
    """Complete RL state representation"""
    audio_features: AudioFeatures
    video_context: Dict[str, Any]
    platform: Platform
    niche: str
    beat_type: BeatType
    historical_patterns: List[AudioPattern]
    viewer_projections: Dict[str, float]
    platform_trends: Dict[str, float]
    
    def to_vector(self) -> np.ndarray:
        """Convert state to numerical vector for neural network input"""
        audio_vec = self.audio_features.to_vector()
        
        # Video context features
        scene_cuts = self.video_context.get('scene_cuts', 0)
        predicted_ctr = self.video_context.get('predicted_ctr', 0.5)
        hook_position = self.video_context.get('hook_position', 0.0)
        
        # Viewer projections
        predicted_watch_time = self.viewer_projections.get('watch_time', 0.5)
        loop_probability = self.viewer_projections.get('loop_prob', 0.5)
        
        # Platform trends
        trending_score = self.platform_trends.get('trending_score', 0.5)
        
        context_vec = np.array([
            float(scene_cuts),
            predicted_ctr,
            hook_position,
            predicted_watch_time,
            loop_probability,
            trending_score
        ])
        
        return np.concatenate([audio_vec, context_vec])


 class RewardFunction:
    """Dynamic reward calculation for RL agents"""
    
    def __init__(self):
        self.weights = {
            'views': 0.25,
            'retention': 0.25,
            'engagement': 0.25,
            'loopability': 0.25
        }
        self.platform_multipliers = {
            Platform.TIKTOK: {'loopability': 1.5, 'first_3s': 1.3},
            Platform.YOUTUBE_SHORTS: {'watch_through': 1.4, 'ctr': 1.3},
            Platform.INSTAGRAM_REELS: {'shares': 1.5, 'saves': 1.4}
        }
    
    def calculate(
        self,
        metrics: PerformanceMetrics,
        platform: Platform,
        predicted_metrics: Dict[str, float],
        pattern_history: List[AudioPattern]
    ) -> float:
        """Calculate reward with dynamic weighting and early boost"""
        
        # Base viral score
        viral_score = metrics.viral_score(platform)
        
        # View threshold reward
        view_reward = self._view_threshold_reward(metrics.views)
        
        # Early retention boost
        retention_boost = self._early_retention_boost(metrics.retention_2s, metrics.first_3s_retention)
        
        # Platform-specific multipliers
        platform_bonus = self._platform_specific_bonus(metrics, platform)
        
        # Anti-viral penalties
        penalties = self._calculate_penalties(metrics, pattern_history)
        
        # Prediction accuracy bonus
        prediction_bonus = self._prediction_accuracy(metrics, predicted_metrics)
        
        total_reward = (
            viral_score * self.weights['views'] * view_reward +
            retention_boost * self.weights['retention'] +
            platform_bonus * self.weights['engagement'] +
            prediction_bonus * self.weights['loopability'] -
            penalties
        )
        
        return max(total_reward, 0.0)
    
    def _view_threshold_reward(self, views: int) -> float:
        """Reward based on view milestones"""
        if views >= 10_000_000:
            return 2.0
        elif views >= 5_000_000:
            return 1.5
        elif views >= 1_000_000:
            return 1.2
        elif views >= 500_000:
            return 1.0
        else:
            return 0.5
    
    def _early_retention_boost(self, retention_2s: float, retention_3s: float) -> float:
        """Bonus for strong early retention"""
        if retention_3s > 0.8:
            return 1.5
        elif retention_2s > 0.7:
            return 1.2
        else:
            return 1.0
    
    def _platform_specific_bonus(self, metrics: PerformanceMetrics, platform: Platform) -> float:
        """Apply platform-specific engagement multipliers"""
        multipliers = self.platform_multipliers.get(platform, {})
        bonus = 0.0
        
        if platform == Platform.TIKTOK:
            bonus += metrics.loop_frequency * multipliers.get('loopability', 1.0)
            bonus += metrics.first_3s_retention * multipliers.get('first_3s', 1.0)
        elif platform == Platform.YOUTUBE_SHORTS:
            bonus += metrics.watch_through_rate * multipliers.get('watch_through', 1.0)
            bonus += metrics.ctr * multipliers.get('ctr', 1.0)
        
        return bonus
    
    def _calculate_penalties(self, metrics: PerformanceMetrics, pattern_history: List[AudioPattern]) -> float:
        """Penalize anti-viral patterns"""
        penalty = 0.0
        
        # Low completion penalty
        if metrics.completion_rate < 0.3:
            penalty += 0.3
        
        # Overused pattern penalty (audience fatigue)
        if pattern_history:
            recent_usage = sum(1 for p in pattern_history[-10:] if p.times_used > 5)
            if recent_usage > 7:
                penalty += 0.2
        
        # Poor engagement penalty
        engagement_rate = (metrics.likes + metrics.shares + metrics.comments) / max(metrics.views, 1)
        if engagement_rate < 0.01:
            penalty += 0.15
        
        return penalty
    
    def _prediction_accuracy(self, actual: PerformanceMetrics, predicted: Dict[str, float]) -> float:
        """Bonus for accurate predictions"""
        if not predicted:
            return 0.5
        
        actual_watch = actual.watch_through_rate
        predicted_watch = predicted.get('watch_time', 0.5)
        accuracy = 1.0 - abs(actual_watch - predicted_watch)
        
        return accuracy * 0.5


 class AudioAgent:
    """Primary audio optimization agent"""
    
    def __init__(self, agent_id: str):
        self.agent_id = agent_id
        self.q_table = defaultdict(lambda: np.random.randn(7) * 0.01)  # 7 action dimensions
        self.learning_rate = 0.01
        self.discount_factor = 0.95
        self.epsilon = 0.2  # Exploration rate
        self.episode_count = 0
    
    def select_action(self, state: State, explore: bool = True) -> ActionSpace:
        """Select audio modification action using epsilon-greedy policy"""
        state_key = self._state_to_key(state)
        
        if explore and np.random.random() < self.epsilon:
            # Exploration: random action
            return self._random_action()
        else:
            # Exploitation: best known action
            q_values = self.q_table[state_key]
            return self._q_values_to_action(q_values)
    
    def update(self, state: State, action: ActionSpace, reward: float, next_state: State):
        """Update Q-values using TD learning"""
        state_key = self._state_to_key(state)
        next_state_key = self._state_to_key(next_state)
        
        action_vec = action.to_vector()
        current_q = self.q_table[state_key]
        next_q = self.q_table[next_state_key]
        
        # TD update: Q(s,a) = Q(s,a) + α[r + γ max Q(s',a') - Q(s,a)]
        td_target = reward + self.discount_factor * np.max(next_q)
        td_error = td_target - np.dot(current_q, action_vec)
        
        self.q_table[state_key] += self.learning_rate * td_error * action_vec
        
        self.episode_count += 1
        self._decay_epsilon()
    
    def _state_to_key(self, state: State) -> str:
        """Convert state to hashable key"""
        state_vec = state.to_vector()
        # Discretize continuous values for Q-table indexing
        discretized = tuple(np.round(state_vec, 2))
        return hashlib.md5(str(discretized).encode()).hexdigest()[:16]
    
    def _random_action(self) -> ActionSpace:
        """Generate random exploration action"""
        return ActionSpace(
            hook_selection=np.random.randint(0, 5),
            beat_timing_adjustment=np.random.uniform(-0.5, 0.5),
            volume_modulation=np.random.uniform(0.7, 1.3),
            pitch_shift=np.random.uniform(-1.5, 1.5),
            voice_modulation=np.random.choice(["energetic", "calm", "dramatic", "neutral"]),
            transition_type=np.random.choice(["cut", "fade", "beat_drop", "none"]),
            effect_intensity=np.random.uniform(0.2, 0.9)
        )
    
    def _q_values_to_action(self, q_values: np.ndarray) -> ActionSpace:
        """Convert Q-values to action space"""
        return ActionSpace(
            hook_selection=int(abs(q_values[0]) % 5),
            beat_timing_adjustment=np.clip(q_values[1], -0.5, 0.5),
            volume_modulation=np.clip(q_values[2], 0.5, 1.5),
            pitch_shift=np.clip(q_values[3], -2.0, 2.0),
            voice_modulation=["energetic", "calm", "dramatic", "neutral"][int(abs(q_values[4]) % 4)],
            transition_type=["cut", "fade", "beat_drop", "none"][int(abs(q_values[5]) % 4)],
            effect_intensity=np.clip(q_values[6], 0.0, 1.0)
        )
    
    def _decay_epsilon(self):
        """Gradually reduce exploration rate"""
        self.epsilon = max(0.05, self.epsilon * 0.9995)


 class MetaViralAgent:
    """Meta-agent for overseeing engagement predictions and reward multipliers"""
    
    def __init__(self):
        self.prediction_history = deque(maxlen=1000)
        self.reward_multipliers = {
            'trending': 1.0,
            'niche': 1.0,
            'platform': 1.0
        }
    
    def predict_engagement(self, state: State, action: ActionSpace) -> Dict[str, float]:
        """Predict engagement metrics for state-action pair"""
        # Simple prediction based on historical patterns
        predictions = {
            'views': self._predict_views(state, action),
            'watch_time': self._predict_watch_time(state),
            'loop_prob': self._predict_loop_probability(state, action),
            'viral_score': self._predict_viral_score(state, action)
        }
        return predictions
    
    def adjust_reward_multipliers(self, recent_performance: List[Tuple[State, PerformanceMetrics]]):
        """Dynamically adjust reward multipliers based on recent trends"""
        if len(recent_performance) < 10:
            return
        
        # Analyze trending patterns
        trending_videos = [p for s, p in recent_performance if s.platform_trends.get('trending_score', 0) > 0.7]
        if len(trending_videos) >= 5:
            avg_trending_score = np.mean([p.viral_score(s.platform) for s, p in recent_performance if s.platform_trends.get('trending_score', 0) > 0.7])
            self.reward_multipliers['trending'] = 1.0 + (avg_trending_score - 0.5) * 0.5
        
        # Adjust platform multipliers
        platform_scores = defaultdict(list)
        for state, metrics in recent_performance:
            platform_scores[state.platform].append(metrics.viral_score(state.platform))
        
        for platform, scores in platform_scores.items():
            avg_score = np.mean(scores)
            self.reward_multipliers[platform.value] = 1.0 + (avg_score - 0.5) * 0.3
    
    def _predict_views(self, state: State, action: ActionSpace) -> float:
        """Predict view count based on state and action"""
        # Simplified prediction model
        base_views = 1_000_000
        feature_boost = state.audio_features.emotional_intensity * 0.5
        platform_factor = 1.2 if state.platform == Platform.TIKTOK else 1.0
        return base_views * (1 + feature_boost) * platform_factor
    
    def _predict_watch_time(self, state: State) -> float:
        """Predict watch-through rate"""
        return state.viewer_projections.get('watch_time', 0.5)
    
    def _predict_loop_probability(self, state: State, action: ActionSpace) -> float:
        """Predict likelihood of video loops"""
        hook_quality = 0.7 if action.hook_selection < 2 else 0.5
        beat_alignment = 1.0 - state.audio_features.beat_alignment_error
        return (hook_quality + beat_alignment) / 2.0
    
    def _predict_viral_score(self, state: State, action: ActionSpace) -> float:
        """Overall viral potential prediction"""
        predictions = {
            'views': self._predict_views(state, action),
            'watch_time': self._predict_watch_time(state),
            'loop_prob': self._predict_loop_probability(state, action)
        }
        return (predictions['watch_time'] * 0.4 + predictions['loop_prob'] * 0.6)


 class AudioMemoryManager:
    """Manages audio patterns with HOT/WARM/COLD memory layers"""
    
    def __init__(self):
        self.patterns: Dict[str, AudioPattern] = {}
        self.hot_patterns: List[str] = []
        self.warm_patterns: List[str] = []
        self.cold_patterns: List[str] = []
        self.replay_buffer = deque(maxlen=100)
    
    def store_pattern(self, pattern: AudioPattern):
        """Store audio pattern and assign to memory layer"""
        self.patterns[pattern.pattern_id] = pattern
        self._assign_to_layer(pattern)
    
    def retrieve_top_patterns(
        self,
        niche: str,
        platform: Platform,
        beat_type: BeatType,
        n: int = 5
    ) -> List[AudioPattern]:
        """Retrieve top-performing patterns for given context"""
        # Filter by context
        candidates = [
            p for p in self.patterns.values()
            if p.niche == niche and p.platform == platform and p.beat_type == beat_type
        ]
        
        # Sort by efficacy score with decay applied
        for pattern in candidates:
            pattern.apply_decay()
        
        candidates.sort(key=lambda p: p.efficacy_score * p.decay_factor, reverse=True)
        
        # Prioritize HOT patterns
        hot_candidates = [p for p in candidates if p.memory_layer == MemoryLayer.HOT]
        warm_candidates = [p for p in candidates if p.memory_layer == MemoryLayer.WARM]
        
        result = (hot_candidates + warm_candidates)[:n]
        
        # Add to replay buffer
        for pattern in result:
            self.replay_buffer.append(pattern)
        
        return result
    
    def update_pattern_performance(
        self,
        pattern_id: str,
        metrics: PerformanceMetrics,
        platform: Platform
    ):
        """Update pattern with new performance data"""
        if pattern_id in self.patterns:
            pattern = self.patterns[pattern_id]
            pattern.update_performance(metrics, platform)
            self._assign_to_layer(pattern)
    
    def enforce_diversity(self, recent_pattern_ids: List[str], threshold: int = 3):
        """Penalize overused patterns to maintain diversity"""
        usage_count = defaultdict(int)
        for pid in recent_pattern_ids[-10:]:
            usage_count[pid] += 1
        
        for pid, count in usage_count.items():
            if count >= threshold and pid in self.patterns:
                self.patterns[pid].efficacy_score *= 0.8  # Diversity penalty
    
    def _assign_to_layer(self, pattern: AudioPattern):
        """Dynamically assign pattern to appropriate memory layer"""
        pattern_id = pattern.pattern_id
        
        # Remove from all layers first
        self.hot_patterns = [p for p in self.hot_patterns if p != pattern_id]
        self.warm_patterns = [p for p in self.warm_patterns if p != pattern_id]
        self.cold_patterns = [p for p in self.cold_patterns if p != pattern_id]
        
        # Assign to new layer
        if pattern.memory_layer == MemoryLayer.HOT:
            self.hot_patterns.append(pattern_id)
        elif pattern.memory_layer == MemoryLayer.WARM:
            self.warm_patterns.append(pattern_id)
        else:
            self.cold_patterns.append(pattern_id)
    
    def get_replay_samples(self, n: int = 10) -> List[AudioPattern]:
        """Get samples from replay buffer for training"""
        if len(self.replay_buffer) < n:
            return list(self.replay_buffer)
        return list(np.random.choice(list(self.replay_buffer), n, replace=False))


 class AudioReinforcementLoop:
    """Main RL system orchestrating all agents and learning"""
    
    def __init__(self):
        self.audio_agent = AudioAgent("primary_audio")
        self.meta_agent = MetaViralAgent()
        self.reward_function = RewardFunction()
        self.memory_manager = AudioMemoryManager()
        
        # Performance tracking
        self.performance_history = deque(maxlen=1000)
        self.recent_states = deque(maxlen=100)
        
        # Engine weights for TTS and voice sync
        self.engine_weights = {
            'tts': {},
            'voice_sync': {}
        }
        
        # Platform-specific rules
        self.platform_rules = self._initialize_platform_rules()
        
        # Training metrics
        self.training_metrics = {
            'total_episodes': 0,
            'avg_reward': 0.0,
            'pattern_success_rate': 0.0,
            'exploration_rate': 0.2
        }
        
        logger.info("AudioReinforcementLoop initialized")
    
    def _initialize_platform_rules(self) -> Dict[Platform, Dict]:
        """Platform-specific audio constraints and preferences"""
        return {
            Platform.TIKTOK: {
                'max_duration': 60,
                'optimal_hook_position': 0.5,  # seconds
                'preferred_tempo_range': (120, 160),
                'loop_importance': 0.9
            },
            Platform.YOUTUBE_SHORTS: {
                'max_duration': 60,
                'optimal_hook_position': 1.0,
                'preferred_tempo_range': (100, 150),
                'watch_through_importance': 0.9
            },
            Platform.INSTAGRAM_REELS: {
                'max_duration': 90,
                'optimal_hook_position': 0.8,
                'preferred_tempo_range': (110, 145),
                'share_importance': 0.85
            }
        }
    
    def process_video_performance(
        self,
        pattern_id: str,
        metrics: PerformanceMetrics,
        state: State,
        action: ActionSpace
    ) -> float:
        """
        Process performance feedback and update RL components.
        Returns the calculated reward.
        """
        # Update pattern in memory
        self.memory_manager.update_pattern_performance(
            pattern_id,
            metrics,
            state.platform
        )
        
        # Get historical patterns for context
        pattern_history = self.memory_manager.retrieve_top_patterns(
            state.niche,
            state.platform,
            state.beat_type,
            n=10
        )
        
        # Calculate reward
        predicted_metrics = self.meta_agent.predict_engagement(state, action)
        reward = self.reward_function.calculate(
            metrics,
            state.platform,
            predicted_metrics,
            pattern_history
        )
        
        # Adjust reward with meta-agent multipliers
        reward *= self.meta_agent.reward_multipliers.get('trending', 1.0)
        reward *= self.meta_agent.reward_multipliers.get(state.platform.value, 1.0)
        
        # Check for platform rule violations
        penalties = self._check_platform_violations(state, action, metrics)
        reward -= penalties
        
        # Store in performance history
        self.performance_history.append((state, metrics))
        self.recent_states.append(state)
        
        # Update audio agent
        if len(self.recent_states) > 1:
            next_state = self.recent_states[-1]
            prev_state = self.recent_states[-2]
            self.audio_agent.update(prev_state, action, reward, next_state)
        
        # Periodically adjust meta-agent multipliers
        if len(self.performance_history) % 50 == 0:
            self.meta_agent.adjust_reward_multipliers(list(self.performance_history))
        
        # Update training metrics
        self._update_training_metrics(reward)
        
        # Check if pattern should be reinforced or penalized
        if metrics.views >= 5_000_000:
            self._reinforce_pattern(pattern_id, state, action)
        elif metrics.views < 100_000 and metrics.completion_rate < 0.3:
            self._penalize_pattern(pattern_id)
        
        logger.info(f"Processed performance for pattern {pattern_id}: reward={reward:.3f}, views={metrics.views}")
        return reward
    
    def _check_platform_violations(self, state: State, action: ActionSpace, metrics: PerformanceMetrics) -> float:
        """Check for platform rule violations and return penalty"""
        penalty = 0.0
        rules = self.platform_rules.get(state.platform, {})
        
        # Check tempo range
        tempo_range = rules.get('preferred_tempo_range', (0, 999))
        if not (tempo_range[0] <= state.audio_features.tempo_bpm <= tempo_range[1]):
            penalty += 0.15
        
        # Check beat alignment
        if state.audio_features.beat_alignment_error > 0.2:
            penalty += 0.2
        
        # Platform-specific checks
        if state.platform == Platform.TIKTOK:
            if metrics.loop_frequency < 0.3:
                penalty += 0.15
            if metrics.first_3s_retention < 0.5:
                penalty += 0.2
        
        return penalty
    
    def _reinforce_pattern(self, pattern_id: str, state: State, action: ActionSpace):
        """Reinforce winning patterns by boosting their weights"""
        if pattern_id in self.memory_manager.patterns:
            pattern = self.memory_manager.patterns[pattern_id]
            pattern.efficacy_score = min(pattern.efficacy_score * 1.2, 1.0)
            pattern.memory_layer = MemoryLayer.HOT
            self.memory_manager._assign_to_layer(pattern)
            
            # Update engine weights
            self._update_engine_weights_from_pattern(pattern, boost=True)
    
    def _penalize_pattern(self, pattern_id: str):
        """Penalize underperforming patterns"""
        if pattern_id in self.memory_manager.patterns:
            pattern = self.memory_manager.patterns[pattern_id]
            pattern.efficacy_score *= 0.7
            if pattern.efficacy_score < 0.3:
                pattern.memory_layer = MemoryLayer.COLD
                self.memory_manager._assign_to_layer(pattern)
    
    def _update_training_metrics(self, reward: float):
        """Update overall training statistics"""
        self.training_metrics['total_episodes'] += 1
        alpha = 0.1
        self.training_metrics['avg_reward'] = (
            alpha * reward + (1 - alpha) * self.training_metrics['avg_reward']
        )
        self.training_metrics['exploration_rate'] = self.audio_agent.epsilon
        
        # Calculate pattern success rate
        if len(self.performance_history) > 0:
            recent_successes = sum(
                1 for _, m in list(self.performance_history)[-50:]
                if m.views >= 1_000_000
            )
            self.training_metrics['pattern_success_rate'] = recent_successes / min(50, len(self.performance_history))
    
    def get_optimal_audio_action(
        self,
        niche: str,
        platform: Platform,
        beat_type: BeatType,
        video_context: Dict[str, Any],
        explore: bool = False
    ) -> Tuple[ActionSpace, List[AudioPattern]]:
        """
        Get optimal audio modifications for a new video.
        Returns action to take and relevant historical patterns.
        """
        # Retrieve top patterns from memory
        top_patterns = self.memory_manager.retrieve_top_patterns(
            niche, platform, beat_type, n=5
        )
        
        if not top_patterns:
            logger.warning(f"No patterns found for niche={niche}, platform={platform}, beat={beat_type}")
            # Return default safe action
            return self._get_default_action(), []
        
        # Construct state from best pattern
        best_pattern = top_patterns[0]
        state = State(
            audio_features=best_pattern.features,
            video_context=video_context,
            platform=platform,
            niche=niche,
            beat_type=beat_type,
            historical_patterns=top_patterns,
            viewer_projections={'watch_time': 0.6, 'loop_prob': 0.5},
            platform_trends={'trending_score': 0.5}
        )
        
        # Get action from audio agent
        action = self.audio_agent.select_action(state, explore=explore)
        
        # Get meta-agent predictions
        predictions = self.meta_agent.predict_engagement(state, action)
        
        logger.info(f"Generated optimal action for {niche}/{platform.value}/{beat_type.value}")
        logger.info(f"Predicted viral score: {predictions['viral_score']:.3f}")
        
        return action, top_patterns
    
    def get_current_optimal_audio_profile(
        self,
        niche: str,
        platform: Platform,
        beat_type: BeatType
    ) -> Dict[str, Any]:
        """
        API Method: Get current optimal audio profile for given context.
        Returns comprehensive audio configuration.
        """
        top_patterns = self.memory_manager.retrieve_top_patterns(
            niche, platform, beat_type, n=3
        )
        
        if not top_patterns:
            return self._get_default_profile(niche, platform, beat_type)
        
        best_pattern = top_patterns[0]
        
        profile = {
            'pattern_id': best_pattern.pattern_id,
            'efficacy_score': best_pattern.efficacy_score,
            'memory_layer': best_pattern.memory_layer.value,
            'audio_features': {
                'pace_wpm': best_pattern.features.pace_wpm,
                'pitch_variance': best_pattern.features.pitch_variance,
                'emotional_intensity': best_pattern.features.emotional_intensity,
                'tempo_bpm': best_pattern.features.tempo_bpm,
                'beat_alignment_error': best_pattern.features.beat_alignment_error
            },
            'voice_style': best_pattern.voice_style,
            'music_track': best_pattern.music_track,
            'trending_beat': best_pattern.trending_beat,
            'performance_stats': {
                'times_used': best_pattern.times_used,
                'total_views': best_pattern.total_views,
                'avg_viral_score': best_pattern.avg_viral_score
            },
            'engine_weights': self.engine_weights.get(best_pattern.pattern_id, {}),
            'recommendations': self._generate_recommendations(best_pattern, platform)
        }
        
        return profile
    
    def update_engine_weights(
        self,
        pattern_id: Optional[str] = None,
        manual_weights: Optional[Dict] = None
    ):
        """
        API Method: Update TTS and voice sync engine weights.
        Can update from a specific pattern or manual configuration.
        """
        if manual_weights:
            self.engine_weights.update(manual_weights)
            logger.info("Updated engine weights manually")
            return
        
        if pattern_id and pattern_id in self.memory_manager.patterns:
            pattern = self.memory_manager.patterns[pattern_id]
            self._update_engine_weights_from_pattern(pattern, boost=True)
            logger.info(f"Updated engine weights from pattern {pattern_id}")
        else:
            # Update from all HOT patterns
            for pid in self.memory_manager.hot_patterns:
                pattern = self.memory_manager.patterns[pid]
                self._update_engine_weights_from_pattern(pattern, boost=True)
            logger.info(f"Updated engine weights from {len(self.memory_manager.hot_patterns)} HOT patterns")
    
    def _update_engine_weights_from_pattern(self, pattern: AudioPattern, boost: bool = False):
        """Extract and store engine weights from pattern"""
        multiplier = 1.2 if boost else 1.0
        
        weights = {
            'pace_target': pattern.features.pace_wpm * multiplier,
            'pitch_variance_target': pattern.features.pitch_variance,
            'emotional_intensity': pattern.features.emotional_intensity * multiplier,
            'tempo_bpm': pattern.features.tempo_bpm,
            'voice_style': pattern.voice_style,
            'beat_alignment_tolerance': max(0.05, pattern.features.beat_alignment_error - 0.05)
        }
        
        self.engine_weights[pattern.pattern_id] = weights
        
        # Update global TTS weights
        if 'tts' not in self.engine_weights:
            self.engine_weights['tts'] = {}
        
        # Weighted average with existing weights
        alpha = 0.3
        for key, value in weights.items():
            if isinstance(value, (int, float)):
                current = self.engine_weights['tts'].get(key, value)
                self.engine_weights['tts'][key] = alpha * value + (1 - alpha) * current
    
    def _generate_recommendations(self, pattern: AudioPattern, platform: Platform) -> List[str]:
        """Generate actionable recommendations based on pattern"""
        recommendations = []
        rules = self.platform_rules[platform]
        
        if pattern.features.emotional_intensity > 0.7:
            recommendations.append("High emotional intensity working well - maintain energy level")
        
        if pattern.features.beat_alignment_error < 0.1:
            recommendations.append("Excellent beat alignment - keep tight synchronization")
        
        if pattern.trending_beat:
            recommendations.append("Currently using trending beat - capitalize on trend")
        
        if pattern.features.pace_wpm > 160:
            recommendations.append("Fast pace driving engagement - maintain rapid delivery")
        
        return recommendations
    
    def _get_default_action(self) -> ActionSpace:
        """Safe default action when no patterns available"""
        return ActionSpace(
            hook_selection=0,
            beat_timing_adjustment=0.0,
            volume_modulation=1.0,
            pitch_shift=0.0,
            voice_modulation="energetic",
            transition_type="fade",
            effect_intensity=0.5
        )
    
    def _get_default_profile(self, niche: str, platform: Platform, beat_type: BeatType) -> Dict[str, Any]:
        """Default profile when no patterns exist"""
        return {
            'pattern_id': 'default',
            'efficacy_score': 0.5,
            'memory_layer': 'warm',
            'audio_features': {
                'pace_wpm': 150,
                'pitch_variance': 0.5,
                'emotional_intensity': 0.6,
                'tempo_bpm': 130,
                'beat_alignment_error': 0.1
            },
            'voice_style': 'energetic',
            'music_track': None,
            'trending_beat': False,
            'performance_stats': {
                'times_used': 0,
                'total_views': 0,
                'avg_viral_score': 0.0
            },
            'engine_weights': {},
            'recommendations': ['No patterns available - using safe defaults']
        }
    
    def train_from_batch(self, batch_data: List[Dict[str, Any]]):
        """
        Batch training from historical data.
        Each item should contain: state, action, metrics, platform
        """
        logger.info(f"Starting batch training with {len(batch_data)} samples")
        
        for item in batch_data:
            state = item['state']
            action = item['action']
            metrics = item['metrics']
            pattern_id = item.get('pattern_id', 'unknown')
            
            # Process as if it's new performance data
            self.process_video_performance(pattern_id, metrics, state, action)
        
        # Enforce diversity after batch
        recent_ids = [item.get('pattern_id', 'unknown') for item in batch_data]
        self.memory_manager.enforce_diversity(recent_ids)
        
        logger.info("Batch training complete")
    
    def export_state(self) -> Dict[str, Any]:
        """Export complete system state for persistence"""
        return {
            'patterns': {pid: asdict(p) for pid, p in self.memory_manager.patterns.items()},
            'hot_patterns': self.memory_manager.hot_patterns,
            'warm_patterns': self.memory_manager.warm_patterns,
            'cold_patterns': self.memory_manager.cold_patterns,
            'engine_weights': self.engine_weights,
            'training_metrics': self.training_metrics,
            'q_table_size': len(self.audio_agent.q_table),
            'reward_multipliers': self.meta_agent.reward_multipliers
        }
    
    def import_state(self, state_data: Dict[str, Any]):
        """Import previously exported state"""
        # Reconstruct patterns
        for pid, pattern_dict in state_data.get('patterns', {}).items():
            # Convert dict back to objects
            features = AudioFeatures(**pattern_dict['features'])
            platform = Platform(pattern_dict['platform'])
            beat_type = BeatType(pattern_dict['beat_type'])
            memory_layer = MemoryLayer(pattern_dict['memory_layer'])
            
            pattern = AudioPattern(
                pattern_id=pid,
                features=features,
                niche=pattern_dict['niche'],
                platform=platform,
                beat_type=beat_type,
                voice_style=pattern_dict['voice_style'],
                language=pattern_dict['language'],
                music_track=pattern_dict.get('music_track'),
                trending_beat=pattern_dict['trending_beat'],
                times_used=pattern_dict['times_used'],
                total_views=pattern_dict['total_views'],
                avg_viral_score=pattern_dict['avg_viral_score'],
                memory_layer=memory_layer,
                efficacy_score=pattern_dict['efficacy_score']
            )
            
            self.memory_manager.store_pattern(pattern)
        
        # Restore weights and metrics
        self.engine_weights = state_data.get('engine_weights', {})
        self.training_metrics = state_data.get('training_metrics', self.training_metrics)
        
        logger.info(f"Imported state with {len(self.memory_manager.patterns)} patterns")


 # Example usage and integration points
 if __name__ == "__main__":
    # Initialize the RL system
    rl_system = AudioReinforcementLoop()
    
    # Example: Get optimal audio profile for new video
    profile = rl_system.get_current_optimal_audio_profile(
        niche="tech_reviews",
        platform=Platform.TIKTOK,
        beat_type=BeatType.PHONK
    )
    print(f"Optimal audio profile: {json.dumps(profile, indent=2, default=str)}")
    
    # Example: Process performance feedback
    sample_features = AudioFeatures(
        pace_wpm=155,
        pitch_variance=0.6,
        hook_jumps=3,
        pause_timing=[0.5, 1.0, 0.3],
        spectral_centroid=2500,
        emotional_intensity=0.8,
        beat_alignment_error=0.08,
        volume_dynamics=0.7,
        timbre_complexity=0.65,
        tempo_bpm=135,
        syllable_timing_variance=0.15
    )
    
    sample_pattern = AudioPattern(
        pattern_id="pattern_001",
        features=sample_features,
        niche="tech_reviews",
        platform=Platform.TIKTOK,
        beat_type=BeatType.PHONK,
        voice_style="energetic",
        language="en",
        music_track="trending_phonk_01",
        trending_beat=True
    )
    
    rl_system.memory_manager.store_pattern(sample_pattern)
    
    sample_metrics = PerformanceMetrics(
        views=6_500_000,
        retention_2s=0.82,
        completion_rate=0.71,
        replay_rate=0.45,
        shares=125_000,
        saves=89_000,
        comments=45_000,
        likes=850_000,
        watch_through_rate=0.68,
        loop_frequency=0.52,
        first_3s_retention=0.79,
        ctr=0.12
    )
    
    sample_state = State(
        audio_features=sample_features,
        video_context={'scene_cuts': 8, 'predicted_ctr': 0.11, 'hook_position': 0.6},
        platform=Platform.TIKTOK,
        niche="tech_reviews",
        beat_type=BeatType.PHONK,
        historical_patterns=[sample_pattern],
        viewer_projections={'watch_time': 0.65, 'loop_prob': 0.50},
        platform_trends={'trending_score': 0.75}
    )
    
    sample_action = ActionSpace(
        hook_selection=1,
        beat_timing_adjustment=0.1,
        volume_modulation=1.1,
        pitch_shift=0.5,
        voice_modulation="energetic",
        transition_type="beat_drop",
        effect_intensity=0.7
    )
    
    reward = rl_system.process_video_performance(
        pattern_id="pattern_001",
        metrics=sample_metrics,
        state=sample_state,
        action=sample_action
    )
    
    print(f"\nProcessed performance - Reward: {reward:.3f}")
    print(f"Training metrics: {json.dumps(rl_system.training_metrics, indent=2)}")
    
    # Update engine weights based on successful pattern
    rl_system.update_engine_weights(pattern_id="pattern_001")
    
    # Get optimal action for next video
    next_action, relevant_patterns = rl_system.get_optimal_audio_action(
        niche="tech_reviews",
        platform=Platform.TIKTOK,
        beat_type=BeatType.PHONK,
        video_context={'scene_cuts': 7, 'predicted_ctr': 0.10, 'hook_position': 0.5}
    )
    
    print(f"\nNext optimal action: {asdict(next_action)}")
    print(f"Based on {len(relevant_patterns)} relevant patterns")
No results found