Created
December 30, 2025 04:56
-
-
Save bogged-broker/c00400db1ffa2996cad58f38dd727218 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| audio_pattern_learner.py | |
| Analyzes audio performance records to identify statistically significant patterns | |
| that correlate with viral audio success. Provides actionable recommendations for | |
| TTS and voice-sync engines. | |
| Version 1: Heuristic/statistical analysis (no deep learning) | |
| Future: Can be upgraded to RL-based continuous learning system | |
| """ | |
| import json | |
| from typing import Dict, List, Optional, Tuple | |
| from dataclasses import dataclass, asdict | |
| from collections import defaultdict | |
| import statistics | |
| from pathlib import Path | |
| @dataclass | |
| class AudioMetrics: | |
| """Raw audio feature measurements""" | |
| pace_wpm: float | |
| avg_pitch_hz: float | |
| pitch_variance: float | |
| pitch_jump_count: int # Number of significant pitch changes | |
| pause_density: float # Pauses per minute | |
| avg_pause_duration_ms: float | |
| beat_alignment_score: float # 0-1, how well synced to music | |
| emphasis_peak_count: int # Number of vocal emphasis points | |
| @dataclass | |
| class PerformanceMetrics: | |
| """Video performance outcomes""" | |
| completion_rate: float # 0-1 | |
| retention_curve: List[float] # Retention at 10%, 20%, ..., 100% | |
| engagement_score: float # Composite: likes, comments, shares | |
| viral_score: float # Composite virality metric | |
| @dataclass | |
| class AudioProfile: | |
| """Recommended audio configuration for a niche/platform""" | |
| niche: str | |
| platform: str | |
| # Core recommendations | |
| target_pace_wpm: float | |
| pace_tolerance: float # +/- range | |
| pitch_baseline_hz: float | |
| pitch_variance_target: float | |
| pitch_jump_frequency: str # "low", "medium", "high" | |
| pause_density_target: float # Pauses per minute | |
| pause_duration_ms: Tuple[float, float] # (min, max) | |
| beat_alignment_importance: str # "critical", "important", "optional" | |
| beat_alignment_threshold: float | |
| emphasis_strategy: str # "sparse", "moderate", "frequent" | |
| emphasis_points_per_min: float | |
| # Supporting data | |
| confidence_score: float # 0-1, based on sample size | |
| sample_size: int | |
| top_performers_analyzed: int | |
| # Explainability | |
| key_differentiators: List[str] # What separates winners from losers | |
| anti_patterns: List[str] # What to avoid | |
| class AudioPatternLearner: | |
| """ | |
| Analyzes audio performance data to extract viral patterns. | |
| Architecture: | |
| 1. Data ingestion from audio_performance_store | |
| 2. Statistical analysis: winners vs losers | |
| 3. Pattern clustering by niche/platform | |
| 4. Profile generation with explainability | |
| Future RL upgrade path: | |
| - Replace statistical thresholds with learned reward functions | |
| - Implement multi-armed bandit for A/B testing recommendations | |
| - Add temporal decay for concept drift (trends change over time) | |
| - Continuous learning loop with performance feedback | |
| """ | |
| def __init__(self, data_dir: str = "./audio_performance_data"): | |
| self.data_dir = Path(data_dir) | |
| self.data_dir.mkdir(exist_ok=True) | |
| # Thresholds for winner/loser classification | |
| self.VIRAL_THRESHOLD = 0.75 # Top 25% are "winners" | |
| self.MIN_SAMPLE_SIZE = 10 # Minimum records for reliable analysis | |
| # Statistical significance thresholds | |
| self.SIGNIFICANCE_DELTA = 0.15 # 15% difference = significant | |
| def load_performance_records(self) -> List[Dict]: | |
| """Load all performance records from storage""" | |
| records = [] | |
| record_file = self.data_dir / "performance_records.jsonl" | |
| if record_file.exists(): | |
| with open(record_file, 'r') as f: | |
| for line in f: | |
| records.append(json.loads(line)) | |
| return records | |
| def classify_performance(self, viral_score: float, all_scores: List[float]) -> str: | |
| """Classify a record as winner/loser based on percentile""" | |
| if not all_scores: | |
| return "unknown" | |
| sorted_scores = sorted(all_scores) | |
| threshold_idx = int(len(sorted_scores) * self.VIRAL_THRESHOLD) | |
| threshold = sorted_scores[threshold_idx] if threshold_idx < len(sorted_scores) else sorted_scores[-1] | |
| return "winner" if viral_score >= threshold else "loser" | |
| def calculate_delta(self, winners: List[float], losers: List[float]) -> Dict: | |
| """ | |
| Calculate statistical difference between winner and loser groups. | |
| Returns delta metrics and significance assessment. | |
| """ | |
| if not winners or not losers: | |
| return {"significant": False, "delta": 0, "winner_avg": 0, "loser_avg": 0} | |
| winner_avg = statistics.mean(winners) | |
| loser_avg = statistics.mean(losers) | |
| # Avoid division by zero | |
| baseline = max(abs(loser_avg), 0.01) | |
| delta_pct = (winner_avg - loser_avg) / baseline | |
| return { | |
| "significant": abs(delta_pct) >= self.SIGNIFICANCE_DELTA, | |
| "delta": delta_pct, | |
| "winner_avg": winner_avg, | |
| "loser_avg": loser_avg, | |
| "winner_std": statistics.stdev(winners) if len(winners) > 1 else 0, | |
| "loser_std": statistics.stdev(losers) if len(losers) > 1 else 0, | |
| } | |
| def analyze_niche_platform(self, records: List[Dict], niche: str, platform: str) -> Optional[AudioProfile]: | |
| """ | |
| Analyze records for specific niche/platform combination. | |
| Core algorithm: | |
| 1. Filter records by niche/platform | |
| 2. Classify into winners/losers by viral_score percentile | |
| 3. Calculate deltas for each audio feature | |
| 4. Identify significant differentiators | |
| 5. Generate recommended profile | |
| """ | |
| # Filter relevant records | |
| filtered = [r for r in records | |
| if r.get("niche") == niche and r.get("platform") == platform] | |
| if len(filtered) < self.MIN_SAMPLE_SIZE: | |
| return None # Insufficient data | |
| # Extract viral scores for classification | |
| viral_scores = [r["performance"]["viral_score"] for r in filtered] | |
| # Separate winners and losers | |
| winners = [] | |
| losers = [] | |
| for record in filtered: | |
| classification = self.classify_performance( | |
| record["performance"]["viral_score"], | |
| viral_scores | |
| ) | |
| if classification == "winner": | |
| winners.append(record) | |
| else: | |
| losers.append(record) | |
| if not winners or not losers: | |
| return None # Need both groups for comparison | |
| # Extract audio metrics for each group | |
| winner_metrics = defaultdict(list) | |
| loser_metrics = defaultdict(list) | |
| for w in winners: | |
| audio = w["audio_features"] | |
| winner_metrics["pace"].append(audio["pace_wpm"]) | |
| winner_metrics["pitch_variance"].append(audio["pitch_variance"]) | |
| winner_metrics["pitch_jumps"].append(audio["pitch_jump_count"]) | |
| winner_metrics["pause_density"].append(audio["pause_density"]) | |
| winner_metrics["pause_duration"].append(audio["avg_pause_duration_ms"]) | |
| winner_metrics["beat_alignment"].append(audio["beat_alignment_score"]) | |
| winner_metrics["emphasis"].append(audio["emphasis_peak_count"]) | |
| for l in losers: | |
| audio = l["audio_features"] | |
| loser_metrics["pace"].append(audio["pace_wpm"]) | |
| loser_metrics["pitch_variance"].append(audio["pitch_variance"]) | |
| loser_metrics["pitch_jumps"].append(audio["pitch_jump_count"]) | |
| loser_metrics["pause_density"].append(audio["pause_density"]) | |
| loser_metrics["pause_duration"].append(audio["avg_pause_duration_ms"]) | |
| loser_metrics["beat_alignment"].append(audio["beat_alignment_score"]) | |
| loser_metrics["emphasis"].append(audio["emphasis_peak_count"]) | |
| # Calculate deltas for each feature | |
| deltas = {} | |
| for feature in winner_metrics.keys(): | |
| deltas[feature] = self.calculate_delta( | |
| winner_metrics[feature], | |
| loser_metrics[feature] | |
| ) | |
| # Identify key differentiators (features with significant deltas) | |
| key_differentiators = [] | |
| anti_patterns = [] | |
| for feature, delta_info in deltas.items(): | |
| if delta_info["significant"]: | |
| direction = "higher" if delta_info["delta"] > 0 else "lower" | |
| key_differentiators.append( | |
| f"{feature}: {direction} by {abs(delta_info['delta'])*100:.1f}% " | |
| f"({delta_info['winner_avg']:.2f} vs {delta_info['loser_avg']:.2f})" | |
| ) | |
| # Anti-patterns are opposite of winning patterns | |
| opposite_direction = "lower" if delta_info["delta"] > 0 else "higher" | |
| anti_patterns.append(f"Avoid {opposite_direction} {feature}") | |
| # Generate recommended profile | |
| profile = self._build_audio_profile( | |
| niche=niche, | |
| platform=platform, | |
| winner_metrics=winner_metrics, | |
| deltas=deltas, | |
| key_differentiators=key_differentiators, | |
| anti_patterns=anti_patterns, | |
| sample_size=len(filtered), | |
| top_performers=len(winners) | |
| ) | |
| return profile | |
| def _build_audio_profile(self, niche: str, platform: str, | |
| winner_metrics: Dict, deltas: Dict, | |
| key_differentiators: List[str], | |
| anti_patterns: List[str], | |
| sample_size: int, | |
| top_performers: int) -> AudioProfile: | |
| """Construct recommended audio profile from analysis""" | |
| # Target pace: use winner average with tolerance | |
| target_pace = statistics.mean(winner_metrics["pace"]) | |
| pace_std = statistics.stdev(winner_metrics["pace"]) if len(winner_metrics["pace"]) > 1 else 10 | |
| pace_tolerance = min(pace_std, 20) # Cap tolerance at 20 WPM | |
| # Pitch recommendations | |
| pitch_baseline = 150.0 # Placeholder, would need pitch data | |
| pitch_variance_target = statistics.mean(winner_metrics["pitch_variance"]) | |
| # Pitch jump frequency classification | |
| avg_jumps = statistics.mean(winner_metrics["pitch_jumps"]) | |
| if avg_jumps < 5: | |
| pitch_jump_freq = "low" | |
| elif avg_jumps < 15: | |
| pitch_jump_freq = "medium" | |
| else: | |
| pitch_jump_freq = "high" | |
| # Pause strategy | |
| pause_density_target = statistics.mean(winner_metrics["pause_density"]) | |
| pause_durations = winner_metrics["pause_duration"] | |
| pause_duration_range = ( | |
| min(pause_durations), | |
| max(pause_durations) | |
| ) | |
| # Beat alignment importance | |
| beat_alignment_avg = statistics.mean(winner_metrics["beat_alignment"]) | |
| if deltas["beat_alignment"]["significant"] and deltas["beat_alignment"]["delta"] > 0.2: | |
| beat_importance = "critical" | |
| beat_threshold = 0.8 | |
| elif beat_alignment_avg > 0.6: | |
| beat_importance = "important" | |
| beat_threshold = 0.6 | |
| else: | |
| beat_importance = "optional" | |
| beat_threshold = 0.4 | |
| # Emphasis strategy | |
| emphasis_avg = statistics.mean(winner_metrics["emphasis"]) | |
| if emphasis_avg < 3: | |
| emphasis_strategy = "sparse" | |
| elif emphasis_avg < 8: | |
| emphasis_strategy = "moderate" | |
| else: | |
| emphasis_strategy = "frequent" | |
| # Confidence based on sample size (logarithmic scale) | |
| import math | |
| confidence = min(math.log10(sample_size) / math.log10(100), 1.0) | |
| return AudioProfile( | |
| niche=niche, | |
| platform=platform, | |
| target_pace_wpm=target_pace, | |
| pace_tolerance=pace_tolerance, | |
| pitch_baseline_hz=pitch_baseline, | |
| pitch_variance_target=pitch_variance_target, | |
| pitch_jump_frequency=pitch_jump_freq, | |
| pause_density_target=pause_density_target, | |
| pause_duration_ms=pause_duration_range, | |
| beat_alignment_importance=beat_importance, | |
| beat_alignment_threshold=beat_threshold, | |
| emphasis_strategy=emphasis_strategy, | |
| emphasis_points_per_min=emphasis_avg, | |
| confidence_score=confidence, | |
| sample_size=sample_size, | |
| top_performers_analyzed=top_performers, | |
| key_differentiators=key_differentiators, | |
| anti_patterns=anti_patterns | |
| ) | |
| def learn_all_patterns(self) -> Dict[str, AudioProfile]: | |
| """ | |
| Analyze all available data and generate profiles for each niche/platform. | |
| Returns dict mapping "niche:platform" -> AudioProfile | |
| """ | |
| records = self.load_performance_records() | |
| if not records: | |
| return {} | |
| # Identify unique niche/platform combinations | |
| combinations = set() | |
| for record in records: | |
| niche = record.get("niche", "unknown") | |
| platform = record.get("platform", "unknown") | |
| combinations.add((niche, platform)) | |
| # Analyze each combination | |
| profiles = {} | |
| for niche, platform in combinations: | |
| profile = self.analyze_niche_platform(records, niche, platform) | |
| if profile: | |
| key = f"{niche}:{platform}" | |
| profiles[key] = profile | |
| # Cache profiles to disk | |
| self._save_profiles(profiles) | |
| return profiles | |
| def _save_profiles(self, profiles: Dict[str, AudioProfile]): | |
| """Persist learned profiles to disk""" | |
| profile_file = self.data_dir / "audio_profiles.json" | |
| serializable = { | |
| key: asdict(profile) | |
| for key, profile in profiles.items() | |
| } | |
| with open(profile_file, 'w') as f: | |
| json.dump(serializable, f, indent=2) | |
| def _load_profiles(self) -> Dict[str, AudioProfile]: | |
| """Load cached profiles from disk""" | |
| profile_file = self.data_dir / "audio_profiles.json" | |
| if not profile_file.exists(): | |
| return {} | |
| with open(profile_file, 'r') as f: | |
| data = json.load(f) | |
| profiles = {} | |
| for key, profile_dict in data.items(): | |
| profiles[key] = AudioProfile(**profile_dict) | |
| return profiles | |
| def get_recommended_audio_profile(self, niche: str, platform: str) -> Optional[AudioProfile]: | |
| """ | |
| API: Get recommended audio profile for specific niche/platform. | |
| Returns cached profile if available, otherwise triggers learning. | |
| Falls back to generic profile if specific combination lacks data. | |
| """ | |
| key = f"{niche}:{platform}" | |
| # Try loading cached profiles | |
| profiles = self._load_profiles() | |
| if key in profiles: | |
| return profiles[key] | |
| # Not cached, trigger learning | |
| all_profiles = self.learn_all_patterns() | |
| if key in all_profiles: | |
| return all_profiles[key] | |
| # Fallback: try platform-generic profile | |
| platform_profiles = [p for k, p in all_profiles.items() if k.endswith(f":{platform}")] | |
| if platform_profiles: | |
| # Return highest confidence profile for this platform | |
| return max(platform_profiles, key=lambda p: p.confidence_score) | |
| # Fallback: try niche-generic profile | |
| niche_profiles = [p for k, p in all_profiles.items() if k.startswith(f"{niche}:")] | |
| if niche_profiles: | |
| return max(niche_profiles, key=lambda p: p.confidence_score) | |
| return None # No data available | |
| def explain_profile(self, profile: AudioProfile) -> str: | |
| """ | |
| Generate human-readable explanation of why this profile works. | |
| Useful for debugging and building intuition. | |
| """ | |
| explanation = f""" | |
| Audio Profile for {profile.niche} on {profile.platform} | |
| {'='*60} | |
| Sample Size: {profile.sample_size} videos analyzed | |
| Top Performers: {profile.top_performers_analyzed} | |
| Confidence: {profile.confidence_score*100:.1f}% | |
| CORE RECOMMENDATIONS: | |
| -------------------- | |
| Pace: {profile.target_pace_wpm:.1f} WPM (±{profile.pace_tolerance:.1f}) | |
| Pitch Variation: {profile.pitch_variance_target:.2f} Hz variance | |
| Pitch Jumps: {profile.pitch_jump_frequency} frequency | |
| Pauses: {profile.pause_density_target:.1f} per minute, {profile.pause_duration_ms[0]:.0f}-{profile.pause_duration_ms[1]:.0f}ms each | |
| Beat Alignment: {profile.beat_alignment_importance} (threshold: {profile.beat_alignment_threshold:.2f}) | |
| Emphasis: {profile.emphasis_strategy} strategy, ~{profile.emphasis_points_per_min:.1f} per minute | |
| KEY DIFFERENTIATORS (what makes winners win): | |
| ---------------------------------------------- | |
| """ | |
| for diff in profile.key_differentiators: | |
| explanation += f"• {diff}\n" | |
| explanation += "\nANTI-PATTERNS (what to avoid):\n" | |
| explanation += "-------------------------------\n" | |
| for anti in profile.anti_patterns: | |
| explanation += f"• {anti}\n" | |
| return explanation | |
| # ============================================================================= | |
| # FUTURE RL UPGRADE PATHS | |
| # ============================================================================= | |
| """ | |
| Reinforcement Learning Integration (Future Version 2): | |
| 1. REWARD FUNCTION: | |
| - Replace fixed viral_score threshold with learned value function | |
| - Multi-objective: completion_rate, engagement, virality | |
| - Temporal credit assignment: which audio features at which timestamps drove retention | |
| 2. EXPLORATION/EXPLOITATION: | |
| - Multi-armed bandit for A/B testing profiles | |
| - Upper Confidence Bound (UCB) for balancing tried-and-true vs experimental | |
| - Thompson sampling for Bayesian optimization | |
| 3. CONTINUOUS LEARNING: | |
| - Online learning: update profiles after each video performance | |
| - Concept drift detection: identify when trends shift | |
| - Temporal decay: older data weighted less (trends change) | |
| - Catastrophic forgetting prevention: maintain ensemble of time-windowed models | |
| 4. CONTEXT-AWARE BANDITS: | |
| - Contextual features: time of day, season, current events | |
| - Personalization: audience demographics, viewing history | |
| - Transfer learning: leverage patterns across similar niches | |
| 5. POLICY GRADIENT METHODS: | |
| - Direct optimization of audio parameters | |
| - Differentiable audio synthesis pipeline | |
| - Actor-critic for continuous action spaces (pace, pitch, etc.) | |
| 6. ARCHITECTURE: | |
| - Replace AudioPatternLearner with RLAudioOptimizer | |
| - Add replay buffer for experience replay | |
| - Add policy network (actor) and value network (critic) | |
| - Add exploration noise (epsilon-greedy or entropy bonus) | |
| Example RL pseudocode: | |
| class RLAudioOptimizer: | |
| def __init__(self): | |
| self.policy_net = AudioPolicyNetwork() # Maps (niche, platform) -> audio params | |
| self.value_net = AudioValueNetwork() # Estimates expected virality | |
| self.replay_buffer = ReplayBuffer() | |
| def get_audio_profile(self, niche, platform, explore=True): | |
| state = encode_state(niche, platform) | |
| if explore and random() < epsilon: | |
| return sample_random_profile() # Exploration | |
| return self.policy_net(state) # Exploitation | |
| def update(self, video_id, audio_profile, performance_metrics): | |
| # Store experience | |
| self.replay_buffer.add(state, audio_profile, reward, next_state) | |
| # Sample batch and update networks | |
| batch = self.replay_buffer.sample() | |
| policy_loss = compute_policy_gradient(batch) | |
| value_loss = compute_td_error(batch) | |
| optimize(policy_loss + value_loss) | |
| This enables true adaptive learning that improves over time rather than | |
| fixed statistical analysis. | |
| """ | |
| # ============================================================================= | |
| # CLI FOR TESTING | |
| # ============================================================================= | |
| if __name__ == "__main__": | |
| learner = AudioPatternLearner() | |
| print("Learning patterns from performance data...") | |
| profiles = learner.learn_all_patterns() | |
| print(f"\nLearned {len(profiles)} audio profiles:") | |
| for key, profile in profiles.items(): | |
| print(f"\n{key}:") | |
| print(f" Pace: {profile.target_pace_wpm:.1f} WPM") | |
| print(f" Beat alignment: {profile.beat_alignment_importance}") | |
| print(f" Confidence: {profile.confidence_score*100:.1f}%") | |
| # Example: get specific recommendation | |
| print("\n" + "="*60) | |
| print("Example: Getting recommendation for 'tech_tips' on 'tiktok'") | |
| profile = learner.get_recommended_audio_profile("tech_tips", "tiktok") | |
| if profile: | |
| print(learner.explain_profile(profile)) | |
| else: | |
| print("No profile available for this niche/platform combination.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment