Skip to content

Instantly share code, notes, and snippets.

@bogged-broker
Created December 30, 2025 04:56
Show Gist options
  • Select an option

  • Save bogged-broker/c00400db1ffa2996cad58f38dd727218 to your computer and use it in GitHub Desktop.

Select an option

Save bogged-broker/c00400db1ffa2996cad58f38dd727218 to your computer and use it in GitHub Desktop.
"""
audio_pattern_learner.py
Analyzes audio performance records to identify statistically significant patterns
that correlate with viral audio success. Provides actionable recommendations for
TTS and voice-sync engines.
Version 1: Heuristic/statistical analysis (no deep learning)
Future: Can be upgraded to RL-based continuous learning system
"""
import json
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, asdict
from collections import defaultdict
import statistics
from pathlib import Path
@dataclass
class AudioMetrics:
"""Raw audio feature measurements"""
pace_wpm: float
avg_pitch_hz: float
pitch_variance: float
pitch_jump_count: int # Number of significant pitch changes
pause_density: float # Pauses per minute
avg_pause_duration_ms: float
beat_alignment_score: float # 0-1, how well synced to music
emphasis_peak_count: int # Number of vocal emphasis points
@dataclass
class PerformanceMetrics:
"""Video performance outcomes"""
completion_rate: float # 0-1
retention_curve: List[float] # Retention at 10%, 20%, ..., 100%
engagement_score: float # Composite: likes, comments, shares
viral_score: float # Composite virality metric
@dataclass
class AudioProfile:
"""Recommended audio configuration for a niche/platform"""
niche: str
platform: str
# Core recommendations
target_pace_wpm: float
pace_tolerance: float # +/- range
pitch_baseline_hz: float
pitch_variance_target: float
pitch_jump_frequency: str # "low", "medium", "high"
pause_density_target: float # Pauses per minute
pause_duration_ms: Tuple[float, float] # (min, max)
beat_alignment_importance: str # "critical", "important", "optional"
beat_alignment_threshold: float
emphasis_strategy: str # "sparse", "moderate", "frequent"
emphasis_points_per_min: float
# Supporting data
confidence_score: float # 0-1, based on sample size
sample_size: int
top_performers_analyzed: int
# Explainability
key_differentiators: List[str] # What separates winners from losers
anti_patterns: List[str] # What to avoid
class AudioPatternLearner:
"""
Analyzes audio performance data to extract viral patterns.
Architecture:
1. Data ingestion from audio_performance_store
2. Statistical analysis: winners vs losers
3. Pattern clustering by niche/platform
4. Profile generation with explainability
Future RL upgrade path:
- Replace statistical thresholds with learned reward functions
- Implement multi-armed bandit for A/B testing recommendations
- Add temporal decay for concept drift (trends change over time)
- Continuous learning loop with performance feedback
"""
def __init__(self, data_dir: str = "./audio_performance_data"):
self.data_dir = Path(data_dir)
self.data_dir.mkdir(exist_ok=True)
# Thresholds for winner/loser classification
self.VIRAL_THRESHOLD = 0.75 # Top 25% are "winners"
self.MIN_SAMPLE_SIZE = 10 # Minimum records for reliable analysis
# Statistical significance thresholds
self.SIGNIFICANCE_DELTA = 0.15 # 15% difference = significant
def load_performance_records(self) -> List[Dict]:
"""Load all performance records from storage"""
records = []
record_file = self.data_dir / "performance_records.jsonl"
if record_file.exists():
with open(record_file, 'r') as f:
for line in f:
records.append(json.loads(line))
return records
def classify_performance(self, viral_score: float, all_scores: List[float]) -> str:
"""Classify a record as winner/loser based on percentile"""
if not all_scores:
return "unknown"
sorted_scores = sorted(all_scores)
threshold_idx = int(len(sorted_scores) * self.VIRAL_THRESHOLD)
threshold = sorted_scores[threshold_idx] if threshold_idx < len(sorted_scores) else sorted_scores[-1]
return "winner" if viral_score >= threshold else "loser"
def calculate_delta(self, winners: List[float], losers: List[float]) -> Dict:
"""
Calculate statistical difference between winner and loser groups.
Returns delta metrics and significance assessment.
"""
if not winners or not losers:
return {"significant": False, "delta": 0, "winner_avg": 0, "loser_avg": 0}
winner_avg = statistics.mean(winners)
loser_avg = statistics.mean(losers)
# Avoid division by zero
baseline = max(abs(loser_avg), 0.01)
delta_pct = (winner_avg - loser_avg) / baseline
return {
"significant": abs(delta_pct) >= self.SIGNIFICANCE_DELTA,
"delta": delta_pct,
"winner_avg": winner_avg,
"loser_avg": loser_avg,
"winner_std": statistics.stdev(winners) if len(winners) > 1 else 0,
"loser_std": statistics.stdev(losers) if len(losers) > 1 else 0,
}
def analyze_niche_platform(self, records: List[Dict], niche: str, platform: str) -> Optional[AudioProfile]:
"""
Analyze records for specific niche/platform combination.
Core algorithm:
1. Filter records by niche/platform
2. Classify into winners/losers by viral_score percentile
3. Calculate deltas for each audio feature
4. Identify significant differentiators
5. Generate recommended profile
"""
# Filter relevant records
filtered = [r for r in records
if r.get("niche") == niche and r.get("platform") == platform]
if len(filtered) < self.MIN_SAMPLE_SIZE:
return None # Insufficient data
# Extract viral scores for classification
viral_scores = [r["performance"]["viral_score"] for r in filtered]
# Separate winners and losers
winners = []
losers = []
for record in filtered:
classification = self.classify_performance(
record["performance"]["viral_score"],
viral_scores
)
if classification == "winner":
winners.append(record)
else:
losers.append(record)
if not winners or not losers:
return None # Need both groups for comparison
# Extract audio metrics for each group
winner_metrics = defaultdict(list)
loser_metrics = defaultdict(list)
for w in winners:
audio = w["audio_features"]
winner_metrics["pace"].append(audio["pace_wpm"])
winner_metrics["pitch_variance"].append(audio["pitch_variance"])
winner_metrics["pitch_jumps"].append(audio["pitch_jump_count"])
winner_metrics["pause_density"].append(audio["pause_density"])
winner_metrics["pause_duration"].append(audio["avg_pause_duration_ms"])
winner_metrics["beat_alignment"].append(audio["beat_alignment_score"])
winner_metrics["emphasis"].append(audio["emphasis_peak_count"])
for l in losers:
audio = l["audio_features"]
loser_metrics["pace"].append(audio["pace_wpm"])
loser_metrics["pitch_variance"].append(audio["pitch_variance"])
loser_metrics["pitch_jumps"].append(audio["pitch_jump_count"])
loser_metrics["pause_density"].append(audio["pause_density"])
loser_metrics["pause_duration"].append(audio["avg_pause_duration_ms"])
loser_metrics["beat_alignment"].append(audio["beat_alignment_score"])
loser_metrics["emphasis"].append(audio["emphasis_peak_count"])
# Calculate deltas for each feature
deltas = {}
for feature in winner_metrics.keys():
deltas[feature] = self.calculate_delta(
winner_metrics[feature],
loser_metrics[feature]
)
# Identify key differentiators (features with significant deltas)
key_differentiators = []
anti_patterns = []
for feature, delta_info in deltas.items():
if delta_info["significant"]:
direction = "higher" if delta_info["delta"] > 0 else "lower"
key_differentiators.append(
f"{feature}: {direction} by {abs(delta_info['delta'])*100:.1f}% "
f"({delta_info['winner_avg']:.2f} vs {delta_info['loser_avg']:.2f})"
)
# Anti-patterns are opposite of winning patterns
opposite_direction = "lower" if delta_info["delta"] > 0 else "higher"
anti_patterns.append(f"Avoid {opposite_direction} {feature}")
# Generate recommended profile
profile = self._build_audio_profile(
niche=niche,
platform=platform,
winner_metrics=winner_metrics,
deltas=deltas,
key_differentiators=key_differentiators,
anti_patterns=anti_patterns,
sample_size=len(filtered),
top_performers=len(winners)
)
return profile
def _build_audio_profile(self, niche: str, platform: str,
winner_metrics: Dict, deltas: Dict,
key_differentiators: List[str],
anti_patterns: List[str],
sample_size: int,
top_performers: int) -> AudioProfile:
"""Construct recommended audio profile from analysis"""
# Target pace: use winner average with tolerance
target_pace = statistics.mean(winner_metrics["pace"])
pace_std = statistics.stdev(winner_metrics["pace"]) if len(winner_metrics["pace"]) > 1 else 10
pace_tolerance = min(pace_std, 20) # Cap tolerance at 20 WPM
# Pitch recommendations
pitch_baseline = 150.0 # Placeholder, would need pitch data
pitch_variance_target = statistics.mean(winner_metrics["pitch_variance"])
# Pitch jump frequency classification
avg_jumps = statistics.mean(winner_metrics["pitch_jumps"])
if avg_jumps < 5:
pitch_jump_freq = "low"
elif avg_jumps < 15:
pitch_jump_freq = "medium"
else:
pitch_jump_freq = "high"
# Pause strategy
pause_density_target = statistics.mean(winner_metrics["pause_density"])
pause_durations = winner_metrics["pause_duration"]
pause_duration_range = (
min(pause_durations),
max(pause_durations)
)
# Beat alignment importance
beat_alignment_avg = statistics.mean(winner_metrics["beat_alignment"])
if deltas["beat_alignment"]["significant"] and deltas["beat_alignment"]["delta"] > 0.2:
beat_importance = "critical"
beat_threshold = 0.8
elif beat_alignment_avg > 0.6:
beat_importance = "important"
beat_threshold = 0.6
else:
beat_importance = "optional"
beat_threshold = 0.4
# Emphasis strategy
emphasis_avg = statistics.mean(winner_metrics["emphasis"])
if emphasis_avg < 3:
emphasis_strategy = "sparse"
elif emphasis_avg < 8:
emphasis_strategy = "moderate"
else:
emphasis_strategy = "frequent"
# Confidence based on sample size (logarithmic scale)
import math
confidence = min(math.log10(sample_size) / math.log10(100), 1.0)
return AudioProfile(
niche=niche,
platform=platform,
target_pace_wpm=target_pace,
pace_tolerance=pace_tolerance,
pitch_baseline_hz=pitch_baseline,
pitch_variance_target=pitch_variance_target,
pitch_jump_frequency=pitch_jump_freq,
pause_density_target=pause_density_target,
pause_duration_ms=pause_duration_range,
beat_alignment_importance=beat_importance,
beat_alignment_threshold=beat_threshold,
emphasis_strategy=emphasis_strategy,
emphasis_points_per_min=emphasis_avg,
confidence_score=confidence,
sample_size=sample_size,
top_performers_analyzed=top_performers,
key_differentiators=key_differentiators,
anti_patterns=anti_patterns
)
def learn_all_patterns(self) -> Dict[str, AudioProfile]:
"""
Analyze all available data and generate profiles for each niche/platform.
Returns dict mapping "niche:platform" -> AudioProfile
"""
records = self.load_performance_records()
if not records:
return {}
# Identify unique niche/platform combinations
combinations = set()
for record in records:
niche = record.get("niche", "unknown")
platform = record.get("platform", "unknown")
combinations.add((niche, platform))
# Analyze each combination
profiles = {}
for niche, platform in combinations:
profile = self.analyze_niche_platform(records, niche, platform)
if profile:
key = f"{niche}:{platform}"
profiles[key] = profile
# Cache profiles to disk
self._save_profiles(profiles)
return profiles
def _save_profiles(self, profiles: Dict[str, AudioProfile]):
"""Persist learned profiles to disk"""
profile_file = self.data_dir / "audio_profiles.json"
serializable = {
key: asdict(profile)
for key, profile in profiles.items()
}
with open(profile_file, 'w') as f:
json.dump(serializable, f, indent=2)
def _load_profiles(self) -> Dict[str, AudioProfile]:
"""Load cached profiles from disk"""
profile_file = self.data_dir / "audio_profiles.json"
if not profile_file.exists():
return {}
with open(profile_file, 'r') as f:
data = json.load(f)
profiles = {}
for key, profile_dict in data.items():
profiles[key] = AudioProfile(**profile_dict)
return profiles
def get_recommended_audio_profile(self, niche: str, platform: str) -> Optional[AudioProfile]:
"""
API: Get recommended audio profile for specific niche/platform.
Returns cached profile if available, otherwise triggers learning.
Falls back to generic profile if specific combination lacks data.
"""
key = f"{niche}:{platform}"
# Try loading cached profiles
profiles = self._load_profiles()
if key in profiles:
return profiles[key]
# Not cached, trigger learning
all_profiles = self.learn_all_patterns()
if key in all_profiles:
return all_profiles[key]
# Fallback: try platform-generic profile
platform_profiles = [p for k, p in all_profiles.items() if k.endswith(f":{platform}")]
if platform_profiles:
# Return highest confidence profile for this platform
return max(platform_profiles, key=lambda p: p.confidence_score)
# Fallback: try niche-generic profile
niche_profiles = [p for k, p in all_profiles.items() if k.startswith(f"{niche}:")]
if niche_profiles:
return max(niche_profiles, key=lambda p: p.confidence_score)
return None # No data available
def explain_profile(self, profile: AudioProfile) -> str:
"""
Generate human-readable explanation of why this profile works.
Useful for debugging and building intuition.
"""
explanation = f"""
Audio Profile for {profile.niche} on {profile.platform}
{'='*60}
Sample Size: {profile.sample_size} videos analyzed
Top Performers: {profile.top_performers_analyzed}
Confidence: {profile.confidence_score*100:.1f}%
CORE RECOMMENDATIONS:
--------------------
Pace: {profile.target_pace_wpm:.1f} WPM (±{profile.pace_tolerance:.1f})
Pitch Variation: {profile.pitch_variance_target:.2f} Hz variance
Pitch Jumps: {profile.pitch_jump_frequency} frequency
Pauses: {profile.pause_density_target:.1f} per minute, {profile.pause_duration_ms[0]:.0f}-{profile.pause_duration_ms[1]:.0f}ms each
Beat Alignment: {profile.beat_alignment_importance} (threshold: {profile.beat_alignment_threshold:.2f})
Emphasis: {profile.emphasis_strategy} strategy, ~{profile.emphasis_points_per_min:.1f} per minute
KEY DIFFERENTIATORS (what makes winners win):
----------------------------------------------
"""
for diff in profile.key_differentiators:
explanation += f"• {diff}\n"
explanation += "\nANTI-PATTERNS (what to avoid):\n"
explanation += "-------------------------------\n"
for anti in profile.anti_patterns:
explanation += f"• {anti}\n"
return explanation
# =============================================================================
# FUTURE RL UPGRADE PATHS
# =============================================================================
"""
Reinforcement Learning Integration (Future Version 2):
1. REWARD FUNCTION:
- Replace fixed viral_score threshold with learned value function
- Multi-objective: completion_rate, engagement, virality
- Temporal credit assignment: which audio features at which timestamps drove retention
2. EXPLORATION/EXPLOITATION:
- Multi-armed bandit for A/B testing profiles
- Upper Confidence Bound (UCB) for balancing tried-and-true vs experimental
- Thompson sampling for Bayesian optimization
3. CONTINUOUS LEARNING:
- Online learning: update profiles after each video performance
- Concept drift detection: identify when trends shift
- Temporal decay: older data weighted less (trends change)
- Catastrophic forgetting prevention: maintain ensemble of time-windowed models
4. CONTEXT-AWARE BANDITS:
- Contextual features: time of day, season, current events
- Personalization: audience demographics, viewing history
- Transfer learning: leverage patterns across similar niches
5. POLICY GRADIENT METHODS:
- Direct optimization of audio parameters
- Differentiable audio synthesis pipeline
- Actor-critic for continuous action spaces (pace, pitch, etc.)
6. ARCHITECTURE:
- Replace AudioPatternLearner with RLAudioOptimizer
- Add replay buffer for experience replay
- Add policy network (actor) and value network (critic)
- Add exploration noise (epsilon-greedy or entropy bonus)
Example RL pseudocode:
class RLAudioOptimizer:
def __init__(self):
self.policy_net = AudioPolicyNetwork() # Maps (niche, platform) -> audio params
self.value_net = AudioValueNetwork() # Estimates expected virality
self.replay_buffer = ReplayBuffer()
def get_audio_profile(self, niche, platform, explore=True):
state = encode_state(niche, platform)
if explore and random() < epsilon:
return sample_random_profile() # Exploration
return self.policy_net(state) # Exploitation
def update(self, video_id, audio_profile, performance_metrics):
# Store experience
self.replay_buffer.add(state, audio_profile, reward, next_state)
# Sample batch and update networks
batch = self.replay_buffer.sample()
policy_loss = compute_policy_gradient(batch)
value_loss = compute_td_error(batch)
optimize(policy_loss + value_loss)
This enables true adaptive learning that improves over time rather than
fixed statistical analysis.
"""
# =============================================================================
# CLI FOR TESTING
# =============================================================================
if __name__ == "__main__":
learner = AudioPatternLearner()
print("Learning patterns from performance data...")
profiles = learner.learn_all_patterns()
print(f"\nLearned {len(profiles)} audio profiles:")
for key, profile in profiles.items():
print(f"\n{key}:")
print(f" Pace: {profile.target_pace_wpm:.1f} WPM")
print(f" Beat alignment: {profile.beat_alignment_importance}")
print(f" Confidence: {profile.confidence_score*100:.1f}%")
# Example: get specific recommendation
print("\n" + "="*60)
print("Example: Getting recommendation for 'tech_tips' on 'tiktok'")
profile = learner.get_recommended_audio_profile("tech_tips", "tiktok")
if profile:
print(learner.explain_profile(profile))
else:
print("No profile available for this niche/platform combination.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment