Skip to content

Instantly share code, notes, and snippets.

@alessaba
Created June 25, 2025 15:15
Show Gist options
  • Save alessaba/872ca49779fd14d4b357c313400004bb to your computer and use it in GitHub Desktop.
Save alessaba/872ca49779fd14d4b357c313400004bb to your computer and use it in GitHub Desktop.
Concatena tutti gli audio di un dataset video in un unico file, aggiungendo anche il nome del file nel file audio (usando il comando "say" di macOS)
#!/usr/bin/env python3
import os
import subprocess
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
# === Config ===
VIDEO_EXT = ".mp4"
TEMP_SPEECH = "temp_speech"
TEMP_AUDIO = "temp_audio"
TEMP_LABELED = "temp_labeled"
OUTPUT_FILE = "output_labeled.m4a"
os.makedirs(TEMP_SPEECH, exist_ok=True)
os.makedirs(TEMP_AUDIO, exist_ok=True)
os.makedirs(TEMP_LABELED, exist_ok=True)
videos = sorted([f for f in os.listdir('.') if f.endswith(VIDEO_EXT)])
def process_video(f):
base = os.path.splitext(f)[0]
speech_path = os.path.join(TEMP_SPEECH, f"{base}.m4a")
audio_path = os.path.join(TEMP_AUDIO, f"{base}.m4a")
# Generate spoken filename
aiff_tmp = os.path.join(TEMP_SPEECH, f"{base}.aiff")
subprocess.run(['say', '-v', 'Samantha', base, '-o', aiff_tmp], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.run(['ffmpeg', '-y', '-i', aiff_tmp, '-ar', '44100', '-c:a', 'aac', '-b:a', '128k', '-ac', '2', speech_path],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
os.remove(aiff_tmp)
# Extract audio
subprocess.run([
'ffmpeg', '-y', '-i', f,
'-vn', '-ac', '2', '-ar', '44100',
'-c:a', 'aac', '-b:a', '128k',
audio_path
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# === Parallel Processing ===
with ThreadPoolExecutor() as executor:
list(tqdm(executor.map(process_video, videos), total=len(videos), desc="Processing videos"))
# === Merge speech + audio ===
labeled_files = []
for f in tqdm(videos, desc="Merging labeled clips"):
base = os.path.splitext(f)[0]
speech = os.path.join(TEMP_SPEECH, f"{base}.m4a")
audio = os.path.join(TEMP_AUDIO, f"{base}.m4a")
labeled = os.path.join(TEMP_LABELED, f"{base}.m4a")
subprocess.run([
'ffmpeg', '-y',
'-i', speech,
'-i', audio,
'-filter_complex', '[0:a][1:a]concat=n=2:v=0:a=1[outa]',
'-map', '[outa]',
'-c:a', 'aac', '-b:a', '128k', '-ac', '2', '-ar', '44100',
labeled
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
labeled_files.append(labeled)
# === Final Concatenation ===
with open("master_list.txt", "w") as master:
for f in labeled_files:
master.write(f"file '{os.path.abspath(f)}'\n")
# Try -c copy first, fallback to re-encoding if it fails
result = subprocess.run(['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', 'master_list.txt', '-c', 'copy', OUTPUT_FILE],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
if result.returncode != 0:
print("🔁 Re-encoding output due to stream incompatibility...")
subprocess.run(['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', 'master_list.txt',
'-c:a', 'aac', '-b:a', '128k', '-ac', '2', OUTPUT_FILE],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# Cleanup
os.remove("master_list.txt")
os.system(f"rm -rf {TEMP_AUDIO}")
os.system(f"rm -rf {TEMP_LABELED}")
os.system(f"rm -rf {TEMP_SPEECH}")
print(f"✅ Done! Final file: {OUTPUT_FILE}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment