Skip to content

Instantly share code, notes, and snippets.

@adimyth
Created September 23, 2024 18:49
Show Gist options
  • Save adimyth/5927b8071a5c8814695a53dd7a5f4a0b to your computer and use it in GitHub Desktop.
Save adimyth/5927b8071a5c8814695a53dd7a5f4a0b to your computer and use it in GitHub Desktop.
Audio Splitter - split audio into chunks based on silence using pydub
import os
import aiohttp
import asyncio
import tempfile
from pydub import AudioSegment
from pydub.silence import split_on_silence
async def split_audio_on_silence_and_save(audio_url: str, output_dir: str):
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Download the audio file
async with aiohttp.ClientSession() as session:
async with session.get(audio_url) as response:
if response.status != 200:
raise Exception(f"Failed to download audio: HTTP {response.status}")
audio_data = await response.read()
# Save the audio data to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
temp_file.write(audio_data)
temp_file_path = temp_file.name
try:
# Load the audio data from the temporary file
audio = AudioSegment.from_file(temp_file_path)
# Split on silence
chunks = split_on_silence(
audio,
min_silence_len=500, # minimum silence length of 500ms
silence_thresh=-40, # consider it silent if quieter than -40 dBFS
keep_silence=300, # keep 300ms of silence at the beginning and end of each chunk
)
# Save chunks to disk
for i, chunk in enumerate(chunks):
output_path = os.path.join(output_dir, f"chunk_{i+1}.wav")
chunk.export(output_path, format="wav")
print(f"Saved chunk {i+1} to {output_path}")
print(f"Total chunks created: {len(chunks)}")
finally:
# Clean up the temporary file
os.unlink(temp_file_path)
# Example usage
async def main():
audio_url = "https://cdn-aditya-dev.enparadigmtech.com/audio-files/hindi/hindi_sample4.mp3"
output_dir = "audio_chunks"
await split_audio_on_silence_and_save(audio_url, output_dir)
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment