Created
September 23, 2024 18:49
-
-
Save adimyth/5927b8071a5c8814695a53dd7a5f4a0b to your computer and use it in GitHub Desktop.
Audio Splitter - split audio into chunks based on silence using pydub
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import aiohttp | |
import asyncio | |
import tempfile | |
from pydub import AudioSegment | |
from pydub.silence import split_on_silence | |
async def split_audio_on_silence_and_save(audio_url: str, output_dir: str): | |
# Create output directory if it doesn't exist | |
os.makedirs(output_dir, exist_ok=True) | |
# Download the audio file | |
async with aiohttp.ClientSession() as session: | |
async with session.get(audio_url) as response: | |
if response.status != 200: | |
raise Exception(f"Failed to download audio: HTTP {response.status}") | |
audio_data = await response.read() | |
# Save the audio data to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: | |
temp_file.write(audio_data) | |
temp_file_path = temp_file.name | |
try: | |
# Load the audio data from the temporary file | |
audio = AudioSegment.from_file(temp_file_path) | |
# Split on silence | |
chunks = split_on_silence( | |
audio, | |
min_silence_len=500, # minimum silence length of 500ms | |
silence_thresh=-40, # consider it silent if quieter than -40 dBFS | |
keep_silence=300, # keep 300ms of silence at the beginning and end of each chunk | |
) | |
# Save chunks to disk | |
for i, chunk in enumerate(chunks): | |
output_path = os.path.join(output_dir, f"chunk_{i+1}.wav") | |
chunk.export(output_path, format="wav") | |
print(f"Saved chunk {i+1} to {output_path}") | |
print(f"Total chunks created: {len(chunks)}") | |
finally: | |
# Clean up the temporary file | |
os.unlink(temp_file_path) | |
# Example usage | |
async def main(): | |
audio_url = "https://cdn-aditya-dev.enparadigmtech.com/audio-files/hindi/hindi_sample4.mp3" | |
output_dir = "audio_chunks" | |
await split_audio_on_silence_and_save(audio_url, output_dir) | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment