Skip to content

Instantly share code, notes, and snippets.

@Ryu1845
Created August 16, 2024 08:04
Show Gist options
  • Save Ryu1845/d1319a7e0da3a291f6a837ff9ce51d70 to your computer and use it in GitHub Desktop.
Save Ryu1845/d1319a7e0da3a291f6a837ff9ce51d70 to your computer and use it in GitHub Desktop.
from typing import Callable
import numpy as np
from tqdm import tqdm
def wsola_chunked_processing(audio: np.ndarray, sr: int, chunk_size: int, hop_size: int, mod_func: Callable[[np.ndarray], np.ndarray]):
# Check if chunk_size is larger than the audio length
if chunk_size >= len(audio):
# Process the entire audio in one go
output = mod_func(audio).squeeze()
else:
# Initialize the output array
output = np.array([], dtype=audio.dtype)
# Initialize the start point of the first chunk
start = 0
# Calculate total number of chunks
total_chunks = (len(audio) - hop_size) // (chunk_size - hop_size) + 1
# WSOLA chunked processing loop with progress bar
with tqdm(total=total_chunks, desc="Processing audio chunks") as pbar:
while start < len(audio)-hop_size:
# Calculate the end point of the current chunk
end = min(start + chunk_size, len(audio))
# Get the current chunk and apply the modifying function
chunk = audio[start:end]
modified_chunk = mod_func(chunk).squeeze()
if start == 0:
# For the first chunk, append the entire modified chunk
output = np.append(output, modified_chunk)
else:
# Find the best overlapping point using cross-correlation
overlap = output[-hop_size:]
correlation = np.correlate(modified_chunk[:hop_size*2], overlap, mode='valid')
best_offset = np.argmax(correlation)
# Overlap and add using the best offset
crossfade = np.linspace(0, 1, hop_size)
output[-hop_size:] = output[-hop_size:] * (1 - crossfade) + modified_chunk[best_offset:best_offset+hop_size] * crossfade
# Append the non-overlapping part of the modified chunk to the output
output = np.append(output, modified_chunk[best_offset+hop_size:])
# Move to the next chunk
start = end - hop_size
# Update progress bar
pbar.update(1)
# Normalize the output
output /= np.max(np.abs(output))
return output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment