Ryu1845 · August 16, 2024 08:04
diff --git a/chunked_audio_inference.py b/chunked_audio_inference.py
 from typing import Callable

 import numpy as np
 from tqdm import tqdm

 def wsola_chunked_processing(audio: np.ndarray, sr: int, chunk_size: int, hop_size: int, mod_func: Callable[[np.ndarray], np.ndarray]):
    # Check if chunk_size is larger than the audio length
    if chunk_size >= len(audio):
        # Process the entire audio in one go
        output = mod_func(audio).squeeze()
    else:
        # Initialize the output array
        output = np.array([], dtype=audio.dtype)

        # Initialize the start point of the first chunk
        start = 0

        # Calculate total number of chunks
        total_chunks = (len(audio) - hop_size) // (chunk_size - hop_size) + 1

        # WSOLA chunked processing loop with progress bar
        with tqdm(total=total_chunks, desc="Processing audio chunks") as pbar:
            while start < len(audio)-hop_size:
                # Calculate the end point of the current chunk
                end = min(start + chunk_size, len(audio))

                # Get the current chunk and apply the modifying function
                chunk = audio[start:end]
                modified_chunk = mod_func(chunk).squeeze()

                if start == 0:
                    # For the first chunk, append the entire modified chunk
                    output = np.append(output, modified_chunk)
                else:
                    # Find the best overlapping point using cross-correlation
                    overlap = output[-hop_size:]
                    correlation = np.correlate(modified_chunk[:hop_size*2], overlap, mode='valid')
                    best_offset = np.argmax(correlation)

                    # Overlap and add using the best offset
                    crossfade = np.linspace(0, 1, hop_size)
                    output[-hop_size:] = output[-hop_size:] * (1 - crossfade) + modified_chunk[best_offset:best_offset+hop_size] * crossfade

                    # Append the non-overlapping part of the modified chunk to the output
                    output = np.append(output, modified_chunk[best_offset+hop_size:])

                # Move to the next chunk
                start = end - hop_size

                # Update progress bar
                pbar.update(1)

    # Normalize the output
    output /= np.max(np.abs(output))

    return output
	from typing import Callable

	import numpy as np
	from tqdm import tqdm

	def wsola_chunked_processing(audio: np.ndarray, sr: int, chunk_size: int, hop_size: int, mod_func: Callable[[np.ndarray], np.ndarray]):
	# Check if chunk_size is larger than the audio length
	if chunk_size >= len(audio):
	# Process the entire audio in one go
	output = mod_func(audio).squeeze()
	else:
	# Initialize the output array
	output = np.array([], dtype=audio.dtype)

	# Initialize the start point of the first chunk
	start = 0

	# Calculate total number of chunks
	total_chunks = (len(audio) - hop_size) // (chunk_size - hop_size) + 1

	# WSOLA chunked processing loop with progress bar
	with tqdm(total=total_chunks, desc="Processing audio chunks") as pbar:
	while start < len(audio)-hop_size:
	# Calculate the end point of the current chunk
	end = min(start + chunk_size, len(audio))

	# Get the current chunk and apply the modifying function
	chunk = audio[start:end]
	modified_chunk = mod_func(chunk).squeeze()

	if start == 0:
	# For the first chunk, append the entire modified chunk
	output = np.append(output, modified_chunk)
	else:
	# Find the best overlapping point using cross-correlation
	overlap = output[-hop_size:]
	correlation = np.correlate(modified_chunk[:hop_size*2], overlap, mode='valid')
	best_offset = np.argmax(correlation)

	# Overlap and add using the best offset
	crossfade = np.linspace(0, 1, hop_size)
	output[-hop_size:] = output[-hop_size:] * (1 - crossfade) + modified_chunk[best_offset:best_offset+hop_size] * crossfade

	# Append the non-overlapping part of the modified chunk to the output
	output = np.append(output, modified_chunk[best_offset+hop_size:])

	# Move to the next chunk
	start = end - hop_size

	# Update progress bar
	pbar.update(1)

	# Normalize the output
	output /= np.max(np.abs(output))

	return output