-
-
Save Bentroen/4df9b8d5d052f9d14bc1a8531fe49994 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pydub import AudioSegment | |
import numpy as np | |
class Mixer: | |
def __init__(self): | |
self.parts = [] | |
def __len__(self): | |
parts = self._sync() | |
seg = parts[0][1] | |
frame_count = max(offset + seg.frame_count() for offset, seg in parts) | |
return int(1000.0 * frame_count / seg.frame_rate) | |
def overlay(self, sound, position=0): | |
self.parts.append((position, sound)) | |
return self | |
def _sync(self): | |
positions, segs = zip(*self.parts) | |
frame_rate = segs[0].frame_rate | |
array_type = segs[0].array_type | |
offsets = [int(frame_rate * pos / 1000.0) for pos in positions] | |
segs = AudioSegment.empty()._sync(*segs) | |
return list(zip(offsets, segs)) | |
def append(self, sound): | |
self.overlay(sound, position=len(self)) | |
def to_audio_segment(self): | |
parts = self._sync() | |
seg = parts[0][1] | |
channels = seg.channels | |
frame_count = max(offset + seg.frame_count() for offset, seg in parts) | |
sample_count = int(frame_count * seg.channels) | |
# We use a larger data type so that clipping doesn't cause data loss | |
output = np.zeros(sample_count, dtype="int32") | |
for offset, seg in parts: | |
sample_offset = offset * channels | |
samples = np.frombuffer(seg.get_array_of_samples(), dtype="int16") | |
start = sample_offset | |
end = start + len(samples) | |
output[start:end] += samples | |
# The audio is then normalized to occupy the full "height" again | |
return seg._spawn(output, overrides={"sample_width": 4}).normalize(headroom=0.0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Well, 64-bit output won't work that well with pydub, I think. So one possibility would be to rescale the 32 Bit Input Array to 16 bit (in this case it's also automatically normalized)?
samples = np.frombuffer(seg.get_array_of_samples(), dtype="int32")
samples = np.int16(samples/np.max(np.abs(samples)) * 32767)
If normalizing isn't wanted, one could also just use the maximal 32Bit integer Value as a dividend:
samples = np.int16(samples/2147483647 * 32767)
16 bit int max value: 32767
32 bit int max value: 2147483647