Last active
May 12, 2021 03:16
-
-
Save knandersen/0f41fc132e584bc66958adca590aed12 to your computer and use it in GitHub Desktop.
Takes an input directory containing wave files and splices them together into a morphagene reel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
""" | |
USAGE: | |
morphagene_directory.py -d <inputdirectory> -o <outputfile>' | |
Script will go through <inputdirectory> and splice all wave-files | |
together in alphabetic order and place a marker in between. | |
Requires all wave-files in directory to have the same number of channels and bitrate! | |
Does not require input file to be 48000Hz, but .WAV-files must be stereo. | |
See the Morphagene manual for naming conventions of output files: | |
http://www.makenoisemusic.com/content/manuals/morphagene-manual.pdf | |
# see http://stackoverflow.com/questions/15576798/create-32bit-float-wav-file-in-python | |
# see... http://blog.theroyweb.com/extracting-wav-file-header-information-using-a-python-script | |
# marker code from Joseph Basquin [https://gist.github.com/josephernest/3f22c5ed5dabf1815f16efa8fa53d476] | |
""" | |
import sys, getopt | |
import struct | |
import numpy as np | |
from scipy import interpolate | |
import gzip | |
import os | |
def float32_wav_file(file_name, sample_array, sample_rate, | |
markers=None, verbose=False): | |
(M,N)=sample_array.shape | |
#print "len sample_array=(%d,%d)" % (M,N) | |
byte_count = M * N * 4 # (len(sample_array)) * 4 # 32-bit floats | |
wav_file = "" | |
# write the header | |
wav_file += struct.pack('<ccccIccccccccIHHIIHH', | |
'R', 'I', 'F', 'F', | |
byte_count + 0x2c - 8, # header size | |
'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', | |
0x10, # size of 'fmt ' header | |
3, # format 3 = floating-point PCM | |
M, # channels | |
sample_rate, # samples / second | |
sample_rate * 4, # bytes / second | |
4, # block alignment | |
32) # bits / sample | |
wav_file += struct.pack('<ccccI', | |
'd', 'a', 't', 'a', byte_count) | |
if verbose: | |
print("packing...") | |
# flatten data in an alternating fashion | |
# see: http://soundfile.sapp.org/doc/WaveFormat/ | |
reordered_wav = [sample_array[k,j] for j in range(N) for k in range(M)] | |
wav_file += struct.pack('<%df' % len(reordered_wav), *reordered_wav) | |
if verbose: | |
print("saving audio...") | |
fid=open(file_name,'wb') | |
for value in wav_file: | |
fid.write(value) | |
if markers: # != None and != [] | |
if verbose: | |
print("saving cue markers...") | |
if isinstance(markers[0], dict):# then we have [{'position': 100, 'label': 'marker1'}, ...] | |
labels = [m['label'] for m in markers] | |
markers = [m['position'] for m in markers] | |
else: | |
labels = ['' for m in markers] | |
fid.write(b'cue ') | |
size = 4 + len(markers) * 24 | |
fid.write(struct.pack('<ii', size, len(markers))) | |
for i, c in enumerate(markers): | |
s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c)# 1635017060 is struct.unpack('<i',b'data') | |
fid.write(s) | |
lbls = '' | |
for i, lbl in enumerate(labels): | |
lbls += b'labl' | |
label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00') | |
size = len(lbl) + 1 + 4 # because \x00 | |
lbls += struct.pack('<ii', size, i + 1) | |
lbls += label | |
fid.write(b'LIST') | |
size = len(lbls) + 4 | |
fid.write(struct.pack('<i', size)) | |
fid.write(b'adtl')# https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html#list | |
fid.write(lbls) | |
fid.close() | |
def wav_file_read(filename,verbose=False): | |
# read file and close | |
fi=open(filename,'rb') | |
data=fi.read() | |
fi.close() | |
# take raw data and read subsections for important format data | |
A,B,C,D=struct.unpack('4c', data[0:4]) # 'RIFF' | |
ChunkSize=struct.unpack('<l', data[4:8])[0] #4+(8+SubChunk1Size)+8+SubChunk2Size) | |
A,B,C,D=struct.unpack('4c', data[8:12]) # 'WAVE' | |
A,B,C,D=struct.unpack('4c', data[12:16]) # 'fmt ' | |
Subchunk1Size=struct.unpack('<l', data[16:20])[0] # LITTLE ENDIAN, long, 16 | |
AudioFormat=struct.unpack('<h', data[20:22])[0] # LITTLE ENDIAN, short, 1 | |
NumChannels=struct.unpack('<h', data[22:24])[0] # LITTLE ENDIAN, short, Mono = 1, Stereo = 2 | |
SampleRate =struct.unpack('<l', data[24:28])[0] # LITTLE ENDIAN, long, sample rate in samples per second | |
ByteRate=struct.unpack('<l', data[28:32])[0] # self.SampleRate * self.NumChannels * self.BitsPerSample/8)) # (ByteRate) LITTLE ENDIAN, long | |
BlockAlign=struct.unpack('<h', data[32:34])[0] # self.NumChannels * self.BitsPerSample/8)) # (BlockAlign) LITTLE ENDIAN, short | |
BitsPerSample=struct.unpack('<h', data[34:36])[0] # LITTLE ENDIAN, short | |
A,B,C,D=struct.unpack('4c', data[36:40]) # BIG ENDIAN, char*4 | |
SubChunk2Size=struct.unpack('<l', data[40:44])[0] # LITTLE ENDIAN, long | |
waveData=data[44:] | |
if verbose: | |
print("ChunkSize =%d\nSubchunk1Size =%d\nAudioFormat =%d\nNumChannels =%d\nSampleRate =%d\nByteRate =%d\nBlockAlign =%d\nBitsPerSample =%d\nA:%c, B:%c, C:%c, D:%c\nSubChunk2Size =%d" % | |
(ChunkSize , | |
Subchunk1Size, | |
AudioFormat , | |
NumChannels , | |
SampleRate , | |
ByteRate , | |
BlockAlign , | |
BitsPerSample , | |
A, B, C, D , | |
SubChunk2Size )) | |
# convert audio data to float based on bitdepth | |
if BitsPerSample==8: | |
if verbose: | |
print("Unpacking 8 bits on len(waveData)=%d" % len(waveData)) | |
d=np.fromstring(waveData,np.uint8) | |
floatdata=d.astype(np.float64)/np.float(127) | |
elif BitsPerSample==16: | |
if verbose: | |
print("Unpacking 16 bits on len(waveData)=%d" % len(waveData)) | |
d=np.zeros(SubChunk2Size/2, dtype=np.int16) | |
j=0 | |
for k in range(0, SubChunk2Size, 2): | |
d[j]=struct.unpack('<h',waveData[k:k+2])[0] | |
j=j+1 | |
floatdata=d.astype(np.float64)/np.float(32767) | |
elif BitsPerSample==24: | |
if verbose: | |
print("Unpacking 24 bits on len(waveData)=%d" % len(waveData)) | |
d=np.zeros(SubChunk2Size/3, dtype=np.int32) | |
j=0 | |
for k in range(0, SubChunk2Size, 3): | |
d[j]=struct.unpack('<l',struct.pack('c',waveData[k])+waveData[k:k+3])[0] | |
j=j+1 | |
floatdata=d.astype(np.float64)/np.float(2147483647) | |
else: # anything else will be considered 32 bits | |
if verbose: | |
print("Unpacking 32 bits on len(waveData)=%d" % len(waveData)) | |
d=np.fromstring(waveData,np.int32) | |
floatdata=d.astype(np.float64)/np.float(2147483647) | |
v=floatdata[0::NumChannels] | |
for i in range(1,NumChannels): | |
v=np.vstack((v,floatdata[i::NumChannels])) | |
#return (np.vstack((floatdata[0::2],floatdata[1::2])), SampleRate, NumChannels, BitsPerSample) | |
return (v, SampleRate, NumChannels, BitsPerSample) | |
def change_samplerate_interp(old_audio,old_rate,new_rate): | |
''' | |
Change sample rate to new sample rate by simple interpolation. | |
If old_rate > new_rate, there may be aliasing / data loss. | |
Input should be in column format, as the interpolation will be completed | |
on each channel this way. | |
Modified from: | |
https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate | |
''' | |
if old_rate != new_rate: | |
# duration of audio | |
duration = old_audio.shape[0] / old_rate | |
# length of old and new audio | |
time_old = np.linspace(0, duration, old_audio.shape[0]) | |
time_new = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate)) | |
# fit old_audio into new_audio length by interpolation | |
interpolator = interpolate.interp1d(time_old, old_audio.T) | |
new_audio = interpolator(time_new).T | |
return new_audio | |
else: | |
print('Conversion not needed, old and new rates match') | |
return old_audio # conversion not needed | |
def main(argv): | |
inputdirectory = '' | |
outputfile = '' | |
try: | |
opts, args = getopt.getopt(argv,"hd:o:",["inputdirectory=","outputfile="]) | |
except getopt.GetoptError: | |
print('Error in usage, correct format:\n'+\ | |
'morphagene_directory.py -d <inputdirectory> -o <outputfile>') | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt == '-h': | |
print('morphagene_directory.py -d <inputdirectory> -o <outputfile>') | |
sys.exit() | |
elif opt in ("-d", "--inputdirectory"): | |
inputdirectory = arg | |
elif opt in ("-o", "--outputfile"): | |
outputfile = arg | |
print('Input directory: %s'%inputdirectory) | |
print('Output Morphagene reel: %s'%outputfile) | |
########################################################################### | |
''' | |
Write single file, to Morphagene 32bit | |
WAV file at 48000hz sample rate. | |
''' | |
########################################################################### | |
morph_srate = 48000 # required samplerate for Morphagene | |
# find .wav-files in inputdirectory | |
wavfiles = [] | |
entries = sorted(os.listdir(inputdirectory)) | |
for entry in entries: | |
if entry.endswith('.wav'): | |
wavfiles.append("{0}/{1}".format(inputdirectory,entry)) | |
first_num_channels = None | |
first_bits_per_sample = None | |
allwaves = None | |
markers = [] | |
for wavfile in wavfiles: | |
# read pertinent info from audio file, exit if input wave file is broken | |
try: | |
(array,sample_rate,num_channels,bits_per_sample)=wav_file_read(wavfile, verbose=False) | |
if first_num_channels == None: | |
first_num_channels = num_channels | |
first_bits_per_sample = bits_per_sample | |
else: | |
if num_channels != first_num_channels or bits_per_sample != first_bits_per_sample: | |
print('All wave files must have the same number of channels and bitrate') | |
sys.exit() | |
except: | |
print('Input .wav file %s is poorly formatted, exiting'%wavfile) | |
sys.exit() | |
# check if input wav has a different rate than desired Morphagene rate, | |
# and correct by interpolation | |
if sample_rate != morph_srate: | |
print("Correcting input sample rate %iHz to Morphagene rate %iHz"%(sample_rate,morph_srate)) | |
# perform interpolation on each channel, then transpose back | |
array = change_samplerate_interp(array.T,float(sample_rate),float(morph_srate)).T | |
if allwaves == None: | |
allwaves = np.array(array) | |
markers.append(len(array[0])) | |
else: | |
#for a in array: | |
allwaves = np.concatenate((allwaves,array), axis=1) | |
markers.append(len(array[0]) + markers[-1]) | |
markers.pop() # remove last marker | |
# write wav file with additional cue markers from labels | |
float32_wav_file(outputfile,allwaves,morph_srate,markers=markers) | |
print('Saved Morphagene reel with %i splices: %s'%(len(markers)+1,outputfile)) | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Great idea @nicksort, I was actually considering that too!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Good stuff! I feel like you should convert this from gist to repo elsewhere. I have some ideas to propose when I get some time later.