-
-
Save ferrihydrite/e96dee177614898be250457cd592517a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
""" | |
Usage: | |
morphagene_onset.py -w <inputwavfile> -o <outputfile> | |
Use the Superflux onset detection algorithm with backtracking to generate | |
splice locations. | |
Use these splice locations with a converted WAV (to 32-bit float / 48000Hz) | |
to make Morphagene reels. | |
This method typically generates splices on each percussion hit of a sample, | |
so be careful to choose an appropriate length sample or quickly exceed the | |
limitations of the Morphagene [300 splices]. | |
""" | |
import librosa | |
import sys, getopt, os | |
import struct | |
import numpy as np | |
from scipy import interpolate | |
import warnings | |
import collections | |
class WavFileWarning(UserWarning): | |
pass | |
_ieee = False | |
def test_normalized(array): | |
''' | |
Determine if an array is entirely -1 < array[i,j] < 1, to see if array is | |
normalized | |
''' | |
return (array > -1).all() and (array < 1).all() | |
def read(file, readmarkers=False, readmarkerlabels=False, | |
readmarkerslist=False, readloops=False, readpitch=False, | |
normalized=False, forcestereo=False): | |
""" | |
Return the sample rate (in samples/sec) and data from a WAV file | |
Parameters | |
---------- | |
file : file | |
Input wav file. | |
Returns | |
------- | |
rate : int | |
Sample rate of wav file | |
data : np array | |
Data read from wav file | |
Notes | |
----- | |
* The file can be an open file or a filename. | |
* The returned sample rate is a Python integer | |
* The data is returned as a np array with a | |
data-type determined from the file. | |
""" | |
################ | |
## READ SUBFUNCTIONS | |
## assumes file pointer is immediately | |
## after the 'fmt ' id | |
def _read_fmt_chunk(fid): | |
res = struct.unpack('<ihHIIHH',fid.read(20)) | |
size, comp, noc, rate, sbytes, ba, bits = res | |
if (comp != 1 or size > 16): | |
if (comp == 3): | |
global _ieee | |
_ieee = True | |
#warnings.warn("IEEE format not supported", WavFileWarning) | |
else: | |
warnings.warn("Unfamiliar format bytes", WavFileWarning) | |
if (size>16): | |
fid.read(size-16) | |
return size, comp, noc, rate, sbytes, ba, bits | |
# assumes file pointer is immediately | |
# after the 'data' id | |
def _read_data_chunk(fid, noc, bits, normalized=False): | |
size = struct.unpack('<i',fid.read(4))[0] | |
if bits == 8 or bits == 24: | |
dtype = 'u1' | |
bytes = 1 | |
else: | |
bytes = bits//8 | |
dtype = '<i%d' % bytes | |
if bits == 32 and _ieee: | |
dtype = 'float32' | |
data = np.fromfile(fid, dtype=dtype, count=size//bytes) | |
if bits == 24: | |
# handle 24 bit file by using samplewidth=3, no native 24-bit type | |
a = np.empty((len(data) // 3, 4), dtype='u1') | |
a[:, :3] = data.reshape((-1, 3)) | |
a[:, 3:] = (a[:, 3 - 1:3] >> 7) * 255 | |
data = a.view('<i4').reshape(a.shape[:-1]) | |
if noc > 1: | |
# handle stereo | |
data = data.reshape(-1,noc) | |
if bool(size & 1): | |
# if odd number of bytes, move 1 byte further (data chunk is word-aligned) | |
fid.seek(1,1) | |
if normalized: | |
if not test_normalized(data): # only normalize non -1 to 1 data | |
if bits == 16 or bits == 24 or bits == 32: | |
normfactor = 2 ** (bits-1) | |
data = np.float32(data) * 1.0 / normfactor | |
elif bits == 8: | |
if isinstance(data[0], (int, np.uint8)): | |
# handle uint8 data by shifting to center at 0 | |
normfactor = 2 ** (bits-1) | |
data = (np.float32(data) * 1.0 / normfactor) -\ | |
((normfactor)/(normfactor-1)) | |
else: # ignore normalization for already normalized files | |
print('File already normalized, passing') | |
pass | |
return data | |
def _skip_unknown_chunk(fid): | |
data = fid.read(4) | |
size = struct.unpack('<i', data)[0] | |
if bool(size & 1): | |
# if odd number of bytes, move 1 byte further (data chunk is word-aligned) | |
size += 1 | |
fid.seek(size, 1) | |
def _read_riff_chunk(fid): | |
str1 = fid.read(4) | |
if str1 != b'RIFF': | |
raise ValueError("Not a WAV file.") | |
fsize = struct.unpack('<I', fid.read(4))[0] + 8 | |
str2 = fid.read(4) | |
if (str2 != b'WAVE'): | |
raise ValueError("Not a WAV file.") | |
return fsize | |
################## | |
if hasattr(file,'read'): | |
fid = file | |
else: | |
fid = open(file, 'rb') | |
fsize = _read_riff_chunk(fid) | |
noc = 1 | |
bits = 8 | |
#_cue = [] | |
#_cuelabels = [] | |
_markersdict = collections.defaultdict(lambda: {'position': -1, 'label': ''}) | |
loops = [] | |
pitch = 0.0 | |
while (fid.tell() < fsize): | |
# read the next chunk | |
chunk_id = fid.read(4) | |
if chunk_id == b'fmt ': | |
size, comp, noc, rate, sbytes, ba, bits = _read_fmt_chunk(fid) | |
elif chunk_id == b'data': | |
data = _read_data_chunk(fid, noc, bits, normalized) | |
elif chunk_id == b'cue ': | |
str1 = fid.read(8) | |
size, numcue = struct.unpack('<ii',str1) | |
for c in range(numcue): | |
str1 = fid.read(24) | |
id, position, datachunkid, chunkstart, blockstart, \ | |
sampleoffset = struct.unpack('<iiiiii', str1) | |
#_cue.append(position) | |
# needed to match labels and markers | |
_markersdict[id]['position'] = position | |
elif chunk_id == b'LIST': | |
str1 = fid.read(8) | |
size, type = struct.unpack('<ii', str1) | |
elif chunk_id in [b'ICRD', b'IENG', b'ISFT', b'ISTJ']: | |
# see http://www.pjb.com.au/midi/sfspec21.html#i5 | |
_skip_unknown_chunk(fid) | |
elif chunk_id == b'labl': | |
str1 = fid.read(8) | |
size, id = struct.unpack('<ii',str1) | |
# the size should be even, see WAV specfication, e.g. 16=>16, 23=>24 | |
size = size + (size % 2) | |
# remove the trailing null characters | |
label = fid.read(size-4).rstrip('\x00') | |
#_cuelabels.append(label) | |
# needed to match labels and markers | |
_markersdict[id]['label'] = label | |
elif chunk_id == b'smpl': | |
str1 = fid.read(40) | |
size, manuf, prod, sampleperiod, midiunitynote,\ | |
midipitchfraction, smptefmt, smpteoffs, numsampleloops, \ | |
samplerdata = struct.unpack('<iiiiiIiiii', str1) | |
cents = midipitchfraction * 1./(2**32-1) | |
pitch = 440. * 2 ** ((midiunitynote + cents - 69.)/12) | |
for i in range(numsampleloops): | |
str1 = fid.read(24) | |
cuepointid, type, start, end, \ | |
fraction, playcount = struct.unpack('<iiiiii', str1) | |
loops.append([start, end]) | |
else: | |
warnings.warn("Chunk " + chunk_id + " skipped", WavFileWarning) | |
_skip_unknown_chunk(fid) | |
fid.close() | |
if data.ndim == 1 and forcestereo: | |
data = np.column_stack((data, data)) | |
_markerslist = sorted([_markersdict[l] for l in _markersdict], key=lambda k: k['position']) # sort by position | |
_cue = [m['position'] for m in _markerslist] | |
_cuelabels = [m['label'] for m in _markerslist] | |
return (rate, data, bits, ) \ | |
+ ((_cue,) if readmarkers else ()) \ | |
+ ((_cuelabels,) if readmarkerlabels else ()) \ | |
+ ((_markerslist,) if readmarkerslist else ()) \ | |
+ ((loops,) if readloops else ()) \ | |
+ ((pitch,) if readpitch else ()) | |
def float32_wav_file(file_name, sample_array, sample_rate, | |
markers=None, verbose=False): | |
(M,N)=sample_array.shape | |
#print "len sample_array=(%d,%d)" % (M,N) | |
byte_count = M * N * 4 # (len(sample_array)) * 4 # 32-bit floats | |
wav_file = "" | |
# write the header | |
wav_file += struct.pack('<ccccIccccccccIHHIIHH', | |
'R', 'I', 'F', 'F', | |
byte_count + 0x2c - 8, # header size | |
'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', | |
0x10, # size of 'fmt ' header | |
3, # format 3 = floating-point PCM | |
M, # channels | |
sample_rate, # samples / second | |
sample_rate * 4, # bytes / second | |
4, # block alignment | |
32) # bits / sample | |
wav_file += struct.pack('<ccccI', | |
'd', 'a', 't', 'a', byte_count) | |
if verbose: | |
print("packing data...") | |
# flatten data in an alternating fashion | |
# see: http://soundfile.sapp.org/doc/WaveFormat/ | |
reordered_wav = [sample_array[k,j] for j in range(N) for k in range(M)] | |
wav_file += struct.pack('<%df' % len(reordered_wav), *reordered_wav) | |
if verbose: | |
print("saving audio...") | |
fid=open(file_name,'wb') | |
for value in wav_file: | |
fid.write(value) | |
if markers: # != None and != [] | |
if verbose: | |
print("saving cue markers...") | |
if isinstance(markers[0], dict):# then we have [{'position': 100, 'label': 'marker1'}, ...] | |
labels = [m['label'] for m in markers] | |
markers = [m['position'] for m in markers] | |
else: | |
labels = ['' for m in markers] | |
fid.write(b'cue ') | |
size = 4 + len(markers) * 24 | |
fid.write(struct.pack('<ii', size, len(markers))) | |
for i, c in enumerate(markers): | |
s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c)# 1635017060 is struct.unpack('<i',b'data') | |
fid.write(s) | |
lbls = '' | |
for i, lbl in enumerate(labels): | |
lbls += b'labl' | |
label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00') | |
size = len(lbl) + 1 + 4 # because \x00 | |
lbls += struct.pack('<ii', size, i + 1) | |
lbls += label | |
fid.write(b'LIST') | |
size = len(lbls) + 4 | |
fid.write(struct.pack('<i', size)) | |
fid.write(b'adtl')# https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html#list | |
fid.write(lbls) | |
fid.close() | |
def onset_splice_superflux(audiofile): | |
''' | |
Superflux onset detection method of Boeck and Widmer [2013], modified to | |
use backtracking to get accurate splice location. | |
From: | |
https://librosa.github.io/librosa/auto_examples/plot_superflux.html#sphx-glr-auto-examples-plot-superflux-py | |
''' | |
y, sr = librosa.load(audiofile,sr=44100) | |
# Constants directly from paper | |
n_fft = 1024 | |
hop_length = int(librosa.time_to_samples(1./200, sr=sr)) | |
lag = 2 # number of frames | |
n_mels = 138 # number of bins | |
fmin = 27.5 # lowest frequency | |
fmax = 16000. #highest frequency | |
max_size = 3 | |
# Mel spectrogram | |
S = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft, | |
hop_length=hop_length, | |
fmin=fmin, | |
fmax=fmax, | |
n_mels=n_mels) | |
# Onset Strength Function | |
odf_sf = librosa.onset.onset_strength(S=librosa.power_to_db(S, ref=np.max), | |
sr=sr, | |
hop_length=hop_length, | |
lag=lag, max_size=max_size) | |
# Onset locations in time | |
onset_sf = librosa.onset.onset_detect(onset_envelope=odf_sf, | |
sr=sr, | |
hop_length=hop_length, | |
units='time', | |
backtrack=True) | |
return onset_sf | |
def change_samplerate_interp(old_audio,old_rate,new_rate): | |
''' | |
Change sample rate to new sample rate by simple interpolation. | |
If old_rate > new_rate, there may be aliasing / data loss. | |
Input should be in column format, as the interpolation will be completed | |
on each channel this way. | |
Modified from: | |
https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate | |
''' | |
if old_rate != new_rate: | |
# duration of audio | |
duration = old_audio.shape[0] / old_rate | |
# length of old and new audio | |
time_old = np.linspace(0, duration, old_audio.shape[0]) | |
time_new = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate)) | |
# fit old_audio into new_audio length by interpolation | |
interpolator = interpolate.interp1d(time_old, old_audio.T) | |
new_audio = interpolator(time_new).T | |
return new_audio | |
else: | |
print('Conversion not needed, old and new rates match') | |
return old_audio # conversion not needed | |
def main(argv): | |
inputwavefile = '' | |
outputfile = '' | |
try: | |
opts, args = getopt.getopt(argv,"hw:o:",["wavfile=","outputfile="]) | |
except getopt.GetoptError: | |
print('Error in usage, correct format:\n'+\ | |
'morphagene_onset.py -w <inputwavfile> -o <outputfile>') | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt == '-h': | |
print('Morphagene reel creation using Superflux onset detection:\n'+\ | |
'morphagene_onset.py -w <inputwavfile> -o <outputfile>') | |
sys.exit() | |
elif opt in ("-w", "--wavfile"): | |
inputwavefile = arg | |
elif opt in ("-o", "--outputfile"): | |
outputfile = arg | |
print('Input wave file: %s'%inputwavefile) | |
print('Output Morphagene reel: %s'%outputfile) | |
########################################################################### | |
''' | |
Write single file, with splice locations using the Superflux onset | |
detection algorithm with backtracking for optimal splice location. | |
''' | |
########################################################################### | |
morph_srate = 48000 # required samplerate for Morphagene | |
# generate labels and time in seconds of splices using librosa | |
librosa_sec = np.unique(onset_splice_superflux(inputwavefile)) | |
# read pertinent info from audio file, exit if input wave file is broken | |
try: | |
(sample_rate, array, bits, ) = read(inputwavefile,normalized=True) | |
except: | |
print('Input .wav file %s is poorly formatted, exiting'%inputwavefile) | |
sys.exit() | |
# check if input wav has a different rate than desired Morphagene rate, | |
# and correct by interpolation | |
if sample_rate != morph_srate: | |
print("Correcting input sample rate %iHz to Morphagene rate %iHz"%(sample_rate,morph_srate)) | |
# perform interpolation on each channel, then transpose back | |
array = change_samplerate_interp(array,float(sample_rate),float(morph_srate)).T | |
# convert labels in seconds to labels in frames, adjusting for change | |
# in rate | |
sc = float(morph_srate) / float(sample_rate) | |
frame_labs = (librosa_sec * sample_rate * sc).astype(np.int) | |
else: | |
array = array.T | |
frame_labs = (librosa_sec * sample_rate).astype(np.int) | |
frame_dict = [{'position': l, 'label': 'marker%i'%(i+1)} for i,l in enumerate(frame_labs)] | |
if len(frame_dict) > 300 or (array.shape[1]/morph_srate)/60. > 2.9: | |
raise ValueError('Number of splices (%i) and/or audio'%len(frame_dict) + \ | |
' length (%2.1f minutes)'%((array.shape[1]/morph_srate)/60.) + \ | |
'exceed Morphagene limits [300 splices / 2.9 minutes]') | |
# write wav file with additional cue markers from labels | |
float32_wav_file(outputfile,array,morph_srate,markers=frame_dict) | |
print('Saved Morphagene reel with %i splices: %s'%(len(frame_labs),outputfile)) | |
name = os.path.splitext(inputwavefile)[0] | |
np.savetxt('%s.txt'%name,librosa_sec,fmt='%03.6f',delimiter='\t') | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Hey, this is very cool, thanks for this! I have a suggestion that I'm currently enjoying in my local adaptation of this script:
def retain_n_splice_markers(onset_sf, splicecount):
k, m = divmod(len(onset_sf), splicecount)
splice_markers = list(onset_sf[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(splicecount))
splice_markers[0] = [0.0]
return np.array([x[0] for x in splice_markers])
Then on https://gist.github.com/ferrihydrite/e96dee177614898be250457cd592517a#file-morphagene_onset-py-L353 , librosa_sec = retain_n_splice_markers(np.unique(onset_splice_superflux(inputwavefile)), splicecount)
And finally with splicecount as CLI argument, you can choose how many splices the reel will actually be cut into. So if you specify 8, you'll get 8 evenly (as evenly as possible) splice markers that are chosen from the array output from onset_splice_superflux
, rather than the hundreds or thousands of possible splice markers that librosa detects onsets at.
It's working well for me 👍
Wow! That is a fantastic addition, and I've got it working for me (and i have a few more ideas as well).
I also found it very useful for making slice collections for the Octatrack, as your addition allows for auto-detected slices from 0-64 in number, very cool.
Will there be an update to this script by chance? It seems to have stopped working with librosa no matter what version I roll back to.
I'm currently in the process of converting this to Python3, and while there are a few issues with writing the labels, librosa 0.7.2 works for me. Keep your eye out for the new version soon!
Hey! Any news on the conversion to python3?
Will there be an update to this script by chance? It seems to have stopped working with librosa no matter what version I roll back to.
I'm currently in the process of converting this to Python3, and while there are a few issues with writing the labels, librosa 0.7.2 works for me. Keep your eye out for the new version soon!
Hey! Any news on the conversion to python3?
Hey! just got a new one up (check my other files). I've tested it on mac and windows and on several wav files and it seems to work.
Will there be an update to this script by chance? It seems to have stopped working with librosa no matter what version I roll back to.
I'm currently in the process of converting this to Python3, and while there are a few issues with writing the labels, librosa 0.7.2 works for me. Keep your eye out for the new version soon!
Hey! Any news on the conversion to python3?
Hey! just got a new one up (check my other files). I've tested it on mac and windows and on several wav files and it seems to work.
Amazing! Thanks so much, been looking forward to this for a while.
I'm currently in the process of converting this to Python3, and while there are a few issues with writing the labels, librosa 0.7.2 works for me. Keep your eye out for the new version soon!