ferrihydrite · July 1, 2024 07:03 · ghost · Mar 21, 2021
diff --git a/morphagene_onset.py b/morphagene_onset.py
 #!/usr/bin/env python2
 # -*- coding: utf-8 -*-
 """
 Usage:
    morphagene_onset.py -w <inputwavfile> -o <outputfile>
    
 Use the Superflux onset detection algorithm with backtracking to generate 
    splice locations.
 Use these splice locations with a converted WAV (to 32-bit float / 48000Hz) 
    to make Morphagene reels.
 This method typically generates splices on each percussion hit of a sample,
    so be careful to choose an appropriate length sample or quickly exceed the
    limitations of the Morphagene [300 splices].
 """
 import librosa
 import sys, getopt, os
 import struct
 import numpy as np
 from scipy import interpolate
 import warnings
 import collections

 class WavFileWarning(UserWarning):
    pass

 _ieee = False

 def test_normalized(array):
    '''
    Determine if an array is entirely -1 < array[i,j] < 1, to see if array is
        normalized
    '''
    return (array > -1).all() and (array < 1).all()

 def read(file, readmarkers=False, readmarkerlabels=False, 
         readmarkerslist=False, readloops=False, readpitch=False, 
         normalized=False, forcestereo=False):
    """
    Return the sample rate (in samples/sec) and data from a WAV file
    Parameters
    ----------
    file : file
        Input wav file.
    Returns
    -------
    rate : int
        Sample rate of wav file
    data : np array
        Data read from wav file
    Notes
    -----
    * The file can be an open file or a filename.
    * The returned sample rate is a Python integer
    * The data is returned as a np array with a
      data-type determined from the file.
    """
    ################
    ## READ SUBFUNCTIONS
    ## assumes file pointer is immediately
    ##  after the 'fmt ' id
    def _read_fmt_chunk(fid):
        res = struct.unpack('<ihHIIHH',fid.read(20))
        size, comp, noc, rate, sbytes, ba, bits = res
        if (comp != 1 or size > 16):
            if (comp == 3):
              global _ieee
              _ieee = True
              #warnings.warn("IEEE format not supported", WavFileWarning)        
            else: 
              warnings.warn("Unfamiliar format bytes", WavFileWarning)
            if (size>16):
                fid.read(size-16)
        return size, comp, noc, rate, sbytes, ba, bits
    # assumes file pointer is immediately
    #   after the 'data' id
    def _read_data_chunk(fid, noc, bits, normalized=False):
        size = struct.unpack('<i',fid.read(4))[0]
        if bits == 8 or bits == 24:
            dtype = 'u1'
            bytes = 1
        else:
            bytes = bits//8
            dtype = '<i%d' % bytes
        if bits == 32 and _ieee:
           dtype = 'float32'
        data = np.fromfile(fid, dtype=dtype, count=size//bytes)
        if bits == 24:
            # handle 24 bit file by using samplewidth=3, no native 24-bit type
            a = np.empty((len(data) // 3, 4), dtype='u1')
            a[:, :3] = data.reshape((-1, 3))
            a[:, 3:] = (a[:, 3 - 1:3] >> 7) * 255
            data = a.view('<i4').reshape(a.shape[:-1])
        if noc > 1: 
            # handle stereo
            data = data.reshape(-1,noc)
        if bool(size & 1):     
          # if odd number of bytes, move 1 byte further (data chunk is word-aligned)
          fid.seek(1,1)    
        if normalized:
            if not test_normalized(data): # only normalize non -1 to 1 data
                if bits == 16 or bits == 24 or bits == 32: 
                    normfactor = 2 ** (bits-1)
                    data = np.float32(data) * 1.0 / normfactor
                elif bits == 8:
                    if isinstance(data[0], (int, np.uint8)):
                        # handle uint8 data by shifting to center at 0
                        normfactor = 2 ** (bits-1)
                        data = (np.float32(data) * 1.0 / normfactor) -\
                                        ((normfactor)/(normfactor-1))
            else: # ignore normalization for already normalized files
                print('File already normalized, passing')
                pass
        return data
    def _skip_unknown_chunk(fid):
        data = fid.read(4)
        size = struct.unpack('<i', data)[0]
        if bool(size & 1):     
          # if odd number of bytes, move 1 byte further (data chunk is word-aligned)
          size += 1 
        fid.seek(size, 1)
    def _read_riff_chunk(fid):
        str1 = fid.read(4)
        if str1 != b'RIFF':
            raise ValueError("Not a WAV file.")
        fsize = struct.unpack('<I', fid.read(4))[0] + 8
        str2 = fid.read(4)
        if (str2 != b'WAVE'):
            raise ValueError("Not a WAV file.")
        return fsize
    ##################
    if hasattr(file,'read'):
        fid = file
    else:
        fid = open(file, 'rb')
    fsize = _read_riff_chunk(fid)
    noc = 1
    bits = 8
    #_cue = []
    #_cuelabels = []
    _markersdict = collections.defaultdict(lambda: {'position': -1, 'label': ''})
    loops = []
    pitch = 0.0
    while (fid.tell() < fsize):
        # read the next chunk
        chunk_id = fid.read(4)
        if chunk_id == b'fmt ':
            size, comp, noc, rate, sbytes, ba, bits = _read_fmt_chunk(fid)
        elif chunk_id == b'data':
            data = _read_data_chunk(fid, noc, bits, normalized)
        elif chunk_id == b'cue ':
            str1 = fid.read(8)
            size, numcue = struct.unpack('<ii',str1)
            for c in range(numcue):
                str1 = fid.read(24)
                id, position, datachunkid, chunkstart, blockstart, \
                    sampleoffset = struct.unpack('<iiiiii', str1)
                #_cue.append(position)
                # needed to match labels and markers
                _markersdict[id]['position'] = position                    
        elif chunk_id == b'LIST':
            str1 = fid.read(8)
            size, type = struct.unpack('<ii', str1)
        elif chunk_id in [b'ICRD', b'IENG', b'ISFT', b'ISTJ']:   
             # see http://www.pjb.com.au/midi/sfspec21.html#i5
            _skip_unknown_chunk(fid)
        elif chunk_id == b'labl':
            str1 = fid.read(8)
            size, id = struct.unpack('<ii',str1)
            # the size should be even, see WAV specfication, e.g. 16=>16, 23=>24
            size = size + (size % 2)      
            # remove the trailing null characters                        
            label = fid.read(size-4).rstrip('\x00')               
            #_cuelabels.append(label)
            # needed to match labels and markers
            _markersdict[id]['label'] = label                          
        elif chunk_id == b'smpl':
            str1 = fid.read(40)
            size, manuf, prod, sampleperiod, midiunitynote,\
            midipitchfraction, smptefmt, smpteoffs, numsampleloops, \
                samplerdata = struct.unpack('<iiiiiIiiii', str1)
            cents = midipitchfraction * 1./(2**32-1)
            pitch = 440. * 2 ** ((midiunitynote + cents - 69.)/12)
            for i in range(numsampleloops):
                str1 = fid.read(24)
                cuepointid, type, start, end, \
                fraction, playcount = struct.unpack('<iiiiii', str1) 
                loops.append([start, end])
        else:
            warnings.warn("Chunk " + chunk_id + " skipped", WavFileWarning)
            _skip_unknown_chunk(fid)
    fid.close()
    if data.ndim == 1 and forcestereo:
        data = np.column_stack((data, data))
    _markerslist = sorted([_markersdict[l] for l in _markersdict], key=lambda k: k['position'])  # sort by position
    _cue = [m['position'] for m in _markerslist]
    _cuelabels = [m['label'] for m in _markerslist]
    return (rate, data, bits, ) \
        + ((_cue,) if readmarkers else ()) \
        + ((_cuelabels,) if readmarkerlabels else ()) \
        + ((_markerslist,) if readmarkerslist else ()) \
        + ((loops,) if readloops else ()) \
        + ((pitch,) if readpitch else ())
        
 def float32_wav_file(file_name, sample_array, sample_rate, 
                     markers=None, verbose=False):
    (M,N)=sample_array.shape
    #print "len sample_array=(%d,%d)" % (M,N)
    byte_count = M * N * 4 # (len(sample_array)) * 4  # 32-bit floats
    wav_file = ""
    # write the header
    wav_file += struct.pack('<ccccIccccccccIHHIIHH',
        'R', 'I', 'F', 'F',
        byte_count + 0x2c - 8,  # header size
        'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
        0x10,  # size of 'fmt ' header
        3,  # format 3 = floating-point PCM
        M,  # channels
        sample_rate,  # samples / second
        sample_rate * 4,  # bytes / second
        4,  # block alignment
        32)  # bits / sample
    wav_file += struct.pack('<ccccI',
        'd', 'a', 't', 'a', byte_count)
    if verbose:
        print("packing data...")
    # flatten data in an alternating fashion 
    # see: http://soundfile.sapp.org/doc/WaveFormat/
    reordered_wav = [sample_array[k,j] for j in range(N) for k in range(M)]
    wav_file += struct.pack('<%df' % len(reordered_wav), *reordered_wav)
    if verbose:
        print("saving audio...")
    fid=open(file_name,'wb')
    for value in wav_file:
        fid.write(value)
    if markers:    # != None and != []
        if verbose:
            print("saving cue markers...")
        if isinstance(markers[0], dict):# then we have [{'position': 100, 'label': 'marker1'}, ...]
            labels = [m['label'] for m in markers]
            markers = [m['position'] for m in markers]
        else:
            labels = ['' for m in markers]
        fid.write(b'cue ')
        size = 4 + len(markers) * 24
        fid.write(struct.pack('<ii', size, len(markers)))
        for i, c in enumerate(markers):
            s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c)# 1635017060 is struct.unpack('<i',b'data')
            fid.write(s)
        lbls = ''
        for i, lbl in enumerate(labels):
            lbls += b'labl'
            label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00')
            size = len(lbl) + 1 + 4          # because \x00
            lbls += struct.pack('<ii', size, i + 1)
            lbls += label
        fid.write(b'LIST')
        size = len(lbls) + 4
        fid.write(struct.pack('<i', size))                         
        fid.write(b'adtl')# https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html#list
        fid.write(lbls) 
    fid.close()

 def onset_splice_superflux(audiofile):
    '''
    Superflux onset detection method of Boeck and Widmer [2013], modified to 
        use backtracking to get accurate splice location.
    From:
    https://librosa.github.io/librosa/auto_examples/plot_superflux.html#sphx-glr-auto-examples-plot-superflux-py
    '''
    y, sr = librosa.load(audiofile,sr=44100)
    # Constants directly from paper
    n_fft = 1024
    hop_length = int(librosa.time_to_samples(1./200, sr=sr))
    lag = 2 # number of frames
    n_mels = 138 # number of bins
    fmin = 27.5 # lowest frequency
    fmax = 16000. #highest frequency
    max_size = 3
    # Mel spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft,
                                   hop_length=hop_length,
                                   fmin=fmin,
                                   fmax=fmax,
                                   n_mels=n_mels)
    # Onset Strength Function
    odf_sf = librosa.onset.onset_strength(S=librosa.power_to_db(S, ref=np.max),
                                      sr=sr,
                                      hop_length=hop_length,
                                      lag=lag, max_size=max_size)
    # Onset locations in time
    onset_sf = librosa.onset.onset_detect(onset_envelope=odf_sf,
                                      sr=sr,
                                      hop_length=hop_length,
                                      units='time',
                                      backtrack=True)
    return onset_sf

 def change_samplerate_interp(old_audio,old_rate,new_rate):
    '''
    Change sample rate to new sample rate by simple interpolation.
    If old_rate > new_rate, there may be aliasing / data loss.
    Input should be in column format, as the interpolation will be completed
        on each channel this way.
    Modified from:
    https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate
    '''    
    if old_rate != new_rate:
        # duration of audio
        duration = old_audio.shape[0] / old_rate
        
        # length of old and new audio
        time_old  = np.linspace(0, duration, old_audio.shape[0])
        time_new  = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate))
        
        # fit old_audio into new_audio length by interpolation
        interpolator = interpolate.interp1d(time_old, old_audio.T)
        new_audio = interpolator(time_new).T
        return new_audio
    else:
        print('Conversion not needed, old and new rates match')
        return old_audio # conversion not needed

 def main(argv):
    inputwavefile = ''
    outputfile = ''
    try:
        opts, args = getopt.getopt(argv,"hw:o:",["wavfile=","outputfile="])
    except getopt.GetoptError:
        print('Error in usage, correct format:\n'+\
            'morphagene_onset.py -w <inputwavfile> -o <outputfile>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('Morphagene reel creation using Superflux onset detection:\n'+\
                  'morphagene_onset.py -w <inputwavfile> -o <outputfile>')
            sys.exit()
        elif opt in ("-w", "--wavfile"):
            inputwavefile = arg
        elif opt in ("-o", "--outputfile"):
            outputfile = arg
    print('Input wave file: %s'%inputwavefile)
    print('Output Morphagene reel: %s'%outputfile)
    
    ###########################################################################
    '''
    Write single file, with splice locations using the Superflux onset 
        detection algorithm with backtracking for optimal splice location.
    '''
    ###########################################################################
    morph_srate = 48000 # required samplerate for Morphagene
     
    # generate labels and time in seconds of splices using librosa
    librosa_sec = np.unique(onset_splice_superflux(inputwavefile))
     
    # read pertinent info from audio file, exit if input wave file is broken
    try:
        (sample_rate, array, bits, ) = read(inputwavefile,normalized=True)
    except: 
        print('Input .wav file %s is poorly formatted, exiting'%inputwavefile)
        sys.exit()
    
    # check if input wav has a different rate than desired Morphagene rate,
    #   and correct by interpolation
    if sample_rate != morph_srate:
        print("Correcting input sample rate %iHz to Morphagene rate %iHz"%(sample_rate,morph_srate))
        # perform interpolation on each channel, then transpose back
        array = change_samplerate_interp(array,float(sample_rate),float(morph_srate)).T
        # convert labels in seconds to labels in frames, adjusting for change
        #   in rate
        sc = float(morph_srate) / float(sample_rate)
        frame_labs = (librosa_sec * sample_rate * sc).astype(np.int)
    else:
        array = array.T
        frame_labs = (librosa_sec * sample_rate).astype(np.int)
    frame_dict = [{'position': l, 'label': 'marker%i'%(i+1)} for i,l in enumerate(frame_labs)]
    if len(frame_dict) > 300 or (array.shape[1]/morph_srate)/60. > 2.9:
        raise ValueError('Number of splices (%i) and/or audio'%len(frame_dict) + \
            ' length (%2.1f minutes)'%((array.shape[1]/morph_srate)/60.) + \
            'exceed Morphagene limits [300 splices / 2.9 minutes]')
    # write wav file with additional cue markers from labels
    float32_wav_file(outputfile,array,morph_srate,markers=frame_dict)
    print('Saved Morphagene reel with %i splices: %s'%(len(frame_labs),outputfile))
    name = os.path.splitext(inputwavefile)[0]
    np.savetxt('%s.txt'%name,librosa_sec,fmt='%03.6f',delimiter='\t')
    
 if __name__ == "__main__":
    main(sys.argv[1:])
	#!/usr/bin/env python2
	# -- coding: utf-8 --
	"""
	Usage:
	morphagene_onset.py -w <inputwavfile> -o <outputfile>

	Use the Superflux onset detection algorithm with backtracking to generate
	splice locations.
	Use these splice locations with a converted WAV (to 32-bit float / 48000Hz)
	to make Morphagene reels.
	This method typically generates splices on each percussion hit of a sample,
	so be careful to choose an appropriate length sample or quickly exceed the
	limitations of the Morphagene [300 splices].
	"""
	import librosa
	import sys, getopt, os
	import struct
	import numpy as np
	from scipy import interpolate
	import warnings
	import collections

	class WavFileWarning(UserWarning):
	pass

	_ieee = False

	def test_normalized(array):
	'''
	Determine if an array is entirely -1 < array[i,j] < 1, to see if array is
	normalized
	'''
	return (array > -1).all() and (array < 1).all()

	def read(file, readmarkers=False, readmarkerlabels=False,
	readmarkerslist=False, readloops=False, readpitch=False,
	normalized=False, forcestereo=False):
	"""
	Return the sample rate (in samples/sec) and data from a WAV file
	Parameters
	----------
	file : file
	Input wav file.
	Returns
	-------
	rate : int
	Sample rate of wav file
	data : np array
	Data read from wav file
	Notes
	-----
	* The file can be an open file or a filename.
	* The returned sample rate is a Python integer
	* The data is returned as a np array with a
	data-type determined from the file.
	"""
	################
	## READ SUBFUNCTIONS
	## assumes file pointer is immediately
	## after the 'fmt ' id
	def _read_fmt_chunk(fid):
	res = struct.unpack('<ihHIIHH',fid.read(20))
	size, comp, noc, rate, sbytes, ba, bits = res
	if (comp != 1 or size > 16):
	if (comp == 3):
	global _ieee
	_ieee = True
	#warnings.warn("IEEE format not supported", WavFileWarning)
	else:
	warnings.warn("Unfamiliar format bytes", WavFileWarning)
	if (size>16):
	fid.read(size-16)
	return size, comp, noc, rate, sbytes, ba, bits
	# assumes file pointer is immediately
	# after the 'data' id
	def _read_data_chunk(fid, noc, bits, normalized=False):
	size = struct.unpack('<i',fid.read(4))[0]
	if bits == 8 or bits == 24:
	dtype = 'u1'
	bytes = 1
	else:
	bytes = bits//8
	dtype = '<i%d' % bytes
	if bits == 32 and _ieee:
	dtype = 'float32'
	data = np.fromfile(fid, dtype=dtype, count=size//bytes)
	if bits == 24:
	# handle 24 bit file by using samplewidth=3, no native 24-bit type
	a = np.empty((len(data) // 3, 4), dtype='u1')
	a[:, :3] = data.reshape((-1, 3))
	a[:, 3:] = (a[:, 3 - 1:3] >> 7) * 255
	data = a.view('<i4').reshape(a.shape[:-1])
	if noc > 1:
	# handle stereo
	data = data.reshape(-1,noc)
	if bool(size & 1):
	# if odd number of bytes, move 1 byte further (data chunk is word-aligned)
	fid.seek(1,1)
	if normalized:
	if not test_normalized(data): # only normalize non -1 to 1 data
	if bits == 16 or bits == 24 or bits == 32:
	normfactor = 2 ** (bits-1)
	data = np.float32(data) * 1.0 / normfactor
	elif bits == 8:
	if isinstance(data[0], (int, np.uint8)):
	# handle uint8 data by shifting to center at 0
	normfactor = 2 ** (bits-1)
	data = (np.float32(data) * 1.0 / normfactor) -\
	((normfactor)/(normfactor-1))
	else: # ignore normalization for already normalized files
	print('File already normalized, passing')
	pass
	return data
	def _skip_unknown_chunk(fid):
	data = fid.read(4)
	size = struct.unpack('<i', data)[0]
	if bool(size & 1):
	# if odd number of bytes, move 1 byte further (data chunk is word-aligned)
	size += 1
	fid.seek(size, 1)
	def _read_riff_chunk(fid):
	str1 = fid.read(4)
	if str1 != b'RIFF':
	raise ValueError("Not a WAV file.")
	fsize = struct.unpack('<I', fid.read(4))[0] + 8
	str2 = fid.read(4)
	if (str2 != b'WAVE'):
	raise ValueError("Not a WAV file.")
	return fsize
	##################
	if hasattr(file,'read'):
	fid = file
	else:
	fid = open(file, 'rb')
	fsize = _read_riff_chunk(fid)
	noc = 1
	bits = 8
	#_cue = []
	#_cuelabels = []
	_markersdict = collections.defaultdict(lambda: {'position': -1, 'label': ''})
	loops = []
	pitch = 0.0
	while (fid.tell() < fsize):
	# read the next chunk
	chunk_id = fid.read(4)
	if chunk_id == b'fmt ':
	size, comp, noc, rate, sbytes, ba, bits = _read_fmt_chunk(fid)
	elif chunk_id == b'data':
	data = _read_data_chunk(fid, noc, bits, normalized)
	elif chunk_id == b'cue ':
	str1 = fid.read(8)
	size, numcue = struct.unpack('<ii',str1)
	for c in range(numcue):
	str1 = fid.read(24)
	id, position, datachunkid, chunkstart, blockstart, \
	sampleoffset = struct.unpack('<iiiiii', str1)
	#_cue.append(position)
	# needed to match labels and markers
	_markersdict[id]['position'] = position
	elif chunk_id == b'LIST':
	str1 = fid.read(8)
	size, type = struct.unpack('<ii', str1)
	elif chunk_id in [b'ICRD', b'IENG', b'ISFT', b'ISTJ']:
	# see http://www.pjb.com.au/midi/sfspec21.html#i5
	_skip_unknown_chunk(fid)
	elif chunk_id == b'labl':
	str1 = fid.read(8)
	size, id = struct.unpack('<ii',str1)
	# the size should be even, see WAV specfication, e.g. 16=>16, 23=>24
	size = size + (size % 2)
	# remove the trailing null characters
	label = fid.read(size-4).rstrip('\x00')
	#_cuelabels.append(label)
	# needed to match labels and markers
	_markersdict[id]['label'] = label
	elif chunk_id == b'smpl':
	str1 = fid.read(40)
	size, manuf, prod, sampleperiod, midiunitynote,\
	midipitchfraction, smptefmt, smpteoffs, numsampleloops, \
	samplerdata = struct.unpack('<iiiiiIiiii', str1)
	cents = midipitchfraction * 1./(2**32-1)
	pitch = 440. * 2 ** ((midiunitynote + cents - 69.)/12)
	for i in range(numsampleloops):
	str1 = fid.read(24)
	cuepointid, type, start, end, \
	fraction, playcount = struct.unpack('<iiiiii', str1)
	loops.append([start, end])
	else:
	warnings.warn("Chunk " + chunk_id + " skipped", WavFileWarning)
	_skip_unknown_chunk(fid)
	fid.close()
	if data.ndim == 1 and forcestereo:
	data = np.column_stack((data, data))
	_markerslist = sorted([_markersdict[l] for l in _markersdict], key=lambda k: k['position']) # sort by position
	_cue = [m['position'] for m in _markerslist]
	_cuelabels = [m['label'] for m in _markerslist]
	return (rate, data, bits, ) \
	+ ((_cue,) if readmarkers else ()) \
	+ ((_cuelabels,) if readmarkerlabels else ()) \
	+ ((_markerslist,) if readmarkerslist else ()) \
	+ ((loops,) if readloops else ()) \
	+ ((pitch,) if readpitch else ())

	def float32_wav_file(file_name, sample_array, sample_rate,
	markers=None, verbose=False):
	(M,N)=sample_array.shape
	#print "len sample_array=(%d,%d)" % (M,N)
	byte_count = M * N * 4 # (len(sample_array)) * 4 # 32-bit floats
	wav_file = ""
	# write the header
	wav_file += struct.pack('<ccccIccccccccIHHIIHH',
	'R', 'I', 'F', 'F',
	byte_count + 0x2c - 8, # header size
	'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
	0x10, # size of 'fmt ' header
	3, # format 3 = floating-point PCM
	M, # channels
	sample_rate, # samples / second
	sample_rate * 4, # bytes / second
	4, # block alignment
	32) # bits / sample
	wav_file += struct.pack('<ccccI',
	'd', 'a', 't', 'a', byte_count)
	if verbose:
	print("packing data...")
	# flatten data in an alternating fashion
	# see: http://soundfile.sapp.org/doc/WaveFormat/
	reordered_wav = [sample_array[k,j] for j in range(N) for k in range(M)]
	wav_file += struct.pack('<%df' % len(reordered_wav), *reordered_wav)
	if verbose:
	print("saving audio...")
	fid=open(file_name,'wb')
	for value in wav_file:
	fid.write(value)
	if markers: # != None and != []
	if verbose:
	print("saving cue markers...")
	if isinstance(markers[0], dict):# then we have [{'position': 100, 'label': 'marker1'}, ...]
	labels = [m['label'] for m in markers]
	markers = [m['position'] for m in markers]
	else:
	labels = ['' for m in markers]
	fid.write(b'cue ')
	size = 4 + len(markers) * 24
	fid.write(struct.pack('<ii', size, len(markers)))
	for i, c in enumerate(markers):
	s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c)# 1635017060 is struct.unpack('<i',b'data')
	fid.write(s)
	lbls = ''
	for i, lbl in enumerate(labels):
	lbls += b'labl'
	label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00')
	size = len(lbl) + 1 + 4 # because \x00
	lbls += struct.pack('<ii', size, i + 1)
	lbls += label
	fid.write(b'LIST')
	size = len(lbls) + 4
	fid.write(struct.pack('<i', size))
	fid.write(b'adtl')# https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html#list
	fid.write(lbls)
	fid.close()

	def onset_splice_superflux(audiofile):
	'''
	Superflux onset detection method of Boeck and Widmer [2013], modified to
	use backtracking to get accurate splice location.
	From:
	https://librosa.github.io/librosa/auto_examples/plot_superflux.html#sphx-glr-auto-examples-plot-superflux-py
	'''
	y, sr = librosa.load(audiofile,sr=44100)
	# Constants directly from paper
	n_fft = 1024
	hop_length = int(librosa.time_to_samples(1./200, sr=sr))
	lag = 2 # number of frames
	n_mels = 138 # number of bins
	fmin = 27.5 # lowest frequency
	fmax = 16000. #highest frequency
	max_size = 3
	# Mel spectrogram
	S = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft,
	hop_length=hop_length,
	fmin=fmin,
	fmax=fmax,
	n_mels=n_mels)
	# Onset Strength Function
	odf_sf = librosa.onset.onset_strength(S=librosa.power_to_db(S, ref=np.max),
	sr=sr,
	hop_length=hop_length,
	lag=lag, max_size=max_size)
	# Onset locations in time
	onset_sf = librosa.onset.onset_detect(onset_envelope=odf_sf,
	sr=sr,
	hop_length=hop_length,
	units='time',
	backtrack=True)
	return onset_sf

	def change_samplerate_interp(old_audio,old_rate,new_rate):
	'''
	Change sample rate to new sample rate by simple interpolation.
	If old_rate > new_rate, there may be aliasing / data loss.
	Input should be in column format, as the interpolation will be completed
	on each channel this way.
	Modified from:
	https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate
	'''
	if old_rate != new_rate:
	# duration of audio
	duration = old_audio.shape[0] / old_rate

	# length of old and new audio
	time_old = np.linspace(0, duration, old_audio.shape[0])
	time_new = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate))

	# fit old_audio into new_audio length by interpolation
	interpolator = interpolate.interp1d(time_old, old_audio.T)
	new_audio = interpolator(time_new).T
	return new_audio
	else:
	print('Conversion not needed, old and new rates match')
	return old_audio # conversion not needed

	def main(argv):
	inputwavefile = ''
	outputfile = ''
	try:
	opts, args = getopt.getopt(argv,"hw:o:",["wavfile=","outputfile="])
	except getopt.GetoptError:
	print('Error in usage, correct format:\n'+\
	'morphagene_onset.py -w <inputwavfile> -o <outputfile>')
	sys.exit(2)
	for opt, arg in opts:
	if opt == '-h':
	print('Morphagene reel creation using Superflux onset detection:\n'+\
	'morphagene_onset.py -w <inputwavfile> -o <outputfile>')
	sys.exit()
	elif opt in ("-w", "--wavfile"):
	inputwavefile = arg
	elif opt in ("-o", "--outputfile"):
	outputfile = arg
	print('Input wave file: %s'%inputwavefile)
	print('Output Morphagene reel: %s'%outputfile)

	###########################################################################
	'''
	Write single file, with splice locations using the Superflux onset
	detection algorithm with backtracking for optimal splice location.
	'''
	###########################################################################
	morph_srate = 48000 # required samplerate for Morphagene

	# generate labels and time in seconds of splices using librosa
	librosa_sec = np.unique(onset_splice_superflux(inputwavefile))

	# read pertinent info from audio file, exit if input wave file is broken
	try:
	(sample_rate, array, bits, ) = read(inputwavefile,normalized=True)
	except:
	print('Input .wav file %s is poorly formatted, exiting'%inputwavefile)
	sys.exit()

	# check if input wav has a different rate than desired Morphagene rate,
	# and correct by interpolation
	if sample_rate != morph_srate:
	print("Correcting input sample rate %iHz to Morphagene rate %iHz"%(sample_rate,morph_srate))
	# perform interpolation on each channel, then transpose back
	array = change_samplerate_interp(array,float(sample_rate),float(morph_srate)).T
	# convert labels in seconds to labels in frames, adjusting for change
	# in rate
	sc = float(morph_srate) / float(sample_rate)
	frame_labs = (librosa_sec * sample_rate * sc).astype(np.int)
	else:
	array = array.T
	frame_labs = (librosa_sec * sample_rate).astype(np.int)
	frame_dict = [{'position': l, 'label': 'marker%i'%(i+1)} for i,l in enumerate(frame_labs)]
	if len(frame_dict) > 300 or (array.shape[1]/morph_srate)/60. > 2.9:
	raise ValueError('Number of splices (%i) and/or audio'%len(frame_dict) + \
	' length (%2.1f minutes)'%((array.shape[1]/morph_srate)/60.) + \
	'exceed Morphagene limits [300 splices / 2.9 minutes]')
	# write wav file with additional cue markers from labels
	float32_wav_file(outputfile,array,morph_srate,markers=frame_dict)
	print('Saved Morphagene reel with %i splices: %s'%(len(frame_labs),outputfile))
	name = os.path.splitext(inputwavefile)[0]
	np.savetxt('%s.txt'%name,librosa_sec,fmt='%03.6f',delimiter='\t')

	if __name__ == "__main__":
	main(sys.argv[1:])
No results found