Created
March 28, 2021 18:19
-
-
Save ferrihydrite/562ee8926e66e0c1af753f28f69b34ab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
USAGE: | |
morphagene_audacity3.py -w <inputwavfile> -l <inputlabels> -o <outputfile>' | |
Used to convert Audacity labels in .txt form on .WAV files into | |
single 32-bit float .WAV with CUE markers within the file, directly | |
compatible with the Make Noise Morphagene. | |
Does not require input file to be 48000Hz, only that the Audacity label matches | |
the .WAV file that generated it, and that the input .WAV is stereo. | |
See the Morphagene manual for naming conventions of output files: | |
http://www.makenoisemusic.com/content/manuals/morphagene-manual.pdf | |
# see http://stackoverflow.com/questions/15576798/create-32bit-float-wav-file-in-python | |
# see... http://blog.theroyweb.com/extracting-wav-file-header-information-using-a-python-script | |
# marker code from Joseph Basquin [https://gist.github.com/josephernest/3f22c5ed5dabf1815f16efa8fa53d476] | |
updated to Python3 by Phiala (March 2021) | |
""" | |
import sys, getopt | |
import struct | |
import numpy as np | |
from scipy import interpolate | |
import warnings | |
import io | |
from enum import IntEnum | |
__all__ = [ | |
'WavFileWarning', | |
'read', | |
'write' | |
] | |
class WavFileWarning(UserWarning): | |
pass | |
class WAVE_FORMAT(IntEnum): | |
""" | |
WAVE form wFormatTag IDs | |
Complete list is in mmreg.h in Windows 10 SDK. ALAC and OPUS are the | |
newest additions, in v10.0.14393 2016-07 | |
""" | |
UNKNOWN = 0x0000 | |
PCM = 0x0001 | |
ADPCM = 0x0002 | |
IEEE_FLOAT = 0x0003 | |
VSELP = 0x0004 | |
IBM_CVSD = 0x0005 | |
ALAW = 0x0006 | |
MULAW = 0x0007 | |
DTS = 0x0008 | |
DRM = 0x0009 | |
WMAVOICE9 = 0x000A | |
WMAVOICE10 = 0x000B | |
OKI_ADPCM = 0x0010 | |
DVI_ADPCM = 0x0011 | |
IMA_ADPCM = 0x0011 # Duplicate | |
MEDIASPACE_ADPCM = 0x0012 | |
SIERRA_ADPCM = 0x0013 | |
G723_ADPCM = 0x0014 | |
DIGISTD = 0x0015 | |
DIGIFIX = 0x0016 | |
DIALOGIC_OKI_ADPCM = 0x0017 | |
MEDIAVISION_ADPCM = 0x0018 | |
CU_CODEC = 0x0019 | |
HP_DYN_VOICE = 0x001A | |
YAMAHA_ADPCM = 0x0020 | |
SONARC = 0x0021 | |
DSPGROUP_TRUESPEECH = 0x0022 | |
ECHOSC1 = 0x0023 | |
AUDIOFILE_AF36 = 0x0024 | |
APTX = 0x0025 | |
AUDIOFILE_AF10 = 0x0026 | |
PROSODY_1612 = 0x0027 | |
LRC = 0x0028 | |
DOLBY_AC2 = 0x0030 | |
GSM610 = 0x0031 | |
MSNAUDIO = 0x0032 | |
ANTEX_ADPCME = 0x0033 | |
CONTROL_RES_VQLPC = 0x0034 | |
DIGIREAL = 0x0035 | |
DIGIADPCM = 0x0036 | |
CONTROL_RES_CR10 = 0x0037 | |
NMS_VBXADPCM = 0x0038 | |
CS_IMAADPCM = 0x0039 | |
ECHOSC3 = 0x003A | |
ROCKWELL_ADPCM = 0x003B | |
ROCKWELL_DIGITALK = 0x003C | |
XEBEC = 0x003D | |
G721_ADPCM = 0x0040 | |
G728_CELP = 0x0041 | |
MSG723 = 0x0042 | |
INTEL_G723_1 = 0x0043 | |
INTEL_G729 = 0x0044 | |
SHARP_G726 = 0x0045 | |
MPEG = 0x0050 | |
RT24 = 0x0052 | |
PAC = 0x0053 | |
MPEGLAYER3 = 0x0055 | |
LUCENT_G723 = 0x0059 | |
CIRRUS = 0x0060 | |
ESPCM = 0x0061 | |
VOXWARE = 0x0062 | |
CANOPUS_ATRAC = 0x0063 | |
G726_ADPCM = 0x0064 | |
G722_ADPCM = 0x0065 | |
DSAT = 0x0066 | |
DSAT_DISPLAY = 0x0067 | |
VOXWARE_BYTE_ALIGNED = 0x0069 | |
VOXWARE_AC8 = 0x0070 | |
VOXWARE_AC10 = 0x0071 | |
VOXWARE_AC16 = 0x0072 | |
VOXWARE_AC20 = 0x0073 | |
VOXWARE_RT24 = 0x0074 | |
VOXWARE_RT29 = 0x0075 | |
VOXWARE_RT29HW = 0x0076 | |
VOXWARE_VR12 = 0x0077 | |
VOXWARE_VR18 = 0x0078 | |
VOXWARE_TQ40 = 0x0079 | |
VOXWARE_SC3 = 0x007A | |
VOXWARE_SC3_1 = 0x007B | |
SOFTSOUND = 0x0080 | |
VOXWARE_TQ60 = 0x0081 | |
MSRT24 = 0x0082 | |
G729A = 0x0083 | |
MVI_MVI2 = 0x0084 | |
DF_G726 = 0x0085 | |
DF_GSM610 = 0x0086 | |
ISIAUDIO = 0x0088 | |
ONLIVE = 0x0089 | |
MULTITUDE_FT_SX20 = 0x008A | |
INFOCOM_ITS_G721_ADPCM = 0x008B | |
CONVEDIA_G729 = 0x008C | |
CONGRUENCY = 0x008D | |
SBC24 = 0x0091 | |
DOLBY_AC3_SPDIF = 0x0092 | |
MEDIASONIC_G723 = 0x0093 | |
PROSODY_8KBPS = 0x0094 | |
ZYXEL_ADPCM = 0x0097 | |
PHILIPS_LPCBB = 0x0098 | |
PACKED = 0x0099 | |
MALDEN_PHONYTALK = 0x00A0 | |
RACAL_RECORDER_GSM = 0x00A1 | |
RACAL_RECORDER_G720_A = 0x00A2 | |
RACAL_RECORDER_G723_1 = 0x00A3 | |
RACAL_RECORDER_TETRA_ACELP = 0x00A4 | |
NEC_AAC = 0x00B0 | |
RAW_AAC1 = 0x00FF | |
RHETOREX_ADPCM = 0x0100 | |
IRAT = 0x0101 | |
VIVO_G723 = 0x0111 | |
VIVO_SIREN = 0x0112 | |
PHILIPS_CELP = 0x0120 | |
PHILIPS_GRUNDIG = 0x0121 | |
DIGITAL_G723 = 0x0123 | |
SANYO_LD_ADPCM = 0x0125 | |
SIPROLAB_ACEPLNET = 0x0130 | |
SIPROLAB_ACELP4800 = 0x0131 | |
SIPROLAB_ACELP8V3 = 0x0132 | |
SIPROLAB_G729 = 0x0133 | |
SIPROLAB_G729A = 0x0134 | |
SIPROLAB_KELVIN = 0x0135 | |
VOICEAGE_AMR = 0x0136 | |
G726ADPCM = 0x0140 | |
DICTAPHONE_CELP68 = 0x0141 | |
DICTAPHONE_CELP54 = 0x0142 | |
QUALCOMM_PUREVOICE = 0x0150 | |
QUALCOMM_HALFRATE = 0x0151 | |
TUBGSM = 0x0155 | |
MSAUDIO1 = 0x0160 | |
WMAUDIO2 = 0x0161 | |
WMAUDIO3 = 0x0162 | |
WMAUDIO_LOSSLESS = 0x0163 | |
WMASPDIF = 0x0164 | |
UNISYS_NAP_ADPCM = 0x0170 | |
UNISYS_NAP_ULAW = 0x0171 | |
UNISYS_NAP_ALAW = 0x0172 | |
UNISYS_NAP_16K = 0x0173 | |
SYCOM_ACM_SYC008 = 0x0174 | |
SYCOM_ACM_SYC701_G726L = 0x0175 | |
SYCOM_ACM_SYC701_CELP54 = 0x0176 | |
SYCOM_ACM_SYC701_CELP68 = 0x0177 | |
KNOWLEDGE_ADVENTURE_ADPCM = 0x0178 | |
FRAUNHOFER_IIS_MPEG2_AAC = 0x0180 | |
DTS_DS = 0x0190 | |
CREATIVE_ADPCM = 0x0200 | |
CREATIVE_FASTSPEECH8 = 0x0202 | |
CREATIVE_FASTSPEECH10 = 0x0203 | |
UHER_ADPCM = 0x0210 | |
ULEAD_DV_AUDIO = 0x0215 | |
ULEAD_DV_AUDIO_1 = 0x0216 | |
QUARTERDECK = 0x0220 | |
ILINK_VC = 0x0230 | |
RAW_SPORT = 0x0240 | |
ESST_AC3 = 0x0241 | |
GENERIC_PASSTHRU = 0x0249 | |
IPI_HSX = 0x0250 | |
IPI_RPELP = 0x0251 | |
CS2 = 0x0260 | |
SONY_SCX = 0x0270 | |
SONY_SCY = 0x0271 | |
SONY_ATRAC3 = 0x0272 | |
SONY_SPC = 0x0273 | |
TELUM_AUDIO = 0x0280 | |
TELUM_IA_AUDIO = 0x0281 | |
NORCOM_VOICE_SYSTEMS_ADPCM = 0x0285 | |
FM_TOWNS_SND = 0x0300 | |
MICRONAS = 0x0350 | |
MICRONAS_CELP833 = 0x0351 | |
BTV_DIGITAL = 0x0400 | |
INTEL_MUSIC_CODER = 0x0401 | |
INDEO_AUDIO = 0x0402 | |
QDESIGN_MUSIC = 0x0450 | |
ON2_VP7_AUDIO = 0x0500 | |
ON2_VP6_AUDIO = 0x0501 | |
VME_VMPCM = 0x0680 | |
TPC = 0x0681 | |
LIGHTWAVE_LOSSLESS = 0x08AE | |
OLIGSM = 0x1000 | |
OLIADPCM = 0x1001 | |
OLICELP = 0x1002 | |
OLISBC = 0x1003 | |
OLIOPR = 0x1004 | |
LH_CODEC = 0x1100 | |
LH_CODEC_CELP = 0x1101 | |
LH_CODEC_SBC8 = 0x1102 | |
LH_CODEC_SBC12 = 0x1103 | |
LH_CODEC_SBC16 = 0x1104 | |
NORRIS = 0x1400 | |
ISIAUDIO_2 = 0x1401 | |
SOUNDSPACE_MUSICOMPRESS = 0x1500 | |
MPEG_ADTS_AAC = 0x1600 | |
MPEG_RAW_AAC = 0x1601 | |
MPEG_LOAS = 0x1602 | |
NOKIA_MPEG_ADTS_AAC = 0x1608 | |
NOKIA_MPEG_RAW_AAC = 0x1609 | |
VODAFONE_MPEG_ADTS_AAC = 0x160A | |
VODAFONE_MPEG_RAW_AAC = 0x160B | |
MPEG_HEAAC = 0x1610 | |
VOXWARE_RT24_SPEECH = 0x181C | |
SONICFOUNDRY_LOSSLESS = 0x1971 | |
INNINGS_TELECOM_ADPCM = 0x1979 | |
LUCENT_SX8300P = 0x1C07 | |
LUCENT_SX5363S = 0x1C0C | |
CUSEEME = 0x1F03 | |
NTCSOFT_ALF2CM_ACM = 0x1FC4 | |
DVM = 0x2000 | |
DTS2 = 0x2001 | |
MAKEAVIS = 0x3313 | |
DIVIO_MPEG4_AAC = 0x4143 | |
NOKIA_ADAPTIVE_MULTIRATE = 0x4201 | |
DIVIO_G726 = 0x4243 | |
LEAD_SPEECH = 0x434C | |
LEAD_VORBIS = 0x564C | |
WAVPACK_AUDIO = 0x5756 | |
OGG_VORBIS_MODE_1 = 0x674F | |
OGG_VORBIS_MODE_2 = 0x6750 | |
OGG_VORBIS_MODE_3 = 0x6751 | |
OGG_VORBIS_MODE_1_PLUS = 0x676F | |
OGG_VORBIS_MODE_2_PLUS = 0x6770 | |
OGG_VORBIS_MODE_3_PLUS = 0x6771 | |
ALAC = 0x6C61 | |
_3COM_NBX = 0x7000 # Can't have leading digit | |
OPUS = 0x704F | |
FAAD_AAC = 0x706D | |
AMR_NB = 0x7361 | |
AMR_WB = 0x7362 | |
AMR_WP = 0x7363 | |
GSM_AMR_CBR = 0x7A21 | |
GSM_AMR_VBR_SID = 0x7A22 | |
COMVERSE_INFOSYS_G723_1 = 0xA100 | |
COMVERSE_INFOSYS_AVQSBC = 0xA101 | |
COMVERSE_INFOSYS_SBC = 0xA102 | |
SYMBOL_G729_A = 0xA103 | |
VOICEAGE_AMR_WB = 0xA104 | |
INGENIENT_G726 = 0xA105 | |
MPEG4_AAC = 0xA106 | |
ENCORE_G726 = 0xA107 | |
ZOLL_ASAO = 0xA108 | |
SPEEX_VOICE = 0xA109 | |
VIANIX_MASC = 0xA10A | |
WM9_SPECTRUM_ANALYZER = 0xA10B | |
WMF_SPECTRUM_ANAYZER = 0xA10C | |
GSM_610 = 0xA10D | |
GSM_620 = 0xA10E | |
GSM_660 = 0xA10F | |
GSM_690 = 0xA110 | |
GSM_ADAPTIVE_MULTIRATE_WB = 0xA111 | |
POLYCOM_G722 = 0xA112 | |
POLYCOM_G728 = 0xA113 | |
POLYCOM_G729_A = 0xA114 | |
POLYCOM_SIREN = 0xA115 | |
GLOBAL_IP_ILBC = 0xA116 | |
RADIOTIME_TIME_SHIFT_RADIO = 0xA117 | |
NICE_ACA = 0xA118 | |
NICE_ADPCM = 0xA119 | |
VOCORD_G721 = 0xA11A | |
VOCORD_G726 = 0xA11B | |
VOCORD_G722_1 = 0xA11C | |
VOCORD_G728 = 0xA11D | |
VOCORD_G729 = 0xA11E | |
VOCORD_G729_A = 0xA11F | |
VOCORD_G723_1 = 0xA120 | |
VOCORD_LBC = 0xA121 | |
NICE_G728 = 0xA122 | |
FRACE_TELECOM_G729 = 0xA123 | |
CODIAN = 0xA124 | |
FLAC = 0xF1AC | |
EXTENSIBLE = 0xFFFE | |
DEVELOPMENT = 0xFFFF | |
KNOWN_WAVE_FORMATS = {WAVE_FORMAT.PCM, WAVE_FORMAT.IEEE_FLOAT} | |
def _raise_bad_format(format_tag): | |
try: | |
format_name = WAVE_FORMAT(format_tag).name | |
except ValueError: | |
format_name = f'{format_tag:#06x}' | |
raise ValueError(f"Unknown wave file format: {format_name}. Supported " | |
"formats: " + | |
', '.join(x.name for x in KNOWN_WAVE_FORMATS)) | |
def _read_fmt_chunk(fid, is_big_endian): | |
""" | |
Returns | |
------- | |
size : int | |
size of format subchunk in bytes (minus 8 for "fmt " and itself) | |
format_tag : int | |
PCM, float, or compressed format | |
channels : int | |
number of channels | |
fs : int | |
sampling frequency in samples per second | |
bytes_per_second : int | |
overall byte rate for the file | |
block_align : int | |
bytes per sample, including all channels | |
bit_depth : int | |
bits per sample | |
Notes | |
----- | |
Assumes file pointer is immediately after the 'fmt ' id | |
""" | |
if is_big_endian: | |
fmt = '>' | |
else: | |
fmt = '<' | |
size = struct.unpack(fmt+'I', fid.read(4))[0] | |
if size < 16: | |
raise ValueError("Binary structure of wave file is not compliant") | |
res = struct.unpack(fmt+'HHIIHH', fid.read(16)) | |
bytes_read = 16 | |
format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res | |
if format_tag == WAVE_FORMAT.EXTENSIBLE and size >= (16+2): | |
ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0] | |
bytes_read += 2 | |
if ext_chunk_size >= 22: | |
extensible_chunk_data = fid.read(22) | |
bytes_read += 22 | |
raw_guid = extensible_chunk_data[2+4:2+4+16] | |
# GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361) | |
# MS GUID byte order: first three groups are native byte order, | |
# rest is Big Endian | |
if is_big_endian: | |
tail = b'\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71' | |
else: | |
tail = b'\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71' | |
if raw_guid.endswith(tail): | |
format_tag = struct.unpack(fmt+'I', raw_guid[:4])[0] | |
else: | |
raise ValueError("Binary structure of wave file is not compliant") | |
if format_tag not in KNOWN_WAVE_FORMATS: | |
_raise_bad_format(format_tag) | |
# move file pointer to next chunk | |
if size > bytes_read: | |
fid.read(size - bytes_read) | |
# fmt should always be 16, 18 or 40, but handle it just in case | |
_handle_pad_byte(fid, size) | |
return (size, format_tag, channels, fs, bytes_per_second, block_align, | |
bit_depth) | |
def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian, | |
block_align, mmap=False): | |
""" | |
Notes | |
----- | |
Assumes file pointer is immediately after the 'data' id | |
It's possible to not use all available bits in a container, or to store | |
samples in a container bigger than necessary, so bytes_per_sample uses | |
the actual reported container size (nBlockAlign / nChannels). Real-world | |
examples: | |
Adobe Audition's "24-bit packed int (type 1, 20-bit)" | |
nChannels = 2, nBlockAlign = 6, wBitsPerSample = 20 | |
http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples/AFsp/M1F1-int12-AFsp.wav | |
is: | |
nChannels = 2, nBlockAlign = 4, wBitsPerSample = 12 | |
http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/multichaudP.pdf | |
gives an example of: | |
nChannels = 2, nBlockAlign = 8, wBitsPerSample = 20 | |
""" | |
if is_big_endian: | |
fmt = '>' | |
else: | |
fmt = '<' | |
# Size of the data subchunk in bytes | |
size = struct.unpack(fmt+'I', fid.read(4))[0] | |
# Number of bytes per sample (sample container size) | |
bytes_per_sample = block_align // channels | |
n_samples = size // bytes_per_sample | |
if format_tag == WAVE_FORMAT.PCM: | |
if 1 <= bit_depth <= 8: | |
dtype = 'u1' # WAV of 8-bit integer or less are unsigned | |
elif bytes_per_sample in {3, 5, 6, 7}: | |
# No compatible dtype. Load as raw bytes for reshaping later. | |
dtype = 'V1' | |
elif bit_depth <= 64: | |
# Remaining bit depths can map directly to signed numpy dtypes | |
dtype = f'{fmt}i{bytes_per_sample}' | |
else: | |
raise ValueError("Unsupported bit depth: the WAV file " | |
f"has {bit_depth}-bit integer data.") | |
elif format_tag == WAVE_FORMAT.IEEE_FLOAT: | |
if bit_depth in {32, 64}: | |
dtype = f'{fmt}f{bytes_per_sample}' | |
else: | |
raise ValueError("Unsupported bit depth: the WAV file " | |
f"has {bit_depth}-bit floating-point data.") | |
else: | |
_raise_bad_format(format_tag) | |
start = fid.tell() | |
if not mmap: | |
try: | |
count = size if dtype == 'V1' else n_samples | |
data = np.fromfile(fid, dtype=dtype, count=count) | |
except io.UnsupportedOperation: # not a C-like file | |
fid.seek(start, 0) # just in case it seeked, though it shouldn't | |
data = np.frombuffer(fid.read(size), dtype=dtype) | |
if dtype == 'V1': | |
# Rearrange raw bytes into smallest compatible numpy dtype | |
dt = f'{fmt}i4' if bytes_per_sample == 3 else f'{fmt}i8' | |
a = np.zeros((len(data) // bytes_per_sample, np.dtype(dt).itemsize), | |
dtype='V1') | |
if is_big_endian: | |
a[:, :bytes_per_sample] = data.reshape((-1, bytes_per_sample)) | |
else: | |
a[:, -bytes_per_sample:] = data.reshape((-1, bytes_per_sample)) | |
data = a.view(dt).reshape(a.shape[:-1]) | |
else: | |
if bytes_per_sample in {1, 2, 4, 8}: | |
start = fid.tell() | |
data = np.memmap(fid, dtype=dtype, mode='c', offset=start, | |
shape=(n_samples,)) | |
fid.seek(start + size) | |
else: | |
raise ValueError("mmap=True not compatible with " | |
f"{bytes_per_sample}-byte container size.") | |
_handle_pad_byte(fid, size) | |
if channels > 1: | |
data = data.reshape(-1, channels) | |
return data | |
def _skip_unknown_chunk(fid, is_big_endian): | |
if is_big_endian: | |
fmt = '>I' | |
else: | |
fmt = '<I' | |
data = fid.read(4) | |
# call unpack() and seek() only if we have really read data from file | |
# otherwise empty read at the end of the file would trigger | |
# unnecessary exception at unpack() call | |
# in case data equals somehow to 0, there is no need for seek() anyway | |
if data: | |
size = struct.unpack(fmt, data)[0] | |
fid.seek(size, 1) | |
_handle_pad_byte(fid, size) | |
def _read_riff_chunk(fid): | |
str1 = fid.read(4) # File signature | |
if str1 == b'RIFF': | |
is_big_endian = False | |
fmt = '<I' | |
elif str1 == b'RIFX': | |
is_big_endian = True | |
fmt = '>I' | |
else: | |
# There are also .wav files with "FFIR" or "XFIR" signatures? | |
raise ValueError(f"File format {repr(str1)} not understood. Only " | |
"'RIFF' and 'RIFX' supported.") | |
# Size of entire file | |
file_size = struct.unpack(fmt, fid.read(4))[0] + 8 | |
str2 = fid.read(4) | |
if str2 != b'WAVE': | |
raise ValueError(f"Not a WAV file. RIFF form type is {repr(str2)}.") | |
return file_size, is_big_endian | |
def _handle_pad_byte(fid, size): | |
# "If the chunk size is an odd number of bytes, a pad byte with value zero | |
# is written after ckData." So we need to seek past this after each chunk. | |
if size % 2: | |
fid.seek(1, 1) | |
def read(filename, mmap=False): | |
""" | |
Open a WAV file. | |
Return the sample rate (in samples/sec) and data from an LPCM WAV file. | |
Parameters | |
---------- | |
filename : string or open file handle | |
Input WAV file. | |
mmap : bool, optional | |
Whether to read data as memory-mapped (default: False). Not compatible | |
with some bit depths; see Notes. Only to be used on real files. | |
.. versionadded:: 0.12.0 | |
Returns | |
------- | |
rate : int | |
Sample rate of WAV file. | |
data : numpy array | |
Data read from WAV file. Data-type is determined from the file; | |
see Notes. Data is 1-D for 1-channel WAV, or 2-D of shape | |
(Nsamples, Nchannels) otherwise. If a file-like input without a | |
C-like file descriptor (e.g., :class:`python:io.BytesIO`) is | |
passed, this will not be writeable. | |
Notes | |
----- | |
Common data types: [1]_ | |
===================== =========== =========== ============= | |
WAV format Min Max NumPy dtype | |
===================== =========== =========== ============= | |
32-bit floating-point -1.0 +1.0 float32 | |
32-bit integer PCM -2147483648 +2147483647 int32 | |
24-bit integer PCM -2147483648 +2147483392 int32 | |
16-bit integer PCM -32768 +32767 int16 | |
8-bit integer PCM 0 255 uint8 | |
===================== =========== =========== ============= | |
WAV files can specify arbitrary bit depth, and this function supports | |
reading any integer PCM depth from 1 to 64 bits. Data is returned in the | |
smallest compatible numpy int type, in left-justified format. 8-bit and | |
lower is unsigned, while 9-bit and higher is signed. | |
For example, 24-bit data will be stored as int32, with the MSB of the | |
24-bit data stored at the MSB of the int32, and typically the least | |
significant byte is 0x00. (However, if a file actually contains data past | |
its specified bit depth, those bits will be read and output, too. [2]_) | |
This bit justification and sign matches WAV's native internal format, which | |
allows memory mapping of WAV files that use 1, 2, 4, or 8 bytes per sample | |
(so 24-bit files cannot be memory-mapped, but 32-bit can). | |
IEEE float PCM in 32- or 64-bit format is supported, with or without mmap. | |
Values exceeding [-1, +1] are not clipped. | |
Non-linear PCM (mu-law, A-law) is not supported. | |
References | |
---------- | |
.. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming | |
Interface and Data Specifications 1.0", section "Data Format of the | |
Samples", August 1991 | |
http://www.tactilemedia.com/info/MCI_Control_Info.html | |
.. [2] Adobe Systems Incorporated, "Adobe Audition 3 User Guide", section | |
"Audio file formats: 24-bit Packed Int (type 1, 20-bit)", 2007 | |
Examples | |
-------- | |
>>> from os.path import dirname, join as pjoin | |
>>> from scipy.io import wavfile | |
>>> import scipy.io | |
Get the filename for an example .wav file from the tests/data directory. | |
>>> data_dir = pjoin(dirname(scipy.io.__file__), 'tests', 'data') | |
>>> wav_fname = pjoin(data_dir, 'test-44100Hz-2ch-32bit-float-be.wav') | |
Load the .wav file contents. | |
>>> samplerate, data = wavfile.read(wav_fname) | |
>>> print(f"number of channels = {data.shape[1]}") | |
number of channels = 2 | |
>>> length = data.shape[0] / samplerate | |
>>> print(f"length = {length}s") | |
length = 0.01s | |
Plot the waveform. | |
>>> import matplotlib.pyplot as plt | |
>>> import numpy as np | |
>>> time = np.linspace(0., length, data.shape[0]) | |
>>> plt.plot(time, data[:, 0], label="Left channel") | |
>>> plt.plot(time, data[:, 1], label="Right channel") | |
>>> plt.legend() | |
>>> plt.xlabel("Time [s]") | |
>>> plt.ylabel("Amplitude") | |
>>> plt.show() | |
""" | |
if hasattr(filename, 'read'): | |
fid = filename | |
mmap = False | |
else: | |
fid = open(filename, 'rb') | |
try: | |
file_size, is_big_endian = _read_riff_chunk(fid) | |
fmt_chunk_received = False | |
data_chunk_received = False | |
while fid.tell() < file_size: | |
# read the next chunk | |
chunk_id = fid.read(4) | |
if not chunk_id: | |
if data_chunk_received: | |
# End of file but data successfully read | |
warnings.warn( | |
"Reached EOF prematurely; finished at {:d} bytes, " | |
"expected {:d} bytes from header." | |
.format(fid.tell(), file_size), | |
WavFileWarning, stacklevel=2) | |
break | |
else: | |
raise ValueError("Unexpected end of file.") | |
elif len(chunk_id) < 4: | |
msg = f"Incomplete chunk ID: {repr(chunk_id)}" | |
# If we have the data, ignore the broken chunk | |
if fmt_chunk_received and data_chunk_received: | |
warnings.warn(msg + ", ignoring it.", WavFileWarning, | |
stacklevel=2) | |
else: | |
raise ValueError(msg) | |
if chunk_id == b'fmt ': | |
fmt_chunk_received = True | |
fmt_chunk = _read_fmt_chunk(fid, is_big_endian) | |
format_tag, channels, fs = fmt_chunk[1:4] | |
bit_depth = fmt_chunk[6] | |
block_align = fmt_chunk[5] | |
elif chunk_id == b'fact': | |
_skip_unknown_chunk(fid, is_big_endian) | |
elif chunk_id == b'data': | |
data_chunk_received = True | |
if not fmt_chunk_received: | |
raise ValueError("No fmt chunk before data") | |
data = _read_data_chunk(fid, format_tag, channels, bit_depth, | |
is_big_endian, block_align, mmap) | |
elif chunk_id == b'LIST': | |
# Someday this could be handled properly but for now skip it | |
_skip_unknown_chunk(fid, is_big_endian) | |
elif chunk_id in {b'JUNK', b'Fake'}: | |
# Skip alignment chunks without warning | |
_skip_unknown_chunk(fid, is_big_endian) | |
else: | |
warnings.warn("Chunk (non-data) not understood, skipping it.", | |
WavFileWarning, stacklevel=2) | |
_skip_unknown_chunk(fid, is_big_endian) | |
finally: | |
if not hasattr(filename, 'read'): | |
fid.close() | |
else: | |
fid.seek(0) | |
return fs, data | |
def write(filename, rate, data, markers=False, verbose=True): | |
""" | |
Write a NumPy array as a WAV file. | |
Parameters | |
---------- | |
filename : string or open file handle | |
Output wav file. | |
rate : int | |
The sample rate (in samples/sec). | |
data : ndarray | |
A 1-D or 2-D NumPy array of either integer or float data-type. | |
Notes | |
----- | |
* Writes a simple uncompressed WAV file. | |
* To write multiple-channels, use a 2-D array of shape | |
(Nsamples, Nchannels). | |
* The bits-per-sample and PCM/float will be determined by the data-type. | |
Common data types: [1]_ | |
===================== =========== =========== ============= | |
WAV format Min Max NumPy dtype | |
===================== =========== =========== ============= | |
32-bit floating-point -1.0 +1.0 float32 | |
32-bit PCM -2147483648 +2147483647 int32 | |
16-bit PCM -32768 +32767 int16 | |
8-bit PCM 0 255 uint8 | |
===================== =========== =========== ============= | |
Note that 8-bit PCM is unsigned. | |
References | |
---------- | |
.. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming | |
Interface and Data Specifications 1.0", section "Data Format of the | |
Samples", August 1991 | |
http://www.tactilemedia.com/info/MCI_Control_Info.html | |
Examples | |
-------- | |
Create a 100Hz sine wave, sampled at 44100Hz. | |
Write to 16-bit PCM, Mono. | |
>>> from scipy.io.wavfile import write | |
>>> samplerate = 44100; fs = 100 | |
>>> t = np.linspace(0., 1., samplerate) | |
>>> amplitude = np.iinfo(np.int16).max | |
>>> data = amplitude * np.sin(2. * np.pi * fs * t) | |
>>> write("example.wav", samplerate, data.astype(np.int16)) | |
""" | |
if hasattr(filename, 'write'): | |
fid = filename | |
else: | |
fid = open(filename, 'wb') | |
fs = rate | |
try: | |
dkind = data.dtype.kind | |
if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and | |
data.dtype.itemsize == 1)): | |
raise ValueError("Unsupported data type '%s'" % data.dtype) | |
header_data = b'' | |
header_data += b'RIFF' | |
header_data += b'\x00\x00\x00\x00' | |
header_data += b'WAVE' | |
# fmt chunk | |
header_data += b'fmt ' | |
if dkind == 'f': | |
format_tag = WAVE_FORMAT.IEEE_FLOAT | |
else: | |
format_tag = WAVE_FORMAT.PCM | |
if data.ndim == 1: | |
channels = 1 | |
else: | |
channels = data.shape[1] | |
bit_depth = data.dtype.itemsize * 8 | |
bytes_per_second = fs*(bit_depth // 8)*channels | |
block_align = channels * (bit_depth // 8) | |
fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs, | |
bytes_per_second, block_align, bit_depth) | |
if not (dkind == 'i' or dkind == 'u'): | |
# add cbSize field for non-PCM files | |
fmt_chunk_data += b'\x00\x00' | |
header_data += struct.pack('<I', len(fmt_chunk_data)) | |
header_data += fmt_chunk_data | |
# fact chunk (non-PCM files) | |
if not (dkind == 'i' or dkind == 'u'): | |
header_data += b'fact' | |
header_data += struct.pack('<II', 4, data.shape[0]) | |
# check data size (needs to be immediately before the data chunk) | |
if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF: | |
raise ValueError("Data exceeds wave file size limit") | |
fid.write(header_data) | |
# data chunk | |
fid.write(b'data') | |
fid.write(struct.pack('<I', data.nbytes)) | |
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and | |
sys.byteorder == 'big'): | |
data = data.byteswap() | |
_array_tofile(fid, data) | |
# cue/marker chunk | |
if markers: # != None and != [] | |
if verbose: | |
print("saving cue markers...") | |
if isinstance(markers[0], dict):# then we have [{'position': 100, 'label': 'marker1'}, ...] | |
labels = [m['label'] for m in markers] | |
markers = [m['position'] for m in markers] | |
else: | |
labels = [b'' for m in markers] | |
fid.write(b'cue ') | |
size = 4 + len(markers) * 24 | |
fid.write(struct.pack('<ii', size, len(markers))) | |
for i, c in enumerate(markers): | |
s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c)# 1635017060 is struct.unpack('<i',b'data') | |
fid.write(s) | |
lbls = b'' | |
for i, lbl in enumerate(labels): | |
lbls += b'labl' | |
label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00') | |
size = len(lbl) + 1 + 4 # because \x00 | |
lbls += struct.pack('<ii', size, i + 1) | |
lbls += bytes(label, encoding='ascii') | |
fid.write(b'LIST') | |
size = len(lbls) + 4 | |
fid.write(struct.pack('<i', size)) | |
fid.write(b'adtl') | |
fid.write(lbls) | |
# Determine file size and place it in correct | |
# position at start of the file. | |
size = fid.tell() | |
fid.seek(4) | |
fid.write(struct.pack('<I', size-8)) | |
finally: | |
if not hasattr(filename, 'write'): | |
fid.close() | |
else: | |
fid.seek(0) | |
def _array_tofile(fid, data): | |
# ravel gives a c-contiguous buffer | |
fid.write(data.ravel().view('b').data) | |
def test_normalized(array): | |
''' | |
Determine if an array is entirely -1 < array[i,j] < 1, to see if array is | |
normalized | |
''' | |
return (array > -1).all() and (array < 1).all() | |
def norm_to_32float(array): | |
''' | |
Convert a variety of audio types to float32 while normalizing if needed | |
''' | |
if array.dtype == 'int16': | |
bits=16 | |
normfactor = 2 ** (bits-1) | |
data = np.float32(array) * 1.0 / normfactor | |
if array.dtype == 'int32': | |
bits=32 | |
normfactor = 2 ** (bits-1) | |
data = np.float32(array) * 1.0 / normfactor | |
if array.dtype == 'float32': | |
if test_normalized(array): | |
data = np.float32(array) # nothing needed | |
else: | |
bits=32 | |
normfactor = 2 ** (bits-1) | |
data = np.float32(array) * 1.0 / normfactor | |
if array.dtype == 'float64': | |
bits=64 | |
normfactor = 2 ** (bits-1) | |
data = np.float32(array) * 1.0 / normfactor | |
elif array.dtype == 'uint8': | |
if isinstance(data[0], (int, np.uint8)): | |
bits=8 | |
# handle uint8 data by shifting to center at 0 | |
normfactor = 2 ** (bits-1) | |
data = (np.float32(array) * 1.0 / normfactor) -\ | |
((normfactor)/(normfactor-1)) | |
return data | |
def load_audacity_labels(label_file): | |
''' | |
Load Audacity labels, ignoring the additional frequency range info lines, | |
if labels were exported from a spectrogram. | |
''' | |
fi = open(label_file, 'r') | |
labs = [line.strip().split()[0] for line in fi if not line.startswith('\\')] | |
fi.close() | |
return np.array(labs).astype('float') | |
def change_samplerate_interp(old_audio,old_rate,new_rate): | |
''' | |
Change sample rate to new sample rate by simple interpolation. | |
If old_rate > new_rate, there may be aliasing / data loss. | |
Input should be in column format, as the interpolation will be completed | |
on each channel this way. | |
Modified from: | |
https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate | |
''' | |
if old_rate != new_rate: | |
# duration of audio | |
duration = old_audio.shape[0] / old_rate | |
# length of old and new audio | |
time_old = np.linspace(0, duration, old_audio.shape[0]) | |
time_new = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate)) | |
# fit old_audio into new_audio length by interpolation | |
interpolator = interpolate.interp1d(time_old, old_audio.T) | |
new_audio = interpolator(time_new).T | |
return new_audio | |
else: | |
print('Conversion not needed, old and new rates match') | |
return old_audio # conversion not needed | |
def main(argv): | |
inputwavefile = '' | |
inputlabelfile = '' | |
outputfile = '' | |
try: | |
opts, args = getopt.getopt(argv,"hw:l:o:",["wavfile=","labelfile=","outputfile="]) | |
except getopt.GetoptError: | |
print('Error in usage, correct format:\n'+\ | |
'morphagene_audacity3.py -w <inputwavfile> -l <inputlabels> -o <outputfile>') | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt == '-h': | |
print('morphagene_audacity3.py -w <inputwavfile> -l <inputlabels> -o <outputfile>') | |
sys.exit() | |
elif opt in ("-w", "--wavfile"): | |
inputwavefile = arg | |
elif opt in ("-l", "--labelfile"): | |
inputlabelfile = arg | |
elif opt in ("-o", "--outputfile"): | |
outputfile = arg | |
print('Input wave file: %s'%inputwavefile) | |
print('Input label file: %s'%inputlabelfile) | |
print('Output Morphagene reel: %s'%outputfile) | |
########################################################################### | |
''' | |
Write single file, edited in Audacity with labels, to Morphagene 32bit | |
WAV file at 48000hz sample rate. | |
''' | |
########################################################################### | |
morph_srate = 48000 # required samplerate for Morphagene | |
# read labels from stereo Audacity label file, ignore text, and use one channel | |
audac_labs = load_audacity_labels(inputlabelfile) | |
# read pertinent info from audio file, convert, exit if input wave file is broken | |
try: | |
sample_rate, array = read(inputwavefile) | |
array = norm_to_32float(array) | |
except: | |
print('Input file %s.wav is poorly formatted, exiting'%inputwavefile) | |
sys.exit() | |
# check if input wav has a different rate than desired Morphagene rate, | |
# and correct by interpolation | |
if sample_rate != morph_srate: | |
print("Correcting input sample rate %iHz to Morphagene rate %iHz"%(sample_rate,morph_srate)) | |
# perform interpolation on each channel, then transpose back | |
array = change_samplerate_interp(array.T,float(sample_rate),float(morph_srate)).T | |
# convert labels in seconds to labels in frames, adjusting for change | |
# in rate | |
sc = float(morph_srate) / float(sample_rate) | |
frame_labs = (audac_labs * sample_rate * sc).astype(np.int) | |
else: | |
frame_labs = (audac_labs * sample_rate).astype(np.int) | |
frame_dict = [{'position': l, 'label': 'marker%i'%(i+1)} for i,l in enumerate(frame_labs)] | |
# write wav file with additional cue markers from labels | |
# no need to transpose again for data from Audacity | |
write(outputfile,morph_srate,array.astype('float32'),markers=frame_dict) | |
print('Saved Morphagene reel with %i splices: %s'%(len(frame_labs),outputfile)) | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment