雲夢 HudsonHuang

| Feature | nnAudio | torch.stft | kapre | torchaudio | tf.signal(or else tf. stuff) | torch-stft | librosa | | ---|---|---|---|---|---|---|---|--- | | Trainable | 1 | 0 | 1 | 0 | 0 | 1 | 0 | | ModelConvert* | 1 | 0 | 1 | 0 | 0 | 1 | 0 | | Speed(Need test**) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | Differentiable(Not sure**) | 1 | 1 | 1 | 1 | 1 | 1 | 0 | | Mel | 1 | 0 | 1 | 1 | 1 | 0 | 1 | | MFCC | 0 | 0 | 0 | 1 | 1 | 0 | 1 | | CQT | 1 | 0 | 0 | 0 | 0 | 0 | 1 | | GPU support | 1 | 1 | 1 | 1 | 1 | 1 | 0 |

Using CUDA in correct way：

确定性卷积：（把所有操作的seed=0，以便重现，会变慢） torch.backends.cudnn.deterministic https://oldpan.me/archives/pytorch-conmon-problem-in-training

添加torch.cuda.get_device_name和torch.cuda.get_device_capability实现如下功能。例：

torch.cuda.get_device_name(0) 'Quadro GP100' torch.cuda.get_device_capability(0) (6, 0)

	import torch
	import math
	from torch.nn import functional as F
	from torch import nn


	class PositionalEncoding(nn.Module):
	def __init__(self, d_model, height, width, dropout=0.1):
	super().__init__()
	self.dropout = nn.Dropout(p=dropout)

	import PIL
	import torch
	import torchvision
	import numpy as np
	import matplotlib.pyplot as plt


	def get_image(path, imsize=-1):
	"""Load an image and resize to a cpecific size.

	## linux + HDD
	# soundfile r: 111.3170223236084
	# soundfile w: 21.47102665901184
	# librosa r: 27.82967972755432
	# librosa w: 24.776712656021118

	## windows + SSD
	# soundfile r: 12.918063640594482
	# soundfile w: 11.726674318313599
	# librosa r: 16.433390378952026

	"""Helper functions for working with audio files in NumPy."""
	"""some code borrowed from https://github.com/mgeier/python-audio/blob/master/audio-files/utility.py"""

	import numpy as np
	import contextlib
	import librosa
	import struct
	import soundfile

	def float_to_byte(sig):

	from scipy.fftpack import fft
	import numpy as np

	# random complex array of N = 4
	a = np.array([11.+11.4j, 2.47+2.3j, 30.89+73.1j, 44.+16.3j])
	print(a)

	# do FFT to a to transform to frequency domain
	da = fft(a)

	# FFT 的一阶，二阶和n阶导数：https://math.mit.edu/~stevenj/fft-deriv.pdf
	# DFT的导数：https://math.stackexchange.com/a/1658364/684858

	import tensorflow as tf
	import numpy as np
	import torch
	from torch.autograd import gradcheck, Variable

	# mag loss
	def mag(x):

	import numpy as np
	import tensorflow as tf

	def mixup_np(features, labels, alpha=0.1):
	# numpy version
	num_examples = features.shape[0]
	num_class = labels.shape[-1]
	mix = np.random.beta(alpha, alpha, size=[num_examples])
	features = np.swapaxes(features, 0, -1)
	features = features * mix + features[::-1] * (np.ones_like(mix) - mix)

	git clone https://github.com/kaldi-asr/kaldi.git kaldi --origin upstream
	cd kaldi/tools
	make -j 4
	# NOTE: If error happend, run this command and apt-get install necessary packages：
	# sh ./extras/check_dependencies.sh
	cd ../src/
	./configure --shared
	make depend -j 4
	make -j 4
	./run.sh