| Feature | nnAudio | torch.stft | kapre | torchaudio | tf.signal(or else tf. stuff) | torch-stft | librosa | | ---|---|---|---|---|---|---|---|--- | | Trainable | 1 | 0 | 1 | 0 | 0 | 1 | 0 | | ModelConvert* | 1 | 0 | 1 | 0 | 0 | 1 | 0 | | Speed(Need test**) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | Differentiable(Not sure**) | 1 | 1 | 1 | 1 | 1 | 1 | 0 | | Mel | 1 | 0 | 1 | 1 | 1 | 0 | 1 | | MFCC | 0 | 0 | 0 | 1 | 1 | 0 | 1 | | CQT | 1 | 0 | 0 | 0 | 0 | 0 | 1 | | GPU support | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import math | |
from torch.nn import functional as F | |
from torch import nn | |
class PositionalEncoding(nn.Module): | |
def __init__(self, d_model, height, width, dropout=0.1): | |
super().__init__() | |
self.dropout = nn.Dropout(p=dropout) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import PIL | |
import torch | |
import torchvision | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def get_image(path, imsize=-1): | |
"""Load an image and resize to a cpecific size. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## linux + HDD | |
# soundfile r: 111.3170223236084 | |
# soundfile w: 21.47102665901184 | |
# librosa r: 27.82967972755432 | |
# librosa w: 24.776712656021118 | |
## windows + SSD | |
# soundfile r: 12.918063640594482 | |
# soundfile w: 11.726674318313599 | |
# librosa r: 16.433390378952026 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Helper functions for working with audio files in NumPy.""" | |
"""some code borrowed from https://github.com/mgeier/python-audio/blob/master/audio-files/utility.py""" | |
import numpy as np | |
import contextlib | |
import librosa | |
import struct | |
import soundfile | |
def float_to_byte(sig): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.fftpack import fft | |
import numpy as np | |
# random complex array of N = 4 | |
a = np.array([11.+11.4j, 2.47+2.3j, 30.89+73.1j, 44.+16.3j]) | |
print(a) | |
# do FFT to a to transform to frequency domain | |
da = fft(a) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# FFT 的 一阶,二阶和n阶导数:https://math.mit.edu/~stevenj/fft-deriv.pdf | |
# DFT的导数:https://math.stackexchange.com/a/1658364/684858 | |
import tensorflow as tf | |
import numpy as np | |
import torch | |
from torch.autograd import gradcheck, Variable | |
# mag loss | |
def mag(x): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
def mixup_np(features, labels, alpha=0.1): | |
# numpy version | |
num_examples = features.shape[0] | |
num_class = labels.shape[-1] | |
mix = np.random.beta(alpha, alpha, size=[num_examples]) | |
features = np.swapaxes(features, 0, -1) | |
features = features * mix + features[::-1] * (np.ones_like(mix) - mix) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone https://github.com/kaldi-asr/kaldi.git kaldi --origin upstream | |
cd kaldi/tools | |
make -j 4 | |
# NOTE: If error happend, run this command and apt-get install necessary packages: | |
# sh ./extras/check_dependencies.sh | |
cd ../src/ | |
./configure --shared | |
make depend -j 4 | |
make -j 4 | |
./run.sh |
- Using CUDA in correct way:
-
确定性卷积:(把所有操作的seed=0,以便重现,会变慢) torch.backends.cudnn.deterministic https://oldpan.me/archives/pytorch-conmon-problem-in-training
添加torch.cuda.get_device_name和torch.cuda.get_device_capability实现如下功能。例:
torch.cuda.get_device_name(0) 'Quadro GP100' torch.cuda.get_device_capability(0) (6, 0)
NewerOlder