Last active
December 26, 2019 02:46
-
-
Save RicherMans/187b2d74c2bf8cd95e3d3fad5de43ec7 to your computer and use it in GitHub Desktop.
Moving average similar to kaldis apply-cmvn-sliding in pytorch.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
class MovingAvgNorm(nn.Module): | |
""" | |
Calculates multiple moving average estiamtes given a kernel_size | |
Similar to kaldi's apply-cmvn-sliding | |
""" | |
def __init__(self, kernel_size=100, with_mean=True, with_std=True): | |
super().__init__() | |
self.kernel_size = kernel_size | |
self.with_mean = with_mean | |
self.with_std = with_std | |
self.eps = 1e-12 | |
def forward(self, x): | |
assert x.ndim == 3, "Input needs to be tensor of shape B x T x D" | |
n_batch, timedim, featdim = x.shape | |
# Too small utterance, just normalize per time-step | |
if timedim < self.kernel_size: | |
return (x - x.mean(1, keepdim=True)) / (x.std(1, keepdim=True) + | |
self.eps) | |
else: | |
sliding_window = F.pad( | |
x.transpose(1, 2), | |
(self.kernel_size // 2, self.kernel_size // 2 - 1), | |
mode='reflect').unfold(-1, self.kernel_size, | |
1).permute(0, 2, 1, 3).contiguous() | |
m = sliding_window.mean(-1) if self.with_mean else torch.zeros_like( | |
x) # Mean estimate for each window | |
v = sliding_window.std(-1) if self.with_std else torch.ones_like( | |
x) #Std estiamte for each window | |
return (x - m) / (v + self.eps) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
class MovingAvgNorm(nn.Module): | |
""" | |
Calculates multiple moving average estiamtes given a kernel_size | |
Similar to kaldi's apply-cmvn-sliding | |
""" | |
def __init__(self, kernel_size=100, with_mean=True, with_std=True): | |
super().__init__() | |
self.kernel_size = kernel_size | |
self.with_mean = with_mean | |
self.with_std = with_std | |
self.eps = 1e-12 | |
def forward(self, x): | |
assert x.ndim == 3, "Input needs to be tensor of shape B x T x D" | |
n_batch, timedim, featdim = x.shape | |
with torch.no_grad(): | |
# Too small utterance, just normalize per time-step | |
if timedim < self.kernel_size: | |
return (x - x.mean(1, keepdim=True)) / ( | |
x.std(1, keepdim=True) + self.eps) | |
else: | |
sliding_window = F.pad( | |
x.transpose(1, 2), | |
(self.kernel_size // 2, self.kernel_size // 2 - 1), | |
mode='constant').unfold(-1, self.kernel_size, | |
1).permute(0, 2, 1, | |
3).contiguous() | |
non_affected_dims = timedim - self.kernel_size | |
denom = list(range(self.kernel_size // 2, self.kernel_size) | |
) + [self.kernel_size] * non_affected_dims + list( | |
range(self.kernel_size - 1, | |
(self.kernel_size // 2) - 1, -1)) | |
denom = torch.tensor(denom).view(1, -1, 1) | |
m = sliding_window.sum( | |
-1) / denom if self.with_mean else torch.zeros_like(x) | |
v = torch.sqrt( | |
torch.sum(torch.abs((sliding_window - m.unsqueeze(-1)))**2, | |
dim=-1) / | |
(self.kernel_size - | |
1)) if self.with_std else torch.ones_like(x) | |
return (x - m) / (v + self.eps) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment