Skip to content

Instantly share code, notes, and snippets.

@RicherMans
Last active December 26, 2019 02:46
Show Gist options
  • Save RicherMans/187b2d74c2bf8cd95e3d3fad5de43ec7 to your computer and use it in GitHub Desktop.
Save RicherMans/187b2d74c2bf8cd95e3d3fad5de43ec7 to your computer and use it in GitHub Desktop.
Moving average similar to kaldis apply-cmvn-sliding in pytorch.
import torch
import torch.nn as nn
import torch.nn.functional as F
class MovingAvgNorm(nn.Module):
"""
Calculates multiple moving average estiamtes given a kernel_size
Similar to kaldi's apply-cmvn-sliding
"""
def __init__(self, kernel_size=100, with_mean=True, with_std=True):
super().__init__()
self.kernel_size = kernel_size
self.with_mean = with_mean
self.with_std = with_std
self.eps = 1e-12
def forward(self, x):
assert x.ndim == 3, "Input needs to be tensor of shape B x T x D"
n_batch, timedim, featdim = x.shape
# Too small utterance, just normalize per time-step
if timedim < self.kernel_size:
return (x - x.mean(1, keepdim=True)) / (x.std(1, keepdim=True) +
self.eps)
else:
sliding_window = F.pad(
x.transpose(1, 2),
(self.kernel_size // 2, self.kernel_size // 2 - 1),
mode='reflect').unfold(-1, self.kernel_size,
1).permute(0, 2, 1, 3).contiguous()
m = sliding_window.mean(-1) if self.with_mean else torch.zeros_like(
x) # Mean estimate for each window
v = sliding_window.std(-1) if self.with_std else torch.ones_like(
x) #Std estiamte for each window
return (x - m) / (v + self.eps)
import torch
import torch.nn as nn
import torch.nn.functional as F
class MovingAvgNorm(nn.Module):
"""
Calculates multiple moving average estiamtes given a kernel_size
Similar to kaldi's apply-cmvn-sliding
"""
def __init__(self, kernel_size=100, with_mean=True, with_std=True):
super().__init__()
self.kernel_size = kernel_size
self.with_mean = with_mean
self.with_std = with_std
self.eps = 1e-12
def forward(self, x):
assert x.ndim == 3, "Input needs to be tensor of shape B x T x D"
n_batch, timedim, featdim = x.shape
with torch.no_grad():
# Too small utterance, just normalize per time-step
if timedim < self.kernel_size:
return (x - x.mean(1, keepdim=True)) / (
x.std(1, keepdim=True) + self.eps)
else:
sliding_window = F.pad(
x.transpose(1, 2),
(self.kernel_size // 2, self.kernel_size // 2 - 1),
mode='constant').unfold(-1, self.kernel_size,
1).permute(0, 2, 1,
3).contiguous()
non_affected_dims = timedim - self.kernel_size
denom = list(range(self.kernel_size // 2, self.kernel_size)
) + [self.kernel_size] * non_affected_dims + list(
range(self.kernel_size - 1,
(self.kernel_size // 2) - 1, -1))
denom = torch.tensor(denom).view(1, -1, 1)
m = sliding_window.sum(
-1) / denom if self.with_mean else torch.zeros_like(x)
v = torch.sqrt(
torch.sum(torch.abs((sliding_window - m.unsqueeze(-1)))**2,
dim=-1) /
(self.kernel_size -
1)) if self.with_std else torch.ones_like(x)
return (x - m) / (v + self.eps)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment