Last active
December 14, 2021 07:07
-
-
Save ketanhdoshi/9bce0a7099e18a88246ae15c4b857eb2 to your computer and use it in GitHub Desktop.
Sound Classification Model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch.nn.functional as F | |
from torch.nn import init | |
# ---------------------------- | |
# Audio Classification Model | |
# ---------------------------- | |
class AudioClassifier (nn.Module): | |
# ---------------------------- | |
# Build the model architecture | |
# ---------------------------- | |
def __init__(self): | |
super().__init__() | |
conv_layers = [] | |
# First Convolution Block with Relu and Batch Norm. Use Kaiming Initialization | |
self.conv1 = nn.Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2)) | |
self.relu1 = nn.ReLU() | |
self.bn1 = nn.BatchNorm2d(8) | |
init.kaiming_normal_(self.conv1.weight, a=0.1) | |
self.conv1.bias.data.zero_() | |
conv_layers += [self.conv1, self.relu1, self.bn1] | |
# Second Convolution Block | |
self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) | |
self.relu2 = nn.ReLU() | |
self.bn2 = nn.BatchNorm2d(16) | |
init.kaiming_normal_(self.conv2.weight, a=0.1) | |
self.conv2.bias.data.zero_() | |
conv_layers += [self.conv2, self.relu2, self.bn2] | |
# Second Convolution Block | |
self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) | |
self.relu3 = nn.ReLU() | |
self.bn3 = nn.BatchNorm2d(32) | |
init.kaiming_normal_(self.conv3.weight, a=0.1) | |
self.conv3.bias.data.zero_() | |
conv_layers += [self.conv3, self.relu3, self.bn3] | |
# Second Convolution Block | |
self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) | |
self.relu4 = nn.ReLU() | |
self.bn4 = nn.BatchNorm2d(64) | |
init.kaiming_normal_(self.conv4.weight, a=0.1) | |
self.conv4.bias.data.zero_() | |
conv_layers += [self.conv4, self.relu4, self.bn4] | |
# Linear Classifier | |
self.ap = nn.AdaptiveAvgPool2d(output_size=1) | |
self.lin = nn.Linear(in_features=64, out_features=10) | |
# Wrap the Convolutional Blocks | |
self.conv = nn.Sequential(*conv_layers) | |
# ---------------------------- | |
# Forward pass computations | |
# ---------------------------- | |
def forward(self, x): | |
# Run the convolutional blocks | |
x = self.conv(x) | |
# Adaptive pool and flatten for input to linear layer | |
x = self.ap(x) | |
x = x.view(x.shape[0], -1) | |
# Linear layer | |
x = self.lin(x) | |
# Final output | |
return x | |
# Create the model and put it on the GPU if available | |
myModel = AudioClassifier() | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
myModel = myModel.to(device) | |
# Check that it is on Cuda | |
next(myModel.parameters()).device |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment