Last active
March 29, 2022 06:16
-
-
Save rish-16/30a339d1c5a43078056d98d2c632ff15 to your computer and use it in GitHub Desktop.
CS4243 PyTorch Snippets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
""" | |
Creating tensors | |
""" | |
a = torch.rand(...) # returns a torch.Tensor | |
b = torch.LongTensor(10).random_(0, 2) # 10-dim vector from [0, 1] | |
""" | |
Network template | |
""" | |
class Network(nn.Module): | |
def __init__(self): | |
super().__init__() | |
pass | |
def forward(self, x): | |
pass | |
""" | |
batch training loop | |
""" | |
for epoch in in range(EPOCHS): | |
num_batches = 0 | |
shuffled_indices=torch.randperm(60000) | |
running_loss = 0 | |
for i in range(0, DATASETSIZE, BATCHSIZE): | |
idx = shuffled_indices[count:count+bs] | |
idx = torch.LongTensor(DATASETSIZE).random_(BATCHSIZE) | |
minibatch_data = train_data[idx] | |
minibatch_labels = trian_labels[idx] | |
inputs = minibatch_data.view(bs, INPUTSIZE) | |
inputs.requires_grad_() | |
pred = model(inputs) | |
loss = criteron(pred, minibatch_labels) | |
running_loss = loss.detach().item() | |
num_batches += 1 | |
epoch_loss = running_loss / num_batches | |
""" | |
Testing model | |
""" | |
def eval_on_test_set(model, test_data, test_label): | |
running_error=0 | |
num_batches=0 | |
for i in range(0,DATASETSIZE, BATCHSIZE): | |
inputs = test_data[i:i+BATCHSIZE].unsqueeze(dim=1) | |
minibatch_label = test_label[i:i+BATCHSIZE] | |
scores = model(inputs) | |
error = utils.get_error( scores , minibatch_label) | |
running_error += error.item() | |
num_batches+=1 | |
total_error = running_error/num_batches | |
print( 'error rate on test set =', total_error*100 ,'percent') | |
def get_accuracy(scores, labels): | |
# use within the batched training loop to get the batch accuracy | |
num_data = scores.size(0) | |
predicted_labels = scores.argmax(dim=1) | |
indicator = (predicted_labels == labels) | |
num_correct = indicator.sum() | |
accuracy = 100*num_correct.float()/num_data | |
return accuracy | |
""" | |
One-hot encoding | |
""" | |
def index_to_onehot(labels, num_classes=10): | |
""" | |
convert index label to one hot labels | |
Inputs: | |
labels: Integer Tensor of length N, e.g., [0, 1, 2, 4, 3] | |
num_classes: the number of classes, e.g., 5 | |
Output: | |
Tensor: onehot_labels of size [N, num_classes] | |
a matrix that contains one-hot label for each sample: | |
e.g., [ | |
[1, 0, 0, 0, 0], | |
[0, 1, 0, 0, 0], | |
[0, 0, 1, 0, 0], | |
[0, 0, 0, 0, 1], | |
[0, 0, 0, 1, 0] | |
] | |
""" | |
num_samples = len(labels) | |
onehot = torch.zeros(num_samples, num_classes) | |
onehot[torch.arange(num_samples), labels] = 1 | |
return onehot | |
""" | |
Soft-label CrossEntropy | |
Only when final layer does not contain Softmax | |
""" | |
score = net(x) | |
prob = torch.softmax(score, dim=-1) | |
loss = -(prob.log() * y).sum(dim=-1).mean() |
Cleaned up implementation of Fast R-CNN
class VanillaFastRCNN(nn.Module):
def __init__(self, input_dim, hidden_dim, object_size, classes, n_objects, im_size):
super(VanillaFastRCNN, self).__init__()
# metadata
self.n_objects = n_objects
self.offset = (object_size - 1)// 2
self.object_size = object_size
self.hidden_dim = hidden_dim
self.im_size = im_size
# backbone convnet
self.conv1 = nn.Conv2d(input_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.activation = nn.functional.relu
# per region network, predicting bbox pixel anchor scores
self.conv_boundingbox = nn.Conv2d(hidden_dim, 1, kernel_size = object_size, stride = 1, padding = self.offset) # activation map padded to detect object size
# per region network, predicting region class
self.linear = nn.Linear(in_features = hidden_dim * object_size **2, out_features = classes) # take object-size feature map and classify
def forward(self, input_tensor, bounding_box_tensor, train_flag = True):
# apply backbone convnet for feature extraction
x = input_tensor
x = self.conv1(x)
x = self.activation(x)
x = self.conv2(x)
x = self.activation(x)
x = self.conv3(x)
x = self.activation(x)
# predict bounding box anchors
scores_boundingbox = self.conv_boundingbox(x).squeeze()
# predict classes for each given bounding box
batches, c, h, w = input_tensor.shape
if train_flag:
boxes = []
for b in range(batches): # for each image in batch
for k in range(self.n_objects): # for n objects to be predicted
offset = self.offset
object_size = self.object_size
horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
vertical_down = bounding_box_tensor[b, k, 1].long() - offset
boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
scores_boxes = self.linear(boxes)
else:
total_boxes = []
for b in range(batches):
# get top n_objects box centres from scores by reshaping into array, then sort
scores_boundingbox[b].view(-1) # to im_size * im_size
_, idx_largest = torch.sort(scores_boundingbox, descending = True)
idx_largest = idx_largest[:self.n_objects] # take top n_objects points as centres
idx_y = idx_largest//self.im_size # reshape to y, x coordinate
idx_x = idx_largest - idx_y*self.im_size
# after taking out top n_object boxes, cut out region and append to list of boxes, as in training
boxes = []
for k in range(self.n_objects): # for n objects to be predicted
offset = self.offset
object_size = self.object_size
horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
vertical_down = bounding_box_tensor[b, k, 1].long() - offset
boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
total_boxes.append(boxes)
# classify for whole batch
total_boxes = torch.cat(total_boxes, dim = 0) # list to tensor
scores_boxes = self.linear(total_boxes)
return scores_boxes, scores_boundingbox
Count network parameters
def display_num_param(net):
nb_param = 0
for param in net.parameters():
nb_param += param.numel()
print('There are {} ({:.2f} million) parameters in this neural network'.format(
nb_param, nb_param/1e6)
)
Mask R-CNN architecture
class MaskRCNN(nn.Module):
def __init__(self, input_dim, hidden_dim, object_size, classes, n_objects, im_size):
super(VanillaFastRCNN, self).__init__()
# metadata
self.n_objects = n_objects
self.offset = (object_size - 1)// 2
self.object_size = object_size
self.hidden_dim = hidden_dim
self.im_size = im_size
# downsampling convnet
self.ss_conv1 = nn.Conv2d(1, dim, (3,3), padding=1, stride=2) # 1x28x28 --> hidden_dim x14x14
self.ss_conv2 = nn.Conv2d(dim, dim, (3,3), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x7x7
# upsampling convnet
self.ss_trans_conv1 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) # hidden_dim x7x7 --> hidden_dim x14x14
self.ss_trans_conv2 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x28x28
# classification layer
self.ss_classifier_head = nn.Conv2d(dim, nb_pixel_classes, (3,3), padding=1, stride=1) # hidden_dim x28x28 --> nb_pixel_classes x28x28
# backbone convnet
self.conv1 = nn.Conv2d(input_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.activation = nn.functional.relu
# per region network, predicting bbox pixel anchor scores
self.conv_boundingbox = nn.Conv2d(hidden_dim, 1, kernel_size = object_size, stride = 1, padding = self.offset) # activation map padded to detect object size
# per region network, predicting region class
self.linear = nn.Linear(in_features = hidden_dim * object_size **2, out_features = classes) # take object-size feature map and classify
def forward(self, input_tensor, bounding_box_tensor, train_flag = True):
# apply backbone convnet for feature extraction
x = input_tensor
x = self.conv1(x)
x = self.activation(x)
x = self.conv2(x)
x = self.activation(x)
x = self.conv3(x)
x = self.activation(x)
# predict bounding box anchors
scores_boundingbox = self.conv_boundingbox(x).squeeze()
# predict classes for each given bounding box
batches, c, h, w = input_tensor.shape
if train_flag:
boxes = []
for b in range(batches): # for each image in batch
for k in range(self.n_objects): # for n objects to be predicted
offset = self.offset
object_size = self.object_size
horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
vertical_down = bounding_box_tensor[b, k, 1].long() - offset
boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
scores_boxes = self.linear(boxes)
else:
total_boxes = []
for b in range(batches):
# get top n_objects box centres from scores by reshaping into array, then sort
scores_boundingbox[b].view(-1) # to im_size * im_size
_, idx_largest = torch.sort(scores_boundingbox, descending = True)
idx_largest = idx_largest[:self.n_objects] # take top n_objects points as centres
idx_y = idx_largest//self.im_size # reshape to y, x coordinate
idx_x = idx_largest - idx_y*self.im_size
# after taking out top n_object boxes, cut out region and append to list of boxes, as in training
boxes = []
for k in range(self.n_objects): # for n objects to be predicted
offset = self.offset
object_size = self.object_size
horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
vertical_down = bounding_box_tensor[b, k, 1].long() - offset
boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
total_boxes.append(boxes)
# classify for whole batch
total_boxes = torch.cat(total_boxes, dim = 0) # list to tensor
scores_boxes = self.linear(total_boxes)
# downsampling convnet
x = input_tensor
x = self.ss_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2]
x = torch.relu(x)
x = self.ss_conv2(x) # [batch_size, hidden_dim, im_size/4, im_size/4]
x = torch.relu(x)
# upsampling convnet
x = self.ss_trans_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2]
x = torch.relu(x)
x = self.ss_trans_conv2(x) # [batch_size, hidden_dim, im_size, im_size]
x = torch.relu(x)
# classification layer
scores_pixel_class = self.ss_classifier_head(x) # [batch_size, nb_pixel_classes, im_size, im_size]
return scores_boxes, scores_boundingbox, scores_pixel_class
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Bilinear Interpolation