rish-16/cs4243.py

rish-16 · 2022-03-29T05:07:03Z

Bilinear Interpolation

import torch
dtype = torch.cuda.FloatTensor
dtype_long = torch.cuda.LongTensor

def bilinear_interpolate_torch(im, x, y):
    x0 = torch.floor(x).type(dtype_long)
    x1 = x0 + 1
    
    y0 = torch.floor(y).type(dtype_long)
    y1 = y0 + 1

    x0 = torch.clamp(x0, 0, im.shape[1]-1)
    x1 = torch.clamp(x1, 0, im.shape[1]-1)
    y0 = torch.clamp(y0, 0, im.shape[0]-1)
    y1 = torch.clamp(y1, 0, im.shape[0]-1)
    
    Ia = im[ y0, x0 ][0]
    Ib = im[ y1, x0 ][0]
    Ic = im[ y0, x1 ][0]
    Id = im[ y1, x1 ][0]
    
    wa = (x1.type(dtype)-x) * (y1.type(dtype)-y)
    wb = (x1.type(dtype)-x) * (y-y0.type(dtype))
    wc = (x-x0.type(dtype)) * (y1.type(dtype)-y)
    wd = (x-x0.type(dtype)) * (y-y0.type(dtype))

    return torch.t((torch.t(Ia)*wa)) + torch.t(torch.t(Ib)*wb) + torch.t(torch.t(Ic)*wc) + torch.t(torch.t(Id)*wd)

tanyjnaaman · 2022-03-29T05:07:48Z

Cleaned up implementation of Fast R-CNN

class VanillaFastRCNN(nn.Module):

    def __init__(self, input_dim, hidden_dim, object_size, classes, n_objects, im_size):
        super(VanillaFastRCNN, self).__init__()

        # metadata
        self.n_objects = n_objects
        self.offset = (object_size - 1)// 2
        self.object_size = object_size
        self.hidden_dim = hidden_dim
        self.im_size = im_size

        # backbone convnet
        self.conv1 = nn.Conv2d(input_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.activation = nn.functional.relu
        
        # per region network, predicting bbox pixel anchor scores
        self.conv_boundingbox = nn.Conv2d(hidden_dim, 1, kernel_size = object_size, stride = 1, padding = self.offset) # activation map padded to detect object size

        # per region network, predicting region class
        self.linear = nn.Linear(in_features = hidden_dim * object_size **2, out_features = classes) # take object-size feature map and classify

    def forward(self, input_tensor, bounding_box_tensor, train_flag = True):

        # apply backbone convnet for feature extraction
        x = input_tensor
        x = self.conv1(x)
        x = self.activation(x)
        x = self.conv2(x)
        x = self.activation(x)
        x = self.conv3(x)
        x = self.activation(x)

        # predict bounding box anchors
        scores_boundingbox = self.conv_boundingbox(x).squeeze()

        # predict classes for each given bounding box
        batches, c, h, w = input_tensor.shape
        if train_flag:
            boxes = []
            for b in range(batches): # for each image in batch
                for k in range(self.n_objects): # for n objects to be predicted
                    offset = self.offset
                    object_size = self.object_size
                    horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
                    vertical_down = bounding_box_tensor[b, k, 1].long() - offset
                    boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
            boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
            boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
            scores_boxes = self.linear(boxes)

        else:
            total_boxes = []
            for b in range(batches):

                # get top n_objects box centres from scores by reshaping into array, then sort
                scores_boundingbox[b].view(-1) # to im_size * im_size
                _, idx_largest = torch.sort(scores_boundingbox, descending = True)
                idx_largest = idx_largest[:self.n_objects] # take top n_objects points as centres
                idx_y = idx_largest//self.im_size  # reshape to y, x coordinate
                idx_x = idx_largest - idx_y*self.im_size

                # after taking out top n_object boxes, cut out region and append to list of boxes, as in training
                boxes = []
                for k in range(self.n_objects): # for n objects to be predicted
                    offset = self.offset
                    object_size = self.object_size
                    horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
                    vertical_down = bounding_box_tensor[b, k, 1].long() - offset
                    boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
                boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
                boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
                total_boxes.append(boxes)

            # classify for whole batch
            total_boxes = torch.cat(total_boxes, dim = 0) # list to tensor
            scores_boxes = self.linear(total_boxes)

        return scores_boxes, scores_boundingbox

rish-16 · 2022-03-29T05:45:53Z

Count network parameters

def display_num_param(net):
    nb_param = 0
    for param in net.parameters():
        nb_param += param.numel()
    print('There are {} ({:.2f} million) parameters in this neural network'.format(
        nb_param, nb_param/1e6)
         )

rish-16 · 2022-03-29T05:59:23Z

Mask R-CNN architecture

class MaskRCNN(nn.Module):

    def __init__(self, input_dim, hidden_dim, object_size, classes, n_objects, im_size):
        super(VanillaFastRCNN, self).__init__()

        # metadata
        self.n_objects = n_objects
        self.offset = (object_size - 1)// 2
        self.object_size = object_size
        self.hidden_dim = hidden_dim
        self.im_size = im_size

       # downsampling convnet
        self.ss_conv1 = nn.Conv2d(1, dim, (3,3), padding=1, stride=2) #  1x28x28 --> hidden_dim x14x14
        self.ss_conv2 = nn.Conv2d(dim, dim, (3,3), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x7x7 

        # upsampling convnet
        self.ss_trans_conv1 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) #  hidden_dim x7x7 --> hidden_dim x14x14
        self.ss_trans_conv2 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) #  hidden_dim x14x14 --> hidden_dim x28x28

        # classification layer
        self.ss_classifier_head = nn.Conv2d(dim, nb_pixel_classes, (3,3), padding=1, stride=1) #  hidden_dim x28x28 --> nb_pixel_classes x28x28

        # backbone convnet
        self.conv1 = nn.Conv2d(input_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.activation = nn.functional.relu
        
        # per region network, predicting bbox pixel anchor scores
        self.conv_boundingbox = nn.Conv2d(hidden_dim, 1, kernel_size = object_size, stride = 1, padding = self.offset) # activation map padded to detect object size

        # per region network, predicting region class
        self.linear = nn.Linear(in_features = hidden_dim * object_size **2, out_features = classes) # take object-size feature map and classify

    def forward(self, input_tensor, bounding_box_tensor, train_flag = True):

        # apply backbone convnet for feature extraction
        x = input_tensor
        x = self.conv1(x)
        x = self.activation(x)
        x = self.conv2(x)
        x = self.activation(x)
        x = self.conv3(x)
        x = self.activation(x)

        # predict bounding box anchors
        scores_boundingbox = self.conv_boundingbox(x).squeeze()

        # predict classes for each given bounding box
        batches, c, h, w = input_tensor.shape
        if train_flag:
            boxes = []
            for b in range(batches): # for each image in batch
                for k in range(self.n_objects): # for n objects to be predicted
                    offset = self.offset
                    object_size = self.object_size
                    horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
                    vertical_down = bounding_box_tensor[b, k, 1].long() - offset
                    boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
            boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
            boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
            scores_boxes = self.linear(boxes)

        else:
            total_boxes = []
            for b in range(batches):

                # get top n_objects box centres from scores by reshaping into array, then sort
                scores_boundingbox[b].view(-1) # to im_size * im_size
                _, idx_largest = torch.sort(scores_boundingbox, descending = True)
                idx_largest = idx_largest[:self.n_objects] # take top n_objects points as centres
                idx_y = idx_largest//self.im_size  # reshape to y, x coordinate
                idx_x = idx_largest - idx_y*self.im_size

                # after taking out top n_object boxes, cut out region and append to list of boxes, as in training
                boxes = []
                for k in range(self.n_objects): # for n objects to be predicted
                    offset = self.offset
                    object_size = self.object_size
                    horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
                    vertical_down = bounding_box_tensor[b, k, 1].long() - offset
                    boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
                boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
                boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
                total_boxes.append(boxes)

            # classify for whole batch
            total_boxes = torch.cat(total_boxes, dim = 0) # list to tensor
            scores_boxes = self.linear(total_boxes)

        # downsampling convnet
        x = input_tensor
        x = self.ss_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2] 
        x = torch.relu(x)
        x = self.ss_conv2(x) # [batch_size, hidden_dim, im_size/4, im_size/4] 
        x = torch.relu(x) 
        
        # upsampling convnet
        x = self.ss_trans_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2] 
        x = torch.relu(x)
        x = self.ss_trans_conv2(x) # [batch_size, hidden_dim, im_size, im_size] 
        x = torch.relu(x) 

        # classification layer
        scores_pixel_class = self.ss_classifier_head(x) # [batch_size, nb_pixel_classes, im_size, im_size] 

        return scores_boxes, scores_boundingbox, scores_pixel_class

	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	"""
	Creating tensors
	"""
	a = torch.rand(...) # returns a torch.Tensor
	b = torch.LongTensor(10).random_(0, 2) # 10-dim vector from [0, 1]

	"""
	Network template
	"""
	class Network(nn.Module):
	def __init__(self):
	super().__init__()
	pass

	def forward(self, x):
	pass

	"""
	batch training loop
	"""
	for epoch in in range(EPOCHS):
	num_batches = 0
	shuffled_indices=torch.randperm(60000)
	running_loss = 0

	for i in range(0, DATASETSIZE, BATCHSIZE):
	idx = shuffled_indices[count:count+bs]
	idx = torch.LongTensor(DATASETSIZE).random_(BATCHSIZE)
	minibatch_data = train_data[idx]
	minibatch_labels = trian_labels[idx]

	inputs = minibatch_data.view(bs, INPUTSIZE)
	inputs.requires_grad_()

	pred = model(inputs)
	loss = criteron(pred, minibatch_labels)

	running_loss = loss.detach().item()
	num_batches += 1

	epoch_loss = running_loss / num_batches

	"""
	Testing model
	"""
	def eval_on_test_set(model, test_data, test_label):
	running_error=0
	num_batches=0

	for i in range(0,DATASETSIZE, BATCHSIZE):

	inputs = test_data[i:i+BATCHSIZE].unsqueeze(dim=1)
	minibatch_label = test_label[i:i+BATCHSIZE]

	scores = model(inputs)
	error = utils.get_error( scores , minibatch_label)

	running_error += error.item()
	num_batches+=1

	total_error = running_error/num_batches
	print( 'error rate on test set =', total_error*100 ,'percent')

	def get_accuracy(scores, labels):
	# use within the batched training loop to get the batch accuracy
	num_data = scores.size(0)
	predicted_labels = scores.argmax(dim=1)
	indicator = (predicted_labels == labels)
	num_correct = indicator.sum()
	accuracy = 100*num_correct.float()/num_data
	return accuracy

	"""
	One-hot encoding
	"""

	def index_to_onehot(labels, num_classes=10):
	"""
	convert index label to one hot labels

	Inputs:
	labels: Integer Tensor of length N, e.g., [0, 1, 2, 4, 3]
	num_classes: the number of classes, e.g., 5

	Output:
	Tensor: onehot_labels of size [N, num_classes]
	a matrix that contains one-hot label for each sample:
	e.g., [
	[1, 0, 0, 0, 0],
	[0, 1, 0, 0, 0],
	[0, 0, 1, 0, 0],
	[0, 0, 0, 0, 1],
	[0, 0, 0, 1, 0]
	]
	"""
	num_samples = len(labels)
	onehot = torch.zeros(num_samples, num_classes)
	onehot[torch.arange(num_samples), labels] = 1
	return onehot

	"""
	Soft-label CrossEntropy
	Only when final layer does not contain Softmax
	"""

	score = net(x)
	prob = torch.softmax(score, dim=-1)
	loss = -(prob.log() * y).sum(dim=-1).mean()

rish-16/cs4243.py

rish-16 commented Mar 29, 2022

tanyjnaaman commented Mar 29, 2022 •

edited

Loading

rish-16 commented Mar 29, 2022

rish-16 commented Mar 29, 2022 •

edited

Loading

rish-16/cs4243.py

rish-16 commented Mar 29, 2022

tanyjnaaman commented Mar 29, 2022 • edited Loading

rish-16 commented Mar 29, 2022

rish-16 commented Mar 29, 2022 • edited Loading

tanyjnaaman commented Mar 29, 2022 •

edited

Loading

rish-16 commented Mar 29, 2022 •

edited

Loading