Skip to content

Instantly share code, notes, and snippets.

View arunm8489's full-sized avatar

Arun Mohan arunm8489

View GitHub Profile
class DummyLayer(nn.Module):
def __init__(self):
super(DummyLayer, self).__init__()
class DetectionLayer(nn.Module):
def __init__(self, anchors):
super(DetectionLayer, self).__init__()
self.anchors = anchors
blocks = parse_cfg(cfgfile)
details,modules = model_initialization(blocks)
modules
class Darknet(nn.Module):
def __init__(self, cfgfile):
super(Darknet, self).__init__()
self.blocks = parse_cfg(cfgfile)
self.net_info, self.module_list = model_initialization(self.blocks)
def forward(self, x, CUDA=False):
modules = self.blocks[1:]
#We cache the outputs for the route layer
outputs = {}
write = 0
for i, module in enumerate(modules):
module_type = (module["type"])
if module_type == "convolutional" or module_type == "upsample":
x = self.module_list[i](x)
outputs[i] = x
elif module_type == "route":
layers = module["layers"]
layers = [int(a) for a in layers]
if len(layers) == 1:
x = outputs[layers[0]]
if len(layers) > 1:
elif module_type == 'yolo':
anchors = self.module_list[i][0].anchors
#Get the input dimensions
inp_dim = int(self.net_info["height"])
#Get the number of classes
num_classes = int(module["classes"])
#Transform
x = x.data # get the data at that point
def detection_preprocess(x,inp_dim,anchors,num_classes,CUDA=False):
"""
This function will take input_dimension_of_image,anchors and number of classes as input
"""
# x --> 4D feature map
batch_size = x.size(0)
grid_size = x.size(2)
stride = inp_dim // x.size(2) # factor by which current feature map reduced from input
#grid_size = inp_dim // stride
# x --> 4D feature map
batch_size = x.size(0)
grid_size = x.size(2)
# factor by which current feature map reduced from input
stride = inp_dim // x.size(2)
bbox_attrs = 5 + num_classes #5 + 80
num_anchors = len(anchors) #3
#eg detection input dimension [1, 255, 13, 13]
#eg detection input dimension [1, 255, 13, 13]
prediction = x.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size) # 1x255x169
prediction = prediction.transpose(1,2).contiguous() #1x169x255
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs) #1x507x85
# the dimension of anchors is wrt original image.We will make it corresponding to feature map
anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
#Sigmoid the centre_X, centre_Y. and object confidencce
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])