This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
S = [8, 16, 32] | |
with torch.no_grad(): | |
out = model(img) | |
boxes = cells_to_bboxes(out, model.head.anchors, S, list_output=False, is_pred=True) | |
boxes = non_max_suppression(boxes, iou_threshold=0.6, threshold=.25, max_detections=300) | |
plot_image(img[0].permute(1, 2, 0).to("cpu"), boxes[0]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cells_to_bboxes(predictions, anchors, strides): | |
num_out_layers = len(predictions) | |
grid = [torch.empty(0) for _ in range(num_out_layers)] # initialize | |
anchor_grid = [torch.empty(0) for _ in range(num_out_layers)] # initialize | |
all_bboxes = [] | |
for i in range(num_out_layers): | |
bs, naxs, ny, nx, _ = predictions[i].shape | |
stride = strides[i] | |
grid[i], anchor_grid[i] = make_grids(anchors, naxs, ny=ny, nx=nx, stride=stride, i=i) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class HEADS(nn.Module): | |
def __init__(self, nc=80, anchors=(), ch=()): # detection layer | |
super(HEADS, self).__init__() | |
self.nc = nc # number of classes | |
self.nl = len(anchors) # number of detection layers | |
self.naxs = len(anchors[0]) # number of anchors per scale | |
self.stride = [8, 16, 32] | |
# anchors are divided by the stride (anchors_for_head_1/8, anchors_for_head_1/16 etc.) | |
anchors_ = torch.tensor(anchors).float().view(self.nl, -1, 2) / torch.tensor(self.stride).repeat(6, 1).T.reshape(3, 3, 2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def forward(self, x): | |
assert x.shape[2] % 32 == 0 and x.shape[3] % 32 == 0, "Width and Height aren't divisible by 32!" | |
backbone_connection = [] | |
neck_connection = [] | |
outputs = [] | |
for idx, layer in enumerate(self.backbone): | |
# takes the out of the 2nd and 3rd C3 block and stores it | |
x = layer(x) | |
if idx in [4, 6]: | |
backbone_connection.append(x) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.neck = nn.ModuleList() | |
self.neck += [ | |
CBL(in_channels=first_out*16, out_channels=first_out*8, kernel_size=1, stride=1, padding=0), | |
C3(in_channels=first_out*16, out_channels=first_out*8, width_multiple=0.25, depth=2, backbone=False), | |
CBL(in_channels=first_out*8, out_channels=first_out*4, kernel_size=1, stride=1, padding=0), | |
C3(in_channels=first_out*8, out_channels=first_out*4, width_multiple=0.25, depth=2, backbone=False), | |
CBL(in_channels=first_out*4, out_channels=first_out*4, kernel_size=3, stride=2, padding=1), | |
C3(in_channels=first_out*8, out_channels=first_out*8, width_multiple=0.5, depth=2, backbone=False), | |
CBL(in_channels=first_out*8, out_channels=first_out*8, kernel_size=3, stride=2, padding=1), | |
C3(in_channels=first_out*16, out_channels=first_out*16, width_multiple=0.5, depth=2, backbone=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.backbone += [ | |
CBL(in_channels=3, out_channels=first_out, kernel_size=6, stride=2, padding=2), | |
CBL(in_channels=first_out, out_channels=first_out*2, kernel_size=3, stride=2, padding=1), | |
C3(in_channels=first_out*2, out_channels=first_out*2, width_multiple=0.5, depth=2), | |
CBL(in_channels=first_out*2, out_channels=first_out*4, kernel_size=3, stride=2, padding=1), | |
C3(in_channels=first_out*4, out_channels=first_out*4, width_multiple=0.5, depth=4), | |
CBL(in_channels=first_out*4, out_channels=first_out*8, kernel_size=3, stride=2, padding=1), | |
C3(in_channels=first_out*8, out_channels=first_out*8, width_multiple=0.5, depth=6), | |
CBL(in_channels=first_out*8, out_channels=first_out*16, kernel_size=3, stride=2, padding=1), | |
C3(in_channels=first_out*16, out_channels=first_out*16, width_multiple=0.5, depth=2), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SPPF(nn.Module): | |
def __init__(self, in_channels, out_channels): | |
super(SPPF, self).__init__() | |
c_ = int(in_channels//2) | |
self.c1 = CBL(in_channels, c_, 1, 1, 0) | |
self.pool = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) | |
self.c_out = CBL(c_ * 4, out_channels, 1, 1, 0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class C3(nn.Module): | |
""" | |
Parameters: | |
in_channels (int): number of channel of the input tensor | |
out_channels (int): number of channel of the output tensor | |
width_multiple (float): it controls the number of channels (and weights) | |
of all the convolutions beside the | |
first and last one. If closer to 0, | |
the simpler the modelIf closer to 1, | |
the model becomes more complex |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Bottleneck(nn.Module): | |
""" | |
Parameters: | |
in_channels (int): number of channel of the input tensor | |
out_channels (int): number of channel of the output tensor | |
width_multiple (float): it controls the number of channels (and weights) | |
of all the convolutions beside the | |
first and last one. If closer to 0, | |
the simpler the modelIf closer to 1, | |
the model becomes more complex |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CBL(nn.Module): | |
def __init__(self, in_channels, out_channels, kernel_size, stride, padding): | |
super(CBL, self).__init__() | |
conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False) | |
bn = nn.BatchNorm2d(out_channels, eps=1e-3, momentum=0.03) | |
self.cbl = nn.Sequential( | |
conv, | |
bn, |