Skip to content

Instantly share code, notes, and snippets.

View AlessandroMondin's full-sized avatar

AlessandroMondin AlessandroMondin

  • AI engineer
  • Berlin
View GitHub Profile
S = [8, 16, 32]
with torch.no_grad():
out = model(img)
boxes = cells_to_bboxes(out, model.head.anchors, S, list_output=False, is_pred=True)
boxes = non_max_suppression(boxes, iou_threshold=0.6, threshold=.25, max_detections=300)
plot_image(img[0].permute(1, 2, 0).to("cpu"), boxes[0])
def cells_to_bboxes(predictions, anchors, strides):
num_out_layers = len(predictions)
grid = [torch.empty(0) for _ in range(num_out_layers)] # initialize
anchor_grid = [torch.empty(0) for _ in range(num_out_layers)] # initialize
all_bboxes = []
for i in range(num_out_layers):
bs, naxs, ny, nx, _ = predictions[i].shape
stride = strides[i]
grid[i], anchor_grid[i] = make_grids(anchors, naxs, ny=ny, nx=nx, stride=stride, i=i)
class HEADS(nn.Module):
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
super(HEADS, self).__init__()
self.nc = nc # number of classes
self.nl = len(anchors) # number of detection layers
self.naxs = len(anchors[0]) # number of anchors per scale
self.stride = [8, 16, 32]
# anchors are divided by the stride (anchors_for_head_1/8, anchors_for_head_1/16 etc.)
anchors_ = torch.tensor(anchors).float().view(self.nl, -1, 2) / torch.tensor(self.stride).repeat(6, 1).T.reshape(3, 3, 2)
def forward(self, x):
assert x.shape[2] % 32 == 0 and x.shape[3] % 32 == 0, "Width and Height aren't divisible by 32!"
backbone_connection = []
neck_connection = []
outputs = []
for idx, layer in enumerate(self.backbone):
# takes the out of the 2nd and 3rd C3 block and stores it
x = layer(x)
if idx in [4, 6]:
backbone_connection.append(x)
self.neck = nn.ModuleList()
self.neck += [
CBL(in_channels=first_out*16, out_channels=first_out*8, kernel_size=1, stride=1, padding=0),
C3(in_channels=first_out*16, out_channels=first_out*8, width_multiple=0.25, depth=2, backbone=False),
CBL(in_channels=first_out*8, out_channels=first_out*4, kernel_size=1, stride=1, padding=0),
C3(in_channels=first_out*8, out_channels=first_out*4, width_multiple=0.25, depth=2, backbone=False),
CBL(in_channels=first_out*4, out_channels=first_out*4, kernel_size=3, stride=2, padding=1),
C3(in_channels=first_out*8, out_channels=first_out*8, width_multiple=0.5, depth=2, backbone=False),
CBL(in_channels=first_out*8, out_channels=first_out*8, kernel_size=3, stride=2, padding=1),
C3(in_channels=first_out*16, out_channels=first_out*16, width_multiple=0.5, depth=2, backbone=False)
self.backbone += [
CBL(in_channels=3, out_channels=first_out, kernel_size=6, stride=2, padding=2),
CBL(in_channels=first_out, out_channels=first_out*2, kernel_size=3, stride=2, padding=1),
C3(in_channels=first_out*2, out_channels=first_out*2, width_multiple=0.5, depth=2),
CBL(in_channels=first_out*2, out_channels=first_out*4, kernel_size=3, stride=2, padding=1),
C3(in_channels=first_out*4, out_channels=first_out*4, width_multiple=0.5, depth=4),
CBL(in_channels=first_out*4, out_channels=first_out*8, kernel_size=3, stride=2, padding=1),
C3(in_channels=first_out*8, out_channels=first_out*8, width_multiple=0.5, depth=6),
CBL(in_channels=first_out*8, out_channels=first_out*16, kernel_size=3, stride=2, padding=1),
C3(in_channels=first_out*16, out_channels=first_out*16, width_multiple=0.5, depth=2),
class SPPF(nn.Module):
def __init__(self, in_channels, out_channels):
super(SPPF, self).__init__()
c_ = int(in_channels//2)
self.c1 = CBL(in_channels, c_, 1, 1, 0)
self.pool = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
self.c_out = CBL(c_ * 4, out_channels, 1, 1, 0)
class C3(nn.Module):
"""
Parameters:
in_channels (int): number of channel of the input tensor
out_channels (int): number of channel of the output tensor
width_multiple (float): it controls the number of channels (and weights)
of all the convolutions beside the
first and last one. If closer to 0,
the simpler the modelIf closer to 1,
the model becomes more complex
class Bottleneck(nn.Module):
"""
Parameters:
in_channels (int): number of channel of the input tensor
out_channels (int): number of channel of the output tensor
width_multiple (float): it controls the number of channels (and weights)
of all the convolutions beside the
first and last one. If closer to 0,
the simpler the modelIf closer to 1,
the model becomes more complex
class CBL(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
super(CBL, self).__init__()
conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
bn = nn.BatchNorm2d(out_channels, eps=1e-3, momentum=0.03)
self.cbl = nn.Sequential(
conv,
bn,