Skip to content

Instantly share code, notes, and snippets.

@tamnguyenvan
Last active August 30, 2021 01:32
Show Gist options
  • Select an option

  • Save tamnguyenvan/cd6b058f51097526186ca7a01642326d to your computer and use it in GitHub Desktop.

Select an option

Save tamnguyenvan/cd6b058f51097526186ca7a01642326d to your computer and use it in GitHub Desktop.
detect with tiling
import argparse
import os
import platform
import shutil
import time
from pathlib import Path
import math
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
import numpy as np
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages, letterbox
from utils.general import (
check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer)
from utils.torch_utils import select_device, load_classifier, time_synchronized
import numpy as np
def nms(dets, thresh):
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return dets[keep, :]
def detect(model, im0, imgsz, device, half, opt, save_img=False):
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
cudnn.benchmark = True
# Run inference
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
# Padded resize
img = letterbox(im0, new_shape=imgsz)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
# for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Process detections
dets = []
for i, det in enumerate(pred): # detections per image
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Write results
for *xyxy, conf, cls in det:
xyxy = [x.item() for x in xyxy]
conf = conf.item()
cls = cls.item()
dets.append((xyxy, conf, cls))
return dets
def slice_images_wo_labels(image, tile_size, overlap_ratio=0.25):
height, width = image.shape[:2]
overlap_w, overlap_h = int(overlap_ratio * tile_size), int(overlap_ratio * tile_size)
if width < tile_size:
nx = 1
else:
nx = math.ceil((width - overlap_w) / (tile_size - overlap_w))
if height < tile_size:
ny = 1
else:
ny = math.ceil((height - overlap_h) / (tile_size - overlap_h))
tiles = []
for i in range(1, ny+1):
for j in range(1, nx+1):
x1 = (j - 1) * (tile_size - overlap_w)
x2 = x1 + tile_size
if x2 > width:
x2 = width
x1 = max(0, x2 - tile_size)
y1 = (i - 1) * (tile_size - overlap_h)
y2 = y1 + tile_size
if y2 > height:
y2 = height
y1 = max(0, y2 - tile_size)
tile_image = image[y1:y2, x1:x2, :].copy()
tiles.append((x1, y1, tile_image))
return tiles
def main():
origin = cv2.imread(opt.source)
image = origin[:, :, ::-1].copy()
image_height, image_width = image.shape[:2]
# Initialize
device = select_device(opt.device)
# if os.path.exists('inference'):
# shutil.rmtree(out) # delete output folder
# os.makedirs(out) # make new output folder
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
imgsz = opt.img_size
model = attempt_load(opt.weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
overlap_ratio = 0.25
tiles = slice_images_wo_labels(image, imgsz, overlap_ratio=overlap_ratio)
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(100, 255) for _ in range(3)] for _ in range(len(names))]
preds = []
height, width = origin.shape[:2]
overlap_h = height * overlap_ratio
overlap_w = width * overlap_ratio
for offset_x, offset_y, image in tiles:
dets = detect(model, image, imgsz, device, half, opt)
if len(dets):
for xyxy, conf, cls in dets:
x1, y1, x2, y2 = xyxy
x1 += offset_x
y1 += offset_y
x2 += offset_x
y2 += offset_y
preds.append([x1, y1, x2, y2, conf, cls])
preds = np.array(preds)
clss = np.unique(preds[:, 5])
nms_preds = []
for class_id in clss:
pred = nms(preds[preds[:, 5] == class_id], opt.iou_thres)
nms_preds.append(pred)
preds = np.concatenate(nms_preds, 0)
print(preds)
out_data = preds[:, :4] / np.array([image_width, image_height, image_width, image_height])
out_class = preds[:, 5:6]
out_centers = (out_data[:, :2] + out_data[:, 2:4]) / 2
out_wh = out_data[:, 2:4] - out_data[:, :2]
out_data = np.concatenate([out_class, out_centers, out_wh], axis=1)
txt_path = Path(opt.source).stem + '_out.txt'
with open(txt_path, 'wt') as f:
for row in out_data:
class_id, cx, cy, w, h = row
f.write('%d,%.4f,%.4f,%.4f,%.4f\n' % (class_id, cx, cy, w, h))
print('Saved output text as:', txt_path)
for i, det in enumerate(preds): # detections per image
if det is not None and len(det):
# Write results
det = det.tolist()
*xyxy, conf, cls = det
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, origin, label=label, color=colors[int(cls)], line_thickness=2)
save_path = Path(opt.source).stem + '_out.jpg'
cv2.imwrite(save_path, origin)
print('Saved output image as:', save_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov4-p5.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam
parser.add_argument('--output', type=str, default='inference/output', help='output folder') # output folder
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
opt = parser.parse_args()
print(opt)
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['']:
detect()
strip_optimizer(opt.weights)
else:
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment