tamnguyenvan · August 30, 2021 01:32
diff --git a/detect_with_tiling.py b/detect_with_tiling.py
 import argparse
 import os
 import platform
 import shutil
 import time
 from pathlib import Path

 import math
 import cv2
 import torch
 import torch.backends.cudnn as cudnn
 from numpy import random
 import numpy as np

 from models.experimental import attempt_load
 from utils.datasets import LoadStreams, LoadImages, letterbox
 from utils.general import (
    check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer)
 from utils.torch_utils import select_device, load_classifier, time_synchronized


 import numpy as np

 def nms(dets, thresh):
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return dets[keep, :]


 def detect(model, im0, imgsz, device, half, opt, save_img=False):

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    cudnn.benchmark = True

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

    # Padded resize
    img = letterbox(im0, new_shape=imgsz)[0]

    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)

    # for path, img, im0s, vid_cap in dataset:
    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    # Inference
    t1 = time_synchronized()
    pred = model(img, augment=opt.augment)[0]

    # Apply NMS
    pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
    t2 = time_synchronized()

    # Process detections
    dets = []
    for i, det in enumerate(pred):  # detections per image
        gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

            # Write results
            for *xyxy, conf, cls in det:
                xyxy = [x.item() for x in xyxy]
                conf = conf.item()
                cls = cls.item()
                dets.append((xyxy, conf, cls))
    return dets


 def slice_images_wo_labels(image, tile_size, overlap_ratio=0.25):
    height, width = image.shape[:2]
    overlap_w, overlap_h = int(overlap_ratio * tile_size), int(overlap_ratio * tile_size)

    if width < tile_size:
        nx = 1
    else:
        nx = math.ceil((width - overlap_w) / (tile_size - overlap_w))

    if height < tile_size:
        ny = 1
    else:
        ny = math.ceil((height - overlap_h) / (tile_size - overlap_h))

    tiles = []
    for i in range(1, ny+1):
        for j in range(1, nx+1):
            x1 = (j - 1) * (tile_size - overlap_w)
            x2 = x1 + tile_size
            if x2 > width:
                x2 = width
                x1 = max(0, x2 - tile_size)

            y1 = (i - 1) * (tile_size - overlap_h)
            y2 = y1 + tile_size
            if y2 > height:
                y2 = height
                y1 = max(0, y2 - tile_size)

            tile_image = image[y1:y2, x1:x2, :].copy()
            tiles.append((x1, y1, tile_image))
    return tiles


 def main():
    origin = cv2.imread(opt.source)
    image = origin[:, :, ::-1].copy()
    image_height, image_width = image.shape[:2]

    # Initialize
    device = select_device(opt.device)
    # if os.path.exists('inference'):
    #     shutil.rmtree(out)  # delete output folder
    # os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    imgsz = opt.img_size
    model = attempt_load(opt.weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16
    
    overlap_ratio = 0.25
    tiles = slice_images_wo_labels(image, imgsz, overlap_ratio=overlap_ratio)

    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(100, 255) for _ in range(3)] for _ in range(len(names))]

    preds = []
    height, width = origin.shape[:2]
    overlap_h = height * overlap_ratio
    overlap_w = width * overlap_ratio
    for offset_x, offset_y, image in tiles:
        dets = detect(model, image, imgsz, device, half, opt)
        if len(dets):
            for xyxy, conf, cls in dets:
                x1, y1, x2, y2 = xyxy
                x1 += offset_x
                y1 += offset_y
                x2 += offset_x
                y2 += offset_y

                preds.append([x1, y1, x2, y2, conf, cls])
    preds = np.array(preds)
    clss = np.unique(preds[:, 5])
    nms_preds = []
    for class_id in clss:
        pred = nms(preds[preds[:, 5] == class_id], opt.iou_thres)
        nms_preds.append(pred)
    
    preds = np.concatenate(nms_preds, 0)

    print(preds)
    out_data = preds[:, :4] / np.array([image_width, image_height, image_width, image_height])
    out_class = preds[:, 5:6]
    out_centers = (out_data[:, :2] + out_data[:, 2:4]) / 2
    out_wh = out_data[:, 2:4] - out_data[:, :2]
    out_data = np.concatenate([out_class, out_centers, out_wh], axis=1)
    txt_path = Path(opt.source).stem + '_out.txt'
    with open(txt_path, 'wt') as f:
        for row in out_data:
            class_id, cx, cy, w, h = row
            f.write('%d,%.4f,%.4f,%.4f,%.4f\n' % (class_id, cx, cy, w, h))
    print('Saved output text as:', txt_path)

    for i, det in enumerate(preds):  # detections per image
        if det is not None and len(det):
            # Write results
            det = det.tolist()
            *xyxy, conf, cls = det
            label = '%s %.2f' % (names[int(cls)], conf)
            plot_one_box(xyxy, origin, label=label, color=colors[int(cls)], line_thickness=2)
    save_path = Path(opt.source).stem + '_out.jpg'
    cv2.imwrite(save_path, origin)
    print('Saved output image as:', save_path)


 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov4-p5.pt', help='model.pt path(s)')
    parser.add_argument('--source', type=str, default='inference/images', help='source')  # file/folder, 0 for webcam
    parser.add_argument('--output', type=str, default='inference/output', help='output folder')  # output folder
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    opt = parser.parse_args()
    print(opt)

    with torch.no_grad():
        if opt.update:  # update all models (to fix SourceChangeWarning)
            for opt.weights in ['']:
                detect()
                strip_optimizer(opt.weights)
        else:
            main()
	import argparse
	import os
	import platform
	import shutil
	import time
	from pathlib import Path

	import math
	import cv2
	import torch
	import torch.backends.cudnn as cudnn
	from numpy import random
	import numpy as np

	from models.experimental import attempt_load
	from utils.datasets import LoadStreams, LoadImages, letterbox
	from utils.general import (
	check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer)
	from utils.torch_utils import select_device, load_classifier, time_synchronized


	import numpy as np

	def nms(dets, thresh):
	x1 = dets[:, 0]
	y1 = dets[:, 1]
	x2 = dets[:, 2]
	y2 = dets[:, 3]
	scores = dets[:, 4]

	areas = (x2 - x1 + 1) * (y2 - y1 + 1)
	order = scores.argsort()[::-1]

	keep = []
	while order.size > 0:
	i = order[0]
	keep.append(i)
	xx1 = np.maximum(x1[i], x1[order[1:]])
	yy1 = np.maximum(y1[i], y1[order[1:]])
	xx2 = np.minimum(x2[i], x2[order[1:]])
	yy2 = np.minimum(y2[i], y2[order[1:]])

	w = np.maximum(0.0, xx2 - xx1 + 1)
	h = np.maximum(0.0, yy2 - yy1 + 1)
	inter = w * h
	ovr = inter / (areas[i] + areas[order[1:]] - inter)

	inds = np.where(ovr <= thresh)[0]
	order = order[inds + 1]

	return dets[keep, :]


	def detect(model, im0, imgsz, device, half, opt, save_img=False):

	# Get names and colors
	names = model.module.names if hasattr(model, 'module') else model.names
	colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

	cudnn.benchmark = True

	# Run inference
	t0 = time.time()
	img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
	_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once

	# Padded resize
	img = letterbox(im0, new_shape=imgsz)[0]

	# Convert
	img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
	img = np.ascontiguousarray(img)

	# for path, img, im0s, vid_cap in dataset:
	img = torch.from_numpy(img).to(device)
	img = img.half() if half else img.float() # uint8 to fp16/32
	img /= 255.0 # 0 - 255 to 0.0 - 1.0
	if img.ndimension() == 3:
	img = img.unsqueeze(0)

	# Inference
	t1 = time_synchronized()
	pred = model(img, augment=opt.augment)[0]

	# Apply NMS
	pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
	t2 = time_synchronized()

	# Process detections
	dets = []
	for i, det in enumerate(pred): # detections per image
	gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
	if det is not None and len(det):
	# Rescale boxes from img_size to im0 size
	det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

	# Write results
	for *xyxy, conf, cls in det:
	xyxy = [x.item() for x in xyxy]
	conf = conf.item()
	cls = cls.item()
	dets.append((xyxy, conf, cls))
	return dets


	def slice_images_wo_labels(image, tile_size, overlap_ratio=0.25):
	height, width = image.shape[:2]
	overlap_w, overlap_h = int(overlap_ratio * tile_size), int(overlap_ratio * tile_size)

	if width < tile_size:
	nx = 1
	else:
	nx = math.ceil((width - overlap_w) / (tile_size - overlap_w))

	if height < tile_size:
	ny = 1
	else:
	ny = math.ceil((height - overlap_h) / (tile_size - overlap_h))

	tiles = []
	for i in range(1, ny+1):
	for j in range(1, nx+1):
	x1 = (j - 1) * (tile_size - overlap_w)
	x2 = x1 + tile_size
	if x2 > width:
	x2 = width
	x1 = max(0, x2 - tile_size)

	y1 = (i - 1) * (tile_size - overlap_h)
	y2 = y1 + tile_size
	if y2 > height:
	y2 = height
	y1 = max(0, y2 - tile_size)

	tile_image = image[y1:y2, x1:x2, :].copy()
	tiles.append((x1, y1, tile_image))
	return tiles


	def main():
	origin = cv2.imread(opt.source)
	image = origin[:, :, ::-1].copy()
	image_height, image_width = image.shape[:2]

	# Initialize
	device = select_device(opt.device)
	# if os.path.exists('inference'):
	# shutil.rmtree(out) # delete output folder
	# os.makedirs(out) # make new output folder
	half = device.type != 'cpu' # half precision only supported on CUDA

	# Load model
	imgsz = opt.img_size
	model = attempt_load(opt.weights, map_location=device) # load FP32 model
	imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
	if half:
	model.half() # to FP16

	overlap_ratio = 0.25
	tiles = slice_images_wo_labels(image, imgsz, overlap_ratio=overlap_ratio)

	names = model.module.names if hasattr(model, 'module') else model.names
	colors = [[random.randint(100, 255) for _ in range(3)] for _ in range(len(names))]

	preds = []
	height, width = origin.shape[:2]
	overlap_h = height * overlap_ratio
	overlap_w = width * overlap_ratio
	for offset_x, offset_y, image in tiles:
	dets = detect(model, image, imgsz, device, half, opt)
	if len(dets):
	for xyxy, conf, cls in dets:
	x1, y1, x2, y2 = xyxy
	x1 += offset_x
	y1 += offset_y
	x2 += offset_x
	y2 += offset_y

	preds.append([x1, y1, x2, y2, conf, cls])
	preds = np.array(preds)
	clss = np.unique(preds[:, 5])
	nms_preds = []
	for class_id in clss:
	pred = nms(preds[preds[:, 5] == class_id], opt.iou_thres)
	nms_preds.append(pred)

	preds = np.concatenate(nms_preds, 0)

	print(preds)
	out_data = preds[:, :4] / np.array([image_width, image_height, image_width, image_height])
	out_class = preds[:, 5:6]
	out_centers = (out_data[:, :2] + out_data[:, 2:4]) / 2
	out_wh = out_data[:, 2:4] - out_data[:, :2]
	out_data = np.concatenate([out_class, out_centers, out_wh], axis=1)
	txt_path = Path(opt.source).stem + '_out.txt'
	with open(txt_path, 'wt') as f:
	for row in out_data:
	class_id, cx, cy, w, h = row
	f.write('%d,%.4f,%.4f,%.4f,%.4f\n' % (class_id, cx, cy, w, h))
	print('Saved output text as:', txt_path)

	for i, det in enumerate(preds): # detections per image
	if det is not None and len(det):
	# Write results
	det = det.tolist()
	*xyxy, conf, cls = det
	label = '%s %.2f' % (names[int(cls)], conf)
	plot_one_box(xyxy, origin, label=label, color=colors[int(cls)], line_thickness=2)
	save_path = Path(opt.source).stem + '_out.jpg'
	cv2.imwrite(save_path, origin)
	print('Saved output image as:', save_path)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--weights', nargs='+', type=str, default='yolov4-p5.pt', help='model.pt path(s)')
	parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam
	parser.add_argument('--output', type=str, default='inference/output', help='output folder') # output folder
	parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
	parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
	parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
	parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
	parser.add_argument('--view-img', action='store_true', help='display results')
	parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
	parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
	parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
	parser.add_argument('--augment', action='store_true', help='augmented inference')
	parser.add_argument('--update', action='store_true', help='update all models')
	opt = parser.parse_args()
	print(opt)

	with torch.no_grad():
	if opt.update: # update all models (to fix SourceChangeWarning)
	for opt.weights in ['']:
	detect()
	strip_optimizer(opt.weights)
	else:
	main()
No results found