Last active
August 30, 2021 01:32
-
-
Save tamnguyenvan/cd6b058f51097526186ca7a01642326d to your computer and use it in GitHub Desktop.
detect with tiling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import os | |
| import platform | |
| import shutil | |
| import time | |
| from pathlib import Path | |
| import math | |
| import cv2 | |
| import torch | |
| import torch.backends.cudnn as cudnn | |
| from numpy import random | |
| import numpy as np | |
| from models.experimental import attempt_load | |
| from utils.datasets import LoadStreams, LoadImages, letterbox | |
| from utils.general import ( | |
| check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer) | |
| from utils.torch_utils import select_device, load_classifier, time_synchronized | |
| import numpy as np | |
| def nms(dets, thresh): | |
| x1 = dets[:, 0] | |
| y1 = dets[:, 1] | |
| x2 = dets[:, 2] | |
| y2 = dets[:, 3] | |
| scores = dets[:, 4] | |
| areas = (x2 - x1 + 1) * (y2 - y1 + 1) | |
| order = scores.argsort()[::-1] | |
| keep = [] | |
| while order.size > 0: | |
| i = order[0] | |
| keep.append(i) | |
| xx1 = np.maximum(x1[i], x1[order[1:]]) | |
| yy1 = np.maximum(y1[i], y1[order[1:]]) | |
| xx2 = np.minimum(x2[i], x2[order[1:]]) | |
| yy2 = np.minimum(y2[i], y2[order[1:]]) | |
| w = np.maximum(0.0, xx2 - xx1 + 1) | |
| h = np.maximum(0.0, yy2 - yy1 + 1) | |
| inter = w * h | |
| ovr = inter / (areas[i] + areas[order[1:]] - inter) | |
| inds = np.where(ovr <= thresh)[0] | |
| order = order[inds + 1] | |
| return dets[keep, :] | |
| def detect(model, im0, imgsz, device, half, opt, save_img=False): | |
| # Get names and colors | |
| names = model.module.names if hasattr(model, 'module') else model.names | |
| colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] | |
| cudnn.benchmark = True | |
| # Run inference | |
| t0 = time.time() | |
| img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img | |
| _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once | |
| # Padded resize | |
| img = letterbox(im0, new_shape=imgsz)[0] | |
| # Convert | |
| img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 | |
| img = np.ascontiguousarray(img) | |
| # for path, img, im0s, vid_cap in dataset: | |
| img = torch.from_numpy(img).to(device) | |
| img = img.half() if half else img.float() # uint8 to fp16/32 | |
| img /= 255.0 # 0 - 255 to 0.0 - 1.0 | |
| if img.ndimension() == 3: | |
| img = img.unsqueeze(0) | |
| # Inference | |
| t1 = time_synchronized() | |
| pred = model(img, augment=opt.augment)[0] | |
| # Apply NMS | |
| pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) | |
| t2 = time_synchronized() | |
| # Process detections | |
| dets = [] | |
| for i, det in enumerate(pred): # detections per image | |
| gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh | |
| if det is not None and len(det): | |
| # Rescale boxes from img_size to im0 size | |
| det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() | |
| # Write results | |
| for *xyxy, conf, cls in det: | |
| xyxy = [x.item() for x in xyxy] | |
| conf = conf.item() | |
| cls = cls.item() | |
| dets.append((xyxy, conf, cls)) | |
| return dets | |
| def slice_images_wo_labels(image, tile_size, overlap_ratio=0.25): | |
| height, width = image.shape[:2] | |
| overlap_w, overlap_h = int(overlap_ratio * tile_size), int(overlap_ratio * tile_size) | |
| if width < tile_size: | |
| nx = 1 | |
| else: | |
| nx = math.ceil((width - overlap_w) / (tile_size - overlap_w)) | |
| if height < tile_size: | |
| ny = 1 | |
| else: | |
| ny = math.ceil((height - overlap_h) / (tile_size - overlap_h)) | |
| tiles = [] | |
| for i in range(1, ny+1): | |
| for j in range(1, nx+1): | |
| x1 = (j - 1) * (tile_size - overlap_w) | |
| x2 = x1 + tile_size | |
| if x2 > width: | |
| x2 = width | |
| x1 = max(0, x2 - tile_size) | |
| y1 = (i - 1) * (tile_size - overlap_h) | |
| y2 = y1 + tile_size | |
| if y2 > height: | |
| y2 = height | |
| y1 = max(0, y2 - tile_size) | |
| tile_image = image[y1:y2, x1:x2, :].copy() | |
| tiles.append((x1, y1, tile_image)) | |
| return tiles | |
| def main(): | |
| origin = cv2.imread(opt.source) | |
| image = origin[:, :, ::-1].copy() | |
| image_height, image_width = image.shape[:2] | |
| # Initialize | |
| device = select_device(opt.device) | |
| # if os.path.exists('inference'): | |
| # shutil.rmtree(out) # delete output folder | |
| # os.makedirs(out) # make new output folder | |
| half = device.type != 'cpu' # half precision only supported on CUDA | |
| # Load model | |
| imgsz = opt.img_size | |
| model = attempt_load(opt.weights, map_location=device) # load FP32 model | |
| imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size | |
| if half: | |
| model.half() # to FP16 | |
| overlap_ratio = 0.25 | |
| tiles = slice_images_wo_labels(image, imgsz, overlap_ratio=overlap_ratio) | |
| names = model.module.names if hasattr(model, 'module') else model.names | |
| colors = [[random.randint(100, 255) for _ in range(3)] for _ in range(len(names))] | |
| preds = [] | |
| height, width = origin.shape[:2] | |
| overlap_h = height * overlap_ratio | |
| overlap_w = width * overlap_ratio | |
| for offset_x, offset_y, image in tiles: | |
| dets = detect(model, image, imgsz, device, half, opt) | |
| if len(dets): | |
| for xyxy, conf, cls in dets: | |
| x1, y1, x2, y2 = xyxy | |
| x1 += offset_x | |
| y1 += offset_y | |
| x2 += offset_x | |
| y2 += offset_y | |
| preds.append([x1, y1, x2, y2, conf, cls]) | |
| preds = np.array(preds) | |
| clss = np.unique(preds[:, 5]) | |
| nms_preds = [] | |
| for class_id in clss: | |
| pred = nms(preds[preds[:, 5] == class_id], opt.iou_thres) | |
| nms_preds.append(pred) | |
| preds = np.concatenate(nms_preds, 0) | |
| print(preds) | |
| out_data = preds[:, :4] / np.array([image_width, image_height, image_width, image_height]) | |
| out_class = preds[:, 5:6] | |
| out_centers = (out_data[:, :2] + out_data[:, 2:4]) / 2 | |
| out_wh = out_data[:, 2:4] - out_data[:, :2] | |
| out_data = np.concatenate([out_class, out_centers, out_wh], axis=1) | |
| txt_path = Path(opt.source).stem + '_out.txt' | |
| with open(txt_path, 'wt') as f: | |
| for row in out_data: | |
| class_id, cx, cy, w, h = row | |
| f.write('%d,%.4f,%.4f,%.4f,%.4f\n' % (class_id, cx, cy, w, h)) | |
| print('Saved output text as:', txt_path) | |
| for i, det in enumerate(preds): # detections per image | |
| if det is not None and len(det): | |
| # Write results | |
| det = det.tolist() | |
| *xyxy, conf, cls = det | |
| label = '%s %.2f' % (names[int(cls)], conf) | |
| plot_one_box(xyxy, origin, label=label, color=colors[int(cls)], line_thickness=2) | |
| save_path = Path(opt.source).stem + '_out.jpg' | |
| cv2.imwrite(save_path, origin) | |
| print('Saved output image as:', save_path) | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--weights', nargs='+', type=str, default='yolov4-p5.pt', help='model.pt path(s)') | |
| parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam | |
| parser.add_argument('--output', type=str, default='inference/output', help='output folder') # output folder | |
| parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') | |
| parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold') | |
| parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS') | |
| parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | |
| parser.add_argument('--view-img', action='store_true', help='display results') | |
| parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') | |
| parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') | |
| parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') | |
| parser.add_argument('--augment', action='store_true', help='augmented inference') | |
| parser.add_argument('--update', action='store_true', help='update all models') | |
| opt = parser.parse_args() | |
| print(opt) | |
| with torch.no_grad(): | |
| if opt.update: # update all models (to fix SourceChangeWarning) | |
| for opt.weights in ['']: | |
| detect() | |
| strip_optimizer(opt.weights) | |
| else: | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment