Last active
February 12, 2022 09:40
-
-
Save tamnguyenvan/293b34e8a0e4620455da840bcac303fe to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import time | |
| import math | |
| from pathlib import Path | |
| import cv2 | |
| import torch | |
| import torch.backends.cudnn as cudnn | |
| from numpy import random | |
| import numpy as np | |
| from models.experimental import attempt_load | |
| from utils.datasets import LoadStreams, LoadImages, letterbox | |
| from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \ | |
| strip_optimizer, set_logging, increment_path | |
| from utils.plots import plot_one_box | |
| from utils.torch_utils import select_device, load_classifier, time_synchronized | |
| def nms(dets, thresh): | |
| x1 = dets[:, 0] | |
| y1 = dets[:, 1] | |
| x2 = dets[:, 2] | |
| y2 = dets[:, 3] | |
| scores = dets[:, 4] | |
| areas = (x2 - x1 + 1) * (y2 - y1 + 1) | |
| order = scores.argsort()[::-1] | |
| keep = [] | |
| while order.size > 0: | |
| i = order[0] | |
| keep.append(i) | |
| xx1 = np.maximum(x1[i], x1[order[1:]]) | |
| yy1 = np.maximum(y1[i], y1[order[1:]]) | |
| xx2 = np.minimum(x2[i], x2[order[1:]]) | |
| yy2 = np.minimum(y2[i], y2[order[1:]]) | |
| w = np.maximum(0.0, xx2 - xx1 + 1) | |
| h = np.maximum(0.0, yy2 - yy1 + 1) | |
| inter = w * h | |
| ovr = inter / (areas[i] + areas[order[1:]] - inter) | |
| inds = np.where(ovr <= thresh)[0] | |
| order = order[inds + 1] | |
| return dets[keep, :] | |
| def slice_images_wo_labels(image, tile_size, overlap_ratio=0.25): | |
| height, width = image.shape[:2] | |
| overlap_w, overlap_h = int(overlap_ratio * tile_size), int(overlap_ratio * tile_size) | |
| if width < tile_size: | |
| nx = 1 | |
| else: | |
| nx = math.ceil((width - overlap_w) / (tile_size - overlap_w)) | |
| if height < tile_size: | |
| ny = 1 | |
| else: | |
| ny = math.ceil((height - overlap_h) / (tile_size - overlap_h)) | |
| tiles = [] | |
| for i in range(1, ny+1): | |
| for j in range(1, nx+1): | |
| x1 = (j - 1) * (tile_size - overlap_w) | |
| x2 = x1 + tile_size | |
| if x2 > width: | |
| x2 = width | |
| x1 = max(0, x2 - tile_size) | |
| y1 = (i - 1) * (tile_size - overlap_h) | |
| y2 = y1 + tile_size | |
| if y2 > height: | |
| y2 = height | |
| y1 = max(0, y2 - tile_size) | |
| tile_image = image[y1:y2, x1:x2, :].copy() | |
| tiles.append((x1, y1, tile_image)) | |
| return tiles | |
| def detect(model, im0, imgsz, device, opt): | |
| img = letterbox(im0, new_shape=imgsz, auto_size=64)[0] | |
| # Convert | |
| img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 | |
| img = np.ascontiguousarray(img) | |
| img = torch.from_numpy(img).to(device) | |
| img = img.float() | |
| img /= 255.0 # 0 - 255 to 0.0 - 1.0 | |
| if img.ndimension() == 3: | |
| img = img.unsqueeze(0) | |
| # Inference | |
| t1 = time_synchronized() | |
| pred = model(img, augment=opt.augment)[0] | |
| # Apply NMS | |
| pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) | |
| t2 = time_synchronized() | |
| # Process detections | |
| dets = [] | |
| for i, det in enumerate(pred): # detections per image | |
| gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh | |
| if len(det): | |
| # Rescale boxes from img_size to im0 size | |
| det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() | |
| # Write results | |
| for *xyxy, conf, cls in reversed(det): | |
| xyxy = [x.item() for x in xyxy] | |
| conf = conf.item() | |
| cls = cls.item() | |
| dets.append([xyxy, conf, cls]) | |
| return dets | |
| def main(save_img=False): | |
| source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size | |
| webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( | |
| ('rtsp://', 'rtmp://', 'http://')) | |
| # Directories | |
| save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run | |
| (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir | |
| # Initialize | |
| set_logging() | |
| device = select_device(opt.device) | |
| # half = device.type != 'cpu' # half precision only supported on CUDA | |
| # Load model | |
| model = attempt_load(weights, map_location=device) # load FP32 model | |
| imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size | |
| # if half: | |
| # model.half() # to FP16 | |
| # Second-stage classifier | |
| classify = False | |
| if classify: | |
| modelc = load_classifier(name='resnet101', n=2) # initialize | |
| modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() | |
| # Get names and colors | |
| names = model.module.names if hasattr(model, 'module') else model.names | |
| colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] | |
| origin = cv2.imread(opt.source) | |
| image = origin[:, :, :].copy() | |
| overlap_ratio = 0.25 | |
| tiles = slice_images_wo_labels(image, imgsz, overlap_ratio=overlap_ratio) | |
| preds = [] | |
| height, width = origin.shape[:2] | |
| overlap_h = height * overlap_ratio | |
| overlap_w = width * overlap_ratio | |
| for offset_x, offset_y, img in tiles: | |
| dets = detect(model, img, imgsz, device, opt) | |
| if len(dets): | |
| for xyxy, conf, cls in dets: | |
| x1, y1, x2, y2 = xyxy | |
| x1 += offset_x | |
| y1 += offset_y | |
| x2 += offset_x | |
| y2 += offset_y | |
| preds.append([x1, y1, x2, y2, conf, cls]) | |
| preds = np.array(preds) | |
| clss = np.unique(preds[:, 5]) | |
| nms_preds = [] | |
| for class_id in clss: | |
| pred = nms(preds[preds[:, 5] == class_id], opt.iou_thres) | |
| nms_preds.append(pred) | |
| preds = np.concatenate(nms_preds, 0) | |
| print(preds) | |
| out_data = preds[:, :4] / np.array([height, height, width, height]) | |
| out_class = preds[:, 5:6] | |
| out_centers = (out_data[:, :2] + out_data[:, 2:4]) / 2 | |
| out_wh = out_data[:, 2:4] - out_data[:, :2] | |
| out_data = np.concatenate([out_class, out_centers, out_wh], axis=1) | |
| txt_path = Path(opt.source).stem + '_out.txt' | |
| with open(txt_path, 'wt') as f: | |
| for row in out_data: | |
| class_id, cx, cy, w, h = row | |
| f.write('%d,%.4f,%.4f,%.4f,%.4f\n' % (class_id, cx, cy, w, h)) | |
| print('Saved output text as:', txt_path) | |
| for i, det in enumerate(preds): # detections per image | |
| if det is not None and len(det): | |
| # Write results | |
| det = det.tolist() | |
| *xyxy, conf, cls = det | |
| label = '%s %.2f' % (names[int(cls)], conf) | |
| plot_one_box(xyxy, origin, label=label, color=colors[int(cls)], line_thickness=2) | |
| save_path = Path(opt.source).stem + '_out.jpg' | |
| cv2.imwrite(save_path, origin) | |
| print('Saved output image as:', save_path) | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--weights', nargs='+', type=str, default='yolor-p6.pt', help='model.pt path(s)') | |
| parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam | |
| parser.add_argument('--img-size', type=int, default=1280, help='inference size (pixels)') | |
| parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') | |
| parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') | |
| parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | |
| parser.add_argument('--view-img', action='store_true', help='display results') | |
| parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') | |
| parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') | |
| parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') | |
| parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') | |
| parser.add_argument('--augment', action='store_true', help='augmented inference') | |
| parser.add_argument('--update', action='store_true', help='update all models') | |
| parser.add_argument('--project', default='runs/detect', help='save results to project/name') | |
| parser.add_argument('--name', default='exp', help='save results to project/name') | |
| parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') | |
| opt = parser.parse_args() | |
| print(opt) | |
| with torch.no_grad(): | |
| if opt.update: # update all models (to fix SourceChangeWarning) | |
| for opt.weights in ['yolor-p6.pt', 'yolor-w6.pt', 'yolor-e6.pt', 'yolor-d6.pt']: | |
| main() | |
| strip_optimizer(opt.weights) | |
| else: | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment