Skip to content

Instantly share code, notes, and snippets.

@tamnguyenvan
Last active February 12, 2022 09:40
Show Gist options
  • Select an option

  • Save tamnguyenvan/293b34e8a0e4620455da840bcac303fe to your computer and use it in GitHub Desktop.

Select an option

Save tamnguyenvan/293b34e8a0e4620455da840bcac303fe to your computer and use it in GitHub Desktop.
import argparse
import time
import math
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
import numpy as np
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages, letterbox
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized
def nms(dets, thresh):
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return dets[keep, :]
def slice_images_wo_labels(image, tile_size, overlap_ratio=0.25):
height, width = image.shape[:2]
overlap_w, overlap_h = int(overlap_ratio * tile_size), int(overlap_ratio * tile_size)
if width < tile_size:
nx = 1
else:
nx = math.ceil((width - overlap_w) / (tile_size - overlap_w))
if height < tile_size:
ny = 1
else:
ny = math.ceil((height - overlap_h) / (tile_size - overlap_h))
tiles = []
for i in range(1, ny+1):
for j in range(1, nx+1):
x1 = (j - 1) * (tile_size - overlap_w)
x2 = x1 + tile_size
if x2 > width:
x2 = width
x1 = max(0, x2 - tile_size)
y1 = (i - 1) * (tile_size - overlap_h)
y2 = y1 + tile_size
if y2 > height:
y2 = height
y1 = max(0, y2 - tile_size)
tile_image = image[y1:y2, x1:x2, :].copy()
tiles.append((x1, y1, tile_image))
return tiles
def detect(model, im0, imgsz, device, opt):
img = letterbox(im0, new_shape=imgsz, auto_size=64)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(device)
img = img.float()
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Process detections
dets = []
for i, det in enumerate(pred): # detections per image
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Write results
for *xyxy, conf, cls in reversed(det):
xyxy = [x.item() for x in xyxy]
conf = conf.item()
cls = cls.item()
dets.append([xyxy, conf, cls])
return dets
def main(save_img=False):
source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://'))
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
# half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
# if half:
# model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
origin = cv2.imread(opt.source)
image = origin[:, :, :].copy()
overlap_ratio = 0.25
tiles = slice_images_wo_labels(image, imgsz, overlap_ratio=overlap_ratio)
preds = []
height, width = origin.shape[:2]
overlap_h = height * overlap_ratio
overlap_w = width * overlap_ratio
for offset_x, offset_y, img in tiles:
dets = detect(model, img, imgsz, device, opt)
if len(dets):
for xyxy, conf, cls in dets:
x1, y1, x2, y2 = xyxy
x1 += offset_x
y1 += offset_y
x2 += offset_x
y2 += offset_y
preds.append([x1, y1, x2, y2, conf, cls])
preds = np.array(preds)
clss = np.unique(preds[:, 5])
nms_preds = []
for class_id in clss:
pred = nms(preds[preds[:, 5] == class_id], opt.iou_thres)
nms_preds.append(pred)
preds = np.concatenate(nms_preds, 0)
print(preds)
out_data = preds[:, :4] / np.array([height, height, width, height])
out_class = preds[:, 5:6]
out_centers = (out_data[:, :2] + out_data[:, 2:4]) / 2
out_wh = out_data[:, 2:4] - out_data[:, :2]
out_data = np.concatenate([out_class, out_centers, out_wh], axis=1)
txt_path = Path(opt.source).stem + '_out.txt'
with open(txt_path, 'wt') as f:
for row in out_data:
class_id, cx, cy, w, h = row
f.write('%d,%.4f,%.4f,%.4f,%.4f\n' % (class_id, cx, cy, w, h))
print('Saved output text as:', txt_path)
for i, det in enumerate(preds): # detections per image
if det is not None and len(det):
# Write results
det = det.tolist()
*xyxy, conf, cls = det
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, origin, label=label, color=colors[int(cls)], line_thickness=2)
save_path = Path(opt.source).stem + '_out.jpg'
cv2.imwrite(save_path, origin)
print('Saved output image as:', save_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolor-p6.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=1280, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
print(opt)
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolor-p6.pt', 'yolor-w6.pt', 'yolor-e6.pt', 'yolor-d6.pt']:
main()
strip_optimizer(opt.weights)
else:
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment