Last active
November 2, 2023 10:50
-
-
Save imvickykumar999/6da98e00e7a4b8b932222fce7f38926d to your computer and use it in GitHub Desktop.
Object Detection of Public Live Camera using YOLOv3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
person | |
bicycle | |
car | |
motorbike | |
aeroplane | |
bus | |
train | |
truck | |
boat | |
traffic light | |
fire hydrant | |
stop sign | |
parking meter | |
bench | |
bird | |
cat | |
dog | |
horse | |
sheep | |
cow | |
elephant | |
bear | |
zebra | |
giraffe | |
backpack | |
umbrella | |
handbag | |
tie | |
suitcase | |
frisbee | |
skis | |
snowboard | |
sports ball | |
kite | |
baseball bat | |
baseball glove | |
skateboard | |
surfboard | |
tennis racket | |
bottle | |
wine glass | |
cup | |
fork | |
knife | |
spoon | |
bowl | |
banana | |
apple | |
sandwich | |
orange | |
broccoli | |
carrot | |
hot dog | |
pizza | |
donut | |
cake | |
chair | |
sofa | |
pottedplant | |
bed | |
diningtable | |
toilet | |
tvmonitor | |
laptop | |
mouse | |
remote | |
keyboard | |
cell phone | |
microwave | |
oven | |
toaster | |
sink | |
refrigerator | |
book | |
clock | |
vase | |
scissors | |
teddy bear | |
hair drier | |
toothbrush |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://screenrec.com/share/sca8wgJKrU | |
import numpy as np | |
import time | |
import cv2 | |
import sys | |
port = '8080' | |
ip = '80.32.125.254' | |
try: url = sys.argv[1] | |
except: url = f'http://{ip}:{port}/cgi-bin/faststream.jpg' | |
if url == '0': url = 0 | |
video_capture = cv2.VideoCapture(url) | |
file = time.strftime("%Y-%m-%d %H-%M-%S", time.gmtime()) | |
fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
out = cv2.VideoWriter(f'Recorded/{file} yolo.mp4', fourcc, 20.0, (640, 480)) | |
confidenceThreshold = 0.5 | |
NMSThreshold = 0.3 | |
modelWeights = 'YOLO/yolov3.weights' # download weights file (236 MB) | |
modelConfiguration = 'YOLO/yolov3.cfg' | |
labelsPath = 'YOLO/coco.names' | |
labels = open(labelsPath).read().strip().split('\n') | |
np.random.seed(10) | |
COLORS = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8") | |
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights) | |
outputLayer = net.getLayerNames() | |
outputLayer = [outputLayer[i - 1] for i in net.getUnconnectedOutLayers()] | |
(W, H) = (None, None) | |
while True: | |
ret, frame = video_capture.read() | |
if W is None or H is None: | |
(H,W) = frame.shape[:2] | |
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB = True, crop = False) | |
net.setInput(blob) | |
layersOutputs = net.forward(outputLayer) | |
boxes = [] | |
confidences = [] | |
classIDs = [] | |
for output in layersOutputs: | |
for detection in output: | |
scores = detection[5:] | |
classID = np.argmax(scores) | |
confidence = scores[classID] | |
if confidence > confidenceThreshold: | |
box = detection[0:4] * np.array([W, H, W, H]) | |
(centerX, centerY, width, height) = box.astype('int') | |
x = int(centerX - (width/2)) | |
y = int(centerY - (height/2)) | |
boxes.append([x, y, int(width), int(height)]) | |
confidences.append(float(confidence)) | |
classIDs.append(classID) | |
detectionNMS = cv2.dnn.NMSBoxes(boxes, confidences, confidenceThreshold, NMSThreshold) | |
if(len(detectionNMS) > 0): | |
for i in detectionNMS.flatten(): | |
(x, y) = (boxes[i][0], boxes[i][1]) | |
(w, h) = (boxes[i][2], boxes[i][3]) | |
color = [int(c) for c in COLORS[classIDs[i]]] | |
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) | |
text = '{}: {:.4f}'.format(labels[classIDs[i]], confidences[i]) | |
cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
out.write(frame) | |
cv2.imshow('YOLO - ESC to Quit', frame) | |
if cv2.waitKey(1) & 0xFF == 27: | |
break | |
video_capture.release() | |
cv2.destroyAllWindows() | |
out.release() | |
'''How to run file... | |
>>> python yolo.py http://212.26.235.210/mjpg/video.mjpg | |
>>> python yolo.py http://80.32.125.254:8080/cgi-bin/faststream.jpg | |
>>> python yolo.py http://212.147.38.3/mjpg/video.mjpg | |
>>> python yolo.py http://imvickykumar999:[email protected]:8080/video | |
>>> python yolo.py 0 | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[net] | |
# Testing | |
# batch=1 | |
# subdivisions=1 | |
# Training | |
batch=64 | |
subdivisions=16 | |
width=608 | |
height=608 | |
channels=3 | |
momentum=0.9 | |
decay=0.0005 | |
angle=0 | |
saturation = 1.5 | |
exposure = 1.5 | |
hue=.1 | |
learning_rate=0.001 | |
burn_in=1000 | |
max_batches = 500200 | |
policy=steps | |
steps=400000,450000 | |
scales=.1,.1 | |
[convolutional] | |
batch_normalize=1 | |
filters=32 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
# Downsample | |
[convolutional] | |
batch_normalize=1 | |
filters=64 | |
size=3 | |
stride=2 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=32 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=64 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
# Downsample | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=3 | |
stride=2 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=64 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=64 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
# Downsample | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=2 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
# Downsample | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=2 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
# Downsample | |
[convolutional] | |
batch_normalize=1 | |
filters=1024 | |
size=3 | |
stride=2 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=1024 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=1024 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=1024 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=1024 | |
size=3 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[shortcut] | |
from=-3 | |
activation=linear | |
###################### | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=1024 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=1024 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=512 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=1024 | |
activation=leaky | |
[convolutional] | |
size=1 | |
stride=1 | |
pad=1 | |
filters=255 | |
activation=linear | |
[yolo] | |
mask = 6,7,8 | |
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 | |
classes=80 | |
num=9 | |
jitter=.3 | |
ignore_thresh = .7 | |
truth_thresh = 1 | |
random=1 | |
[route] | |
layers = -4 | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[upsample] | |
stride=2 | |
[route] | |
layers = -1, 61 | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=512 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=512 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=256 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=512 | |
activation=leaky | |
[convolutional] | |
size=1 | |
stride=1 | |
pad=1 | |
filters=255 | |
activation=linear | |
[yolo] | |
mask = 3,4,5 | |
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 | |
classes=80 | |
num=9 | |
jitter=.3 | |
ignore_thresh = .7 | |
truth_thresh = 1 | |
random=1 | |
[route] | |
layers = -4 | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[upsample] | |
stride=2 | |
[route] | |
layers = -1, 36 | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=256 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=256 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
filters=128 | |
size=1 | |
stride=1 | |
pad=1 | |
activation=leaky | |
[convolutional] | |
batch_normalize=1 | |
size=3 | |
stride=1 | |
pad=1 | |
filters=256 | |
activation=leaky | |
[convolutional] | |
size=1 | |
stride=1 | |
pad=1 | |
filters=255 | |
activation=linear | |
[yolo] | |
mask = 0,1,2 | |
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 | |
classes=80 | |
num=9 | |
jitter=.3 | |
ignore_thresh = .7 | |
truth_thresh = 1 | |
random=1 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[InternetShortcut] | |
URL=https://pjreddie.com/media/files/yolov3.weights |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment