Skip to content

Instantly share code, notes, and snippets.

@chrismatthieu
Created March 6, 2025 17:33
Show Gist options
  • Save chrismatthieu/16431e3f56d1f62c177392dcf99b7b06 to your computer and use it in GitHub Desktop.
Save chrismatthieu/16431e3f56d1f62c177392dcf99b7b06 to your computer and use it in GitHub Desktop.
RealSense D415 object detection and distance calculation
import pyrealsense2 as rs
import numpy as np
import cv2
# Load YOLOv3 model
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
# Initialize RealSense camera
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
pipeline.start(config)
try:
while True:
# Wait for a coherent pair of frames: depth and color
frames = pipeline.wait_for_frames()
color_frame = frames.get_color_frame()
depth_frame = frames.get_depth_frame()
if not color_frame or not depth_frame:
continue
# Convert images to numpy arrays
color_image = np.asanyarray(color_frame.get_data())
depth_image = np.asanyarray(depth_frame.get_data())
# Detect objects
blob = cv2.dnn.blobFromImage(color_image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Analyze detections
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5 and class_id == 0: # class_id == 0 is for 'person'
center_x = int(detection[0] * 640)
center_y = int(detection[1] * 480)
w = int(detection[2] * 640)
h = int(detection[3] * 480)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# Draw bounding boxes
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str("Person")
distance = depth_image[center_y, center_x] * depth_frame.get_units()
cv2.rectangle(color_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(color_image, f"{label} {distance:.2f}m", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Display the resulting frame
cv2.imshow('RealSense', color_image)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
# Stop streaming
pipeline.stop()
cv2.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment