Skip to content

Instantly share code, notes, and snippets.

@maxpromer
Created November 16, 2025 17:22
Show Gist options
  • Select an option

  • Save maxpromer/45f4f747644a554b7847a68a8279666b to your computer and use it in GitHub Desktop.

Select an option

Save maxpromer/45f4f747644a554b7847a68a8279666b to your computer and use it in GitHub Desktop.
Raspberry Pi Camera Module 3 image classification with MobileNet
from picamera2 import Picamera2
import cv2
import numpy as np
# Load the TensorFlow model (frozen graph + pbtxt)
model = cv2.dnn.readNetFromTensorflow(
"frozen_inference_graph.pb",
"ssd_mobilenet_v1_coco_2017_11_17.pbtxt"
)
# Initialize Picamera2
picam2 = Picamera2()
# Set format to 3-channel RGB to match model input
config = picam2.create_preview_configuration(main={"format": "RGB888"})
picam2.configure(config)
picam2.start()
# enable autofocus (continuous)
picam2.set_controls({"AfMode": 2}) # 2 = continuous autofocus
# COCO class labels (partial list, extend as needed)
class_names = {
1: "person", 2: "bicycle", 3: "car", 4: "motorcycle", 5: "airplane",
6: "bus", 7: "train", 8: "truck", 9: "boat", 10: "traffic light",
15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow",
20: "elephant", 21: "bear", 22: "zebra", 23: "giraffe", 44: "bottle",
46: "wine glass", 47: "cup", 48: "fork", 49: "knife", 50: "spoon",
51: "bowl", 62: "chair", 63: "couch", 64: "potted plant", 67: "dining table"
}
try:
while True:
# Capture frame from camera
frame = picam2.capture_array()
# Convert RGB to BGR for OpenCV
# frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
frame_bgr = frame
# Prepare input blob for the network
blob = cv2.dnn.blobFromImage(frame_bgr, size=(300, 300), swapRB=True, crop=False)
model.setInput(blob)
# Run inference
detections = model.forward()
h, w = frame_bgr.shape[:2]
# Loop through detections and draw bounding boxes
for detection in detections[0, 0]:
confidence = detection[2]
if confidence > 0.5: # Only consider detections above 50% confidence
class_id = int(detection[1])
x1, y1, x2, y2 = (detection[3:7] * [w, h, w, h]).astype(int)
label = class_names.get(class_id, f"ID:{class_id}")
text = f"{label}: {confidence:.2f}"
cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(frame_bgr, text,
(x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
# Display the resulting frame
cv2.imshow("Camera", frame_bgr)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
# Clean up windows and stop camera
cv2.destroyAllWindows()
picam2.stop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment