Created
November 16, 2025 17:22
-
-
Save maxpromer/45f4f747644a554b7847a68a8279666b to your computer and use it in GitHub Desktop.
Raspberry Pi Camera Module 3 image classification with MobileNet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from picamera2 import Picamera2 | |
| import cv2 | |
| import numpy as np | |
| # Load the TensorFlow model (frozen graph + pbtxt) | |
| model = cv2.dnn.readNetFromTensorflow( | |
| "frozen_inference_graph.pb", | |
| "ssd_mobilenet_v1_coco_2017_11_17.pbtxt" | |
| ) | |
| # Initialize Picamera2 | |
| picam2 = Picamera2() | |
| # Set format to 3-channel RGB to match model input | |
| config = picam2.create_preview_configuration(main={"format": "RGB888"}) | |
| picam2.configure(config) | |
| picam2.start() | |
| # enable autofocus (continuous) | |
| picam2.set_controls({"AfMode": 2}) # 2 = continuous autofocus | |
| # COCO class labels (partial list, extend as needed) | |
| class_names = { | |
| 1: "person", 2: "bicycle", 3: "car", 4: "motorcycle", 5: "airplane", | |
| 6: "bus", 7: "train", 8: "truck", 9: "boat", 10: "traffic light", | |
| 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow", | |
| 20: "elephant", 21: "bear", 22: "zebra", 23: "giraffe", 44: "bottle", | |
| 46: "wine glass", 47: "cup", 48: "fork", 49: "knife", 50: "spoon", | |
| 51: "bowl", 62: "chair", 63: "couch", 64: "potted plant", 67: "dining table" | |
| } | |
| try: | |
| while True: | |
| # Capture frame from camera | |
| frame = picam2.capture_array() | |
| # Convert RGB to BGR for OpenCV | |
| # frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
| frame_bgr = frame | |
| # Prepare input blob for the network | |
| blob = cv2.dnn.blobFromImage(frame_bgr, size=(300, 300), swapRB=True, crop=False) | |
| model.setInput(blob) | |
| # Run inference | |
| detections = model.forward() | |
| h, w = frame_bgr.shape[:2] | |
| # Loop through detections and draw bounding boxes | |
| for detection in detections[0, 0]: | |
| confidence = detection[2] | |
| if confidence > 0.5: # Only consider detections above 50% confidence | |
| class_id = int(detection[1]) | |
| x1, y1, x2, y2 = (detection[3:7] * [w, h, w, h]).astype(int) | |
| label = class_names.get(class_id, f"ID:{class_id}") | |
| text = f"{label}: {confidence:.2f}" | |
| cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (255, 0, 0), 2) | |
| cv2.putText(frame_bgr, text, | |
| (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1) | |
| # Display the resulting frame | |
| cv2.imshow("Camera", frame_bgr) | |
| if cv2.waitKey(1) & 0xFF == ord('q'): | |
| break | |
| finally: | |
| # Clean up windows and stop camera | |
| cv2.destroyAllWindows() | |
| picam2.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment