Skip to content

Instantly share code, notes, and snippets.

@sssemil
Created October 22, 2023 17:46
Show Gist options
  • Save sssemil/7b0b5492208c6940e7436c4da75081a1 to your computer and use it in GitHub Desktop.
Save sssemil/7b0b5492208c6940e7436c4da75081a1 to your computer and use it in GitHub Desktop.
Cam to depth stream
import numpy as np
import torch
import cv2
from PIL import Image
from transformers import DPTForDepthEstimation, DPTImageProcessor
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas", low_cpu_mem_usage=True).to("cuda")
feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
# Start capturing video from the first camera device
cap = cv2.VideoCapture(0)
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# Convert frame to PIL Image
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# Prepare image for the model
inputs = feature_extractor(images=image, return_tensors="pt").to("cuda")
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
# Interpolate to original size
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False,
)
# Visualize the prediction
output = prediction.squeeze().cpu().numpy()
formatted = (output * 255 / np.max(output)).astype("uint8")
depth_map = cv2.cvtColor(np.array(formatted), cv2.COLOR_GRAY2BGR)
# Concatenate the original frame and the depth map side by side
combined = np.hstack((frame, depth_map))
# Display the concatenated frame
cv2.imshow('Camera Input and Depth Estimation', combined)
# Break the loop if 'q' key is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the capture and close all windows
cap.release()
cv2.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment