sssemil · October 22, 2023 17:46
diff --git a/deep_cam.py b/deep_cam.py
 import numpy as np
 import torch
 import cv2
 from PIL import Image
 from transformers import DPTForDepthEstimation, DPTImageProcessor

 model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas", low_cpu_mem_usage=True).to("cuda")
 feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")

 # Start capturing video from the first camera device
 cap = cv2.VideoCapture(0)

 while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Convert frame to PIL Image
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Prepare image for the model
    inputs = feature_extractor(images=image, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    # Interpolate to original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False,
    )

    # Visualize the prediction
    output = prediction.squeeze().cpu().numpy()
    formatted = (output * 255 / np.max(output)).astype("uint8")
    depth_map = cv2.cvtColor(np.array(formatted), cv2.COLOR_GRAY2BGR)

    # Concatenate the original frame and the depth map side by side
    combined = np.hstack((frame, depth_map))

    # Display the concatenated frame
    cv2.imshow('Camera Input and Depth Estimation', combined)

    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

 # Release the capture and close all windows
 cap.release()
 cv2.destroyAllWindows()
	import numpy as np
	import torch
	import cv2
	from PIL import Image
	from transformers import DPTForDepthEstimation, DPTImageProcessor

	model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas", low_cpu_mem_usage=True).to("cuda")
	feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")

	# Start capturing video from the first camera device
	cap = cv2.VideoCapture(0)

	while True:
	# Capture frame-by-frame
	ret, frame = cap.read()

	# Convert frame to PIL Image
	image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

	# Prepare image for the model
	inputs = feature_extractor(images=image, return_tensors="pt").to("cuda")

	with torch.no_grad():
	outputs = model(**inputs)
	predicted_depth = outputs.predicted_depth

	# Interpolate to original size
	prediction = torch.nn.functional.interpolate(
	predicted_depth.unsqueeze(1),
	size=image.size[::-1],
	mode="bicubic",
	align_corners=False,
	)

	# Visualize the prediction
	output = prediction.squeeze().cpu().numpy()
	formatted = (output * 255 / np.max(output)).astype("uint8")
	depth_map = cv2.cvtColor(np.array(formatted), cv2.COLOR_GRAY2BGR)

	# Concatenate the original frame and the depth map side by side
	combined = np.hstack((frame, depth_map))

	# Display the concatenated frame
	cv2.imshow('Camera Input and Depth Estimation', combined)

	# Break the loop if 'q' key is pressed
	if cv2.waitKey(1) & 0xFF == ord('q'):
	break

	# Release the capture and close all windows
	cap.release()
	cv2.destroyAllWindows()