Last active
February 22, 2022 14:49
-
-
Save alexlee-gk/b28fb962c9b2da586d1591bac8888f1f to your computer and use it in GitHub Desktop.
Get color and depth image as numpy array using Panda3d 1.10.0 and python 3.5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import time | |
import cv2 | |
from direct.showbase.ShowBase import ShowBase | |
from panda3d.core import FrameBufferProperties, WindowProperties | |
from panda3d.core import GraphicsPipe, GraphicsOutput | |
from panda3d.core import Texture | |
from panda3d.core import loadPrcFileData | |
loadPrcFileData('', 'show-frame-rate-meter true') | |
loadPrcFileData('', 'sync-video 0') | |
def show_rgbd_image(image, depth_image, window_name='Image window', delay=1, depth_offset=0.0, depth_scale=1.0): | |
if depth_image.dtype != np.uint8: | |
if depth_scale is None: | |
depth_scale = depth_image.max() - depth_image.min() | |
if depth_offset is None: | |
depth_offset = depth_image.min() | |
depth_image = np.clip((depth_image - depth_offset) / depth_scale, 0.0, 1.0) | |
depth_image = (255.0 * depth_image).astype(np.uint8) | |
depth_image = np.tile(depth_image, (1, 1, 3)) | |
if image.shape[2] == 4: # add alpha channel | |
alpha = np.full(depth_image.shape[:2] + (1,), 255, dtype=np.uint8) | |
depth_image = np.concatenate([depth_image, alpha], axis=-1) | |
images = np.concatenate([image, depth_image], axis=1) | |
# images = cv2.cvtColor(images, cv2.COLOR_RGB2BGR) # not needed since image is already in BGR format | |
cv2.imshow(window_name, images) | |
key = cv2.waitKey(delay) | |
key &= 255 | |
if key == 27 or key == ord('q'): | |
print("Pressed ESC or q, exiting") | |
exit_request = True | |
else: | |
exit_request = False | |
return exit_request | |
class MyApp(ShowBase): | |
def __init__(self): | |
ShowBase.__init__(self) | |
# Load the environment model. | |
self.scene = self.loader.loadModel("models/environment") | |
# Reparent the model to render. | |
self.scene.reparentTo(self.render) | |
# Apply scale and position transforms on the model. | |
self.scene.setScale(0.25, 0.25, 0.25) | |
self.scene.setPos(-8, 42, 0) | |
# Needed for camera image | |
self.dr = self.camNode.getDisplayRegion(0) | |
# Needed for camera depth image | |
winprops = WindowProperties.size(self.win.getXSize(), self.win.getYSize()) | |
fbprops = FrameBufferProperties() | |
fbprops.setDepthBits(1) | |
self.depthBuffer = self.graphicsEngine.makeOutput( | |
self.pipe, "depth buffer", -2, | |
fbprops, winprops, | |
GraphicsPipe.BFRefuseWindow, | |
self.win.getGsg(), self.win) | |
self.depthTex = Texture() | |
self.depthTex.setFormat(Texture.FDepthComponent) | |
self.depthBuffer.addRenderTexture(self.depthTex, | |
GraphicsOutput.RTMCopyRam, GraphicsOutput.RTPDepth) | |
lens = self.cam.node().getLens() | |
# the near and far clipping distances can be changed if desired | |
# lens.setNear(5.0) | |
# lens.setFar(500.0) | |
self.depthCam = self.makeCamera(self.depthBuffer, | |
lens=lens, | |
scene=render) | |
self.depthCam.reparentTo(self.cam) | |
# TODO: Scene is rendered twice: once for rgb and once for depth image. | |
# How can both images be obtained in one rendering pass? | |
def get_camera_image(self, requested_format=None): | |
""" | |
Returns the camera's image, which is of type uint8 and has values | |
between 0 and 255. | |
The 'requested_format' argument should specify in which order the | |
components of the image must be. For example, valid format strings are | |
"RGBA" and "BGRA". By default, Panda's internal format "BGRA" is used, | |
in which case no data is copied over. | |
""" | |
tex = self.dr.getScreenshot() | |
if requested_format is None: | |
data = tex.getRamImage() | |
else: | |
data = tex.getRamImageAs(requested_format) | |
image = np.frombuffer(data, np.uint8) # use data.get_data() instead of data in python 2 | |
image.shape = (tex.getYSize(), tex.getXSize(), tex.getNumComponents()) | |
image = np.flipud(image) | |
return image | |
def get_camera_depth_image(self): | |
""" | |
Returns the camera's depth image, which is of type float32 and has | |
values between 0.0 and 1.0. | |
""" | |
data = self.depthTex.getRamImage() | |
depth_image = np.frombuffer(data, np.float32) | |
depth_image.shape = (self.depthTex.getYSize(), self.depthTex.getXSize(), self.depthTex.getNumComponents()) | |
depth_image = np.flipud(depth_image) | |
return depth_image | |
def main(): | |
app = MyApp() | |
frames = 1800 | |
radius = 20 | |
step = 0.1 | |
start_time = time.time() | |
for t in range(frames): | |
angleDegrees = t * step | |
angleRadians = angleDegrees * (np.pi / 180.0) | |
app.cam.setPos(radius * np.sin(angleRadians), -radius * np.cos(angleRadians), 3) | |
app.cam.setHpr(angleDegrees, 0, 0) | |
app.graphicsEngine.renderFrame() | |
image = app.get_camera_image() | |
depth_image = app.get_camera_depth_image() | |
show_rgbd_image(image, depth_image) | |
end_time = time.time() | |
print("average FPS: {}".format(frames / (end_time - start_time))) | |
if __name__ == '__main__': | |
main() |
This helped me very much! Can you tell how many meters the object is away just by reading the image? For example: Pixel value = 0.7 -> 5m
is there any scaling oder equation i can use?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Indeed, a big thank you. I would remove the loop in the main and put an app.run(), while also putting the retrieval of the images (last part of the loop) into a class method and add it as a task to the
taskMgr
. It is closer to the way one would normally runShowBase
(run until closed by user). In addition resizing is an issue. You need to reinitialize thedepthBuffer
when a resize event is detected. Since this would annihilate the performance to the point you will probably be looking at 2-3 FPS, I would suggest to detected the moment when the resizing stops and then do that. Of course this would mean that no loading of image data can take place while resizing.As for the rendering twice - I'm new to Panda3D but unless you are able to put both the Z values and the RGB data into the same structure, there is no way of not rendering twice. That said you might be able to do something about accessing the graphics data if you use CUDA, which is also partially integrated with OpenCV. This (if possible) would require a lot of low level stuff that is probably not really exposed in the Python API of Panda3D.