Skip to content

Instantly share code, notes, and snippets.

@eldar
Last active May 29, 2025 13:23
Show Gist options
  • Save eldar/902f85e64569d15a4b04925b693a56b8 to your computer and use it in GitHub Desktop.
Save eldar/902f85e64569d15a4b04925b693a56b8 to your computer and use it in GitHub Desktop.
ParallelDomain-4D
import argparse
from pathlib import Path
import json
from pyquaternion import Quaternion
import numpy as np
import torch
import torch.nn.functional as F
from imageio.v3 import imread
def to_4x4(
m
):
m_ = torch.eye(4, dtype=m.dtype)
m_[:3, :3] = m
return m_
def invert_se3(
T
):
R_ = T[:3, :3].transpose(0, 1)
t = T[:3, 3]
t_ = -torch.einsum("ij,j->i", R_, t)
T_ = torch.eye(4, dtype=T.dtype)
T_[:3, :3] = R_
T_[:3, 3] = t_
return T_
def transform_pts(
T,
pts
):
"""
Args:
T (torch.Tensor): transformation matrix of shape (d, d)
pts (torch.Tensor): Input points of shape (n, c)
"""
if pts.shape[-1] == (T.shape[-1] - 1):
pts = F.pad(pts, (0, 1), value=1)
pts = torch.einsum("...ji,...ni->...nj", T, pts)
return pts[..., :3]
def inside_image(
pts2d,
image_size
):
H, W = image_size
px, py = pts2d.unbind(-1)
return (
(0 <= px) & (px < W) &
(0 <= py) & (py < H)
)
def json_load(filename):
with open(filename, "r") as f:
data = json.load(f)
return data
def get_pardom_camera_matrices_torch(calibration):
'''
Adapted from convert_pcl_pardom.py.
:return all_views: List of str with view names corresponding to camera matrix ordering.
:return all_intrinsics: (V, 3, 3) tensor of float32.
:return all_extrinsics: (V, 4, 4) tensor of float32.
'''
# NOTE: Camera parameters do not vary over time in this dataset.
view_names = []
all_intrinsics = dict() # Maps view_name to (3, 3) tensor of float.
all_extrinsics = dict() # Maps view_name to (4, 4) tensor of float.
for (view_name, intrinsics_dict, extrinsics_dict) in zip(
calibration['names'], calibration['intrinsics'], calibration['extrinsics']):
if 'velodyne' in view_name.lower():
continue
# NOTE: Unlike Kubric, the intrinsics matrix is already unnormalized (pixel coordinates).
intrinsics_matrix = get_pardom_intrinsics_matrix(intrinsics_dict) # (3, 3) tensor of float.
extrinsics_matrix = get_pardom_extrinsics_matrix(extrinsics_dict) # (4, 4) tensor of float.
all_intrinsics[view_name] = intrinsics_matrix
all_extrinsics[view_name] = extrinsics_matrix
view_names.append(view_name)
view_names = sorted(view_names) # (V) list of str.
all_intrinsics = torch.stack([all_intrinsics[view_name] for view_name in view_names], dim=0)
# (V, 3, 3) tensor of float.
all_extrinsics = torch.stack([all_extrinsics[view_name] for view_name in view_names], dim=0)
# (V, 4, 4) tensor of float.
# For reference: view_names =
# ['camera0', 'camera1', 'camera10', 'camera11', 'camera12', 'camera13', 'camera14', 'camera15',
# 'camera2', 'camera3', 'camera4', 'camera5', 'camera6', 'camera7', 'camera8', 'camera9',
# 'yaw-0', 'yaw-60', 'yaw-neg-60']
return (view_names, all_intrinsics, all_extrinsics)
def get_pardom_intrinsics_matrix(intrinsics_dict):
'''
Adapted from convert_pcl_pardom.py.
'''
intrinsics_matrix = torch.tensor(
[[intrinsics_dict['fx'], 0.0, intrinsics_dict['cx']],
[0.0, intrinsics_dict['fy'], intrinsics_dict['cy']],
[0.0, 0.0, 1.0]], dtype=torch.float32)
return intrinsics_matrix
def get_pardom_extrinsics_matrix(extrinsics_dict):
'''
Adapted from convert_pcl_pardom.py.
'''
rot_q = extrinsics_dict['rotation']
rot_t = extrinsics_dict['translation']
rot_m = Quaternion(rot_q['qw'], rot_q['qx'], rot_q['qy'], rot_q['qz']).rotation_matrix
extrinsics_matrix = torch.eye(4, dtype=torch.float32)
extrinsics_matrix[0:3, 0:3] = torch.tensor(rot_m)
extrinsics_matrix[0:3, 3] = torch.tensor([rot_t['x'], rot_t['y'], rot_t['z']])
return extrinsics_matrix
def find_first_element(seq_data, name):
for d in seq_data:
if d["id"]["name"] == name:
return d
return None
def extract_sequence(seq_data, name):
frames = []
for d in seq_data:
if d["id"]["name"] == name:
frames.append(d)
return frames
def handle_scene(opts, scene_id):
data_dir = Path(opts.p4d_dir) / "data"
camera_id = "yaw-0"
scene_dir = data_dir / scene_id
calib_file = next(scene_dir.joinpath("calibration").glob("*.json"))
calibration = json_load(calib_file)
view_names, all_intrinsics, all_extrinsics = (
get_pardom_camera_matrices_torch(calibration)
)
camera_index = view_names.index(camera_id)
K = all_intrinsics[camera_index]
metadata_file = next(scene_dir.glob("scene_*.json"))
metadata = json_load(metadata_file)
seq_data = metadata["data"]
camera_frames = extract_sequence(seq_data, camera_id)
pc_frames = extract_sequence(seq_data, "VelodyneVLS128_Center")
img_anno = camera_frames[0]["datum"]["image"]
H, W = img_anno["height"], img_anno["width"]
for k, f in enumerate(camera_frames):
T = get_pardom_extrinsics_matrix(f["datum"]["image"]["pose"])
image_file = f["datum"]["image"]["filename"]
img = imread(scene_dir / image_file)
# Load depth
anno = f["datum"]["image"]["annotations"]
depth_file = anno["6"]
depth_np = np.load(scene_dir / depth_file)["data"]
depth = torch.from_numpy(depth_np)
# Load LiDAR
pc_frame = pc_frames[k]
# LiDAR camera-to-world transform
T_WC_lidar = get_pardom_extrinsics_matrix(pc_frame["datum"]["point_cloud"]["pose"])
lidar_file = pc_frame["datum"]["point_cloud"]["filename"]
lidar = np.load(scene_dir / lidar_file)["data"]
XYZ_lidar = np.stack([lidar[k] for k in ["X", "Y", "Z"]], axis=-1)
# make sure LiDAR and depth match
verify_frame(depth, T, K, T_WC_lidar, XYZ_lidar)
def verify_frame(depth, T_WC_cam, K, T_WC_lidar, XYZ):
image_size = tuple(depth.shape)
# get yaw-0 world-to-camera transform
T_CW_cam = invert_se3(T_WC_cam)
# lidar-world-camera transform
T_proj = to_4x4(K) @ T_CW_cam @ T_WC_lidar
XYZ = torch.from_numpy(XYZ)
uv, z = persp_project(transform_pts(T_proj, XYZ))
# uv = torch.round(uv + 0.5) - 0.5
# ji = torch.round(uv - 0.5).to(torch.int64)
ji = uv.round().to(torch.int64)
mask = inside_image(ji, image_size) & (z[:, 0] > 0)
ji = ji[mask, :]
uv = uv[mask, :]
z = z[mask, 0]
jj, ii = ji.unbind(-1)
z_depth = depth[ii, jj]
valid_depth = z_depth < 250
uv = uv[valid_depth, :]
z = z[valid_depth]
z_depth = z_depth[valid_depth]
diff = (z - z_depth).abs().mean()
print("mean discrepancy", diff.item())
import pdb
pdb.set_trace()
def persp_project(xyz):
z = xyz[:, 2:]
uv = xyz[:, :2] / z
return uv, z
def get_opts():
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--split",
type=str,
default="train",
choices=["train", "val", "test"])
parser.add_argument("--p4d-dir",
default="/path/to/ParallelDomain-4D")
return parser.parse_args()
def main():
opts = get_opts()
scene_id = "scene_002130" # pedestrians
handle_scene(opts, scene_id)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment