Created
May 22, 2024 22:03
-
-
Save jizongFox/533785076a4c235a7e3bc91eab798e2e to your computer and use it in GitHub Desktop.
123123
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
""" | |
Tools supporting the execution of COLMAP and preparation of COLMAP-based datasets for nerfstudio training. | |
""" | |
import json | |
from pathlib import Path | |
from typing import Any, Dict, Literal, Optional, Union | |
import appdirs | |
import cv2 | |
import numpy as np | |
import requests | |
import torch | |
from packaging.version import Version | |
from rich.progress import track | |
# TODO(1480) use pycolmap instead of colmap_parsing_utils | |
# import pycolmap | |
from nerfstudio.data.utils.colmap_parsing_utils import ( | |
qvec2rotmat, | |
read_cameras_binary, | |
read_images_binary, | |
read_points3D_binary, | |
read_points3D_text, | |
) | |
from nerfstudio.process_data.process_data_utils import CameraModel | |
from nerfstudio.utils import colormaps | |
from nerfstudio.utils.rich_utils import CONSOLE, status | |
from nerfstudio.utils.scripts import run_command | |
def get_colmap_version(colmap_cmd: str, default_version: str = "3.8") -> Version: | |
"""Returns the version of COLMAP. | |
This code assumes that colmap returns a version string of the form | |
"COLMAP 3.8 ..." which may not be true for all versions of COLMAP. | |
Args: | |
default_version: Default version to return if COLMAP version can't be determined. | |
Returns: | |
The version of COLMAP. | |
""" | |
output = run_command(f"{colmap_cmd} -h", verbose=False) | |
assert output is not None | |
for line in output.split("\n"): | |
if line.startswith("COLMAP"): | |
version = line.split(" ")[1] | |
version = Version(version) | |
return version | |
CONSOLE.print(f"[bold red]Could not find COLMAP version. Using default {default_version}") | |
return Version(default_version) | |
def get_vocab_tree() -> Path: | |
"""Return path to vocab tree. Downloads vocab tree if it doesn't exist. | |
Returns: | |
The path to the vocab tree. | |
""" | |
vocab_tree_filename = Path(appdirs.user_data_dir("nerfstudio")) / "vocab_tree.fbow" | |
if not vocab_tree_filename.exists(): | |
r = requests.get("https://demuc.de/colmap/vocab_tree_flickr100K_words32K.bin", stream=True) | |
vocab_tree_filename.parent.mkdir(parents=True, exist_ok=True) | |
with open(vocab_tree_filename, "wb") as f: | |
total_length = r.headers.get("content-length") | |
assert total_length is not None | |
for chunk in track( | |
r.iter_content(chunk_size=1024), | |
total=int(total_length) / 1024 + 1, | |
description="Downloading vocab tree...", | |
): | |
if chunk: | |
f.write(chunk) | |
f.flush() | |
return vocab_tree_filename | |
def run_colmap( | |
image_dir: Path, | |
colmap_dir: Path, | |
camera_model: CameraModel, | |
camera_mask_path: Optional[Path] = None, | |
gpu: bool = True, | |
verbose: bool = False, | |
matching_method: Literal["vocab_tree", "exhaustive", "sequential"] = "vocab_tree", | |
refine_intrinsics: bool = True, | |
colmap_cmd: str = "colmap", | |
) -> None: | |
"""Runs COLMAP on the images. | |
Args: | |
image_dir: Path to the directory containing the images. | |
colmap_dir: Path to the output directory. | |
camera_model: Camera model to use. | |
camera_mask_path: Path to the camera mask. | |
gpu: If True, use GPU. | |
verbose: If True, logs the output of the command. | |
matching_method: Matching method to use. | |
refine_intrinsics: If True, refine intrinsics. | |
colmap_cmd: Path to the COLMAP executable. | |
""" | |
colmap_version = get_colmap_version(colmap_cmd) | |
colmap_database_path = colmap_dir / "database.db" | |
colmap_database_path.unlink(missing_ok=True) | |
# Feature extraction | |
feature_extractor_cmd = [ | |
f"{colmap_cmd} feature_extractor", | |
f"--database_path {colmap_dir / 'database.db'}", | |
f"--image_path {image_dir}", | |
"--ImageReader.single_camera 1", | |
f"--ImageReader.camera_model {camera_model.value}", | |
f"--SiftExtraction.use_gpu {int(gpu)}", | |
] | |
if camera_mask_path is not None: | |
feature_extractor_cmd.append(f"--ImageReader.camera_mask_path {camera_mask_path}") | |
feature_extractor_cmd = " ".join(feature_extractor_cmd) | |
with status(msg="[bold yellow]Running COLMAP feature extractor...", spinner="moon", verbose=verbose): | |
run_command(feature_extractor_cmd, verbose=verbose) | |
CONSOLE.log("[bold green]:tada: Done extracting COLMAP features.") | |
# Feature matching | |
feature_matcher_cmd = [ | |
f"{colmap_cmd} {matching_method}_matcher", | |
f"--database_path {colmap_dir / 'database.db'}", | |
f"--SiftMatching.use_gpu {int(gpu)}", | |
] | |
if matching_method == "vocab_tree": | |
vocab_tree_filename = get_vocab_tree() | |
feature_matcher_cmd.append(f'--VocabTreeMatching.vocab_tree_path "{vocab_tree_filename}"') | |
feature_matcher_cmd = " ".join(feature_matcher_cmd) | |
with status(msg="[bold yellow]Running COLMAP feature matcher...", spinner="runner", verbose=verbose): | |
run_command(feature_matcher_cmd, verbose=verbose) | |
CONSOLE.log("[bold green]:tada: Done matching COLMAP features.") | |
# Bundle adjustment | |
sparse_dir = colmap_dir / "sparse" | |
sparse_dir.mkdir(parents=True, exist_ok=True) | |
mapper_cmd = [ | |
f"{colmap_cmd} mapper", | |
f"--database_path {colmap_dir / 'database.db'}", | |
f"--image_path {image_dir}", | |
f"--output_path {sparse_dir}", | |
] | |
if colmap_version >= Version("3.7"): | |
mapper_cmd.append("--Mapper.ba_global_function_tolerance=1e-6") | |
mapper_cmd = " ".join(mapper_cmd) | |
with status( | |
msg="[bold yellow]Running COLMAP bundle adjustment... (This may take a while)", | |
spinner="circle", | |
verbose=verbose, | |
): | |
run_command(mapper_cmd, verbose=verbose) | |
CONSOLE.log("[bold green]:tada: Done COLMAP bundle adjustment.") | |
if refine_intrinsics: | |
with status(msg="[bold yellow]Refine intrinsics...", spinner="dqpb", verbose=verbose): | |
bundle_adjuster_cmd = [ | |
f"{colmap_cmd} bundle_adjuster", | |
f"--input_path {sparse_dir}/0", | |
f"--output_path {sparse_dir}/0", | |
"--BundleAdjustment.refine_principal_point 1", | |
] | |
run_command(" ".join(bundle_adjuster_cmd), verbose=verbose) | |
CONSOLE.log("[bold green]:tada: Done refining intrinsics.") | |
def parse_colmap_camera_params(camera) -> Dict[str, Any]: | |
""" | |
Parses all currently supported COLMAP cameras into the transforms.json metadata | |
Args: | |
camera: COLMAP camera | |
Returns: | |
transforms.json metadata containing camera's intrinsics and distortion parameters | |
""" | |
out: Dict[str, Any] = { | |
"w": camera.width, | |
"h": camera.height, | |
} | |
# Parameters match https://github.com/colmap/colmap/blob/dev/src/base/camera_models.h | |
camera_params = camera.params | |
if camera.model == "SIMPLE_PINHOLE": | |
# du = 0 | |
# dv = 0 | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[0]) | |
out["cx"] = float(camera_params[1]) | |
out["cy"] = float(camera_params[2]) | |
out["k1"] = 0.0 | |
out["k2"] = 0.0 | |
out["p1"] = 0.0 | |
out["p2"] = 0.0 | |
camera_model = CameraModel.OPENCV | |
elif camera.model == "PINHOLE": | |
# f, cx, cy, k | |
# du = 0 | |
# dv = 0 | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[1]) | |
out["cx"] = float(camera_params[2]) | |
out["cy"] = float(camera_params[3]) | |
out["k1"] = 0.0 | |
out["k2"] = 0.0 | |
out["p1"] = 0.0 | |
out["p2"] = 0.0 | |
camera_model = CameraModel.OPENCV | |
elif camera.model == "SIMPLE_RADIAL": | |
# f, cx, cy, k | |
# r2 = u**2 + v**2; | |
# radial = k * r2 | |
# du = u * radial | |
# dv = u * radial | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[0]) | |
out["cx"] = float(camera_params[1]) | |
out["cy"] = float(camera_params[2]) | |
out["k1"] = float(camera_params[3]) | |
out["k2"] = 0.0 | |
out["p1"] = 0.0 | |
out["p2"] = 0.0 | |
camera_model = CameraModel.OPENCV | |
elif camera.model == "RADIAL": | |
# f, cx, cy, k1, k2 | |
# r2 = u**2 + v**2; | |
# radial = k1 * r2 + k2 * r2 ** 2 | |
# du = u * radial | |
# dv = v * radial | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[0]) | |
out["cx"] = float(camera_params[1]) | |
out["cy"] = float(camera_params[2]) | |
out["k1"] = float(camera_params[3]) | |
out["k2"] = float(camera_params[4]) | |
out["p1"] = 0.0 | |
out["p2"] = 0.0 | |
camera_model = CameraModel.OPENCV | |
elif camera.model == "OPENCV": | |
# fx, fy, cx, cy, k1, k2, p1, p2 | |
# uv = u * v; | |
# r2 = u**2 + v**2 | |
# radial = k1 * r2 + k2 * r2 ** 2 | |
# du = u * radial + 2 * p1 * u*v + p2 * (r2 + 2 * u**2) | |
# dv = v * radial + 2 * p2 * u*v + p1 * (r2 + 2 * v**2) | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[1]) | |
out["cx"] = float(camera_params[2]) | |
out["cy"] = float(camera_params[3]) | |
out["k1"] = float(camera_params[4]) | |
out["k2"] = float(camera_params[5]) | |
out["p1"] = float(camera_params[6]) | |
out["p2"] = float(camera_params[7]) | |
camera_model = CameraModel.OPENCV | |
elif camera.model == "OPENCV_FISHEYE": | |
# fx, fy, cx, cy, k1, k2, k3, k4 | |
# r = sqrt(u**2 + v**2) | |
# if r > eps: | |
# theta = atan(r) | |
# theta2 = theta ** 2 | |
# theta4 = theta2 ** 2 | |
# theta6 = theta4 * theta2 | |
# theta8 = theta4 ** 2 | |
# thetad = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8) | |
# du = u * thetad / r - u; | |
# dv = v * thetad / r - v; | |
# else: | |
# du = dv = 0 | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[1]) | |
out["cx"] = float(camera_params[2]) | |
out["cy"] = float(camera_params[3]) | |
out["k1"] = float(camera_params[4]) | |
out["k2"] = float(camera_params[5]) | |
out["k3"] = float(camera_params[6]) | |
out["k4"] = float(camera_params[7]) | |
camera_model = CameraModel.OPENCV_FISHEYE | |
elif camera.model == "FULL_OPENCV": | |
# fx, fy, cx, cy, k1, k2, p1, p2, k3, k4, k5, k6 | |
# u2 = u ** 2 | |
# uv = u * v | |
# v2 = v ** 2 | |
# r2 = u2 + v2 | |
# r4 = r2 * r2 | |
# r6 = r4 * r2 | |
# radial = (1 + k1 * r2 + k2 * r4 + k3 * r6) / | |
# (1 + k4 * r2 + k5 * r4 + k6 * r6) | |
# du = u * radial + 2 * p1 * uv + p2 * (r2 + 2 * u2) - u | |
# dv = v * radial + 2 * p2 * uv + p1 * (r2 + 2 * v2) - v | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[1]) | |
out["cx"] = float(camera_params[2]) | |
out["cy"] = float(camera_params[3]) | |
out["k1"] = float(camera_params[4]) | |
out["k2"] = float(camera_params[5]) | |
out["p1"] = float(camera_params[6]) | |
out["p2"] = float(camera_params[7]) | |
out["k3"] = float(camera_params[8]) | |
out["k4"] = float(camera_params[9]) | |
out["k5"] = float(camera_params[10]) | |
out["k6"] = float(camera_params[11]) | |
raise NotImplementedError(f"{camera.model} camera model is not supported yet!") | |
elif camera.model == "FOV": | |
# fx, fy, cx, cy, omega | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[1]) | |
out["cx"] = float(camera_params[2]) | |
out["cy"] = float(camera_params[3]) | |
out["omega"] = float(camera_params[4]) | |
raise NotImplementedError(f"{camera.model} camera model is not supported yet!") | |
elif camera.model == "SIMPLE_RADIAL_FISHEYE": | |
# f, cx, cy, k | |
# r = sqrt(u ** 2 + v ** 2) | |
# if r > eps: | |
# theta = atan(r) | |
# theta2 = theta ** 2 | |
# thetad = theta * (1 + k * theta2) | |
# du = u * thetad / r - u; | |
# dv = v * thetad / r - v; | |
# else: | |
# du = dv = 0 | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[0]) | |
out["cx"] = float(camera_params[1]) | |
out["cy"] = float(camera_params[2]) | |
out["k1"] = float(camera_params[3]) | |
out["k2"] = 0.0 | |
out["k3"] = 0.0 | |
out["k4"] = 0.0 | |
camera_model = CameraModel.OPENCV_FISHEYE | |
elif camera.model == "RADIAL_FISHEYE": | |
# f, cx, cy, k1, k2 | |
# r = sqrt(u ** 2 + v ** 2) | |
# if r > eps: | |
# theta = atan(r) | |
# theta2 = theta ** 2 | |
# theta4 = theta2 ** 2 | |
# thetad = theta * (1 + k * theta2) | |
# thetad = theta * (1 + k1 * theta2 + k2 * theta4) | |
# du = u * thetad / r - u; | |
# dv = v * thetad / r - v; | |
# else: | |
# du = dv = 0 | |
out["fl_x"] = float(camera_params[0]) | |
out["fl_y"] = float(camera_params[0]) | |
out["cx"] = float(camera_params[1]) | |
out["cy"] = float(camera_params[2]) | |
out["k1"] = float(camera_params[3]) | |
out["k2"] = float(camera_params[4]) | |
out["k3"] = 0 | |
out["k4"] = 0 | |
camera_model = CameraModel.OPENCV_FISHEYE | |
else: | |
# THIN_PRISM_FISHEYE not supported! | |
raise NotImplementedError(f"{camera.model} camera model is not supported yet!") | |
out["camera_model"] = camera_model.value | |
return out | |
def colmap_to_json( | |
recon_dir: Path, | |
output_dir: Path, | |
camera_mask_path: Optional[Path] = None, | |
image_id_to_depth_path: Optional[Dict[int, Path]] = None, | |
image_rename_map: Optional[Dict[str, str]] = None, | |
ply_filename="sparse_pc.ply", | |
keep_original_world_coordinate: bool = False, | |
use_single_camera_mode: bool = True, | |
) -> int: | |
"""Converts COLMAP's cameras.bin and images.bin to a JSON file. | |
Args: | |
recon_dir: Path to the reconstruction directory, e.g. "sparse/0" | |
output_dir: Path to the output directory. | |
camera_model: Camera model used. | |
camera_mask_path: Path to the camera mask. | |
image_id_to_depth_path: When including sfm-based depth, embed these depth file paths in the exported json | |
image_rename_map: Use these image names instead of the names embedded in the COLMAP db | |
keep_original_world_coordinate: If True, no extra transform will be applied to world coordinate. | |
Colmap optimized world often have y direction of the first camera pointing towards down direction, | |
while nerfstudio world set z direction to be up direction for viewer. | |
Returns: | |
The number of registered images. | |
""" | |
# TODO(1480) use pycolmap | |
# recon = pycolmap.Reconstruction(recon_dir) | |
# cam_id_to_camera = recon.cameras | |
# im_id_to_image = recon.images | |
cam_id_to_camera = read_cameras_binary(recon_dir / "cameras.bin") | |
im_id_to_image = read_images_binary(recon_dir / "images.bin") | |
if set(cam_id_to_camera.keys()) != {1}: | |
CONSOLE.print(f"[bold yellow]Warning: More than one camera is found in {recon_dir}") | |
print(cam_id_to_camera) | |
use_single_camera_mode = False # update bool: one camera per frame | |
out = {} # out = {"camera_model": parse_colmap_camera_params(cam_id_to_camera[1])["camera_model"]} | |
else: # one camera for all frames | |
out = parse_colmap_camera_params(cam_id_to_camera[1]) | |
frames = [] | |
for im_id, im_data in im_id_to_image.items(): | |
# NB: COLMAP uses Eigen / scalar-first quaternions | |
# * https://colmap.github.io/format.html | |
# * https://github.com/colmap/colmap/blob/bf3e19140f491c3042bfd85b7192ef7d249808ec/src/base/pose.cc#L75 | |
# the `rotation_matrix()` handles that format for us. | |
# TODO(1480) BEGIN use pycolmap API | |
# rotation = im_data.rotation_matrix() | |
rotation = qvec2rotmat(im_data.qvec) | |
translation = im_data.tvec.reshape(3, 1) | |
w2c = np.concatenate([rotation, translation], 1) | |
w2c = np.concatenate([w2c, np.array([[0, 0, 0, 1]])], 0) | |
c2w = np.linalg.inv(w2c) | |
# Convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL) | |
c2w[0:3, 1:3] *= -1 | |
if not keep_original_world_coordinate: | |
c2w = c2w[np.array([0, 2, 1, 3]), :] | |
c2w[2, :] *= -1 | |
name = im_data.name | |
if image_rename_map is not None: | |
name = image_rename_map[name] | |
name = Path(f"./images/{name}") | |
frame = { | |
"file_path": name.as_posix(), | |
"transform_matrix": c2w.tolist(), | |
"colmap_im_id": im_id, | |
} | |
if camera_mask_path is not None: | |
frame["mask_path"] = camera_mask_path.relative_to(camera_mask_path.parent.parent).as_posix() | |
if image_id_to_depth_path is not None: | |
depth_path = image_id_to_depth_path[im_id] | |
frame["depth_file_path"] = str(depth_path.relative_to(depth_path.parent.parent)) | |
if not use_single_camera_mode: # add the camera parameters for this frame | |
frame.update(parse_colmap_camera_params(cam_id_to_camera[im_data.camera_id])) | |
frames.append(frame) | |
out["frames"] = frames | |
applied_transform = None | |
if not keep_original_world_coordinate: | |
applied_transform = np.eye(4)[:3, :] | |
applied_transform = applied_transform[np.array([0, 2, 1]), :] | |
applied_transform[2, :] *= -1 | |
out["applied_transform"] = applied_transform.tolist() | |
# create ply from colmap | |
assert ply_filename.endswith(".ply"), f"ply_filename: {ply_filename} does not end with '.ply'" | |
create_ply_from_colmap( | |
ply_filename, | |
recon_dir, | |
output_dir, | |
torch.from_numpy(applied_transform).float() if applied_transform is not None else None, | |
) | |
out["ply_file_path"] = ply_filename | |
with open(output_dir / "transforms.json", "w", encoding="utf-8") as f: | |
json.dump(out, f, indent=4) | |
return len(frames) | |
def create_sfm_depth( | |
recon_dir: Path, | |
output_dir: Path, | |
verbose: bool = True, | |
depth_scale_to_integer_factor: float = 1000.0, | |
min_depth: float = 0.001, | |
max_depth: float = 10000, | |
max_repoj_err: float = 2.5, | |
min_n_visible: int = 2, | |
include_depth_debug: bool = False, | |
input_images_dir: Optional[Path] = None, | |
) -> Dict[int, Path]: | |
"""Converts COLMAP's points3d.bin to sparse depth map images encoded as | |
16-bit "millimeter depth" PNGs. | |
Notes: | |
* This facility does NOT use COLMAP dense reconstruction; it creates depth | |
maps from sparse SfM points here. | |
* COLMAP does *not* reconstruct metric depth unless you give it calibrated | |
(metric) intrinsics as input. Therefore, "depth" in this function has | |
potentially ambiguous units. | |
Args: | |
recon_dir: Path to the reconstruction directory, e.g. "sparse/0" | |
output_dir: Path to the output directory. | |
verbose: If True, logs progress of depth image creation. | |
depth_scale_to_integer_factor: Use this parameter to tune the conversion of | |
raw depth measurements to integer depth values. This value should | |
be equal to 1. / `depth_unit_scale_factor`, where | |
`depth_unit_scale_factor` is the value you provide at training time. | |
E.g. for millimeter depth, leave `depth_unit_scale_factor` at 1e-3 | |
and depth_scale_to_integer_factor at 1000. | |
min_depth: Discard points closer than this to the camera. | |
max_depth: Discard points farther than this from the camera. | |
max_repoj_err: Discard points with reprojection error greater than this | |
amount (in pixels). | |
min_n_visible: Discard 3D points that have been triangulated with fewer | |
than this many frames. | |
include_depth_debug: Also include debug images showing depth overlaid | |
upon RGB. | |
Returns: | |
Depth file paths indexed by COLMAP image id | |
""" | |
# TODO(1480) use pycolmap | |
# recon = pycolmap.Reconstruction(recon_dir) | |
# ptid_to_info = recon.points3D | |
# cam_id_to_camera = recon.cameras | |
# im_id_to_image = recon.images | |
ptid_to_info = read_points3D_binary(recon_dir / "points3D.bin") | |
cam_id_to_camera = read_cameras_binary(recon_dir / "cameras.bin") | |
im_id_to_image = read_images_binary(recon_dir / "images.bin") | |
# Only support first camera | |
CAMERA_ID = 1 | |
W = cam_id_to_camera[CAMERA_ID].width | |
H = cam_id_to_camera[CAMERA_ID].height | |
if verbose: | |
iter_images = track( | |
im_id_to_image.items(), total=len(im_id_to_image.items()), description="Creating depth maps ..." | |
) | |
else: | |
iter_images = iter(im_id_to_image.items()) | |
image_id_to_depth_path = {} | |
for im_id, im_data in iter_images: | |
# TODO(1480) BEGIN delete when abandoning colmap_parsing_utils | |
pids = [pid for pid in im_data.point3D_ids if pid != -1] | |
xyz_world = np.array([ptid_to_info[pid].xyz for pid in pids]) | |
rotation = qvec2rotmat(im_data.qvec) | |
z = (rotation @ xyz_world.T)[-1] + im_data.tvec[-1] | |
errors = np.array([ptid_to_info[pid].error for pid in pids]) | |
n_visible = np.array([len(ptid_to_info[pid].image_ids) for pid in pids]) | |
uv = np.array([im_data.xys[i] for i in range(len(im_data.xys)) if im_data.point3D_ids[i] != -1]) | |
# TODO(1480) END delete when abandoning colmap_parsing_utils | |
# TODO(1480) BEGIN use pycolmap API | |
# # Get only keypoints that have corresponding triangulated 3D points | |
# p2ds = im_data.get_valid_points2D() | |
# xyz_world = np.array([ptid_to_info[p2d.point3D_id].xyz for p2d in p2ds]) | |
# # COLMAP OpenCV convention: z is always positive | |
# z = (im_data.rotation_matrix() @ xyz_world.T)[-1] + im_data.tvec[-1] | |
# # Mean reprojection error in image space | |
# errors = np.array([ptid_to_info[p2d.point3D_id].error for p2d in p2ds]) | |
# # Number of frames in which each frame is visible | |
# n_visible = np.array([ptid_to_info[p2d.point3D_id].track.length() for p2d in p2ds]) | |
# Note: these are *unrectified* pixel coordinates that should match the original input | |
# no matter the camera model | |
# uv = np.array([p2d.xy for p2d in p2ds]) | |
# TODO(1480) END use pycolmap API | |
idx = np.where( | |
(z >= min_depth) | |
& (z <= max_depth) | |
& (errors <= max_repoj_err) | |
& (n_visible >= min_n_visible) | |
& (uv[:, 0] >= 0) | |
& (uv[:, 0] < W) | |
& (uv[:, 1] >= 0) | |
& (uv[:, 1] < H) | |
) | |
z = z[idx] | |
uv = uv[idx] | |
uu, vv = uv[:, 0].astype(int), uv[:, 1].astype(int) | |
depth = np.zeros((H, W), dtype=np.float32) | |
depth[vv, uu] = z | |
# E.g. if `depth` is metric and in units of meters, and `depth_scale_to_integer_factor` | |
# is 1000, then `depth_img` will be integer millimeters. | |
depth_img = (depth_scale_to_integer_factor * depth).astype(np.uint16) | |
out_name = str(im_data.name) | |
depth_path = output_dir / out_name | |
if depth_path.suffix == ".jpg": | |
depth_path = depth_path.with_suffix(".png") | |
cv2.imwrite(str(depth_path), depth_img) # type: ignore | |
image_id_to_depth_path[im_id] = depth_path | |
if include_depth_debug: | |
assert input_images_dir is not None, "Need explicit input_images_dir for debug images" | |
assert input_images_dir.exists(), input_images_dir | |
depth_flat = depth.flatten()[:, None] | |
overlay = 255.0 * colormaps.apply_depth_colormap(torch.from_numpy(depth_flat)).numpy() | |
overlay = overlay.reshape([H, W, 3]) | |
input_image_path = input_images_dir / im_data.name | |
input_image = cv2.imread(str(input_image_path)) # type: ignore | |
debug = 0.3 * input_image + 0.7 + overlay | |
out_name = out_name + ".debug.jpg" | |
output_path = output_dir / "debug_depth" / out_name | |
output_path.parent.mkdir(parents=True, exist_ok=True) | |
cv2.imwrite(str(output_path), debug.astype(np.uint8)) # type: ignore | |
return image_id_to_depth_path | |
def get_matching_summary(num_initial_frames: int, num_matched_frames: int) -> str: | |
"""Returns a summary of the matching results. | |
Args: | |
num_initial_frames: The number of initial frames. | |
num_matched_frames: The number of matched frames. | |
Returns: | |
A summary of the matching results. | |
""" | |
match_ratio = num_matched_frames / num_initial_frames | |
if match_ratio == 1: | |
return "[bold green]COLMAP found poses for all images, CONGRATS!" | |
if match_ratio < 0.4: | |
result = f"[bold red]COLMAP only found poses for {num_matched_frames / num_initial_frames * 100:.2f}%" | |
result += " of the images. This is low.\nThis can be caused by a variety of reasons," | |
result += " such poor scene coverage, blurry images, or large exposure changes." | |
return result | |
if match_ratio < 0.8: | |
result = f"[bold yellow]COLMAP only found poses for {num_matched_frames / num_initial_frames * 100:.2f}%" | |
result += " of the images.\nThis isn't great, but may be ok." | |
result += "\nMissing poses can be caused by a variety of reasons, such poor scene coverage, blurry images," | |
result += " or large exposure changes." | |
return result | |
return f"[bold green]COLMAP found poses for {num_matched_frames / num_initial_frames * 100:.2f}% of the images." | |
def create_ply_from_colmap( | |
filename: str, recon_dir: Path, output_dir: Path, applied_transform: Union[torch.Tensor, None] | |
) -> None: | |
"""Writes a ply file from colmap. | |
Args: | |
filename: file name for .ply | |
recon_dir: Directory to grab colmap points | |
output_dir: Directory to output .ply | |
""" | |
if (recon_dir / "points3D.bin").exists(): | |
colmap_points = read_points3D_binary(recon_dir / "points3D.bin") | |
elif (recon_dir / "points3D.txt").exists(): | |
colmap_points = read_points3D_text(recon_dir / "points3D.txt") | |
else: | |
raise ValueError(f"Could not find points3D.txt or points3D.bin in {recon_dir}") | |
# Load point Positions | |
points3D = torch.from_numpy(np.array([p.xyz for p in colmap_points.values()], dtype=np.float32)) | |
if applied_transform is not None: | |
assert applied_transform.shape == (3, 4) | |
points3D = torch.einsum("ij,bj->bi", applied_transform[:3, :3], points3D) + applied_transform[:3, 3] | |
# Load point colours | |
points3D_rgb = torch.from_numpy(np.array([p.rgb for p in colmap_points.values()], dtype=np.uint8)) | |
# write ply | |
with open(output_dir / filename, "w") as f: | |
# Header | |
f.write("ply\n") | |
f.write("format ascii 1.0\n") | |
f.write(f"element vertex {len(points3D)}\n") | |
f.write("property float x\n") | |
f.write("property float y\n") | |
f.write("property float z\n") | |
f.write("property uint8 red\n") | |
f.write("property uint8 green\n") | |
f.write("property uint8 blue\n") | |
f.write("end_header\n") | |
for coord, color in zip(points3D, points3D_rgb): | |
x, y, z = coord | |
r, g, b = color | |
f.write(f"{x:8f} {y:8f} {z:8f} {r} {g} {b}\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment