Last active
September 5, 2024 05:27
-
-
Save ialhashim/be6235489a9c43c6d240e8331836586a to your computer and use it in GitHub Desktop.
Python implementation of depth filling from NYU Depth v2 toolbox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original Matlab code https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html | |
# | |
# | |
# Python port of depth filling code from NYU toolbox | |
# Speed needs to be improved | |
# | |
# Uses 'pypardiso' solver | |
# | |
import scipy | |
import skimage | |
import numpy as np | |
from pypardiso import spsolve | |
from PIL import Image | |
# | |
# fill_depth_colorization.m | |
# Preprocesses the kinect depth image using a gray scale version of the | |
# RGB image as a weighting for the smoothing. This code is a slight | |
# adaptation of Anat Levin's colorization code: | |
# | |
# See: www.cs.huji.ac.il/~yweiss/Colorization/ | |
# | |
# Args: | |
# imgRgb - HxWx3 matrix, the rgb image for the current frame. This must | |
# be between 0 and 1. | |
# imgDepth - HxW matrix, the depth image for the current frame in | |
# absolute (meters) space. | |
# alpha - a penalty value between 0 and 1 for the current depth values. | |
def fill_depth_colorization(imgRgb=None, imgDepthInput=None, alpha=1): | |
imgIsNoise = imgDepthInput == 0 | |
maxImgAbsDepth = np.max(imgDepthInput) | |
imgDepth = imgDepthInput / maxImgAbsDepth | |
imgDepth[imgDepth > 1] = 1 | |
(H, W) = imgDepth.shape | |
numPix = H * W | |
indsM = np.arange(numPix).reshape((W, H)).transpose() | |
knownValMask = (imgIsNoise == False).astype(int) | |
grayImg = skimage.color.rgb2gray(imgRgb) | |
winRad = 1 | |
len_ = 0 | |
absImgNdx = 0 | |
len_window = (2 * winRad + 1) ** 2 | |
len_zeros = numPix * len_window | |
cols = np.zeros(len_zeros) - 1 | |
rows = np.zeros(len_zeros) - 1 | |
vals = np.zeros(len_zeros) - 1 | |
gvals = np.zeros(len_window) - 1 | |
for j in range(W): | |
for i in range(H): | |
nWin = 0 | |
for ii in range(max(0, i - winRad), min(i + winRad + 1, H)): | |
for jj in range(max(0, j - winRad), min(j + winRad + 1, W)): | |
if ii == i and jj == j: | |
continue | |
rows[len_] = absImgNdx | |
cols[len_] = indsM[ii, jj] | |
gvals[nWin] = grayImg[ii, jj] | |
len_ = len_ + 1 | |
nWin = nWin + 1 | |
curVal = grayImg[i, j] | |
gvals[nWin] = curVal | |
c_var = np.mean((gvals[:nWin + 1] - np.mean(gvals[:nWin+ 1])) ** 2) | |
csig = c_var * 0.6 | |
mgv = np.min((gvals[:nWin] - curVal) ** 2) | |
if csig < -mgv / np.log(0.01): | |
csig = -mgv / np.log(0.01) | |
if csig < 2e-06: | |
csig = 2e-06 | |
gvals[:nWin] = np.exp(-(gvals[:nWin] - curVal) ** 2 / csig) | |
gvals[:nWin] = gvals[:nWin] / sum(gvals[:nWin]) | |
vals[len_ - nWin:len_] = -gvals[:nWin] | |
# Now the self-reference (along the diagonal). | |
rows[len_] = absImgNdx | |
cols[len_] = absImgNdx | |
vals[len_] = 1 # sum(gvals(1:nWin)) | |
len_ = len_ + 1 | |
absImgNdx = absImgNdx + 1 | |
vals = vals[:len_] | |
cols = cols[:len_] | |
rows = rows[:len_] | |
A = scipy.sparse.csr_matrix((vals, (rows, cols)), (numPix, numPix)) | |
rows = np.arange(0, numPix) | |
cols = np.arange(0, numPix) | |
vals = (knownValMask * alpha).transpose().reshape(numPix) | |
G = scipy.sparse.csr_matrix((vals, (rows, cols)), (numPix, numPix)) | |
A = A + G | |
b = np.multiply(vals.reshape(numPix), imgDepth.flatten('F')) | |
#print ('Solving system..') | |
new_vals = spsolve(A, b) | |
new_vals = np.reshape(new_vals, (H, W), 'F') | |
#print ('Done.') | |
denoisedDepthImg = new_vals * maxImgAbsDepth | |
output = denoisedDepthImg.reshape((H, W)).astype('float32') | |
output = np.multiply(output, (1-knownValMask)) + imgDepthInput | |
return output |
@ialhashim
please tell how you created 80 nodes for parallel running
its taking 1 minute per image in dgx 1
share if any codes regarding cluster creation
By chance do you know if in the original colorization algorithm, what is the b vector? It seems you are using the b vector as the noisy depth image.
replace from pypardiso import spsolve
with from scipy.sparse.linalg import spsolve
if your not using conda.
I want to ask if there is any improvement for speed?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yes, normalize RGB to values between 0.0 and 1.0 (from 0 to 255 range).