Last active
September 10, 2020 10:37
-
-
Save berak/cd3fa149e8cd297597fc71427a576233 to your computer and use it in GitHub Desktop.
depth from single image / www
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
code running https://berak.pythonanywhere.com/ | |
run get_model.sh to download the pretrained 'unet.onnx' | |
wsgi.py is a webserver, receiving images of indoor scenes and sending back depth images. | |
up.html is the main webpage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
fileid="14n3cTmaKGJHIhL0sBRaByuyPBDyntHZm" | |
filename="unet.onnx" | |
curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null | |
curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<title> Depth from single image </title> | |
<style> | |
.main { | |
font-family: Arial, "MS Trebuchet", sans-serif; font-size: 12px; | |
border:0; | |
margin-top: 15px; margin-bottom: 15px; margin-right: 15px; margin-left: 15px; | |
} | |
p,.bordered { | |
margin-top: 5px; margin-bottom: 5px; margin-right: 5px; margin-left: 5px; | |
} | |
.bordered { | |
border-color:#777; | |
border-style:solid; | |
} | |
</style> | |
<script type="text/javascript"> | |
var ticks=0; | |
function postCanvasToURL(url) { // this is the actual workhorse | |
err.innerHTML = "... posting image"; | |
var type = "image/png" | |
var data = document.getElementById("output").toDataURL(type); | |
var xhr = new XMLHttpRequest(); | |
xhr.open('POST', url, true); | |
xhr.setRequestHeader('Content-Type', "application/x-www-form-urlencoded"); | |
xhr.onreadystatechange = function(e) { | |
if ( this.readyState > 3 ) { | |
err.innerHTML = "<p> <a href='/depth.png'>16bit depth png</a>" | |
err.innerHTML += "<p> <a href='/depth.ply.gz'>zipped point cloud (ply.gz)</a><br>" | |
render(this.response, "result"); | |
} | |
} | |
data = data.replace('data:' + type + ';base64,', ''); | |
xhr.send(data) | |
} | |
function render(src, dst){ | |
var image = new Image(); | |
image.onload = function(){ | |
var canvas = document.getElementById(dst); | |
var MAX_W = 640; | |
var MAX_H = 480; | |
if (image.height > MAX_H || image.width > MAX_W) { | |
var scale = (image.width < image.height) ? | |
MAX_H / image.height : | |
MAX_W / image.width ; | |
image.height *= scale; | |
image.width *= scale; | |
} | |
var ctx = canvas.getContext("2d"); | |
ctx.clearRect(0, 0, canvas.width, canvas.height); | |
canvas.width = image.width; | |
canvas.height = image.height; | |
ctx.drawImage(image, 0, 0, image.width, image.height); | |
}; | |
image.src = src; | |
} | |
function _load_image(src, dst){ | |
if(!src.type.match(/image.*/)){ | |
console.log("The dropped file is not an image: ", src.type); | |
return; | |
} | |
var reader = new FileReader(); | |
reader.onload = function(e){ | |
render(e.target.result, dst); | |
}; | |
reader.readAsDataURL(src); | |
} | |
function loadImageDrag(src){ | |
_load_image(src, "output"); | |
} | |
function loadImageFile(src){ | |
var up = document.getElementById("upload") | |
var oFile = up.files[0]; | |
_load_image(oFile, "output"); | |
} | |
</script> | |
</head> | |
<body class="main"> | |
<h2> Depth from a single image</h2> | |
<div id="droparea"> | |
<canvas id="output" width="320" height="240" class="bordered" title="drop an image here"></canvas> | |
<canvas id="result" width="320" height="240" class="bordered"></canvas> | |
<div> | |
<p>Drop files here, or</p> | |
<p> <input id="upload" type="file" multiple onchange="loadImageFile();"/></p> | |
<p> <input id="uploadbtn" type="button" value="Generate Depth Image" onclick="postCanvasToURL('/up');"/></p> | |
<p> <div id="err"></div></p> | |
</div> | |
</div> | |
<p><br><p> | |
<font color=#999> | |
<p><a href='https://github.com/karoly-hars/DE_resnet_unet_hyb'>DE_resnet_unet_hyb</a> trained on <a href="https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html">interior room images</a> | |
<p>running on opencv's dnn ;) | |
</font> | |
</body> | |
<script type="text/javascript"> | |
var target = document.getElementById("droparea"); | |
target.addEventListener("dragover", function(e){e.preventDefault();}, true); | |
target.addEventListener("drop", function(e){ | |
e.preventDefault(); | |
loadImageDrag(e.dataTransfer.files[0]); | |
}, true); | |
</script> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os, base64 | |
import cv2, numpy as np | |
import math | |
import gzip | |
HEIGHT = 256 | |
WIDTH = 320 | |
mean = [0.485, 0.456, 0.406] | |
std = [0.229, 0.224, 0.225] | |
net = cv2.dnn.readNet("unet.onnx") | |
ply_header = '''ply | |
format ascii 1.0 | |
element vertex %(vert_num)d | |
property float x | |
property float y | |
property float z | |
property uchar red | |
property uchar green | |
property uchar blue | |
end_header | |
''' | |
def write_ply(img, depth, scale=1.0/255): # scale for 16bit ! | |
w = img.shape[1] | |
h = img.shape[0] | |
depth = cv2.resize(depth,(w,h), cv2.INTER_CUBIC) | |
with gzip.open("depth.ply.gz", 'wb') as f: | |
total = w*h | |
f.write((ply_header % dict(vert_num=total)).encode('utf-8')) | |
for y in range(h): | |
for x in range(w): | |
pix = img[y,x] | |
z = scale * depth[y,x] | |
s = b'%f %f %f %d %d %d\n' % (x,y,z, pix[2], pix[1], pix[0]) | |
f.write(s) | |
f.close() | |
def depth_to_grayscale(depth, max_dist=10.0): | |
"""Transform a prediction into a grayscale 8-bit image.""" | |
depth = np.transpose(depth, (1, 2, 0)) | |
depth[depth > max_dist] = max_dist | |
depth = depth / max_dist | |
depth = np.array(depth * 255.0*255, dtype=np.uint16) | |
depth = cv2.resize(depth, (WIDTH, HEIGHT)) | |
#bgr_depth_img = cv2.cvtColor(depth, cv2.COLOR_GRAY2BGR) | |
depth_img = np.clip(depth, 0, 255*255) | |
return depth_img | |
def scale_image(img, scale=None): | |
"""Resize/scale an image. If a scale is not provided, scale it closer to HEIGHT x WIDTH.""" | |
# if scale is None, scale to the longer size | |
if scale is None: | |
scale = max(WIDTH / img.shape[1], HEIGHT / img.shape[0]) | |
new_size = (math.ceil(img.shape[1] * scale), math.ceil(img.shape[0] * scale)) | |
image = cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST) | |
return image | |
def center_crop(img): | |
"""Center crop an image to HEIGHT x WIDTH.""" | |
corner = ((img.shape[0] - HEIGHT) // 2, (img.shape[1] - WIDTH) // 2) | |
img = img[corner[0]:corner[0] + HEIGHT, corner[1]:corner[1] + WIDTH] | |
return img | |
def process(org): | |
inWidth = org.shape[1] | |
inHeight = org.shape[0] | |
img = scale_image(org) | |
img = center_crop(img) | |
img = img.astype(np.float32) / 255.0 | |
img -= mean | |
img /= std | |
img = img.transpose(2,0,1) | |
blob = img.reshape(1,img.shape[0],img.shape[1],img.shape[2]) | |
net.setInput(blob) | |
res = net.forward() | |
res = depth_to_grayscale(res[0,:,:,:]) | |
cv2.imwrite("depth.png", res) | |
write_ply(org, res) | |
draw = (res/255).astype(np.uint8) | |
return cv2.resize(draw, (inWidth,inHeight)) | |
def _read(fname): | |
f = open(fname,"rb") | |
r = f.read() | |
f.close() | |
return r | |
def application(environ, start_response): | |
request_body=None | |
retcode = '200 OK' | |
resp = "dummy\r\n" | |
ct ="text/html" | |
try: | |
request_body_size = int(environ.get('CONTENT_LENGTH', 0)) | |
request_body = environ['wsgi.input'].read(request_body_size) | |
except (ValueError): | |
resp = "no response" | |
url = environ['PATH_INFO']; | |
if url == "/": | |
resp = _read("up.html") | |
elif url == "/dn": | |
ct = 'image/png' | |
resp = _read("my.png") | |
elif url == "/depth.png": | |
ct = 'image/png' | |
resp = _read("depth.png") | |
elif url == "/depth.ply.gz": | |
ct = 'application/zip' | |
resp = _read("depth.ply.gz") | |
elif url == "/up" and request_body: | |
ct = 'image/png' | |
resp = request_body.replace(b'data:' + ct.encode('ascii') + b';base64,', b"") | |
data = base64.b64decode(resp) | |
buf = np.frombuffer(data, dtype=np.uint8) | |
img = cv2.imdecode(buf, 1) | |
img = process(img) | |
cv2.imwrite("my.png", img) | |
ok, enc = cv2.imencode(".png", img) | |
resp = base64.b64encode(enc.tostring()) | |
resp = b'data:' + ct.encode('ascii') + b';base64,' + resp | |
start_response(retcode, [('Content-Type', ct), ('Content-Length', str(len(resp)))]) | |
return [resp] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment