Created
January 26, 2017 05:42
-
-
Save liruoteng/771eecde0277e4c0b57c2a96ac0c2167 to your computer and use it in GitHub Desktop.
convert image file and optical flow file into lmdb data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import caffe | |
import numpy as np | |
import flowlib as fl | |
from PIL import Image | |
def data2lmdb(imagefile1, imagefile2, labelfile): | |
# define image and ground truth file | |
imagefile1 = '' # specify 1st image file | |
imagefile2 = '' # specify 2nd image file | |
labelfile = '' # specify label file | |
# preprocessing | |
img = preprocess_image(imagefile1, imagefile2) | |
label = preprocess_label(labelfile) | |
# Convert to data | |
im_data = caffe.io.array_to_datum(img) | |
label_data = caffe.io.array_to_datum(label) | |
# read image | |
db = lmdb.open('image-lmdb', map_size=int(1e12)) | |
with db.begin(write=True) as txn: | |
txn.put('{:08}'.format(0), im_data.SerializeToString()) | |
db = lmdb.open('label-lmdb', map_size=int(1e12)) | |
with db.begin(write=True) as txn: | |
txn.put('{:08}'.format(0), label_data.SerializeToString()) | |
def preprocess_image(imagefile1, imagefile2): | |
# read image file | |
img1 = Image.open(imagefile1) | |
img2 = Image.open(imagefile2) | |
# Convert image file to array | |
im1 = np.array(img1) | |
im2 = np.array(img2) | |
# RGB to BGR for caffe | |
im1 = im1[:, :, ::-1] | |
im2 = im2[:, :, ::-1] | |
# Concatenate | |
img = np.concatenate((im1, im2), axis=2) | |
# Convert to caffe blob | |
img = img.transpose((2,0,1)) | |
# Crop - optional | |
im = img[:, 0:32, 0:32] | |
return im | |
def preprocess_label(labelfile): | |
# read flow file | |
flow = fl.read_flow(labelfile) | |
# take horizontal flow u only | |
u = np.array(flow[:,:,0]) | |
# crop if necessary | |
u = u[0:32, 0:32] | |
# find largest displacement | |
label = flow2label(u) | |
def flow2label(flow): | |
# unknown flow, occlusion | |
threshold = 1e8 | |
max_flow = -999 | |
min_flow = 999 | |
idx = (abs(flow) > threshold) | |
flow[idx] = 0 | |
max_flow = max(max_flow, np.max(flow)) | |
min_flow = min(min_flow, np.min(flow)) | |
print('max_flow: ', max_flow) | |
print('min_flow: ', min_flow) | |
label = np.floor(flow) + abs(np.floor(min_flow)) | |
label = label.astype(np.uint8) | |
label = np.expand_dims(label, axis=0) | |
return label | |
def read_lmdb(database_file): | |
db = lmdb.open(database_file, readonly=True) | |
with db.begin() as txn: | |
raw_data = txn.get(b'00000000') # get the first key value | |
datum = caffe.proto.caffe_pb2.Datum() | |
datum.ParseFromString(raw_data) | |
# convert string type data to actual data | |
# content now is a Nx1 array | |
content = np.fromstring(datum.data, dtype=np.uint8) | |
return content | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import numpy as np | |
import matplotlib.pyplot as plt | |
# Calculate flow end point error | |
def flow_error(f1, f2): | |
# Read flow files and calculate the errors | |
gt_flow = readflow(f1) # ground truth flow | |
eva_flow = readflow(f2) # test flow | |
# Calculate errors | |
average_pe = flowAngErr(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1]) | |
print "average end point error is:", average_pe | |
# Visualize flow | |
gt_img = visualize_flow(gt_flow) | |
eva_img = visualize_flow(eva_flow) | |
plt.figure(1) | |
plt.imshow(gt_img) | |
plt.figure(2) | |
plt.imshow(eva_img) | |
plt.show() | |
# show flow file visualization | |
def show_flow(filename): | |
flow = read_flow(filename) | |
img = visualize_flow(flow) | |
plt.imshow(img) | |
plt.show() | |
# Find indices of a matrix | |
def indices(a, func): | |
return [i for (i, val) in enumerate(a) if func(val)] | |
# WARNING: this will work on little-endian architectures (eg Intel x86) only! | |
def read_flow(filename): | |
f = open(filename, 'rb') | |
magic = np.fromfile(f, np.float32, count=1) | |
data2d = None | |
if 202021.25 != magic: | |
print 'Magic number incorrect. Invalid .flo file' | |
else: | |
w = np.fromfile(f, np.int32, count=1) | |
h = np.fromfile(f, np.int32, count=1) | |
print "Reading %d x %d flo file" % (h, w) | |
data2d = np.fromfile(f, np.float32, count=2 * w * h) | |
# reshape data into 3D array (columns, rows, channels) | |
data2d = np.resize(data2d, (h, w, 2)) | |
f.close() | |
return data2d | |
# Calculate average end point error | |
def flowAngErr(tu, tv, u, v): | |
smallflow = 0.0 | |
''' | |
stu = tu[bord+1:end-bord,bord+1:end-bord] | |
stv = tv[bord+1:end-bord,bord+1:end-bord] | |
su = u[bord+1:end-bord,bord+1:end-bord] | |
sv = v[bord+1:end-bord,bord+1:end-bord] | |
''' | |
stu = tu[:] | |
stv = tv[:] | |
su = u[:] | |
sv = v[:] | |
ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)] | |
index_su = su[ind2] | |
index_sv = sv[ind2] | |
an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1) | |
un = index_su * an | |
vn = index_sv * an | |
index_stu = stu[ind2] | |
index_stv = stv[ind2] | |
tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1) | |
tun = index_stu * tn | |
tvn = index_stv * tn | |
''' | |
angle = un * tun + vn * tvn + (an * tn) | |
index = [angle == 1.0] | |
angle[index] = 0.999 | |
ang = np.arccos(angle) | |
mang = np.mean(ang) | |
mang = mang * 180 / np.pi | |
''' | |
epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2) | |
epe = epe[ind2] | |
mepe = np.mean(epe) | |
return mepe | |
def image_adjust(img, sz): | |
""" | |
Adjust image to size | |
:param img: array of image to be resized | |
:param sz: tuple value (H,W) height x width | |
:return: adjusted image | |
""" | |
from scipy import misc as mc | |
return mc.imresize(img, size=sz) | |
def visualize_flow(flow): | |
""" | |
:param flow: | |
:return: | |
""" | |
UNKNOWN_FLOW_THRESH = 1e9 | |
u = flow[:, :, 0] | |
v = flow[:, :, 1] | |
maxu = -999. | |
maxv = -999. | |
minu = 999. | |
minv = 999. | |
maxrad = -1 | |
idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) | |
u[idxUnknow] = 0 | |
v[idxUnknow] = 0 | |
maxu = max(maxu, np.max(u)) | |
minu = min(minu, np.min(u)) | |
maxv = max(maxv, np.max(v)) | |
minv = min(minv, np.min(v)) | |
print "max flow: %.4f flow range: u = %.3f .. %.3f; v = %.3f .. %.3f\n" % (maxrad, minu,maxu, minv, maxv) | |
rad = np.sqrt(u ** 2 + v ** 2) | |
maxrad = max(maxrad, np.max(rad)) | |
u = u/(maxrad + np.finfo(float).eps) | |
v = v/(maxrad + np.finfo(float).eps) | |
img = compute_color(u, v) | |
idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) | |
img[idx] = 0 | |
return np.uint8(img) | |
def compute_color(u,v): | |
[h, w] = u.shape | |
img = np.zeros([h, w, 3]) | |
nanIdx = np.isnan(u) | np.isnan(v) | |
u[nanIdx] = 0 | |
v[nanIdx] = 0 | |
colorwheel = make_color_wheel() | |
ncols = np.size(colorwheel, 0) | |
rad = np.sqrt(u**2+v**2) | |
a = np.arctan2(-v, -u) / np.pi | |
fk = (a+1) / 2 * (ncols - 1) + 1 | |
k0 = np.floor(fk).astype(int) | |
k1 = k0 + 1 | |
k1[k1 == ncols+1] = 1 | |
f = fk - k0 | |
for i in range(0, np.size(colorwheel,1)): | |
tmp = colorwheel[:, i] | |
col0 = tmp[k0-1] / 255 | |
col1 = tmp[k1-1] / 255 | |
col = (1-f) * col0 + f * col1 | |
idx = rad <= 1 | |
col[idx] = 1-rad[idx]*(1-col[idx]) | |
notidx = np.logical_not(idx) | |
col[notidx] *= 0.75 | |
img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx))) | |
return img | |
def make_color_wheel(): | |
RY = 15 | |
YG = 6 | |
GC = 4 | |
CB = 11 | |
BM = 13 | |
MR = 6 | |
ncols = RY + YG + GC + CB + BM + MR | |
colorwheel = np.zeros([ncols, 3]) | |
col = 0 | |
# RY | |
colorwheel[0:RY, 0] = 255 | |
colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY)) | |
col += RY | |
# YG | |
colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG)) | |
colorwheel[col:col+YG, 1] = 255 | |
col += YG | |
# GC | |
colorwheel[col:col+GC, 1] = 255 | |
colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC)) | |
col += GC | |
# CB | |
colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB)) | |
colorwheel[col:col+CB, 2] = 255 | |
col += CB | |
# BM | |
colorwheel[col:col+BM, 2] = 255 | |
colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM)) | |
col += + BM | |
# MR | |
colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) | |
colorwheel[col:col+MR, 0] = 255 | |
return colorwheel |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment