Last active
July 1, 2017 02:53
-
-
Save Marcus-Zhu/1e03c430a28a9b0430c07df09e5bcca4 to your computer and use it in GitHub Desktop.
naive reconstruction using opencv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//http://ceres-solver.org/installation.html | |
//cmake -DEIGENSPARSE=ON ../ceres-solver-1.12.0 | |
#include <opencv2/xfeatures2d/nonfree.hpp> | |
#include <opencv2/features2d/features2d.hpp> | |
#include <opencv2/highgui/highgui.hpp> | |
#include <opencv2/calib3d/calib3d.hpp> | |
#include <iostream> | |
#include <ceres/ceres.h> | |
#include <ceres/rotation.h> | |
#include "tinydir.h" | |
using namespace cv; | |
using namespace std; | |
void extract_features( | |
vector<string>& image_names, | |
vector<vector<KeyPoint> >& key_points_for_all, | |
vector<Mat>& descriptor_for_all, | |
vector<vector<Vec3b> >& colors_for_all | |
) | |
{ | |
key_points_for_all.clear(); | |
descriptor_for_all.clear(); | |
Mat image; | |
//get image, find features, and save | |
Ptr<Feature2D> sift = xfeatures2d::SIFT::create(0, 3, 0.04, 10); | |
for (auto it = image_names.begin(); it != image_names.end(); ++it) | |
{ | |
image = imread(*it); | |
if (image.empty()) continue; | |
cout << "Extracting features: " << *it << endl; | |
vector<KeyPoint> key_points; | |
Mat descriptor; | |
//memalloc fails sometime | |
sift->detectAndCompute(image, noArray(), key_points, descriptor); | |
//if too few features, continue | |
if (key_points.size() <= 10) continue; | |
key_points_for_all.push_back(key_points); | |
descriptor_for_all.push_back(descriptor); | |
cout << key_points.size() << endl; | |
vector<Vec3b> colors(key_points.size()); | |
for (int i = 0; i < key_points.size(); ++i) | |
{ | |
Point2f& p = key_points[i].pt; | |
colors[i] = image.at<Vec3b>(p.y, p.x); | |
} | |
colors_for_all.push_back(colors); | |
} | |
} | |
void match_features(Mat& query, Mat& train, vector<DMatch>& matches) | |
{ | |
vector<vector<DMatch> > knn_matches; | |
BFMatcher matcher(NORM_L2); | |
matcher.knnMatch(query, train, knn_matches, 2); | |
//get the min match dist that satisfy Ratio Test | |
float min_dist = FLT_MAX; | |
for (int r = 0; r < knn_matches.size(); ++r) | |
{ | |
//Ratio Test | |
if (knn_matches[r][0].distance > 0.6*knn_matches[r][1].distance) | |
continue; | |
float dist = knn_matches[r][0].distance; | |
if (dist < min_dist) min_dist = dist; | |
} | |
matches.clear(); | |
for (size_t r = 0; r < knn_matches.size(); ++r) | |
{ | |
//eliminate matches that dist>threshold or fails Ratio Test | |
if ( | |
knn_matches[r][0].distance > 0.6*knn_matches[r][1].distance || | |
knn_matches[r][0].distance > 5 * max(min_dist, 10.0f) | |
) | |
continue; | |
//save matches | |
matches.push_back(knn_matches[r][0]); | |
} | |
cout << matches.size() << endl; | |
} | |
void match_features(vector<Mat>& descriptor_for_all, vector<vector<DMatch> >& matches_for_all) | |
{ | |
matches_for_all.clear(); | |
// n images, match in sequence e.g. 1&2,2&3,3&4... | |
for (int i = 0; i < descriptor_for_all.size() - 1; ++i) | |
{ | |
cout << "Matching images " << i << " - " << i + 1 << endl; | |
vector<DMatch> matches; | |
match_features(descriptor_for_all[i], descriptor_for_all[i + 1], matches); | |
matches_for_all.push_back(matches); | |
} | |
} | |
bool find_transform(Mat& K, vector<Point2f>& p1, vector<Point2f>& p2, Mat& R, Mat& T, Mat& mask) | |
{ | |
//get fc from camera intrinsic matrix | |
double focal_length = 0.5*(K.at<double>(0) + K.at<double>(4)); | |
Point2d principle_point(K.at<double>(2), K.at<double>(5)); | |
//find E based on matched points using RANSAC | |
Mat E = findEssentialMat(p1, p2, focal_length, principle_point, RANSAC, 0.999, 1.0, mask); | |
if (E.empty()) return false; | |
double feasible_count = countNonZero(mask); | |
cout << (int)feasible_count << " -in- " << p1.size() << endl; | |
//unreliable result for RANSAC if outlier>50% | |
if (feasible_count <= 15 || (feasible_count / p1.size()) < 0.6) | |
return false; | |
//decompose E to get R and T | |
int pass_count = recoverPose(E, p1, p2, R, T, focal_length, principle_point, mask); | |
//most of the points should be in front of the camera | |
if (((double)pass_count) / feasible_count < 0.7) | |
return false; | |
return true; | |
} | |
void get_matched_points( | |
vector<KeyPoint>& p1, | |
vector<KeyPoint>& p2, | |
vector<DMatch> matches, | |
vector<Point2f>& out_p1, | |
vector<Point2f>& out_p2 | |
) | |
{ | |
out_p1.clear(); | |
out_p2.clear(); | |
for (int i = 0; i < matches.size(); ++i) | |
{ | |
out_p1.push_back(p1[matches[i].queryIdx].pt); | |
out_p2.push_back(p2[matches[i].trainIdx].pt); | |
} | |
} | |
void get_matched_colors( | |
vector<Vec3b>& c1, | |
vector<Vec3b>& c2, | |
vector<DMatch> matches, | |
vector<Vec3b>& out_c1, | |
vector<Vec3b>& out_c2 | |
) | |
{ | |
out_c1.clear(); | |
out_c2.clear(); | |
for (int i = 0; i < matches.size(); ++i) | |
{ | |
out_c1.push_back(c1[matches[i].queryIdx]); | |
out_c2.push_back(c2[matches[i].trainIdx]); | |
} | |
} | |
void reconstruct(Mat& K, Mat& R1, Mat& T1, Mat& R2, Mat& T2, vector<Point2f>& p1, vector<Point2f>& p2, vector<Point3f>& structure) | |
{ | |
//two camera matrix [R T] | |
Mat proj1(3, 4, CV_32FC1); | |
Mat proj2(3, 4, CV_32FC1); | |
R1.convertTo(proj1(Range(0, 3), Range(0, 3)), CV_32FC1); | |
T1.convertTo(proj1.col(3), CV_32FC1); | |
R2.convertTo(proj2(Range(0, 3), Range(0, 3)), CV_32FC1); | |
T2.convertTo(proj2.col(3), CV_32FC1); | |
Mat fK; | |
K.convertTo(fK, CV_32FC1); | |
proj1 = fK*proj1; | |
proj2 = fK*proj2; | |
//triangulation | |
Mat s; | |
triangulatePoints(proj1, proj2, p1, p2, s); | |
structure.clear(); | |
structure.reserve(s.cols); | |
for (int i = 0; i < s.cols; ++i) | |
{ | |
Mat_<float> col = s.col(i); | |
col /= col(3); //homogeneous coordinates | |
structure.push_back(Point3f(col(0), col(1), col(2))); | |
} | |
} | |
void maskout_points(vector<Point2f>& p1, Mat& mask) | |
{ | |
vector<Point2f> p1_copy = p1; | |
p1.clear(); | |
for (int i = 0; i < mask.rows; ++i) | |
{ | |
if (mask.at<uchar>(i) > 0) | |
p1.push_back(p1_copy[i]); | |
} | |
} | |
void maskout_colors(vector<Vec3b>& p1, Mat& mask) | |
{ | |
vector<Vec3b> p1_copy = p1; | |
p1.clear(); | |
for (int i = 0; i < mask.rows; ++i) | |
{ | |
if (mask.at<uchar>(i) > 0) | |
p1.push_back(p1_copy[i]); | |
} | |
} | |
void save_structure(string file_name, vector<Mat>& rotations, vector<Mat>& motions, vector<Point3f>& structure, vector<Vec3b>& colors) | |
{ | |
int n = (int)rotations.size(); | |
FileStorage fs(file_name, FileStorage::WRITE); | |
fs << "Camera Count" << n; | |
fs << "Point Count" << (int)structure.size(); | |
fs << "Rotations" << "["; | |
for (size_t i = 0; i < n; ++i) | |
{ | |
fs << rotations[i]; | |
} | |
fs << "]"; | |
fs << "Motions" << "["; | |
for (size_t i = 0; i < n; ++i) | |
{ | |
fs << motions[i]; | |
} | |
fs << "]"; | |
fs << "Points" << "["; | |
for (size_t i = 0; i < structure.size(); ++i) | |
{ | |
fs << structure[i]; | |
} | |
fs << "]"; | |
fs << "Colors" << "["; | |
for (size_t i = 0; i < colors.size(); ++i) | |
{ | |
fs << colors[i]; | |
} | |
fs << "]"; | |
fs.release(); | |
} | |
void get_objpoints_and_imgpoints( | |
vector<DMatch>& matches, | |
vector<int>& struct_indices, | |
vector<Point3f>& structure, | |
vector<KeyPoint>& key_points, | |
vector<Point3f>& object_points, | |
vector<Point2f>& image_points) | |
{ | |
object_points.clear(); | |
image_points.clear(); | |
for (int i = 0; i < matches.size(); ++i) | |
{ | |
int query_idx = matches[i].queryIdx; | |
int train_idx = matches[i].trainIdx; | |
int struct_idx = struct_indices[query_idx]; | |
if (struct_idx < 0) continue; | |
object_points.push_back(structure[struct_idx]); | |
image_points.push_back(key_points[train_idx].pt); | |
} | |
} | |
void fusion_structure( | |
vector<DMatch>& matches, | |
vector<int>& struct_indices, | |
vector<int>& next_struct_indices, | |
vector<Point3f>& structure, | |
vector<Point3f>& next_structure, | |
vector<Vec3b>& colors, | |
vector<Vec3b>& next_colors | |
) | |
{ | |
for (int i = 0; i < matches.size(); ++i) | |
{ | |
int query_idx = matches[i].queryIdx; | |
int train_idx = matches[i].trainIdx; | |
int struct_idx = struct_indices[query_idx]; | |
if (struct_idx >= 0) //if point already exists, the matches should correspond to the same point in 3d space | |
{ | |
next_struct_indices[train_idx] = struct_idx; | |
continue; | |
} | |
//if point already exists, add it to structure, assign index | |
structure.push_back(next_structure[i]); | |
colors.push_back(next_colors[i]); | |
struct_indices[query_idx] = next_struct_indices[train_idx] = structure.size() - 1; | |
} | |
} | |
void init_structure( | |
Mat K, | |
vector<vector<KeyPoint> >& key_points_for_all, | |
vector<vector<Vec3b> >& colors_for_all, | |
vector<vector<DMatch> >& matches_for_all, | |
vector<Point3f>& structure, | |
vector<vector<int> >& correspond_struct_idx, | |
vector<Vec3b>& colors, | |
vector<Mat>& rotations, | |
vector<Mat>& motions | |
) | |
{ | |
//calculate R and T of first two images | |
vector<Point2f> p1, p2; | |
vector<Vec3b> c2; | |
Mat R, T; | |
Mat mask; //>0 means matching points, otherwise not match | |
get_matched_points(key_points_for_all[0], key_points_for_all[1], matches_for_all[0], p1, p2); | |
get_matched_colors(colors_for_all[0], colors_for_all[1], matches_for_all[0], colors, c2); | |
find_transform(K, p1, p2, R, T, mask); | |
//reconstruct using first two images | |
maskout_points(p1, mask); | |
maskout_points(p2, mask); | |
maskout_colors(colors, mask); | |
Mat R0 = Mat::eye(3, 3, CV_64FC1); | |
Mat T0 = Mat::zeros(3, 1, CV_64FC1); | |
reconstruct(K, R0, T0, R, T, p1, p2, structure); | |
//save R and T | |
rotations = { R0, R }; | |
motions = { T0, T }; | |
//intialize the size of correspond_struct_idx to the same as key_points_for_all | |
correspond_struct_idx.clear(); | |
correspond_struct_idx.resize(key_points_for_all.size()); | |
for (int i = 0; i < key_points_for_all.size(); ++i) | |
{ | |
correspond_struct_idx[i].resize(key_points_for_all[i].size(), -1); | |
} | |
//write structure index for first two images | |
int idx = 0; | |
vector<DMatch>& matches = matches_for_all[0]; | |
for (int i = 0; i < matches.size(); ++i) | |
{ | |
if (mask.at<uchar>(i) == 0) | |
continue; | |
correspond_struct_idx[0][matches[i].queryIdx] = idx; | |
correspond_struct_idx[1][matches[i].trainIdx] = idx; | |
++idx; | |
} | |
} | |
void get_file_names(string dir_name, vector<string> & names) | |
{ | |
names.clear(); | |
tinydir_dir dir; | |
tinydir_open(&dir, dir_name.c_str()); | |
while (dir.has_next) | |
{ | |
tinydir_file file; | |
tinydir_readfile(&dir, &file); | |
if (!file.is_dir) | |
{ | |
names.push_back(file.path); | |
cout << "Found image!" << names.back() << endl; | |
} | |
tinydir_next(&dir); | |
} | |
tinydir_close(&dir); | |
} | |
/**************** | |
Bundle adjustment | |
*****************/ | |
struct ReprojectCost | |
{ | |
cv::Point2d observation; | |
ReprojectCost(cv::Point2d& observation) | |
: observation(observation) | |
{ | |
} | |
template <typename T> | |
bool operator()(const T* const intrinsic, const T* const extrinsic, const T* const pos3d, T* residuals) const | |
{ | |
const T* r = extrinsic; | |
const T* t = &extrinsic[3]; | |
T pos_proj[3]; | |
ceres::AngleAxisRotatePoint(r, pos3d, pos_proj); | |
// Apply the camera translation | |
pos_proj[0] += t[0]; | |
pos_proj[1] += t[1]; | |
pos_proj[2] += t[2]; | |
const T x = pos_proj[0] / pos_proj[2]; | |
const T y = pos_proj[1] / pos_proj[2]; | |
const T fx = intrinsic[0]; | |
const T fy = intrinsic[1]; | |
const T cx = intrinsic[2]; | |
const T cy = intrinsic[3]; | |
// Apply intrinsic | |
const T u = fx * x + cx; | |
const T v = fy * y + cy; | |
residuals[0] = u - T(observation.x); | |
residuals[1] = v - T(observation.y); | |
return true; | |
} | |
}; | |
void bundle_adjustment( | |
Mat& intrinsic, | |
vector<Mat>& extrinsics, | |
vector<vector<int>>& correspond_struct_idx, | |
vector<vector<KeyPoint>>& key_points_for_all, | |
vector<Point3d>& structure | |
) | |
{ | |
ceres::Problem problem; | |
// load extrinsics (rotations and motions) | |
for (size_t i = 0; i < extrinsics.size(); ++i) | |
{ | |
problem.AddParameterBlock(extrinsics[i].ptr<double>(), 6); | |
} | |
// fix the first camera. | |
problem.SetParameterBlockConstant(extrinsics[0].ptr<double>()); | |
// load intrinsic | |
problem.AddParameterBlock(intrinsic.ptr<double>(), 4); // fx, fy, cx, cy | |
// load points | |
ceres::LossFunction* loss_function = new ceres::HuberLoss(4); // loss function make bundle adjustment robuster. | |
for (size_t img_idx = 0; img_idx < correspond_struct_idx.size(); ++img_idx) | |
{ | |
vector<int>& point3d_ids = correspond_struct_idx[img_idx]; | |
vector<KeyPoint>& key_points = key_points_for_all[img_idx]; | |
for (size_t point_idx = 0; point_idx < point3d_ids.size(); ++point_idx) | |
{ | |
int point3d_id = point3d_ids[point_idx]; | |
if (point3d_id < 0) | |
continue; | |
Point2d observed = key_points[point_idx].pt; | |
ceres::CostFunction* cost_function = new ceres::AutoDiffCostFunction<ReprojectCost, 2, 4, 6, 3>(new ReprojectCost(observed)); | |
problem.AddResidualBlock( | |
cost_function, | |
loss_function, | |
intrinsic.ptr<double>(), // Intrinsic | |
extrinsics[img_idx].ptr<double>(), // View Rotation and Translation | |
&(structure[point3d_id].x) // Point in 3D space | |
); | |
} | |
} | |
// Solve BA | |
ceres::Solver::Options ceres_config_options; | |
ceres_config_options.minimizer_progress_to_stdout = false; | |
ceres_config_options.logging_type = ceres::SILENT; | |
ceres_config_options.num_threads = 1; | |
ceres_config_options.preconditioner_type = ceres::JACOBI; | |
ceres_config_options.linear_solver_type = ceres::SPARSE_SCHUR; | |
ceres_config_options.sparse_linear_algebra_library_type = ceres::EIGEN_SPARSE; | |
ceres::Solver::Summary summary; | |
ceres::Solve(ceres_config_options, &problem, &summary); | |
if (!summary.IsSolutionUsable()) | |
{ | |
std::cout << "Bundle Adjustment failed." << std::endl; | |
} | |
else | |
{ | |
// Display statistics about the minimization | |
std::cout << std::endl | |
<< "Bundle Adjustment statistics (approximated RMSE):\n" | |
<< " #views: " << extrinsics.size() << "\n" | |
<< " #residuals: " << summary.num_residuals << "\n" | |
<< " Initial RMSE: " << std::sqrt(summary.initial_cost / summary.num_residuals) << "\n" | |
<< " Final RMSE: " << std::sqrt(summary.final_cost / summary.num_residuals) << "\n" | |
<< " Time (s): " << summary.total_time_in_seconds << "\n" | |
<< std::endl; | |
} | |
} | |
int main() | |
{ | |
vector<string> img_names; | |
get_file_names("images", img_names); | |
//K matrix | |
Mat K(Matx33d( | |
2759.48, 0, 1520.69, | |
0, 2764.16, 1006.81, | |
0, 0, 1)); | |
vector<vector<KeyPoint> > key_points_for_all; | |
vector<Mat> descriptor_for_all; | |
vector<vector<Vec3b> > colors_for_all; | |
vector<vector<DMatch> > matches_for_all; | |
//extract all image features | |
extract_features(img_names, key_points_for_all, descriptor_for_all, colors_for_all); | |
//match features in sequence | |
match_features(descriptor_for_all, matches_for_all); | |
vector<Point3f> structure; | |
vector<vector<int> > correspond_struct_idx; //saves structure index of j-th feature in i-th image | |
vector<Vec3b> colors; | |
vector<Mat> rotations; | |
vector<Mat> motions; | |
//initialize structure(3d pointcloud) | |
init_structure( | |
K, | |
key_points_for_all, | |
colors_for_all, | |
matches_for_all, | |
structure, | |
correspond_struct_idx, | |
colors, | |
rotations, | |
motions | |
); | |
//incrementally reconstruct remaining images | |
for (int i = 1; i < matches_for_all.size(); ++i) | |
{ | |
vector<Point3f> object_points; | |
vector<Point2f> image_points; | |
Mat r, R, T; | |
//Mat mask; | |
//get corresponding 3d points from i-th image, and corresponding pixel point in i+1-th image | |
get_objpoints_and_imgpoints( | |
matches_for_all[i], | |
correspond_struct_idx[i], | |
structure, | |
key_points_for_all[i+1], | |
object_points, | |
image_points | |
); | |
//solve for r and T | |
solvePnPRansac(object_points, image_points, K, noArray(), r, T); | |
//turn r(vector) into R (matrix) | |
Rodrigues(r, R); | |
//save R and T | |
rotations.push_back(R); | |
motions.push_back(T); | |
vector<Point2f> p1, p2; | |
vector<Vec3b> c1, c2; | |
get_matched_points(key_points_for_all[i], key_points_for_all[i + 1], matches_for_all[i], p1, p2); | |
get_matched_colors(colors_for_all[i], colors_for_all[i + 1], matches_for_all[i], c1, c2); | |
//reconstruct using previous R and T | |
vector<Point3f> next_structure; | |
reconstruct(K, rotations[i], motions[i], R, T, p1, p2, next_structure); | |
//fusion new reconstruction results with previous one | |
fusion_structure( | |
matches_for_all[i], | |
correspond_struct_idx[i], | |
correspond_struct_idx[i + 1], | |
structure, | |
next_structure, | |
colors, | |
c1 | |
); | |
} | |
Mat intrinsic(Matx41d(K.at<double>(0, 0), K.at<double>(1, 1), K.at<double>(0, 2), K.at<double>(1, 2))); | |
vector<Mat> extrinsics; | |
for (size_t i = 0; i < rotations.size(); ++i) | |
{ | |
Mat extrinsic(6, 1, CV_64FC1); | |
Mat r; | |
Rodrigues(rotations[i], r); | |
r.copyTo(extrinsic.rowRange(0, 3)); | |
motions[i].copyTo(extrinsic.rowRange(3, 6)); | |
extrinsics.push_back(extrinsic); | |
} | |
vector<Point3d> st(structure.begin(), structure.end()); | |
bundle_adjustment(intrinsic, extrinsics, correspond_struct_idx, key_points_for_all, st); | |
//save | |
vector<Point3f> st2(st.begin(), st.end()); | |
save_structure("./structure.yml", rotations, motions, st2, colors); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment