Last active
October 14, 2015 17:34
-
-
Save panmari/b9654d2f3e47ca7af825 to your computer and use it in GitHub Desktop.
Testing performance of different ways to divide a 3d image by a 1d image channel wise. See also http://stackoverflow.com/questions/33098797/divide-every-channel-of-image-by-weight-image-in-opencv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OpenCV version: 3.0.0-dev | |
OpenCV VCS version: 3.0.0-556-g33f5ac5 | |
Build type: release | |
Parallel framework: pthreads | |
CPU features: mmx sse sse2 sse3 | |
OpenCL Platforms: | |
NVIDIA CUDA | |
dGPU: GeForce GTX TITAN (OpenCL 1.2 CUDA) | |
Current OpenCL device: | |
Type = dGPU | |
Name = GeForce GTX TITAN | |
Version = OpenCL 1.2 CUDA | |
Compute units = 14 | |
Max work group size = 1024 | |
Local memory size = 48 kB | |
Max memory allocation size = 1 GB 511 MB 848 kB | |
Double support = Yes | |
Host unified memory = No | |
Has AMD Blas = No | |
Has AMD Fft = No | |
Preferred vector width char = 1 | |
Preferred vector width short = 1 | |
Preferred vector width int = 1 | |
Preferred vector width long = 1 | |
Preferred vector width float = 1 | |
Preferred vector width double = 1 | |
Note: Google Test filter = performance_test* | |
[==========] Running 1 test from 1 test case. | |
[----------] Global test environment set-up. | |
[----------] 1 test from performance_test | |
[ RUN ] performance_test.performance_test_division_of_3d_by_1d | |
Size Method 1 Method 2 Method 3 Method 4 | |
[2 x 2] 0.051497 0.005057 0.001207 0.034242 | |
[10 x 10] 0.01181 0.002343 0.00148 0.006504 | |
[100 x 100] 0.173996 0.176055 0.071535 0.159307 | |
[1000 x 1000] 12.8465 17.618 7.50787 10.1962 | |
[2000 x 2000] 46.1933 70.5931 29.2439 28.4596 | |
[ OK ] performance_test.performance_test_division_of_3d_by_1d (477 ms) | |
[----------] 1 test from performance_test (477 ms total) | |
[----------] Global test environment tear-down | |
[==========] 1 test from 1 test case ran. (477 ms total) | |
[ PASSED ] 1 test. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OpenCV version: 3.0.0-dev | |
OpenCV VCS version: 3.0.0-580-g0f1fdd8 | |
Build type: release | |
Parallel framework: ms-concurrency | |
CPU features: popcnt mmx sse sse2 sse3 ssse3 sse4.1 sse4.2 | |
OpenCL Platforms: | |
NVIDIA CUDA | |
dGPU: GeForce GTX 960 (OpenCL 1.2 CUDA) | |
Current OpenCL device: | |
Type = dGPU | |
Name = GeForce GTX 960 | |
Version = OpenCL 1.2 CUDA | |
Compute units = 8 | |
Max work group size = 1024 | |
Local memory size = 48 kB | |
Max memory allocation size = 512 MB | |
Double support = Yes | |
Host unified memory = No | |
Has AMD Blas = No | |
Has AMD Fft = No | |
Preferred vector width char = 1 | |
Preferred vector width short = 1 | |
Preferred vector width int = 1 | |
Preferred vector width long = 1 | |
Preferred vector width float = 1 | |
Preferred vector width double = 1 | |
[----------] 1 test from performance_test | |
[ RUN ] performance_test.performance_test_division_of_3d_by_1d | |
Size Method 1 Method 2 Method 3 Method 4 | |
[2 x 2] 0.0185935 0.00364579 0.00109374 0.00765615 | |
[10 x 10] 0.0109374 0.00218747 0.00145831 0.00546868 | |
[100 x 100] 0.172446 0.143644 0.0765615 0.10828 | |
[1000 x 1000] 17.9836 15.5434 9.18447 13.7391 | |
[2000 x 2000] 72.7801 69.2262 34.0422 56.9764 | |
[ OK ] performance_test.performance_test_division_of_3d_by_1d (686 ms) | |
[----------] 1 test from performance_test (687 ms total) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "opencv2/ts.hpp" | |
#include "opencv2/imgproc/imgproc.hpp" | |
using namespace std; | |
using namespace cv; | |
TEST(performance_test, performance_test_division_of_3d_by_1d) { | |
vector<Size> sizes{Size(2, 2), Size(10, 10), Size(100, 100), Size(1000, 1000), Size(2000, 2000)}; | |
cout << "Size \t\tMethod 1 \tMethod 2 \tMethod 3" << "\tMethod 4" << endl; | |
for (int is = 0; is < sizes.size(); ++is) { | |
Size sz = sizes[is]; | |
Mat weighted_sum(sz, CV_32FC3); | |
randu(weighted_sum, 0.0, 200.0); | |
Mat weights(sz, CV_32FC1); | |
randu(weights, 1.0, 10.0); | |
Mat ws1 = weighted_sum.clone(); | |
Mat ws2 = weighted_sum.clone(); | |
Mat ws3 = weighted_sum.clone(); | |
Mat ws4 = weighted_sum.clone(); | |
// Method 1 @panmari | |
double tic1 = double(getTickCount()); | |
Mat rec1; | |
vector<Mat> channels(3); | |
split(ws1, channels); | |
for (Mat chan : channels) { | |
divide(chan, weights, chan); | |
} | |
merge(channels, rec1); | |
double toc1 = (double(getTickCount() - tic1)) * 1000. / getTickFrequency(); | |
// Method 2 @Miki | |
double tic2 = double(getTickCount()); | |
Mat rec2 = ws2.reshape(3, 1); | |
Mat ww = weights.reshape(1, 1); | |
for (int i = 0; i < rec2.cols; ++i) { | |
float w = ww.at<float>(0, i); | |
Vec3f *v = rec2.ptr<Vec3f>(0, i); | |
v->val[0] /= w; | |
v->val[1] /= w; | |
v->val[2] /= w; | |
} | |
rec2 = rec2.reshape(3, ws2.rows); | |
double toc2 = (double(getTickCount() - tic2)) * 1000. / getTickFrequency(); | |
// Method 3 @Miki (+ @Micka) | |
double tic3 = double(getTickCount()); | |
Mat3f rec3 = ws3.reshape(3, 1); | |
//Mat3f rec3 = ws3.reshape(3, 1).clone(); // To not override original image | |
Mat1f ww3 = weights.reshape(1, 1); | |
Vec3f* prec3 = rec3.ptr<Vec3f>(0); | |
float* pww = ww3.ptr<float>(0); | |
for (int i = 0; i < rec3.cols; ++i) | |
{ | |
float scale = 1. / (*pww); | |
(*prec3)[0] *= scale; | |
(*prec3)[1] *= scale; | |
(*prec3)[2] *= scale; | |
++prec3; ++pww; | |
} | |
rec3 = rec3.reshape(3, ws3.rows); | |
double toc3 = (double(getTickCount() - tic3)) * 1000. / getTickFrequency(); | |
// Method 4 @Micka | |
double tic4 = double(getTickCount()); | |
Mat3f rec4; | |
Mat3f w3ch; | |
cvtColor(weights, w3ch, COLOR_GRAY2BGR); | |
divide(ws4, w3ch, rec4); | |
double toc4 = (double(getTickCount() - tic4)) * 1000. / getTickFrequency(); | |
cout << sz << " \t" << toc1 << " \t" << toc2 << " \t" << toc3 << " \t" << toc4 << endl; | |
// Check for equality of methods. | |
Mat diff; | |
absdiff(rec1, rec2, diff); | |
EXPECT_EQ(0, countNonZero(diff.reshape(1))); | |
absdiff(rec1, rec3, diff); | |
threshold(diff, diff, 1e-4, 1, THRESH_BINARY); | |
EXPECT_EQ(0, countNonZero(diff.reshape(1))); | |
absdiff(rec1, rec4, diff); | |
EXPECT_EQ(0, countNonZero(diff.reshape(1))); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment