Skip to content

Instantly share code, notes, and snippets.

@ShigekiKarita
Created September 7, 2014 14:34
Show Gist options
  • Save ShigekiKarita/f9fd818e2980609da39e to your computer and use it in GitHub Desktop.
Save ShigekiKarita/f9fd818e2980609da39e to your computer and use it in GitHub Desktop.
// just FAIL
TEST(CNN, TotalPooling)
{
const std::size_t pool = 2;
const std::size_t stride = 2;
const std::size_t input = 6; // NOTE: this means W_conv.rows(), output from convolution ply
const std::size_t output = (input / stride - pool + 1) * stride; // no overlap
const std::size_t batch = 2;
ASSERT_EQ(4, output);
// fold horizontaly in stride, seek verticaly in pool
fmatrix in0(input,1);
in0 <<
1,2,
3,2,
1,4;
fmatrix out0(output,1);
out0 <<
3,2,
3,4;
fmatrix in1(input,1);
in1 <<
2,1,
0,1,
3,0;
fmatrix out1(output,1);
out1 <<
2,1,
3,1;
fmatrix in_mat(input, batch);
in_mat << in0, in1;
fmatrix out_mat(output, batch);
out_mat << out0, out1;
std::vector<std::size_t> expected_indices =
{2, 1, 2, 5, // in0 -> out0
0 + input, 1 + input, 4 + input, 3 + input}; // in1 -> out1
// NoCL operations
const auto result_nocl = maxpool(in_mat, pool, stride);
for (std::size_t i = 0; i < expected_indices.size(); ++i)
{
ASSERT_EQ(expected_indices[i], result_nocl[i]);
}
// CL operations
cl::Buffer in_buf = copy_to_buffer(in_mat);
std::vector<int> result_cl(output * batch);
cl::Buffer out_buf = copy_to_buffer(result_cl); // don't care about elems
cl::Kernel maxpool_cl = kernel_args("max_pool", in_buf, out_buf, input, output, batch, pool, stride);
cl::NDRange global(output, batch);
CLS::get_instance().queue.enqueueNDRangeKernel(maxpool_cl, cl::NullRange, global);
copy_from_buffer(out_buf, result_cl);
// NoCL vs. CL comparison
for (std::size_t i = 0; i < result_cl.size(); ++i)
{
ASSERT_EQ(result_nocl[i], result_cl[i]);
}
// Unpooling, regard out_mat as prop from next ply
fmatrix expected_unpools(in_mat.rows(), in_mat.cols());
expected_unpools <<
0, 2,
2, 1,
3+3, 0,
0, 1,
0, 3,
4, 0;
// NoCL operation
const fmatrix unpools_nocl = unmaxpool(out_mat, result_nocl, input);
for (std::size_t i = 0; i < unpools_nocl.rows(); ++i)
{
for (std::size_t j = 0; j < unpools_nocl.cols(); ++j)
{
ASSERT_EQ(expected_unpools(i, j), unpools_nocl(i, j));;
}
}
// CL operations
fmatrix unpools_cl = fmatrix::Zero(input, batch);
cl::Buffer unpool_buf = copy_to_buffer(unpools_cl); // don't care about elems
cl::Buffer prop_buf = copy_to_buffer(out_mat);
cl::Kernel back_maxpool_cl = kernel_args("back_max_pool", prop_buf, unpool_buf, output, input, batch, out_buf);
cl::NDRange global_unpool(output, batch);
CLS::get_instance().queue.enqueueNDRangeKernel(back_maxpool_cl, cl::NullRange, global_unpool);
copy_from_buffer(unpool_buf, unpools_cl);
for (std::size_t i = 0; i < unpools_cl.rows(); ++i)
{
for (std::size_t j = 0; j < unpools_cl.cols(); ++j)
{
EXPECT_EQ(expected_unpools(i, j), unpools_cl(i, j));;
}
}
}
@ShigekiKarita
Copy link
Author

Now, I don't think the parallel increment in back_maxpool_cl works...

https://gist.github.com/ShigekiKarita/edcab9d3797ff7633b73#file-kernel-cl-L48

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment