Created
January 8, 2020 02:06
-
-
Save marty1885/082eae20950cb9a82271ca613e35e13e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <algorithm> | |
#include <chrono> | |
#include <random> | |
#include <cstdint> | |
#include <cassert> | |
// Banchmarking library | |
#include <hayai/hayai.hpp> | |
// Testing parameters | |
constexpr int width = 512, height = 512; | |
constexpr int local_area = 32; | |
constexpr int stride = 1; | |
constexpr float local_density = 0.1; | |
constexpr int out_width = (width-local_area)/stride+1; | |
constexpr int out_height = (height-local_area)/stride+1; | |
std::vector<int> activation; | |
std::vector<uint8_t> output; | |
std::vector<std::pair<int, int>> local_values; | |
BENCHMARK(local_inhib, local_inhib_base, 1, 1) | |
{ | |
int num_active_bits = local_area*local_area*local_density; | |
// The baseline algorithm works like max pooling, a naive version | |
// Performance on my box is ~2.4 iterations/s | |
for(int y=0;y<height-local_area;y+=stride) { | |
for(int x=0;x<width-local_area;x+=stride) { | |
// Collect the nearby cell index and activations | |
for(int dy=0;dy<local_area;dy++) { | |
for(int dx=0;dx<local_area;dx++) { | |
int index = (y+dy)*width+x+dx; | |
local_values[dy*local_area+dx] = {index, activation[index]}; | |
} | |
} | |
int output_index = (y/stride)*out_width+(x/stride); | |
// Do a partial sort to put the top K values in the top K slots | |
std::nth_element(local_values.begin(), local_values.begin()+num_active_bits, local_values.end() | |
, [](const auto& a, const auto& b) {return a.second > b.second;}); | |
// If the current cell is in the top K slots | |
if(std::find_if(local_values.begin(), local_values.begin()+num_active_bits | |
, [&](const auto& a){ return a.first == (y*width+x); }) != local_values.begin()+num_active_bits) | |
output[output_index] = 1; | |
else | |
output[output_index] = 0; | |
} | |
} | |
} | |
// Optimization 3: Instrad of storing both index and activation. Since we are just dealing with current | |
// cell values. We don't need index anymore. But this only provide noise level | |
// improvments. | |
std::vector<int> local_activations; | |
BENCHMARK(local_inhib, local_inhib_optim, 1, 1) | |
{ | |
// Performance on my box is ~45.3 iterations/s | |
int num_active_bits = local_area*local_area*local_density; | |
for(int x=0;x<width-local_area;x+=stride) { | |
for(int y=0;y<height-local_area;y+=stride) { | |
int target_value = 0; | |
// Optimization 2: Reuse previouslly aquired vaules | |
int n = 0; | |
if(y == 0) { // If y == 0. i.e. First time in the column | |
target_value = activation[y*width+x]; | |
for(int dy=0;dy<local_area;dy++) { | |
for(int dx=0;dx<local_area;dx++) { | |
int index = (y+dy)*width+x+dx; | |
local_activations[dy*local_area+dx] = activation[index]; | |
// Optimization 5: Instrad of computing how many cells are more active then out current one | |
// after done with local_activations. We compute it on the fly. Reducing memory | |
// access. | |
if(activation[index] > target_value) | |
n += 1; | |
} | |
} | |
} | |
else { // Reuse old data when possible | |
int dy = (y-1)%local_area; | |
// Optimization 4: For some reason using std::copy is faster then a for loop | |
std::copy( | |
activation.begin() + (y+dy)*width+x, | |
activation.begin() + (y+dy)*width+x+local_area, | |
local_activations.begin() + dy*local_area | |
); | |
target_value = local_activations[dy*local_area]; | |
// Optimization 1: Instead of sorting and finding if the current cell is in the top-K cells | |
// we count how many cells has greater activations comapred to the current one. | |
n = std::count_if(local_activations.begin(), local_activations.end(), [&](int act){ return act > target_value; }); | |
} | |
int output_index = (y/stride)*out_width+(x/stride); | |
if(n < num_active_bits) | |
output[output_index] = 1; | |
else | |
output[output_index] = 0; | |
} | |
} | |
} | |
int main() | |
{ | |
//Initalize activation | |
activation = std::vector<int>(width*height); | |
std::mt19937 rng; | |
std::uniform_int_distribution<int> dist(0, 16); | |
std::generate(activation.begin(), activation.end(), [&](){ return bool(dist(rng)); }); | |
// Initalize buffers | |
output = std::vector<uint8_t>(out_width*out_height); | |
local_values.resize(local_area*local_area); | |
local_activations.resize(local_area*local_area); | |
hayai::ConsoleOutputter consoleOutputter; | |
hayai::Benchmarker::AddOutputter(consoleOutputter); | |
hayai::Benchmarker::RunAllTests(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment