Last active
December 19, 2015 18:18
-
-
Save jrk/5997111 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// compiled with clang++ "/run/shm/halideVwHhPM.cpp" -o "/run/shm/halideVwHhPM.bin" -I "/home/jansel/Halide/include" "/home/jansel/Halide/bin/libHalide.a" -ldl -lpthread -DAUTOTUNE_N="1024, 1024" -DAUTOTUNE_TRIALS=3 | |
#include <Halide.h> | |
#include <stdio.h> | |
#include <sys/time.h> | |
#ifndef AUTOTUNE_N | |
#define AUTOTUNE_N 1000, 1000 | |
#endif | |
#ifndef AUTOTUNE_TRIALS | |
#define AUTOTUNE_TRIALS 3 | |
#endif | |
inline void _autotune_timing_stub(Halide::Func& func) { | |
func.compile_jit(); | |
func.infer_input_bounds(AUTOTUNE_N); | |
timeval t1, t2; | |
double rv = 0; | |
for (int i = 0; i < AUTOTUNE_TRIALS; i++) { | |
gettimeofday(&t1, NULL); | |
func.realize(AUTOTUNE_N); | |
gettimeofday(&t2, NULL); | |
double t = (t2.tv_sec - t1.tv_sec) + (t2.tv_usec - t1.tv_usec)/1000000.0; | |
if(i == 0 || t < rv) | |
rv = t; | |
} | |
printf("{\"time\": %.10f}\n", rv); | |
exit(0); | |
} | |
#include <Halide.h> | |
using namespace Halide; | |
#define AUTOTUNE_HOOK(x) | |
#define BASELINE_HOOK(x) | |
int main(int argc, char **argv) { | |
ImageParam input(UInt(16), 2); | |
Func blur_x("blur_x"), blur_y("blur_y"); | |
Var x("x"), y("y"), xi("xi"), yi("yi"); | |
// The algorithm | |
blur_x(x, y) = (input(x, y) + input(x+1, y) + input(x+2, y))/3; | |
blur_y(x, y) = (blur_x(x, y) + blur_x(x, y+1) + blur_x(x, y+2))/3; | |
Halide::Var _y2, _y3, _x4, _x5, _y6, _y7; | |
blur_x .compute_at(blur_y, _y6) | |
.store_root() .reorder_storage(y, x) | |
/*.serial(x)*/ | |
.split(y, y, _y2, 4) .split(_y2, _y2, _y3, 16) .parallel(y) /*.serial(_y2)*/ /*.serial(_y3)*/ | |
.reorder(x, _y3, _y2, y) | |
; | |
blur_y .reorder_storage(x, y) | |
.split(x, x, _x4, 16) .split(_x4, _x4, _x5, 2) .parallel(x) .unroll(_x4) /*.serial(_x5)*/ | |
.split(y, y, _y6, 2) .split(_y6, _y6, _y7, 8) .parallel(y) .vectorize(_y6) /*.serial(_y7)*/ | |
.reorder(_y6, _y7, x, _x5, y) | |
; | |
_autotune_timing_stub(blur_y);; | |
// How to schedule it | |
blur_y.split(y, y, yi, 8).parallel(y).vectorize(x, 8); | |
blur_x.store_at(blur_y, y).compute_at(blur_y, yi).vectorize(x, 8); | |
BASELINE_HOOK(blur_y); | |
blur_y.compile_to_file("halide_blur", input); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment