Linking multiple Halide-compiled modules together into a single app, they seem to be sharing/clobbering some global state which should be per-module state. As a result, only the first module’s compiled PTX is ever actually getting loaded, so attempts to grab subsequent kernels fail since they were indeed never loaded.
Last active
August 29, 2015 13:56
-
-
Save jrk/9085020 to your computer and use it in GitHub Desktop.
Halide multi-pipeline CUDA initialization bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hl | |
process | |
initialize_halide.o | |
visualize_halide.o | |
initialize_halide.h | |
visualize_halide.h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Halide.h> | |
using namespace Halide; | |
int main (int argc, char const *argv[]) | |
{ | |
{ | |
Var x("x"), y("y"); | |
Func output("initialize"); | |
output(x,y) = cast<uint16_t>(25535); | |
Target target = get_target_from_environment(); | |
if(target.features & (Target::CUDA | Target::OpenCL)) { | |
output.compute_root().cuda_tile(x, y, 16, 16); | |
} else { | |
output.compute_root(); | |
} | |
output.compile_to_file("initialize_halide" ); | |
} | |
{ | |
ImageParam input(UInt(16), 2); | |
Var x("x"), y("y"), c("c"); | |
Func output("visualize"); | |
//output(x,y,c) = input(x,y); // don't even use input for now! | |
output(x,y,c) = select(c == 0, cast<uint16_t>(20000), | |
cast<uint16_t>(60000)); // just a constant | |
Target target = get_target_from_environment(); | |
if(target.features & (Target::CUDA | Target::OpenCL)) { | |
output.compute_root().cuda_tile(x, y, 16, 16 ); | |
} else { | |
output.compute_root(); | |
} | |
output.compile_to_file("visualize_halide", input ); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HALIDE_DIR ?= ~/Halide | |
hl: hl.cpp | |
g++ -I$(HALIDE_DIR)/include/ hl.cpp $(HALIDE_DIR)/bin/$(BUILD_PREFIX)/libHalide.a -ldl -ohl | |
visualize_halide.o: hl | |
HL_TARGET=x86-64-osx-cuda-gpu_debug ./hl | |
process: process.cpp visualize_halide.o | |
g++ -I$(HALIDE_DIR)/apps/support/ *.o process.cpp -lpng -L/usr/local/cuda/lib -lcuda -oprocess |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "initialize_halide.h" | |
#include "visualize_halide.h" | |
#include <static_image.h> | |
#include <image_io.h> | |
int main (int argc, char const *argv[]) | |
{ | |
const int width = 256; | |
const int height = 256; | |
// For rendering an output | |
Image<uint16_t> render(width, height, 3); | |
// Initialize test | |
Image<uint16_t> test(width, height); | |
initialize_halide(test); // initialize with a constant grey value (result not used in visualize!) | |
// ^^^^^^^^ if I comment this out, then visualize_halide works! | |
visualize_halide(test, render); | |
//render.copy_to_host(); // does not matter if I use it or not | |
save(render, "visual.png"); // garbled output | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment