Last active
April 20, 2019 19:47
-
-
Save silverweed/6508f8ce26e4cbcc690f91d5f48010df to your computer and use it in GitHub Desktop.
Cuda + SFML test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CC = nvcc | |
CFLAGS = -std=c++11 --compiler-options -Wall --compiler-options -Wextra --compiler-options -ggdb | |
LDFLAGS = -lsfml-graphics -lsfml-window -lsfml-system -lcurand | |
all: test2 | |
%: %.o | |
$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) | |
%.o: %.cu myutils.hpp | |
$(CC) $(CFLAGS) $< -c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <iostream> | |
#define MUST(x) \ | |
if (x != cudaSuccess) { \ | |
std::cerr << "CUDA error at line " << __LINE__ << ": " << cudaGetErrorString(x) << std::endl; \ | |
std::exit(1); \ | |
} | |
#define MUST_CRND(x) \ | |
if (x != CURAND_STATUS_SUCCESS) { \ | |
std::cerr << "CURAND error at line " << __LINE__ << std::endl; \ | |
std::exit(1); \ | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <SFML/Graphics.hpp> | |
#include <SFML/Window.hpp> | |
#include <curand.h> | |
#include "myutils.hpp" | |
#define WIDTH 1920 | |
#define HEIGHT 1080 | |
using u8 = unsigned char; | |
void float_to_u8(float *src, u8 *dst, size_t n); | |
sf::View keep_ratio(const sf::Event::SizeEvent& size, const sf::Vector2u& designedsize); | |
int main() { | |
/// Init CURAND | |
curandGenerator_t gen; | |
MUST_CRND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT)) | |
/// Init SFML | |
sf::RenderWindow window(sf::VideoMode(WIDTH, HEIGHT), "Test CuSFML"); | |
window.setFramerateLimit(60); | |
sf::Texture tex; | |
tex.create(WIDTH, HEIGHT); | |
sf::Sprite sprite(tex); | |
const size_t N = WIDTH * HEIGHT * 3; | |
// Array of generated floats on device. This is filled by curand generator. | |
float *devData; | |
// Array to copy generated floats to on host. | |
float *hostData; | |
// Array of pixels to update the texture with. | |
// Note that we only generate RGB channels, but we still need to pass RGBA | |
// data to the SFML texture, so this array has 4 numbers per pixel, not 3. | |
u8 pixels[WIDTH * HEIGHT * 4]; | |
// Allocate host and device memory | |
hostData = static_cast<float*>(malloc(N * sizeof(float))); | |
MUST(cudaMalloc(&devData, N * sizeof(float))) | |
cudaEvent_t start, end; | |
MUST(cudaEventCreate(&start)) | |
MUST(cudaEventCreate(&end)) | |
// Main loop | |
while (window.isOpen()) { | |
// Event loop | |
sf::Event evt; | |
while (window.pollEvent(evt)) { | |
switch (evt.type) { | |
case sf::Event::Closed: | |
window.close(); | |
break; | |
case sf::Event::Resized: | |
window.setView(keep_ratio(evt.size, sf::Vector2u(WIDTH, HEIGHT))); | |
case sf::Event::KeyPressed: | |
switch (evt.key.code) { | |
case sf::Keyboard::Q: | |
window.close(); | |
break; | |
default: break; | |
} | |
default: break; | |
} | |
} | |
MUST(cudaEventRecord(start)) | |
// Generate random floats on GPU | |
MUST_CRND(curandGenerateUniform(gen, devData, N)) | |
// and copy them to host | |
MUST(cudaMemcpy(hostData, devData, N * sizeof(float), cudaMemcpyDeviceToHost)) | |
MUST(cudaEventRecord(end)) | |
MUST(cudaEventSynchronize(end)) | |
float ms; | |
MUST(cudaEventElapsedTime(&ms, start, end)) | |
std::clog << "CUDA took " << ms << " ms\n"; | |
// Convert generated floats to u8 | |
float_to_u8(hostData, pixels, N); | |
tex.update(pixels); | |
window.clear(); | |
window.draw(sprite); | |
window.display(); | |
} | |
// Tear down CURAND | |
MUST_CRND(curandDestroyGenerator(gen)) | |
} | |
void float_to_u8(float *src, u8 *dst, size_t n) { | |
for (size_t i = 0; i < n/3; ++i) { | |
#pragma unroll | |
for(size_t j = 0; j < 3; ++j) | |
dst[4*i + j] = static_cast<u8>(src[3*i+j] * 255); | |
dst[4*i + 3] = 255; | |
} | |
} | |
// Handle resizing | |
sf::View keep_ratio(const sf::Event::SizeEvent& size, const sf::Vector2u& designedsize) { | |
sf::FloatRect viewport(0.f, 0.f, 1.f, 1.f); | |
const float screenwidth = size.width / static_cast<float>(designedsize.x), | |
screenheight = size.height / static_cast<float>(designedsize.y); | |
if (screenwidth > screenheight) { | |
viewport.width = screenheight / screenwidth; | |
viewport.left = (1.f - viewport.width) / 2.f; | |
} else if (screenwidth < screenheight) { | |
viewport.height = screenwidth / screenheight; | |
viewport.top = (1.f - viewport.height) / 2.f; | |
} | |
sf::View view(sf::FloatRect(0, 0, designedsize.x , designedsize.y)); | |
view.setViewport(viewport); | |
return view; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <SFML/Graphics.hpp> | |
#include <SFML/Window.hpp> | |
#include "myutils.hpp" | |
#include <iomanip> | |
#include <cmath> | |
#define WIDTH 1920 | |
#define HEIGHT 1080 | |
using std::cerr; | |
using std::endl; | |
using u8 = unsigned char; | |
sf::View keep_ratio(const sf::Event::SizeEvent& size, const sf::Vector2u& designedsize); | |
template<typename DataType> | |
__global__ void updatePixels(DataType *data, float pulse, float t, float wavefreq) { | |
const auto W = gridDim.x * blockDim.x; | |
const auto H = gridDim.y * blockDim.y; | |
const auto idx = blockDim.x * blockIdx.x + threadIdx.x; | |
const auto idy = blockDim.y * blockIdx.y + threadIdx.y; | |
// Calculate psi(r) = |cos(pulse * t - wavefreq * r)| | |
const float x = idx - W / 2.0; | |
const float y = idy - H / 2.0; | |
const float r = sqrtf(x * x + y * y); | |
const float psi = abs(cosf(pulse * t - wavefreq * r)); | |
const auto index = 4 * (W * idy + idx); | |
data[index + 0] = 0; | |
data[index + 1] = 255 * (float(threadIdx.x)/blockDim.x * float(threadIdx.y)/blockDim.y); | |
data[index + 2] = 255 * psi; | |
data[index + 3] = 255; | |
} | |
template<typename DataType> | |
void updatePixelsCPU(DataType *data, float pulse, float t, float wavefreq, int W, int H) { | |
for (int idx = 0; idx < W; ++idx) { | |
for (int idy = 0; idy < H; ++idy) { | |
// Calculate psi(r) = |cos(pulse * t - wavefreq * r)| | |
const float x = idx - W / 2.0; | |
const float y = idy - H / 2.0; | |
const float r = sqrtf(x * x + y * y); | |
const float psi = abs(cosf(pulse * t - wavefreq * r)); | |
const auto index = 4 * (W * idy + idx); | |
data[index + 0] = 0; | |
data[index + 1] = 0; | |
data[index + 2] = 255 * psi; | |
data[index + 3] = 255; | |
} | |
} | |
} | |
int main() { | |
/// Init SFML | |
sf::RenderWindow window(sf::VideoMode(WIDTH, HEIGHT), "Test CudaSFML"); | |
window.setFramerateLimit(60); | |
bool vsync = true; | |
sf::Texture tex; | |
tex.create(WIDTH, HEIGHT); | |
sf::Sprite sprite(tex); | |
const size_t N = WIDTH * HEIGHT * 4; | |
// Array of generated pixels on device. | |
u8 *devData; | |
// Array to copy generated pixels to on host. | |
u8 *hostData; | |
// Allocate host and device memory | |
MUST(cudaMallocHost(&hostData, N * sizeof(float))) | |
MUST(cudaMalloc(&devData, N * sizeof(float))) | |
dim3 blockSize(32, 18); | |
dim3 gridSize(WIDTH / blockSize.x, HEIGHT / blockSize.y); | |
float t = 0; // time | |
float pulse = 10; | |
float wavefreq = 0.04; | |
sf::Clock clock; | |
float timeAcc = 0; | |
float ms = 0; | |
bool cpu = false; | |
cudaEvent_t start, end; | |
MUST(cudaEventCreate(&start)) | |
MUST(cudaEventCreate(&end)) | |
int cycles = 0; | |
// Main loop | |
while (window.isOpen()) { | |
// Event loop | |
sf::Event evt; | |
while (window.pollEvent(evt)) { | |
switch (evt.type) { | |
case sf::Event::Closed: | |
window.close(); | |
break; | |
case sf::Event::Resized: | |
window.setView(keep_ratio(evt.size, sf::Vector2u(WIDTH, HEIGHT))); | |
case sf::Event::KeyPressed: | |
switch (evt.key.code) { | |
case sf::Keyboard::Q: | |
window.close(); | |
break; | |
case sf::Keyboard::Add: | |
wavefreq += wavefreq / 5.0; | |
break; | |
case sf::Keyboard::Subtract: | |
wavefreq -= wavefreq / 5.0; | |
break; | |
case sf::Keyboard::V: | |
vsync = !vsync; | |
window.setFramerateLimit(vsync ? 60 : 0); | |
break; | |
case sf::Keyboard::C: | |
cpu = !cpu; | |
break; | |
default: break; | |
} | |
default: break; | |
} | |
} | |
if (!cpu) { | |
MUST(cudaEventRecord(start)) | |
// Generate pixels on device | |
updatePixels<<<gridSize, blockSize>>>(devData, pulse, t, wavefreq); | |
// and copy them to host myutils.hpp | |
MUST(cudaMemcpy(hostData, devData, N * sizeof(float), cudaMemcpyDeviceToHost)) | |
MUST(cudaEventRecord(end)) | |
MUST(cudaEventSynchronize(end)) | |
MUST(cudaEventElapsedTime(&ms, start, end)) | |
} else { | |
updatePixelsCPU(hostData, pulse, t, wavefreq, WIDTH, HEIGHT); | |
} | |
tex.update(hostData); | |
window.clear(); | |
window.draw(sprite); | |
window.display(); | |
const auto delta = clock.restart().asSeconds(); | |
t += delta; | |
timeAcc += delta; | |
if (!cpu) { | |
if (++cycles == 100) { | |
std::clog << "[GPU] " << std::setprecision(4) << std::setw(6) << 100.0 / timeAcc << " FPS (" | |
<< std::setw(5) << 10 * timeAcc << " ms loop, " << std::setw(5) << ms << " ms CUDA)\n"; | |
cycles = 0; | |
timeAcc = 0; | |
} | |
} else { | |
std::clog << "[CPU] " << std::setprecision(4) << std::setw(6) << 1.0 / timeAcc << " FPS (" | |
<< std::setw(5) << 1000 * timeAcc << " ms loop)\n"; | |
cycles = 0; | |
timeAcc = 0; | |
} | |
} | |
MUST(cudaFreeHost(hostData)) | |
MUST(cudaFree(devData)) | |
MUST(cudaEventDestroy(end)) | |
MUST(cudaEventDestroy(start)) | |
} | |
// Handle resizing | |
sf::View keep_ratio(const sf::Event::SizeEvent& size, const sf::Vector2u& designedsize) { | |
sf::FloatRect viewport(0.f, 0.f, 1.f, 1.f); | |
const float screenwidth = size.width / static_cast<float>(designedsize.x), | |
screenheight = size.height / static_cast<float>(designedsize.y); | |
if (screenwidth > screenheight) { | |
viewport.width = screenheight / screenwidth; | |
viewport.left = (1.f - viewport.width) / 2.f; | |
} else if (screenwidth < screenheight) { | |
viewport.height = screenwidth / screenheight; | |
viewport.top = (1.f - viewport.height) / 2.f; | |
} | |
sf::View view(sf::FloatRect(0, 0, designedsize.x , designedsize.y)); | |
view.setViewport(viewport); | |
return view; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
should we put these files in visual studio 2017 by making a new makefile project?
if yes what should the configuration of the debugger and release be?
I really want to put CUDA and sfml in one code and run.I have been trying to do that scince 2 days after integrating CUDA with visual studio 2017.
I am using CUDA 10.1
All I really want is to get a GUI for CUDA.please help