Last active
April 27, 2021 07:11
-
-
Save yui0/06533f84d6ee8677fecd98210d80617f to your computer and use it in GitHub Desktop.
GPGPU for vulkan
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 450 | |
layout ( | |
local_size_x = WORKGROUP_SIZE, | |
local_size_y = 1, | |
local_size_z = 1 | |
) in; | |
layout (binding = 0) buffer buf_in { int buffer_in[]; }; | |
layout (binding = 1) buffer buf_out { int buffer_out[]; }; | |
void main() | |
{ | |
if (gl_GlobalInvocationID.x >= ELT_COUNT) | |
return; | |
uint id = gl_GlobalInvocationID.x; | |
buffer_out[id] = buffer_in[id] + buffer_in[id]; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gcc -o vk vk.c -DELT_COUNT=1024 -DWORKGROUP_SIZE=32 -lvulkan | |
// glslangValidator sum.comp -DWORKGROUP_SIZE=32 -DELT_COUNT=1024 -V -o sum.spv | |
#include <assert.h> | |
#include <fcntl.h> | |
#include <libgen.h> | |
#include <limits.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/stat.h> | |
#include <sys/types.h> | |
#include <unistd.h> | |
#include <vulkan/vulkan.h> | |
#define BUFFER_COUNT 2 | |
#define SHADER_NAME "sum.spv" | |
#define SHADER_ENTRY_POINT "main" | |
struct vulkan_state { | |
VkInstance instance; | |
VkPhysicalDevice phys_device; | |
VkDevice device; | |
VkQueue queue; | |
uint32_t queue_family_index; | |
VkDescriptorPool descriptor_pool; | |
VkCommandPool command_pool; | |
VkDescriptorSetLayout descriptor_layout; | |
VkDescriptorSet descriptor_set; | |
VkPipelineLayout pipeline_layout; | |
VkPipeline pipeline; | |
VkShaderModule shader_module; | |
uint8_t memory_is_cached; | |
}; | |
struct gpu_memory { | |
void *buffer; | |
VkDeviceSize vk_size; | |
VkDeviceMemory vk_memory; | |
VkBuffer vk_buffer; | |
}; | |
static const char* vkresult_to_string(VkResult res) | |
{ | |
switch (res) { | |
#define VK2STR(Value) case Value: return #Value | |
VK2STR(VK_SUCCESS); | |
VK2STR(VK_NOT_READY); | |
VK2STR(VK_TIMEOUT); | |
VK2STR(VK_EVENT_SET); | |
VK2STR(VK_EVENT_RESET); | |
VK2STR(VK_INCOMPLETE); | |
VK2STR(VK_ERROR_OUT_OF_HOST_MEMORY); | |
VK2STR(VK_ERROR_OUT_OF_DEVICE_MEMORY); | |
VK2STR(VK_ERROR_INITIALIZATION_FAILED); | |
VK2STR(VK_ERROR_DEVICE_LOST); | |
VK2STR(VK_ERROR_MEMORY_MAP_FAILED); | |
VK2STR(VK_ERROR_LAYER_NOT_PRESENT); | |
VK2STR(VK_ERROR_EXTENSION_NOT_PRESENT); | |
VK2STR(VK_ERROR_FEATURE_NOT_PRESENT); | |
VK2STR(VK_ERROR_INCOMPATIBLE_DRIVER); | |
VK2STR(VK_ERROR_TOO_MANY_OBJECTS); | |
VK2STR(VK_ERROR_FORMAT_NOT_SUPPORTED); | |
VK2STR(VK_ERROR_FRAGMENTED_POOL); | |
VK2STR(VK_ERROR_OUT_OF_POOL_MEMORY); | |
VK2STR(VK_ERROR_INVALID_EXTERNAL_HANDLE); | |
VK2STR(VK_ERROR_SURFACE_LOST_KHR); | |
VK2STR(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); | |
VK2STR(VK_SUBOPTIMAL_KHR); | |
VK2STR(VK_ERROR_OUT_OF_DATE_KHR); | |
VK2STR(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); | |
VK2STR(VK_ERROR_VALIDATION_FAILED_EXT); | |
VK2STR(VK_ERROR_INVALID_SHADER_NV); | |
VK2STR(VK_ERROR_FRAGMENTATION_EXT); | |
VK2STR(VK_ERROR_NOT_PERMITTED_EXT); | |
VK2STR(VK_RESULT_MAX_ENUM); | |
#undef VK2STR | |
default: | |
return "VK_UNKNOWN_RETURN_VALUE"; | |
} | |
} | |
static void check_vkresult(const char* fname, VkResult res) | |
{ | |
if (res == VK_SUCCESS) { | |
fprintf(stderr, "\033[32m%s\033[0m\n", fname); | |
return; | |
} | |
fprintf(stderr, "\033[31m%s = %s\033[0m\n", fname, vkresult_to_string(res)); | |
assert(0); | |
} | |
#define CALL_VK(Func, Param) check_vkresult(#Func, Func Param) | |
static void dump_available_layers(void) | |
{ | |
uint32_t layer_count; | |
CALL_VK(vkEnumerateInstanceLayerProperties, (&layer_count, NULL)); | |
if (layer_count == 0) { | |
fprintf(stderr, "no layers available.\n"); | |
return; | |
} | |
VkLayerProperties *layers = malloc(sizeof(*layers) * layer_count); | |
assert(layers); | |
CALL_VK(vkEnumerateInstanceLayerProperties, (&layer_count, layers)); | |
fprintf(stderr, "layers:\n"); | |
for (uint32_t i = 0; i < layer_count; i++) { | |
fprintf(stderr, "\t%s: %s\n", layers[i].layerName, layers[i].description); | |
} | |
free(layers); | |
} | |
static struct vulkan_state* create_state(void) | |
{ | |
struct vulkan_state *state = malloc(sizeof(*state)); | |
if (NULL == state) { | |
abort(); | |
} | |
memset(state, 0, sizeof(*state)); | |
struct VkApplicationInfo app_info = { | |
VK_STRUCTURE_TYPE_APPLICATION_INFO, | |
NULL, | |
"vulkan compute", | |
1, | |
"vulkan engine", | |
1, | |
VK_MAKE_VERSION(1, 0, 0) | |
}; | |
dump_available_layers(); | |
const char* validation_layers[] = { | |
//"VK_LAYER_LUNARG_standard_validation", | |
}; | |
struct VkInstanceCreateInfo info = { | |
VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, | |
NULL, | |
0, | |
&app_info, | |
sizeof(validation_layers) / sizeof(*validation_layers), | |
validation_layers, | |
0, | |
NULL | |
}; | |
CALL_VK(vkCreateInstance, (&info, NULL, &state->instance)); | |
return state; | |
} | |
static void select_physical_device(struct vulkan_state *state) | |
{ | |
uint32_t device_count; | |
VkPhysicalDevice *devices = NULL; | |
CALL_VK(vkEnumeratePhysicalDevices, (state->instance, &device_count, NULL)); | |
if (device_count <= 0) { | |
abort(); | |
} | |
devices = malloc(sizeof(*devices) * device_count); | |
if (devices == NULL) { | |
exit(1); | |
} | |
CALL_VK(vkEnumeratePhysicalDevices, (state->instance, &device_count, devices)); | |
uint32_t device_index = UINT_MAX; | |
printf("%d available devices\n", device_count); | |
for (uint32_t i = 0; i < device_count; i++) { | |
VkPhysicalDeviceProperties props; | |
vkGetPhysicalDeviceProperties(devices[i], &props); | |
printf("\t[%u] - %s (v:0x%x, d:0x%x)\n", | |
i, props.deviceName, props.vendorID, props.deviceID); | |
device_index = i; | |
} | |
if (device_index == UINT_MAX) { | |
fprintf(stderr, "Unable to find any virtio-gpu device. Aborting now.\n"); | |
abort(); | |
} | |
printf("loading device id=%u\n", device_index); | |
state->phys_device = devices[device_index]; | |
free(devices); | |
} | |
static VkDeviceQueueCreateInfo find_queue(struct vulkan_state *state) | |
{ | |
uint32_t count; | |
VkQueueFamilyProperties *properties; | |
vkGetPhysicalDeviceQueueFamilyProperties(state->phys_device, &count, NULL); | |
if (count <= 0) { | |
abort(); | |
} | |
properties = malloc(sizeof(*properties) * count); | |
if (NULL == properties) { | |
abort(); | |
} | |
vkGetPhysicalDeviceQueueFamilyProperties(state->phys_device, &count, properties); | |
uint32_t compute_queue_index = UINT32_MAX; | |
for (uint32_t i = 0; i < count; i++) { | |
if (properties[i].queueFlags | VK_QUEUE_COMPUTE_BIT) { | |
compute_queue_index = i; | |
break; | |
} | |
} | |
assert(compute_queue_index < UINT32_MAX); | |
const float priorities[] = { 1.f }; | |
VkDeviceQueueCreateInfo queue_info = { | |
VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, | |
NULL, | |
0, | |
compute_queue_index, | |
1, | |
priorities | |
}; | |
state->queue_family_index = compute_queue_index; | |
free(properties); | |
return queue_info; | |
} | |
static void create_logical_device(struct vulkan_state *state) | |
{ | |
VkDeviceQueueCreateInfo queue_info = find_queue(state); | |
struct VkDeviceCreateInfo info = { | |
VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, | |
NULL, | |
0, | |
1, | |
&queue_info, | |
0, | |
NULL, | |
0, | |
NULL, | |
NULL | |
}; | |
CALL_VK(vkCreateDevice, (state->phys_device, &info, NULL, &state->device)); | |
vkGetDeviceQueue(state->device, queue_info.queueFamilyIndex, 0, &state->queue); | |
} | |
static void descriptor_set_layouts_create(struct vulkan_state *state, uint32_t count) | |
{ | |
VkDescriptorSetLayoutBinding *bindings = malloc(sizeof(*bindings) * count); | |
assert(bindings); | |
for (uint32_t i = 0; i < count; i++) { | |
bindings[i].binding = i; | |
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | |
bindings[i].descriptorCount = 1; | |
bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; | |
bindings[i].pImmutableSamplers = NULL; | |
} | |
VkDescriptorSetLayoutCreateInfo info = { | |
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | |
NULL, | |
0, | |
count, | |
bindings | |
}; | |
CALL_VK(vkCreateDescriptorSetLayout, | |
(state->device, &info, NULL, &state->descriptor_layout)); | |
free(bindings); | |
} | |
static void descriptor_pool_create(struct vulkan_state *state, uint32_t size) | |
{ | |
VkDescriptorPoolSize pool_size = { | |
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
size | |
}; | |
VkDescriptorPoolCreateInfo info = { | |
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | |
NULL, | |
0 /* no flags */, | |
size, | |
1, | |
&pool_size | |
}; | |
CALL_VK(vkCreateDescriptorPool, | |
(state->device, &info, NULL, &state->descriptor_pool)); | |
} | |
static void command_pool_create(struct vulkan_state *state) | |
{ | |
VkCommandPoolCreateInfo pool_info = { | |
VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, | |
NULL, | |
0, | |
state->queue_family_index | |
}; | |
CALL_VK(vkCreateCommandPool, | |
(state->device, &pool_info, NULL, &state->command_pool)); | |
} | |
static void descriptor_set_create(struct vulkan_state *state) | |
{ | |
VkDescriptorSetAllocateInfo alloc_info = { | |
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | |
NULL, | |
state->descriptor_pool, | |
1, | |
&state->descriptor_layout | |
}; | |
CALL_VK(vkAllocateDescriptorSets, | |
(state->device, &alloc_info, &state->descriptor_set)); | |
} | |
static void descriptor_set_bind(struct vulkan_state *state, | |
VkBuffer buffer, | |
VkDeviceSize size, | |
uint32_t binding) | |
{ | |
VkDescriptorBufferInfo buffer_info = { | |
buffer, | |
0, | |
size, | |
}; | |
VkWriteDescriptorSet write_info = { | |
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |
NULL, | |
state->descriptor_set, | |
binding, | |
0, | |
1, | |
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
NULL, | |
&buffer_info, | |
NULL | |
}; | |
vkUpdateDescriptorSets(state->device, 1, &write_info, 0, NULL); | |
} | |
static void initialize_device(struct vulkan_state *state) | |
{ | |
select_physical_device(state); | |
create_logical_device(state); | |
descriptor_pool_create(state, BUFFER_COUNT); | |
command_pool_create(state); | |
descriptor_set_layouts_create(state, BUFFER_COUNT); | |
descriptor_set_create(state); | |
} | |
static VkDeviceMemory allocate_gpu_memory(struct vulkan_state *state, VkDeviceSize size) | |
{ | |
uint32_t memory_index = UINT32_MAX; | |
VkPhysicalDeviceMemoryProperties props; | |
vkGetPhysicalDeviceMemoryProperties(state->phys_device, &props); | |
for (uint32_t i = 0; i < props.memoryTypeCount; i++) { | |
VkMemoryType type = props.memoryTypes[i]; | |
printf("Memory[%d]:\n", i); | |
if (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT & type.propertyFlags) { | |
printf("\tVK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT\n"); | |
} | |
if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & type.propertyFlags) { | |
printf("\tVK_MEMORY_PROPERTY_HOST_VISIBLE_BIT\n"); | |
} | |
if (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & type.propertyFlags) { | |
printf("\tVK_MEMORY_PROPERTY_HOST_COHERENT_BIT\n"); | |
} | |
if (VK_MEMORY_PROPERTY_HOST_CACHED_BIT & type.propertyFlags) { | |
printf("\tVK_MEMORY_PROPERTY_HOST_CACHED_BIT\n"); | |
} | |
if (VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT & type.propertyFlags) { | |
printf("\tVK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT\n"); | |
} | |
if (VK_MEMORY_PROPERTY_PROTECTED_BIT & type.propertyFlags) { | |
printf("\tVK_MEMORY_PROPERTY_PROTECTED_BIT\n"); | |
} | |
if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { | |
if (0 == (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { | |
state->memory_is_cached = 1; | |
} | |
memory_index = i; | |
break; | |
} | |
} | |
if (memory_index == UINT32_MAX) { | |
fprintf(stderr, "Compatible memory not found (HOST_VISIBLE).\n"); | |
abort(); | |
} | |
VkMemoryAllocateInfo alloc_info = { | |
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | |
NULL, | |
size, | |
memory_index | |
}; | |
VkDeviceMemory vk_memory; | |
CALL_VK(vkAllocateMemory, (state->device, &alloc_info, NULL, &vk_memory)); | |
return vk_memory; | |
} | |
static VkBuffer create_gpu_buffer(struct vulkan_state *state, VkDeviceSize size) | |
{ | |
VkBufferCreateInfo buffer_info = { | |
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | |
NULL, | |
0, | |
size, | |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | |
VK_SHARING_MODE_EXCLUSIVE, | |
0, | |
NULL /* ignored since marked as exclusive */ | |
}; | |
VkBuffer vk_buffer; | |
CALL_VK(vkCreateBuffer, (state->device, &buffer_info, NULL, &vk_buffer)); | |
return vk_buffer; | |
} | |
static struct gpu_memory allocate_buffer(struct vulkan_state *state, | |
VkDeviceSize offset, | |
VkDeviceSize size) | |
{ | |
VkDeviceMemory vk_memory = allocate_gpu_memory(state, size); | |
VkBuffer vk_buffer = create_gpu_buffer(state, size); | |
CALL_VK(vkBindBufferMemory, (state->device, vk_buffer, vk_memory, offset)); | |
struct gpu_memory info = { | |
NULL, | |
size, | |
vk_memory, | |
vk_buffer, | |
}; | |
return info; | |
} | |
static void free_buffer(struct vulkan_state *state, struct gpu_memory *mem) | |
{ | |
if (mem->buffer) { | |
vkUnmapMemory(state->device, mem->vk_memory); | |
mem->buffer = NULL; | |
} | |
vkFreeMemory(state->device, mem->vk_memory, NULL); | |
vkDestroyBuffer(state->device, mem->vk_buffer, NULL); | |
} | |
static uint32_t* load_shader(const char *path, size_t *file_length) | |
{ | |
assert(file_length); | |
uint32_t *content = NULL; | |
int fd = open(path, O_RDONLY); | |
if (fd < 0) return NULL; | |
do { | |
size_t size = lseek(fd, 0, SEEK_END); | |
lseek(fd, 0, SEEK_SET); | |
if (size == (size_t)-1) break; | |
content = malloc(size); | |
if (content == NULL) break; | |
if (read(fd, content, size) < 0) { | |
free(content); | |
content = NULL; | |
break; | |
} | |
*file_length = size; | |
} while (0); | |
close(fd); | |
return content; | |
} | |
static void create_pipeline(struct vulkan_state *state, | |
const uint32_t *shader, | |
uint32_t shader_len) | |
{ | |
VkShaderModuleCreateInfo shader_info = { | |
VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | |
NULL, | |
0, | |
shader_len, | |
shader | |
}; | |
CALL_VK(vkCreateShaderModule, | |
(state->device, &shader_info, NULL, &state->shader_module)); | |
VkPipelineShaderStageCreateInfo shader_stage_creation_info = { | |
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |
NULL, | |
0, | |
VK_SHADER_STAGE_COMPUTE_BIT, | |
state->shader_module, | |
SHADER_ENTRY_POINT, | |
NULL | |
}; | |
VkPipelineLayoutCreateInfo layout_info = { | |
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | |
NULL, | |
0, | |
1, | |
&state->descriptor_layout, | |
0, | |
NULL | |
}; | |
CALL_VK(vkCreatePipelineLayout, | |
(state->device, &layout_info, NULL, &state->pipeline_layout)); | |
VkComputePipelineCreateInfo pipeline_info = { | |
VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | |
NULL, | |
0, | |
shader_stage_creation_info, | |
state->pipeline_layout, | |
VK_NULL_HANDLE, | |
0 | |
}; | |
CALL_VK(vkCreateComputePipelines, | |
(state->device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, &state->pipeline)); | |
} | |
static void execute_sum_kernel(struct vulkan_state *state) | |
{ | |
VkCommandBuffer command_buffer; | |
VkCommandBufferAllocateInfo alloc_info = { | |
VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, | |
NULL, | |
state->command_pool, | |
VK_COMMAND_BUFFER_LEVEL_PRIMARY, | |
1 | |
}; | |
CALL_VK(vkAllocateCommandBuffers, (state->device, &alloc_info, &command_buffer)); | |
VkCommandBufferBeginInfo begin_info = { | |
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | |
NULL, | |
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | |
NULL | |
}; | |
CALL_VK(vkBeginCommandBuffer, (command_buffer, &begin_info)); | |
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->pipeline); | |
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, | |
state->pipeline_layout, | |
0, | |
1, | |
&state->descriptor_set, | |
0, | |
NULL); | |
vkCmdDispatch(command_buffer, ELT_COUNT / WORKGROUP_SIZE, 1, 1); | |
CALL_VK(vkEndCommandBuffer, (command_buffer)); | |
VkSubmitInfo submit_info = { | |
VK_STRUCTURE_TYPE_SUBMIT_INFO, | |
NULL, | |
0, | |
NULL, | |
NULL, | |
1, | |
&command_buffer, | |
0, | |
NULL | |
}; | |
VkFence fence; | |
VkFenceCreateInfo fence_info = { | |
VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, | |
NULL, | |
0 | |
}; | |
CALL_VK(vkCreateFence, (state->device, &fence_info, NULL, &fence)); | |
CALL_VK(vkQueueSubmit, (state->queue, 1, &submit_info, fence)); | |
CALL_VK(vkWaitForFences, (state->device, 1, &fence, VK_TRUE, 1e9 * 5)); | |
vkDestroyFence(state->device, fence, NULL); | |
} | |
static void destroy_state(struct vulkan_state **state) | |
{ | |
assert(state && *state); | |
struct vulkan_state *st = *state; | |
#define FREE_VK(Field, Function) \ | |
if (st->Field != VK_NULL_HANDLE) \ | |
Function(st->device, st->Field, NULL) | |
FREE_VK(shader_module, vkDestroyShaderModule); | |
FREE_VK(descriptor_pool, vkDestroyDescriptorPool); | |
FREE_VK(descriptor_layout, vkDestroyDescriptorSetLayout); | |
FREE_VK(pipeline_layout, vkDestroyPipelineLayout); | |
FREE_VK(pipeline, vkDestroyPipeline); | |
FREE_VK(command_pool, vkDestroyCommandPool); | |
if (st->device != VK_NULL_HANDLE) { | |
vkDestroyDevice(st->device, NULL); | |
} | |
if (st->instance != VK_NULL_HANDLE) { | |
vkDestroyInstance(st->instance, NULL); | |
} | |
free(st); | |
*state = NULL; | |
} | |
/* Application logic */ | |
static void generate_payload(int *buffer, int elt_count) | |
{ | |
for (int i = 0; i < elt_count; i++) { | |
buffer[i] = i; | |
} | |
} | |
static void check_payload(int *buffer, int elt_count) | |
{ | |
for (int i = 0; i < elt_count; i++) { | |
if (buffer[i] != i + i) { | |
fprintf(stderr, "invalid value for [%d]. got %d, expected %d\n", | |
i, buffer[i], i + i); | |
abort(); | |
} | |
} | |
} | |
static void check_memory_upload(struct vulkan_state *state) | |
{ | |
struct gpu_memory a; | |
void *ptr = NULL; | |
VkMappedMemoryRange range; | |
const size_t size = ELT_COUNT * sizeof(int); | |
void *local = NULL; | |
local = malloc(size); | |
generate_payload(local, ELT_COUNT); | |
a = allocate_buffer(state, 0, size); | |
memset(&range, 0, sizeof(range)); | |
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; | |
range.memory = a.vk_memory; | |
range.size = a.vk_size; | |
/* writting and unmapping */ | |
CALL_VK(vkMapMemory, (state->device, a.vk_memory, 0, a.vk_size, 0, &ptr)); | |
memcpy(ptr, local, size); | |
if (state->memory_is_cached) { | |
CALL_VK(vkFlushMappedMemoryRanges, (state->device, 1, &range)); | |
} | |
vkUnmapMemory(state->device, a.vk_memory); | |
/* remapping and reading back */ | |
CALL_VK(vkMapMemory, (state->device, a.vk_memory, 0, a.vk_size, 0, &ptr)); | |
if (state->memory_is_cached) { | |
CALL_VK(vkInvalidateMappedMemoryRanges, (state->device, 1, &range)); | |
} | |
if (0 != memcmp(ptr, local, size)) { | |
fprintf(stderr, "identity check failed\n"); | |
abort(); | |
} | |
vkUnmapMemory(state->device, a.vk_memory); | |
printf("\033[36m%s executed\033[0m\n", __func__); | |
free_buffer(state, &a); | |
} | |
static void do_sum_one_buffer_one_memory(struct vulkan_state *state) | |
{ | |
struct gpu_memory a; | |
void *ptr; | |
a = allocate_buffer(state, 0, sizeof(int) * ELT_COUNT); | |
descriptor_set_bind(state, a.vk_buffer, a.vk_size, 0); | |
descriptor_set_bind(state, a.vk_buffer, a.vk_size, 1); | |
VkMappedMemoryRange range = { | |
VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, | |
NULL, | |
a.vk_memory, | |
0, | |
a.vk_size | |
}; | |
CALL_VK(vkMapMemory, (state->device, a.vk_memory, 0, a.vk_size, 0, &ptr)); | |
generate_payload(ptr, ELT_COUNT); | |
if (state->memory_is_cached) { | |
CALL_VK(vkFlushMappedMemoryRanges, (state->device, 1, &range)); | |
} | |
execute_sum_kernel(state); | |
if (state->memory_is_cached) { | |
CALL_VK(vkInvalidateMappedMemoryRanges, (state->device, 1, &range)); | |
} | |
check_payload(ptr, ELT_COUNT); | |
printf("\033[36m%s executed\033[0m\n", __func__); | |
vkUnmapMemory(state->device, a.vk_memory); | |
free_buffer(state, &a); | |
} | |
static void do_sum_two_buffer_one_memory(struct vulkan_state *state) | |
{ | |
VkBuffer buffer_a, buffer_b; | |
VkDeviceMemory vk_memory; | |
VkMappedMemoryRange write_range, read_range; | |
const VkDeviceSize size = ELT_COUNT * sizeof(int); | |
void *ptr = NULL; | |
vk_memory = allocate_gpu_memory(state, size * 2); | |
buffer_a = create_gpu_buffer(state, size); | |
buffer_b = create_gpu_buffer(state, size); | |
CALL_VK(vkBindBufferMemory, (state->device, buffer_a, vk_memory, 0)); | |
CALL_VK(vkBindBufferMemory, (state->device, buffer_b, vk_memory, size)); | |
descriptor_set_bind(state, buffer_a, size, 0); | |
descriptor_set_bind(state, buffer_b, size, 1); | |
write_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; | |
write_range.pNext = 0; | |
write_range.memory = vk_memory; | |
write_range.offset = 0; | |
write_range.size = size; | |
read_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; | |
read_range.pNext = 0; | |
read_range.memory = vk_memory; | |
read_range.offset = size; | |
read_range.size = size; | |
CALL_VK(vkMapMemory, (state->device, vk_memory, 0, size, 0, &ptr)); | |
generate_payload(ptr, ELT_COUNT); | |
if (state->memory_is_cached) { | |
CALL_VK(vkFlushMappedMemoryRanges, (state->device, 1, &write_range)); | |
} | |
vkUnmapMemory(state->device, vk_memory); | |
execute_sum_kernel(state); | |
CALL_VK(vkMapMemory, (state->device, vk_memory, size, size, 0, &ptr)); | |
if (state->memory_is_cached) { | |
CALL_VK(vkInvalidateMappedMemoryRanges, (state->device, 1, &read_range)); | |
} | |
check_payload(ptr, ELT_COUNT); | |
vkUnmapMemory(state->device, vk_memory); | |
printf("\033[36m%s executed\033[0m\n", __func__); | |
vkDestroyBuffer(state->device, buffer_a, NULL); | |
vkDestroyBuffer(state->device, buffer_b, NULL); | |
vkFreeMemory(state->device, vk_memory, NULL); | |
} | |
static void do_sum_two_buffer_two_memory(struct vulkan_state *state) | |
{ | |
struct gpu_memory a, b; | |
void *ptr_a, *ptr_b; | |
a = allocate_buffer(state, 0, sizeof(int) * ELT_COUNT); | |
descriptor_set_bind(state, a.vk_buffer, a.vk_size, 0); | |
b = allocate_buffer(state, 0, sizeof(int) * ELT_COUNT); | |
descriptor_set_bind(state, b.vk_buffer, b.vk_size, 1); | |
VkMappedMemoryRange write_range = { | |
VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, | |
NULL, | |
a.vk_memory, | |
0, | |
a.vk_size | |
}; | |
VkMappedMemoryRange read_range = { | |
VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, | |
NULL, | |
b.vk_memory, | |
0, | |
b.vk_size | |
}; | |
CALL_VK(vkMapMemory, (state->device, a.vk_memory, 0, a.vk_size, 0, &ptr_a)); | |
generate_payload(ptr_a, ELT_COUNT); | |
if (state->memory_is_cached) { | |
CALL_VK(vkFlushMappedMemoryRanges, (state->device, 1, &write_range)); | |
} | |
vkUnmapMemory(state->device, a.vk_memory); | |
execute_sum_kernel(state); | |
CALL_VK(vkMapMemory, (state->device, b.vk_memory, 0, b.vk_size, 0, &ptr_b)); | |
if (state->memory_is_cached) { | |
CALL_VK(vkInvalidateMappedMemoryRanges, (state->device, 1, &read_range)); | |
} | |
check_payload(ptr_b, ELT_COUNT); | |
printf("\033[36m%s executed\033[0m\n", __func__); | |
vkUnmapMemory(state->device, b.vk_memory); | |
free_buffer(state, &a); | |
free_buffer(state, &b); | |
} | |
int main(int argc, char **argv) | |
{ | |
if (argc <= 0) return 1; | |
struct vulkan_state *state = NULL; | |
uint32_t *shader_code = NULL; | |
size_t shader_length; | |
state = create_state(); | |
if (state == NULL) return 1; | |
initialize_device(state); | |
char *path = dirname(strdup(argv[0])); | |
size_t pathlen = strlen(path) + strlen(SHADER_NAME) + 2; | |
path = realloc(path, pathlen); | |
strcat(path, "/" SHADER_NAME); | |
shader_code = load_shader(path, &shader_length); | |
printf("path: %s\n", path); | |
free(path); | |
path = NULL; | |
if (shader_code == NULL) { | |
fprintf(stderr, "unable to load the shader.\n"); | |
destroy_state(&state); | |
return 2; | |
} | |
create_pipeline(state, shader_code, shader_length); | |
check_memory_upload(state); | |
do_sum_one_buffer_one_memory(state); | |
do_sum_two_buffer_one_memory(state); | |
do_sum_two_buffer_two_memory(state); | |
free(shader_code); | |
destroy_state(&state); | |
puts("bye bye"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment