-
-
Save sheredom/523f02bbad2ae397d7ed255f3f3b5a7f to your computer and use it in GitHub Desktop.
// This is free and unencumbered software released into the public domain. | |
// | |
// Anyone is free to copy, modify, publish, use, compile, sell, or | |
// distribute this software, either in source code form or as a compiled | |
// binary, for any purpose, commercial or non-commercial, and by any | |
// means. | |
// | |
// In jurisdictions that recognize copyright laws, the author or authors | |
// of this software dedicate any and all copyright interest in the | |
// software to the public domain. We make this dedication for the benefit | |
// of the public at large and to the detriment of our heirs and | |
// successors. We intend this dedication to be an overt act of | |
// relinquishment in perpetuity of all present and future rights to this | |
// software under copyright law. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
// OTHER DEALINGS IN THE SOFTWARE. | |
// | |
// For more information, please refer to <http://unlicense.org/> | |
#include "vulkan.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#define BAIL_ON_BAD_RESULT(result) \ | |
if (VK_SUCCESS != (result)) { fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); exit(-1); } | |
VkResult vkGetBestTransferQueueNPH(VkPhysicalDevice physicalDevice, uint32_t* queueFamilyIndex) { | |
uint32_t queueFamilyPropertiesCount = 0; | |
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, 0); | |
VkQueueFamilyProperties* const queueFamilyProperties = (VkQueueFamilyProperties*)_alloca( | |
sizeof(VkQueueFamilyProperties) * queueFamilyPropertiesCount); | |
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, queueFamilyProperties); | |
// first try and find a queue that has just the transfer bit set | |
for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
// mask out the sparse binding bit that we aren't caring about (yet!) | |
const VkQueueFlags maskedFlags = (~VK_QUEUE_SPARSE_BINDING_BIT & queueFamilyProperties[i].queueFlags); | |
if (!((VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT) & maskedFlags) && | |
(VK_QUEUE_TRANSFER_BIT & maskedFlags)) { | |
*queueFamilyIndex = i; | |
return VK_SUCCESS; | |
} | |
} | |
// otherwise we'll prefer using a compute-only queue, | |
// remember that having compute on the queue implicitly enables transfer! | |
for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
// mask out the sparse binding bit that we aren't caring about (yet!) | |
const VkQueueFlags maskedFlags = (~VK_QUEUE_SPARSE_BINDING_BIT & queueFamilyProperties[i].queueFlags); | |
if (!(VK_QUEUE_GRAPHICS_BIT & maskedFlags) && (VK_QUEUE_COMPUTE_BIT & maskedFlags)) { | |
*queueFamilyIndex = i; | |
return VK_SUCCESS; | |
} | |
} | |
// lastly get any queue that'll work for us (graphics, compute or transfer bit set) | |
for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
// mask out the sparse binding bit that we aren't caring about (yet!) | |
const VkQueueFlags maskedFlags = (~VK_QUEUE_SPARSE_BINDING_BIT & queueFamilyProperties[i].queueFlags); | |
if ((VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT) & maskedFlags) { | |
*queueFamilyIndex = i; | |
return VK_SUCCESS; | |
} | |
} | |
return VK_ERROR_INITIALIZATION_FAILED; | |
} | |
VkResult vkGetBestComputeQueueNPH(VkPhysicalDevice physicalDevice, uint32_t* queueFamilyIndex) { | |
uint32_t queueFamilyPropertiesCount = 0; | |
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, 0); | |
VkQueueFamilyProperties* const queueFamilyProperties = (VkQueueFamilyProperties*)_alloca( | |
sizeof(VkQueueFamilyProperties) * queueFamilyPropertiesCount); | |
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, queueFamilyProperties); | |
// first try and find a queue that has just the compute bit set | |
for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
// mask out the sparse binding bit that we aren't caring about (yet!) and the transfer bit | |
const VkQueueFlags maskedFlags = (~(VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT) & | |
queueFamilyProperties[i].queueFlags); | |
if (!(VK_QUEUE_GRAPHICS_BIT & maskedFlags) && (VK_QUEUE_COMPUTE_BIT & maskedFlags)) { | |
*queueFamilyIndex = i; | |
return VK_SUCCESS; | |
} | |
} | |
// lastly get any queue that'll work for us | |
for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
// mask out the sparse binding bit that we aren't caring about (yet!) and the transfer bit | |
const VkQueueFlags maskedFlags = (~(VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT) & | |
queueFamilyProperties[i].queueFlags); | |
if (VK_QUEUE_COMPUTE_BIT & maskedFlags) { | |
*queueFamilyIndex = i; | |
return VK_SUCCESS; | |
} | |
} | |
return VK_ERROR_INITIALIZATION_FAILED; | |
} | |
int main(int argc, const char * const argv[]) { | |
(void)argc; | |
(void)argv; | |
const VkApplicationInfo applicationInfo = { | |
VK_STRUCTURE_TYPE_APPLICATION_INFO, | |
0, | |
"VKComputeSample", | |
0, | |
"", | |
0, | |
VK_MAKE_VERSION(1, 0, 9) | |
}; | |
const VkInstanceCreateInfo instanceCreateInfo = { | |
VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, | |
0, | |
0, | |
&applicationInfo, | |
0, | |
0, | |
0, | |
0 | |
}; | |
VkInstance instance; | |
BAIL_ON_BAD_RESULT(vkCreateInstance(&instanceCreateInfo, 0, &instance)); | |
uint32_t physicalDeviceCount = 0; | |
BAIL_ON_BAD_RESULT(vkEnumeratePhysicalDevices(instance, &physicalDeviceCount, 0)); | |
VkPhysicalDevice* const physicalDevices = (VkPhysicalDevice*)malloc( | |
sizeof(VkPhysicalDevice) * physicalDeviceCount); | |
BAIL_ON_BAD_RESULT(vkEnumeratePhysicalDevices(instance, &physicalDeviceCount, physicalDevices)); | |
for (uint32_t i = 0; i < physicalDeviceCount; i++) { | |
uint32_t queueFamilyIndex = 0; | |
BAIL_ON_BAD_RESULT(vkGetBestComputeQueueNPH(physicalDevices[i], &queueFamilyIndex)); | |
const float queuePrioritory = 1.0f; | |
const VkDeviceQueueCreateInfo deviceQueueCreateInfo = { | |
VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, | |
0, | |
0, | |
queueFamilyIndex, | |
1, | |
&queuePrioritory | |
}; | |
const VkDeviceCreateInfo deviceCreateInfo = { | |
VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, | |
0, | |
0, | |
1, | |
&deviceQueueCreateInfo, | |
0, | |
0, | |
0, | |
0, | |
0 | |
}; | |
VkDevice device; | |
BAIL_ON_BAD_RESULT(vkCreateDevice(physicalDevices[i], &deviceCreateInfo, 0, &device)); | |
VkPhysicalDeviceMemoryProperties properties; | |
vkGetPhysicalDeviceMemoryProperties(physicalDevices[i], &properties); | |
const int32_t bufferLength = 16384; | |
const uint32_t bufferSize = sizeof(int32_t) * bufferLength; | |
// we are going to need two buffers from this one memory | |
const VkDeviceSize memorySize = bufferSize * 2; | |
// set memoryTypeIndex to an invalid entry in the properties.memoryTypes array | |
uint32_t memoryTypeIndex = VK_MAX_MEMORY_TYPES; | |
for (uint32_t k = 0; k < properties.memoryTypeCount; k++) { | |
if ((VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & properties.memoryTypes[k].propertyFlags) && | |
(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & properties.memoryTypes[k].propertyFlags) && | |
(memorySize < properties.memoryHeaps[properties.memoryTypes[k].heapIndex].size)) { | |
memoryTypeIndex = k; | |
break; | |
} | |
} | |
BAIL_ON_BAD_RESULT(memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS); | |
const VkMemoryAllocateInfo memoryAllocateInfo = { | |
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | |
0, | |
memorySize, | |
memoryTypeIndex | |
}; | |
VkDeviceMemory memory; | |
BAIL_ON_BAD_RESULT(vkAllocateMemory(device, &memoryAllocateInfo, 0, &memory)); | |
int32_t *payload; | |
BAIL_ON_BAD_RESULT(vkMapMemory(device, memory, 0, memorySize, 0, (void *)&payload)); | |
for (uint32_t k = 1; k < memorySize / sizeof(int32_t); k++) { | |
payload[k] = rand(); | |
} | |
vkUnmapMemory(device, memory); | |
const VkBufferCreateInfo bufferCreateInfo = { | |
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | |
0, | |
0, | |
bufferSize, | |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | |
VK_SHARING_MODE_EXCLUSIVE, | |
1, | |
&queueFamilyIndex | |
}; | |
VkBuffer in_buffer; | |
BAIL_ON_BAD_RESULT(vkCreateBuffer(device, &bufferCreateInfo, 0, &in_buffer)); | |
BAIL_ON_BAD_RESULT(vkBindBufferMemory(device, in_buffer, memory, 0)); | |
VkBuffer out_buffer; | |
BAIL_ON_BAD_RESULT(vkCreateBuffer(device, &bufferCreateInfo, 0, &out_buffer)); | |
BAIL_ON_BAD_RESULT(vkBindBufferMemory(device, out_buffer, memory, bufferSize)); | |
enum { | |
RESERVED_ID = 0, | |
FUNC_ID, | |
IN_ID, | |
OUT_ID, | |
GLOBAL_INVOCATION_ID, | |
VOID_TYPE_ID, | |
FUNC_TYPE_ID, | |
INT_TYPE_ID, | |
INT_ARRAY_TYPE_ID, | |
STRUCT_ID, | |
POINTER_TYPE_ID, | |
ELEMENT_POINTER_TYPE_ID, | |
INT_VECTOR_TYPE_ID, | |
INT_VECTOR_POINTER_TYPE_ID, | |
INT_POINTER_TYPE_ID, | |
CONSTANT_ZERO_ID, | |
CONSTANT_ARRAY_LENGTH_ID, | |
LABEL_ID, | |
IN_ELEMENT_ID, | |
OUT_ELEMENT_ID, | |
GLOBAL_INVOCATION_X_ID, | |
GLOBAL_INVOCATION_X_PTR_ID, | |
TEMP_LOADED_ID, | |
BOUND | |
}; | |
enum { | |
INPUT = 1, | |
UNIFORM = 2, | |
BUFFER_BLOCK = 3, | |
ARRAY_STRIDE = 6, | |
BUILTIN = 11, | |
BINDING = 33, | |
OFFSET = 35, | |
DESCRIPTOR_SET = 34, | |
GLOBAL_INVOCATION = 28, | |
OP_TYPE_VOID = 19, | |
OP_TYPE_FUNCTION = 33, | |
OP_TYPE_INT = 21, | |
OP_TYPE_VECTOR = 23, | |
OP_TYPE_ARRAY = 28, | |
OP_TYPE_STRUCT = 30, | |
OP_TYPE_POINTER = 32, | |
OP_VARIABLE = 59, | |
OP_DECORATE = 71, | |
OP_MEMBER_DECORATE = 72, | |
OP_FUNCTION = 54, | |
OP_LABEL = 248, | |
OP_ACCESS_CHAIN = 65, | |
OP_CONSTANT = 43, | |
OP_LOAD = 61, | |
OP_STORE = 62, | |
OP_RETURN = 253, | |
OP_FUNCTION_END = 56, | |
OP_CAPABILITY = 17, | |
OP_MEMORY_MODEL = 14, | |
OP_ENTRY_POINT = 15, | |
OP_EXECUTION_MODE = 16, | |
OP_COMPOSITE_EXTRACT = 81, | |
}; | |
int32_t shader[] = { | |
// first is the SPIR-V header | |
0x07230203, // magic header ID | |
0x00010000, // version 1.0.0 | |
0, // generator (optional) | |
BOUND, // bound | |
0, // schema | |
// OpCapability Shader | |
(2 << 16) | OP_CAPABILITY, 1, | |
// OpMemoryModel Logical Simple | |
(3 << 16) | OP_MEMORY_MODEL, 0, 0, | |
// OpEntryPoint GLCompute %FUNC_ID "f" %IN_ID %OUT_ID | |
(4 << 16) | OP_ENTRY_POINT, 5, FUNC_ID, 0x00000066, | |
// OpExecutionMode %FUNC_ID LocalSize 1 1 1 | |
(6 << 16) | OP_EXECUTION_MODE, FUNC_ID, 17, 1, 1, 1, | |
// next declare decorations | |
(3 << 16) | OP_DECORATE, STRUCT_ID, BUFFER_BLOCK, | |
(4 << 16) | OP_DECORATE, GLOBAL_INVOCATION_ID, BUILTIN, GLOBAL_INVOCATION, | |
(4 << 16) | OP_DECORATE, IN_ID, DESCRIPTOR_SET, 0, | |
(4 << 16) | OP_DECORATE, IN_ID, BINDING, 0, | |
(4 << 16) | OP_DECORATE, OUT_ID, DESCRIPTOR_SET, 0, | |
(4 << 16) | OP_DECORATE, OUT_ID, BINDING, 1, | |
(4 << 16) | OP_DECORATE, INT_ARRAY_TYPE_ID, ARRAY_STRIDE, 4, | |
(5 << 16) | OP_MEMBER_DECORATE, STRUCT_ID, 0, OFFSET, 0, | |
// next declare types | |
(2 << 16) | OP_TYPE_VOID, VOID_TYPE_ID, | |
(3 << 16) | OP_TYPE_FUNCTION, FUNC_TYPE_ID, VOID_TYPE_ID, | |
(4 << 16) | OP_TYPE_INT, INT_TYPE_ID, 32, 1, | |
(4 << 16) | OP_CONSTANT, INT_TYPE_ID, CONSTANT_ARRAY_LENGTH_ID, bufferLength, | |
(4 << 16) | OP_TYPE_ARRAY, INT_ARRAY_TYPE_ID, INT_TYPE_ID, CONSTANT_ARRAY_LENGTH_ID, | |
(3 << 16) | OP_TYPE_STRUCT, STRUCT_ID, INT_ARRAY_TYPE_ID, | |
(4 << 16) | OP_TYPE_POINTER, POINTER_TYPE_ID, UNIFORM, STRUCT_ID, | |
(4 << 16) | OP_TYPE_POINTER, ELEMENT_POINTER_TYPE_ID, UNIFORM, INT_TYPE_ID, | |
(4 << 16) | OP_TYPE_VECTOR, INT_VECTOR_TYPE_ID, INT_TYPE_ID, 3, | |
(4 << 16) | OP_TYPE_POINTER, INT_VECTOR_POINTER_TYPE_ID, INPUT, INT_VECTOR_TYPE_ID, | |
(4 << 16) | OP_TYPE_POINTER, INT_POINTER_TYPE_ID, INPUT, INT_TYPE_ID, | |
// then declare constants | |
(4 << 16) | OP_CONSTANT, INT_TYPE_ID, CONSTANT_ZERO_ID, 0, | |
// then declare variables | |
(4 << 16) | OP_VARIABLE, POINTER_TYPE_ID, IN_ID, UNIFORM, | |
(4 << 16) | OP_VARIABLE, POINTER_TYPE_ID, OUT_ID, UNIFORM, | |
(4 << 16) | OP_VARIABLE, INT_VECTOR_POINTER_TYPE_ID, GLOBAL_INVOCATION_ID, INPUT, | |
// then declare function | |
(5 << 16) | OP_FUNCTION, VOID_TYPE_ID, FUNC_ID, 0, FUNC_TYPE_ID, | |
(2 << 16) | OP_LABEL, LABEL_ID, | |
(5 << 16) | OP_ACCESS_CHAIN, INT_POINTER_TYPE_ID, GLOBAL_INVOCATION_X_PTR_ID, GLOBAL_INVOCATION_ID, CONSTANT_ZERO_ID, | |
(4 << 16) | OP_LOAD, INT_TYPE_ID, GLOBAL_INVOCATION_X_ID, GLOBAL_INVOCATION_X_PTR_ID, | |
(6 << 16) | OP_ACCESS_CHAIN, ELEMENT_POINTER_TYPE_ID, IN_ELEMENT_ID, IN_ID, CONSTANT_ZERO_ID, GLOBAL_INVOCATION_X_ID, | |
(4 << 16) | OP_LOAD, INT_TYPE_ID, TEMP_LOADED_ID, IN_ELEMENT_ID, | |
(6 << 16) | OP_ACCESS_CHAIN, ELEMENT_POINTER_TYPE_ID, OUT_ELEMENT_ID, OUT_ID, CONSTANT_ZERO_ID, GLOBAL_INVOCATION_X_ID, | |
(3 << 16) | OP_STORE, OUT_ELEMENT_ID, TEMP_LOADED_ID, | |
(1 << 16) | OP_RETURN, | |
(1 << 16) | OP_FUNCTION_END, | |
}; | |
VkShaderModuleCreateInfo shaderModuleCreateInfo = { | |
VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | |
0, | |
0, | |
sizeof(shader), | |
shader | |
}; | |
VkShaderModule shader_module; | |
BAIL_ON_BAD_RESULT(vkCreateShaderModule(device, &shaderModuleCreateInfo, 0, &shader_module)); | |
VkDescriptorSetLayoutBinding descriptorSetLayoutBindings[2] = { | |
{ | |
0, | |
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
1, | |
VK_SHADER_STAGE_COMPUTE_BIT, | |
0 | |
}, | |
{ | |
1, | |
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
1, | |
VK_SHADER_STAGE_COMPUTE_BIT, | |
0 | |
} | |
}; | |
VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = { | |
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | |
0, | |
0, | |
2, | |
descriptorSetLayoutBindings | |
}; | |
VkDescriptorSetLayout descriptorSetLayout; | |
BAIL_ON_BAD_RESULT(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, 0, &descriptorSetLayout)); | |
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { | |
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | |
0, | |
0, | |
1, | |
&descriptorSetLayout, | |
0, | |
0 | |
}; | |
VkPipelineLayout pipelineLayout; | |
BAIL_ON_BAD_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, 0, &pipelineLayout)); | |
VkComputePipelineCreateInfo computePipelineCreateInfo = { | |
VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | |
0, | |
0, | |
{ | |
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |
0, | |
0, | |
VK_SHADER_STAGE_COMPUTE_BIT, | |
shader_module, | |
"f", | |
0 | |
}, | |
pipelineLayout, | |
0, | |
0 | |
}; | |
VkPipeline pipeline; | |
BAIL_ON_BAD_RESULT(vkCreateComputePipelines(device, 0, 1, &computePipelineCreateInfo, 0, &pipeline)); | |
VkCommandPoolCreateInfo commandPoolCreateInfo = { | |
VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, | |
0, | |
0, | |
queueFamilyIndex | |
}; | |
VkDescriptorPoolSize descriptorPoolSize = { | |
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
2 | |
}; | |
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = { | |
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | |
0, | |
0, | |
1, | |
1, | |
&descriptorPoolSize | |
}; | |
VkDescriptorPool descriptorPool; | |
BAIL_ON_BAD_RESULT(vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, 0, &descriptorPool)); | |
VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = { | |
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | |
0, | |
descriptorPool, | |
1, | |
&descriptorSetLayout | |
}; | |
VkDescriptorSet descriptorSet; | |
BAIL_ON_BAD_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &descriptorSet)); | |
VkDescriptorBufferInfo in_descriptorBufferInfo = { | |
in_buffer, | |
0, | |
VK_WHOLE_SIZE | |
}; | |
VkDescriptorBufferInfo out_descriptorBufferInfo = { | |
out_buffer, | |
0, | |
VK_WHOLE_SIZE | |
}; | |
VkWriteDescriptorSet writeDescriptorSet[2] = { | |
{ | |
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |
0, | |
descriptorSet, | |
0, | |
0, | |
1, | |
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
0, | |
&in_descriptorBufferInfo, | |
0 | |
}, | |
{ | |
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |
0, | |
descriptorSet, | |
1, | |
0, | |
1, | |
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
0, | |
&out_descriptorBufferInfo, | |
0 | |
} | |
}; | |
vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, 0); | |
VkCommandPool commandPool; | |
BAIL_ON_BAD_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, 0, &commandPool)); | |
VkCommandBufferAllocateInfo commandBufferAllocateInfo = { | |
VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, | |
0, | |
commandPool, | |
VK_COMMAND_BUFFER_LEVEL_PRIMARY, | |
1 | |
}; | |
VkCommandBuffer commandBuffer; | |
BAIL_ON_BAD_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBuffer)); | |
VkCommandBufferBeginInfo commandBufferBeginInfo = { | |
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | |
0, | |
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | |
0 | |
}; | |
BAIL_ON_BAD_RESULT(vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo)); | |
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | |
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, | |
pipelineLayout, 0, 1, &descriptorSet, 0, 0); | |
vkCmdDispatch(commandBuffer, bufferSize / sizeof(int32_t), 1, 1); | |
BAIL_ON_BAD_RESULT(vkEndCommandBuffer(commandBuffer)); | |
VkQueue queue; | |
vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue); | |
VkSubmitInfo submitInfo = { | |
VK_STRUCTURE_TYPE_SUBMIT_INFO, | |
0, | |
0, | |
0, | |
0, | |
1, | |
&commandBuffer, | |
0, | |
0 | |
}; | |
BAIL_ON_BAD_RESULT(vkQueueSubmit(queue, 1, &submitInfo, 0)); | |
BAIL_ON_BAD_RESULT(vkQueueWaitIdle(queue)); | |
BAIL_ON_BAD_RESULT(vkMapMemory(device, memory, 0, memorySize, 0, (void *)&payload)); | |
for (uint32_t k = 0, e = bufferSize / sizeof(int32_t); k < e; k++) { | |
BAIL_ON_BAD_RESULT(payload[k + e] == payload[k] ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY); | |
} | |
} | |
} |
UNASSIGNED-CoreValidation-Shader-InconsistentSpirv(ERROR / SPEC): msgNum: 0 - SPIR-V module not valid: Interface variable id <4> is used by entry point 'f' id <1>, but is not listed as an interface
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3int Input
the module validates fine for me after changing line 325 to:
(4 << 16) | OP_ENTRY_POINT, 5, FUNC_ID, 0x66, GLOBAL_INVOCATION_ID
UNASSIGNED-CoreValidation-Shader-InconsistentSpirv(ERROR / SPEC): msgNum: 0 - SPIR-V module not valid: Interface variable id <4> is used by entry point 'f' id <1>, but is not listed as an interface
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3int Inputthe module validates fine for me after changing line 325 to:
(4 << 16) | OP_ENTRY_POINT, 5, FUNC_ID, 0x66, GLOBAL_INVOCATION_ID
It works with me with (5 << 16) | OP_ENTRY_POINT, 5, FUNC_ID, 0x66, GLOBAL_INVOCATION_ID.
Seemingly (x << 16) means "the current instruction consists of x uint32_t values."
I measured times with the following code:
auto tm0 = std::chrono::high_resolution_clock::now();
BAIL_ON_BAD_RESULT(vkQueueSubmit(queue, 1, &submitInfo, 0));
BAIL_ON_BAD_RESULT(vkQueueWaitIdle(queue));
auto tm1 = std::chrono::high_resolution_clock::now();
BAIL_ON_BAD_RESULT(vkMapMemory(device, memory, 0, memorySize, 0, (void **)&payload));
auto tm2 = std::chrono::high_resolution_clock::now();
for (uint32_t k = 0, e = bufferSize / sizeof(int32_t); k < e; k++) {
BAIL_ON_BAD_RESULT(payload[k + e] == payload[k] ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY);
}
auto tm3 = std::chrono::high_resolution_clock::now();
using milliseconds = std::chrono::duration<double, std::milli>;
milliseconds tmProcess = tm1-tm0;
milliseconds tmMap = tm2-tm1;
milliseconds tmRead = tm3-tm2;
printf("Times (ms):\n process: %f\n map : %f\n read : %f",
tmProcess.count(), tmMap.count(), tmRead.count());
NVidia Geforce RTX 2080 super, Windows 10, Core I9-10980HK.
Times (ms):
process: 0.315900
map : 0.000300
read : 3.107100
Reading back the memory takes most of the time because a memory without VK_MEMORY_PROPERTY_HOST_CACHED_BIT was being selected. If I change memory selection code like this:
for (uint32_t k = 0; k < properties.memoryTypeCount; k++) {
if ((VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & properties.memoryTypes[k].propertyFlags) &&
(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & properties.memoryTypes[k].propertyFlags) &&
(VK_MEMORY_PROPERTY_HOST_CACHED_BIT & properties.memoryTypes[k].propertyFlags) &&
(memorySize < properties.memoryHeaps[properties.memoryTypes[k].heapIndex].size)) {
memoryTypeIndex = k;
break;
}
}
then I get
Times (ms):
process: 0.327000
map : 0.000500
read : 0.010500
Thank you to everyone who participates because it enriches the development of NPH.
VkQueueFamilyProperties* const queueFamilyProperties = (VkQueueFamilyProperties*)_alloca(
sizeof(VkQueueFamilyProperties) * queueFamilyPropertiesCount);
in Visual Studio Comunity advise me to change: _alloca for _malloca
This is great. I think learning Vulkan through the compute side first makes a lot more sense, rather than going through the very long graphics setup.