Created
July 9, 2019 19:24
-
-
Save native-m/facbe0f797f16f7c55cb46b512610a80 to your computer and use it in GitHub Desktop.
Performing async task in different GPU using DirectX 11
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Windows.h> | |
#include <d3d11.h> | |
#include <iostream> | |
#include <vector> | |
#include <thread> | |
#include <mutex> | |
#include <d3dcompiler.h> | |
#pragma comment(lib, "dxgi.lib") | |
#pragma comment(lib, "d3d11.lib") | |
#pragma comment(lib, "d3dcompiler.lib") | |
#define MAKE_SHADER(x) #x | |
// inputs | |
float dataA[10] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f }; | |
float dataB[10] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f }; | |
// Compute shader | |
const char computeShader[] = MAKE_SHADER( | |
Buffer<float> bufA : register(t0); | |
Buffer<float> bufB : register(t1); | |
RWBuffer<float> bufC : register(u0); | |
[numthreads(1,1,1)] | |
void CSMain(uint3 i : SV_DispatchThreadID) | |
{ | |
bufC[i.x] = bufA[i.x] + bufB[i.x]; | |
} | |
); | |
// a mutex for synchronizing stream out | |
std::mutex mut; | |
// definition | |
HRESULT CreateBuffer(ID3D11Device* device, UINT sz, void* pInitData, ID3D11Buffer** bufOut); | |
HRESULT CreateBufferSrv(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11ShaderResourceView ** srvOut); | |
HRESULT CreateBufferUav(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11UnorderedAccessView** uavOut); | |
ID3D11Buffer* CopyBuffer(ID3D11Device* device, ID3D11DeviceContext* ctx, ID3D11Buffer* buf); | |
HRESULT CompileShader(const char* shader, SIZE_T sz, ID3D10Blob** bytecode, ID3D10Blob** errMsg); | |
int main() | |
{ | |
HRESULT hr; | |
IDXGIFactory* factory; | |
IDXGIAdapter* adapter; | |
ID3D11Device* dev0 = nullptr; | |
ID3D11DeviceContext* ctx0 = nullptr; | |
ID3D11Device* dev1 = nullptr; | |
ID3D11DeviceContext* ctx1 = nullptr; | |
std::vector<IDXGIAdapter*> adapters; | |
static const D3D_FEATURE_LEVEL lvl[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, | |
D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0, | |
D3D_FEATURE_LEVEL_9_3, D3D_FEATURE_LEVEL_9_2, D3D_FEATURE_LEVEL_9_1 }; | |
D3D_FEATURE_LEVEL supported; | |
float result0[10] = { 0.0f }; | |
float result1[10] = { 0.0f }; | |
if (FAILED(hr = CreateDXGIFactory(__uuidof(factory), (void**)&factory))) | |
{ | |
std::cout << "Failed to create DXGI factory" << std::endl; | |
return hr; | |
} | |
// Enumerate all adapters and store it into std::vector | |
for (int i = 0; factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND; i++) | |
adapters.push_back(adapter); | |
// Create d3d11 on each device | |
if (FAILED(hr = D3D11CreateDevice( | |
adapters[0], | |
D3D_DRIVER_TYPE_UNKNOWN, | |
nullptr, | |
0, | |
lvl, | |
7, | |
D3D11_SDK_VERSION, | |
&dev0, | |
&supported, | |
&ctx0))) | |
{ | |
std::cout << "Failed to create d3d device0" << std::endl; | |
return hr; | |
} | |
if (FAILED(hr = D3D11CreateDevice( | |
adapters[1], | |
D3D_DRIVER_TYPE_UNKNOWN, | |
nullptr, | |
0, | |
lvl, | |
7, | |
D3D11_SDK_VERSION, | |
&dev1, | |
&supported, | |
&ctx1))) | |
{ | |
std::cout << "Failed to create d3d device1" << std::endl; | |
return hr; | |
} | |
// perform a simple GPGPU program that add ten numbers | |
auto task = [&](ID3D11Device* dev, ID3D11DeviceContext* ctx, float* result, size_t sz, HRESULT* hr) { | |
ID3D11Buffer* bufA; | |
ID3D11Buffer* bufB; | |
ID3D11Buffer* bufC; | |
ID3D11ShaderResourceView* bufASrv; | |
ID3D11ShaderResourceView* bufBSrv; | |
ID3D11UnorderedAccessView* bufCUav; | |
ID3D11ComputeShader* cs = nullptr; | |
ID3D10Blob* bytecode; | |
ID3D10Blob* errMsg; | |
IDXGIDevice* dxgiDev; | |
IDXGIAdapter* adapter; | |
DXGI_ADAPTER_DESC adesc; | |
// Get device information | |
dev->QueryInterface(__uuidof(dxgiDev), (void**)&dxgiDev); | |
dxgiDev->GetAdapter(&adapter); | |
adapter->GetDesc(&adesc); | |
mut.lock(); | |
std::wcout << "Performing async task on device: " << adesc.Description << std::endl; | |
mut.unlock(); | |
// Create buffer and its view | |
CreateBuffer(dev, sizeof(dataA), dataA, &bufA); | |
CreateBuffer(dev, sizeof(dataB), dataB, &bufB); | |
CreateBuffer(dev, sz, nullptr, &bufC); | |
CreateBufferSrv(dev, DXGI_FORMAT_R32_FLOAT, 4, bufA, &bufASrv); | |
CreateBufferSrv(dev, DXGI_FORMAT_R32_FLOAT, 4, bufB, &bufBSrv); | |
CreateBufferUav(dev, DXGI_FORMAT_R32_FLOAT, 4, bufC, &bufCUav); | |
// Compile & create shader | |
if (FAILED(CompileShader(computeShader, sizeof(computeShader), &bytecode, &errMsg))) | |
{ | |
std::cout << "Failed to compile compute shader" << std::endl; | |
if (errMsg) | |
{ | |
std::cout << (char*)errMsg->GetBufferPointer() << std::endl; | |
errMsg->Release(); | |
*hr = -1; | |
return; | |
} | |
} | |
dev->CreateComputeShader( | |
bytecode->GetBufferPointer(), | |
bytecode->GetBufferSize(), | |
nullptr, | |
&cs); | |
bytecode->Release(); | |
ID3D11ShaderResourceView* srvs[2] = { bufASrv, bufBSrv }; | |
// Perform action on GPU | |
ctx->CSSetShader(cs, nullptr, 0); | |
ctx->CSSetShaderResources(0, 2, srvs); | |
ctx->CSSetUnorderedAccessViews(0, 1, &bufCUav, nullptr); | |
ctx->Dispatch(10, 1, 1); | |
ctx->CSSetShader(nullptr, nullptr, 0); | |
ctx->CSSetShaderResources(0, 0, nullptr); | |
ctx->CSSetUnorderedAccessViews(0, 0, nullptr, nullptr); | |
// Copy result from GPU | |
ID3D11Buffer* cpy = CopyBuffer(dev, ctx, bufC); | |
D3D11_MAPPED_SUBRESOURCE copyback; | |
ctx->Map(cpy, 0, D3D11_MAP_READ, 0, ©back); | |
memcpy_s(result, sz, copyback.pData, sz); | |
ctx->Unmap(cpy, 0); | |
// we dont need these again | |
cpy->Release(); | |
if (bufA) | |
{ | |
bufA->Release(); | |
if (bufASrv) | |
bufASrv->Release(); | |
} | |
if (bufB) | |
{ | |
bufB->Release(); | |
if (bufBSrv) | |
bufBSrv->Release(); | |
} | |
if (bufC) | |
{ | |
bufC->Release(); | |
if (bufCUav) | |
bufCUav->Release(); | |
} | |
if (cs) | |
cs->Release(); | |
return; | |
}; | |
// perform async task | |
std::thread gpu0(task, dev0, ctx0, (float*)result0, (size_t)sizeof(result0), &hr); | |
std::thread gpu1(task, dev1, ctx1, (float*)result1, (size_t)sizeof(result1), &hr); | |
// wait until the job done | |
gpu0.join(); | |
gpu1.join(); | |
std::cout << "Done performing tasks" << std::endl; | |
// print outputs | |
std::cout << "Output from GPU0: { "; | |
for (int i = 0; i < 10; i++) | |
std::cout << result0[i] << ((i < 9) ? ", " : " }"); | |
std::cout << std::endl; | |
std::cout << "Output from GPU1: { "; | |
for (int i = 0; i < 10; i++) | |
std::cout << result1[i] << ((i < 9) ? ", " : " }"); | |
std::cout << std::endl; | |
std::cout << "Releasing shits..." << std::endl; | |
// release shits | |
if (dev0) | |
dev0->Release(); | |
if (ctx0) | |
ctx0->Release(); | |
if(dev1) | |
dev1->Release(); | |
if(ctx1) | |
ctx1->Release(); | |
return 0; | |
} | |
HRESULT CreateBuffer(ID3D11Device * device, UINT sz, void * pInitData, ID3D11Buffer ** bufOut) | |
{ | |
D3D11_BUFFER_DESC bufDesc = { 0 }; | |
bufDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER; | |
bufDesc.ByteWidth = sz; | |
bufDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; | |
if (pInitData) | |
{ | |
D3D11_SUBRESOURCE_DATA init = { 0 }; | |
init.pSysMem = pInitData; | |
return device->CreateBuffer(&bufDesc, &init, bufOut); | |
} | |
else | |
return device->CreateBuffer(&bufDesc, nullptr, bufOut); | |
} | |
HRESULT CreateBufferSrv(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11ShaderResourceView ** srvOut) | |
{ | |
D3D11_BUFFER_DESC bufDesc; | |
buf->GetDesc(&bufDesc); | |
D3D11_SHADER_RESOURCE_VIEW_DESC desc; | |
ZeroMemory(&desc, sizeof(desc)); | |
desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; | |
desc.Buffer.FirstElement = 0; | |
desc.Buffer.NumElements = bufDesc.ByteWidth / szStride; | |
desc.Format = bufFormat; | |
return device->CreateShaderResourceView(buf, &desc, srvOut); | |
} | |
HRESULT CreateBufferUav(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11UnorderedAccessView ** uavOut) | |
{ | |
D3D11_BUFFER_DESC bufDesc; | |
buf->GetDesc(&bufDesc); | |
D3D11_UNORDERED_ACCESS_VIEW_DESC desc; | |
ZeroMemory(&desc, sizeof(desc)); | |
desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; | |
desc.Buffer.FirstElement = 0; | |
desc.Buffer.NumElements = bufDesc.ByteWidth / szStride; | |
desc.Format = bufFormat; | |
return device->CreateUnorderedAccessView(buf, &desc, uavOut); | |
} | |
ID3D11Buffer* CopyBuffer(ID3D11Device * device, ID3D11DeviceContext * ctx, ID3D11Buffer * buf) | |
{ | |
D3D11_BUFFER_DESC desc; | |
ID3D11Buffer* ret = nullptr; | |
buf->GetDesc(&desc); | |
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; | |
desc.Usage = D3D11_USAGE_STAGING; | |
desc.BindFlags = 0; | |
desc.MiscFlags = 0; | |
if (SUCCEEDED(device->CreateBuffer(&desc, nullptr, &ret))) | |
ctx->CopyResource(ret, buf); | |
return ret; | |
} | |
HRESULT CompileShader(const char * shader, SIZE_T sz, ID3D10Blob** bytecode, ID3D10Blob** errMsg) | |
{ | |
HRESULT hr; | |
if (FAILED(hr = D3DCompile(shader, sz, "ComputeShader", nullptr, nullptr, "CSMain", "cs_5_0", 0, 0, bytecode, errMsg))) | |
return hr; | |
return hr; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment