-
-
Save wjx0912/cb6d140ab265a0759f82edb07c367c1e to your computer and use it in GitHub Desktop.
Memory Copy Performance Measure (memcpy, ID3D11DeviceContext::CopyResource)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Windows.h> | |
#include <d3d11.h> | |
#include <atlbase.h> | |
#pragma comment (lib, "d3d11.lib") | |
#include <iostream> | |
#include <thread> | |
#include <chrono> | |
#include <memory> | |
#include <vector> | |
#include <cstdint> | |
#include <cassert> | |
#include <algorithm> | |
#include <execution> | |
constexpr double MEASURE_SECONDS = 10; | |
constexpr double GIGABYTE_MAKER = 1 / 1073741824.0; | |
class performance | |
{ | |
public: | |
performance () : _copy_bytes (0), _running (false) { } | |
virtual ~performance () noexcept | |
{ | |
_run.join (); | |
} | |
public: | |
uint64_t copy_bytes () const noexcept { return _copy_bytes; } | |
std::chrono::duration<double> proceed_time () const noexcept | |
{ | |
return std::chrono::high_resolution_clock::now () - _started; | |
} | |
bool is_running () const noexcept { return _running; } | |
protected: | |
virtual size_t do_measure () noexcept = 0; | |
public: | |
void run () noexcept | |
{ | |
_running = true; | |
_started = std::chrono::high_resolution_clock::now (); | |
_run = std::thread ([this]() | |
{ | |
_copy_bytes = 0; | |
do | |
{ | |
_copy_bytes += this->do_measure (); | |
std::this_thread::yield (); | |
} | |
while (_running); | |
} | |
); | |
} | |
void stop () { _running = false; } | |
private: | |
uint64_t _copy_bytes; | |
std::chrono::steady_clock::time_point _started; | |
std::thread _run; | |
bool _running; | |
}; | |
class memcpy_performance : public performance | |
{ | |
private: | |
const size_t BUFFER_SIZE = 1024 * 1024 * 16; //< 16MB | |
public: | |
memcpy_performance () | |
{ | |
_dest.resize (BUFFER_SIZE); | |
_src.resize (BUFFER_SIZE); | |
for (unsigned int i = 0; i < std::thread::hardware_concurrency (); ++i) | |
_temp.push_back (i); | |
} | |
protected: | |
virtual size_t do_measure () noexcept override | |
{ | |
std::for_each (std::execution::par_unseq, _temp.begin (), _temp.end (), [this](unsigned int i) | |
{ | |
memcpy (_dest.data (), _src.data (), BUFFER_SIZE); | |
}); | |
return BUFFER_SIZE * _temp.size (); | |
} | |
private: | |
std::vector<uint8_t> _dest, _src; | |
std::vector<unsigned int> _temp; | |
}; | |
class D3D11CopyResourceRAM2VRAM_performance : public performance | |
{ | |
public: | |
D3D11CopyResourceRAM2VRAM_performance (size_t size = 4096, DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM) | |
{ | |
HRESULT hr = D3D11CreateDevice (nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, 0, nullptr, 0, D3D11_SDK_VERSION, &_d3dDevice, nullptr, &_immediateContext); | |
assert (SUCCEEDED (hr)); | |
memset (&_texDesc, 0, sizeof (D3D11_TEXTURE2D_DESC)); | |
_texDesc.Width = _texDesc.Height = size; | |
_texDesc.ArraySize = 1; | |
_texDesc.MipLevels = 1; | |
_texDesc.Format = format; | |
_texDesc.SampleDesc.Count = 1; | |
_texDesc.Usage = D3D11_USAGE_DEFAULT; | |
_texDesc.CPUAccessFlags = 0; | |
_texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; | |
hr = _d3dDevice->CreateTexture2D (&_texDesc, nullptr, &_dest); | |
assert (SUCCEEDED (hr)); | |
_texDesc.Usage = D3D11_USAGE_STAGING; | |
_texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; | |
_texDesc.BindFlags = 0; | |
hr = _d3dDevice->CreateTexture2D (&_texDesc, nullptr, &_src); | |
assert (SUCCEEDED (hr)); | |
_totalSize = size * size * (format == DXGI_FORMAT_R8G8B8A8_UNORM ? 4 : 16); | |
} | |
protected: | |
virtual size_t do_measure () noexcept override | |
{ | |
_immediateContext->CopyResource (_dest, _src); | |
_immediateContext->Flush (); | |
return _totalSize; | |
} | |
private: | |
D3D11_TEXTURE2D_DESC _texDesc; | |
CComPtr<ID3D11Device> _d3dDevice; | |
CComPtr<ID3D11DeviceContext> _immediateContext; | |
CComPtr<ID3D11Texture2D> _dest, _src; | |
size_t _totalSize; | |
}; | |
class D3D11CopyResourceVRAM2VRAM_performance : public performance | |
{ | |
public: | |
D3D11CopyResourceVRAM2VRAM_performance (size_t size = 4096, DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM) | |
{ | |
HRESULT hr = D3D11CreateDevice (nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, 0, nullptr, 0, D3D11_SDK_VERSION, &_d3dDevice, nullptr, &_immediateContext); | |
assert (SUCCEEDED (hr)); | |
memset (&_texDesc, 0, sizeof (D3D11_TEXTURE2D_DESC)); | |
_texDesc.Width = _texDesc.Height = size; | |
_texDesc.ArraySize = 1; | |
_texDesc.MipLevels = 1; | |
_texDesc.Format = format; | |
_texDesc.SampleDesc.Count = 1; | |
_texDesc.Usage = D3D11_USAGE_DEFAULT; | |
_texDesc.CPUAccessFlags = 0; | |
_texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; | |
hr = _d3dDevice->CreateTexture2D (&_texDesc, nullptr, &_dest); | |
assert (SUCCEEDED (hr)); | |
hr = _d3dDevice->CreateTexture2D (&_texDesc, nullptr, &_src); | |
assert (SUCCEEDED (hr)); | |
_totalSize = size * size * (format == DXGI_FORMAT_R8G8B8A8_UNORM ? 4 : 16); | |
} | |
protected: | |
virtual size_t do_measure () noexcept override | |
{ | |
_immediateContext->CopyResource (_dest, _src); | |
_immediateContext->Flush (); | |
return _totalSize; | |
} | |
private: | |
D3D11_TEXTURE2D_DESC _texDesc; | |
CComPtr<ID3D11Device> _d3dDevice; | |
CComPtr<ID3D11DeviceContext> _immediateContext; | |
CComPtr<ID3D11Texture2D> _dest, _src; | |
size_t _totalSize; | |
}; | |
void measure (const char * testname, performance* perf) | |
{ | |
printf ("==== %s Performance Measure ====\n", testname); | |
std::shared_ptr<performance> _measure (perf); | |
_measure->run (); | |
while (_measure->is_running ()) | |
{ | |
if (_measure->proceed_time ().count () >= MEASURE_SECONDS) | |
_measure->stop (); | |
printf ("\r%3.3lfs... %lfGB/s... Total Copied: %lfGB", | |
_measure->proceed_time ().count (), | |
(_measure->copy_bytes () / _measure->proceed_time ().count ()) * GIGABYTE_MAKER, | |
_measure->copy_bytes () * GIGABYTE_MAKER); | |
std::this_thread::yield (); | |
} | |
printf ("\r%3.3lfs... %lfGB/s... Total Copied: %lfGB", | |
_measure->proceed_time ().count (), | |
(_measure->copy_bytes () / _measure->proceed_time ().count ()) * GIGABYTE_MAKER, | |
_measure->copy_bytes ()* GIGABYTE_MAKER); | |
putchar ('\n'); | |
} | |
int main (int argc, char* argv[]) | |
{ | |
measure (u8"CPU memcpy", new memcpy_performance ()); | |
measure (u8"Direct3D Texture2D Copy Resource RAM to VRAM(4096 * 4096 * R8G8B8A8)", new D3D11CopyResourceRAM2VRAM_performance (4096, DXGI_FORMAT_R8G8B8A8_UNORM)); | |
measure (u8"Direct3D Texture2D Copy Resource RAM to VRAM(4096 * 4096 * RGBAF)", new D3D11CopyResourceRAM2VRAM_performance (4096, DXGI_FORMAT_R32G32B32A32_FLOAT)); | |
measure (u8"Direct3D Texture2D Copy Resource VRAM to VRAM(4096 * 4096 * R8G8B8A8)", new D3D11CopyResourceVRAM2VRAM_performance (4096, DXGI_FORMAT_R8G8B8A8_UNORM)); | |
measure (u8"Direct3D Texture2D Copy Resource VRAM to VRAM(4096 * 4096 * RGBAF)", new D3D11CopyResourceVRAM2VRAM_performance (4096, DXGI_FORMAT_R32G32B32A32_FLOAT)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment