Last active
October 11, 2024 03:29
-
-
Save cshenton/3ba217ee409648927889b56562a534e8 to your computer and use it in GitHub Desktop.
Seeing how fast a d3d11 swapchain can go.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stdio.h> | |
#include <time.h> | |
#define COBJMACROS | |
#include <windows.h> | |
#include <d3d11_1.h> | |
#include <d3dcompiler.h> | |
////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
#define TITLE "D3D11 Speedtest" | |
////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
const char SHADER_SRC[] = "\n" | |
"cbuffer Constants : register(b0)\n" | |
"{\n" | |
" int frame;\n" | |
"};\n" | |
"\n" | |
"struct PixelData {\n" | |
" float4 position : SV_POSITION;\n" | |
" float4 color : COL;\n" | |
"};\n" | |
"\n" | |
"uint hash(uint a)\n" | |
"{\n" | |
" a = (a+0x7ed55d16) + (a<<12);\n" | |
" a = (a^0xc761c23c) ^ (a>>19);\n" | |
" a = (a+0x165667b1) + (a<<5);\n" | |
" a = (a+0xd3a2646c) ^ (a<<9);\n" | |
" a = (a+0xfd7046c5) + (a<<3);\n" | |
" a = (a^0xb55a4f09) ^ (a>>16);\n" | |
" return a;\n" | |
"}\n" | |
"\n" | |
"PixelData vert(uint v: SV_VertexID)\n" | |
"{\n" | |
" float2 positions[3] = { float2(0.5, -0.5), float2(-0.5, -0.5), float2(0.0, 0.5) };\n" | |
" PixelData pd;\n" | |
"\n" | |
" uint ch = hash(~uint(frame));\n" | |
" pd.color = float4(((ch >> 16) & 0xff) / 255.0, ((ch >> 8) & 0xff) / 255.0, ((ch) & 0xff) / 255.0, 1.0);\n" | |
" pd.position = float4(positions[v], 0.0, 1.0);\n" | |
" return pd;\n" | |
"}\n" | |
"\n" | |
"float4 pix(PixelData pd) : SV_TARGET\n" | |
"{\n" | |
" return pd.color;\n" | |
"}\n"; | |
typedef struct Constants { | |
int frame; | |
} Constants; | |
int main() | |
{ | |
HRESULT hr; | |
WNDCLASSA wnd_class = { 0, DefWindowProcA, 0, 0, 0, 0, 0, 0, 0, TITLE }; | |
RegisterClassA(&wnd_class); | |
DWORD style = WS_POPUP | WS_MAXIMIZE | WS_VISIBLE; | |
HWND window = CreateWindowExA(0, TITLE, TITLE, style, 0, 0, 0, 0, NULL, NULL, NULL, NULL); | |
assert(window); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
D3D_FEATURE_LEVEL feature_levels[] = { D3D_FEATURE_LEVEL_11_1 }; | |
ID3D11Device* base_device; | |
ID3D11DeviceContext* base_ctx; | |
hr = D3D11CreateDevice( | |
NULL, D3D_DRIVER_TYPE_HARDWARE, | |
NULL, D3D11_CREATE_DEVICE_BGRA_SUPPORT, | |
feature_levels, ARRAYSIZE(feature_levels), | |
D3D11_SDK_VERSION, &base_device, | |
NULL, &base_ctx); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
ID3D11Device1* device; | |
hr = ID3D11Device_QueryInterface(base_device, &IID_ID3D11Device1, (void**)(&device)); | |
assert(SUCCEEDED(hr)); | |
ID3D11DeviceContext1* ctx; | |
hr = ID3D11DeviceContext_QueryInterface(base_ctx, &IID_ID3D11DeviceContext1, (void**)&ctx); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
IDXGIDevice1* dxgi_dev; | |
hr = ID3D11Device_QueryInterface(device, &IID_IDXGIDevice1, (void**)&dxgi_dev); | |
assert(SUCCEEDED(hr)); | |
IDXGIAdapter1* adapter; | |
hr = IDXGIDevice_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter); | |
assert(SUCCEEDED(hr)); | |
IDXGIFactory2* factory; | |
hr = IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory2, (void**)&factory); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
DXGI_SWAP_CHAIN_DESC1 swapchain_desc = { | |
.Width = 80, | |
.Height = 60, | |
.Format = DXGI_FORMAT_B8G8R8A8_UNORM, | |
.SampleDesc = {.Count = 1}, | |
.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT, | |
.BufferCount = 3, | |
.Scaling = DXGI_SCALING_NONE, | |
.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING, | |
.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD, | |
.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED, | |
}; | |
IDXGISwapChain1* swapchain; | |
hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown*)device, window, | |
&swapchain_desc, NULL, NULL, &swapchain); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
ID3D11Texture2D* framebuffer; | |
hr = IDXGISwapChain1_GetBuffer(swapchain, 0, &IID_ID3D11Texture2D, (void**)&framebuffer); | |
assert(SUCCEEDED(hr)); | |
D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = { | |
.Format = DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, | |
.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D, | |
}; | |
ID3D11RenderTargetView* rtv; | |
hr = ID3D11Device_CreateRenderTargetView(device, (ID3D11Resource*)framebuffer, &rtv_desc, &rtv); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
D3D11_TEXTURE2D_DESC depth_tex_desc; | |
ID3D11Texture2D_GetDesc(framebuffer, &depth_tex_desc); | |
depth_tex_desc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; | |
depth_tex_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; | |
ID3D11Texture2D* depth_tex; | |
hr = ID3D11Device_CreateTexture2D(device, &depth_tex_desc, NULL, &depth_tex); | |
assert(SUCCEEDED(hr)); | |
ID3D11DepthStencilView* dsv; | |
ID3D11Device_CreateDepthStencilView(device, (ID3D11Resource*)depth_tex, NULL, &dsv); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
ID3DBlob* vert_src; | |
hr = D3DCompile(SHADER_SRC, sizeof(SHADER_SRC), NULL, NULL, NULL, "vert", "vs_5_0", 0, 0, &vert_src, NULL); | |
assert(SUCCEEDED(hr)); | |
ID3D11VertexShader* vert_shd; | |
hr = ID3D11Device_CreateVertexShader( | |
device, | |
ID3D10Blob_GetBufferPointer(vert_src), | |
ID3D10Blob_GetBufferSize(vert_src), | |
NULL, | |
&vert_shd); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
ID3DBlob* pix_src; | |
hr = D3DCompile(SHADER_SRC, sizeof(SHADER_SRC), NULL, NULL, NULL, "pix", "ps_5_0", 0, 0, &pix_src, NULL); | |
assert(SUCCEEDED(hr)); | |
ID3D11PixelShader* pix_shd; | |
hr = ID3D11Device_CreatePixelShader(device, | |
ID3D10Blob_GetBufferPointer(pix_src), | |
ID3D10Blob_GetBufferSize(pix_src), | |
NULL, | |
&pix_shd); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
D3D11_BUFFER_DESC cbuffer_desc = { | |
.ByteWidth = sizeof(Constants) + 0xf & 0xfffffff0, | |
.Usage = D3D11_USAGE_DYNAMIC, | |
.BindFlags = D3D11_BIND_CONSTANT_BUFFER, | |
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, | |
}; | |
ID3D11Buffer* cbuffer; | |
hr = ID3D11Device_CreateBuffer(device, &cbuffer_desc, NULL, &cbuffer); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
D3D11_RASTERIZER_DESC rasterizer_desc = { | |
.FillMode = D3D11_FILL_SOLID, | |
.CullMode = D3D11_CULL_BACK, | |
}; | |
ID3D11RasterizerState* raster_state; | |
hr = ID3D11Device_CreateRasterizerState(device, &rasterizer_desc, &raster_state); | |
assert(SUCCEEDED(hr)); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
D3D11_DEPTH_STENCIL_DESC depth_stencil_desc = { | |
.DepthEnable = TRUE, | |
.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL, | |
.DepthFunc = D3D11_COMPARISON_LESS, | |
}; | |
ID3D11DepthStencilState* depth_stencil_state; | |
ID3D11Device_CreateDepthStencilState(device, &depth_stencil_desc, &depth_stencil_state); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
float clear_color[4] = { 0.0f, 0.0f, 0.0f, 1.0f }; | |
float width = (float)depth_tex_desc.Width; | |
float height = (float)depth_tex_desc.Height; | |
D3D11_VIEWPORT viewport = { 0.0f, 0.0f, width, height, 0.0f, 1.0f }; | |
printf("%f, %f\n", width, height); | |
////////////////////////////////////////////////////////////////////////////////////////////////////// | |
int frame = 0; | |
LARGE_INTEGER start_time, end_time, ElapsedMicroseconds; | |
LARGE_INTEGER frequency; | |
QueryPerformanceFrequency(&frequency); | |
QueryPerformanceCounter(&start_time); | |
clock_t frame_time = clock(); | |
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); | |
for (;;) { | |
MSG msg; | |
while (PeekMessageA(&msg, NULL, 0, 0, PM_REMOVE)) { | |
if (msg.message == WM_KEYDOWN && msg.wParam == VK_ESCAPE) { | |
return 0; | |
} | |
DispatchMessageA(&msg); | |
} | |
D3D11_MAPPED_SUBRESOURCE mapped; | |
Constants c = { .frame = frame++ }; | |
ID3D11DeviceContext_Map(ctx, (ID3D11Resource*)cbuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); | |
memcpy(mapped.pData, &c, sizeof(c)); | |
ID3D11DeviceContext_Unmap(ctx, (ID3D11Resource*)cbuffer, 0); | |
ID3D11DeviceContext_ClearRenderTargetView(ctx, rtv, clear_color); | |
ID3D11DeviceContext_ClearDepthStencilView(ctx, dsv, D3D11_CLEAR_DEPTH, 1.0f, 0); | |
ID3D11DeviceContext_IASetPrimitiveTopology(ctx, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); | |
ID3D11DeviceContext_VSSetShader(ctx, vert_shd, NULL, 0); | |
ID3D11DeviceContext_VSSetConstantBuffers(ctx, 0, 1, &cbuffer); | |
ID3D11DeviceContext_RSSetViewports(ctx, 1, &viewport); | |
ID3D11DeviceContext_RSSetState(ctx, raster_state); | |
ID3D11DeviceContext_PSSetShader(ctx, pix_shd, NULL, 0); | |
ID3D11DeviceContext_OMSetRenderTargets(ctx, 1, &rtv, dsv); | |
ID3D11DeviceContext_OMSetDepthStencilState(ctx, depth_stencil_state, 0); | |
ID3D11DeviceContext_OMSetBlendState(ctx, NULL, NULL, 0xffffffff); | |
ID3D11DeviceContext_Draw(ctx, 3, 0); | |
IDXGISwapChain1_Present(swapchain, 0, DXGI_PRESENT_ALLOW_TEARING); | |
QueryPerformanceCounter(&end_time); | |
long long elapsed_us = 1000000 * (end_time.QuadPart - start_time.QuadPart) / frequency.QuadPart; | |
if (elapsed_us > 250000) { | |
printf("%f FPS\n", (double)frame / ((double)elapsed_us) * 1000000.0 ); | |
frame = 0; | |
start_time = end_time; | |
} | |
} | |
} |
Compiled with cl.exe .\d3d11_speedtest.c /link user32.lib d3dcompiler.lib d3d11.lib dxguid.lib
. Couldn't get past 8000 FPS
here, potato PC.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am compiling with
zig cc -o tri.exe tri.c -ld3d11 -ld3dcompiler_47 -Werror
. Hits19402 FPS
(well that's a random printout obviously it varies in practice). Worth noting that if we leave the swapchain res the default (which is 1440p on my monitor), the perf is more like 9k.There's some swapover point where the tearing is no longer visible and the framerate passes the ~9.5k mark.