Skip to content

Instantly share code, notes, and snippets.

View Triang3l's full-sized avatar
🚀
Never say never!

Triang3l Triang3l

🚀
Never say never!
View GitHub Profile
#include "SRDescriptors.h"
#include "../GPU/DescriptorStorage.h"
#include "../Log/Log.h"
#include "../Math/Bit.h"
#include "../Parallel/Threading.h"
#include <algorithm>
#include <cstdint>
#include <cstring>
namespace SecretGame::GFX::SRDescriptors {
#include <DirectXMath.h>
inline DirectX::XMVECTOR Vector4TransformRM(DirectX::XMVECTOR v, const DirectX::XMMATRIX &m) {
// XMVector4Transform doesn't work since the matrix is row-major (dot) and DirectXMath expects column-major (mul, mad).
#if defined(_XM_SSE_INTRINSICS_)
// XMVector4Transform uses 11 instructions, and XMMatrixTranspose uses 8. This uses 15.
// r0x | r0y | r1z | r1w
DirectX::XMVECTOR x0y0z1w1 = _mm_shuffle_ps(m.r[0], m.r[1], _MM_SHUFFLE(3, 2, 1, 0));
@Triang3l
Triang3l / CreateCubeFunction.cc
Last active June 17, 2018 17:59
Updated CreateCubeFunction of Xenia SPIR-V shader translator that crashes RenderDoc's AMDIL disassembler
spv::Function* SpirvShaderTranslator::CreateCubeFunction() {
auto& b = *builder_;
spv::Block* function_block = nullptr;
auto function = b.makeFunctionEntry(spv::NoPrecision, vec4_float_type_,
"cube", {vec4_float_type_},
{{spv::NoPrecision}}, &function_block);
auto src = function->getParamId(0);
// The source parameter is ordered as .yxzz.
const unsigned int src_i_x = 1, src_i_y = 0, src_i_z = 2;
loc_823F5744:
lwz r10, 0(r22)
r10 = boxLeft
mr r5, r17
r5 (memcpy size) = microWidth << log2bpp
lwz r11, 0(r21)
r11 = pointX
add r10, r10, r31
r10 = srcX
add r11, r11, r31
@Triang3l
Triang3l / TiledOffset3D.cc
Last active March 6, 2019 20:52
3D texture tiling for Xenia, reversed from XGAddress(Volume?)TiledOffset from XGRAPHICS::TileVolume
static uint32_t TiledOffset3D(uint32_t x, uint32_t y, uint32_t z,
uint32_t pitch_h, uint32_t pitch_v,
uint32_t log2_bpp) {
// Reconstructed from disassembly of XGRAPHICS::TileVolume.
uint32_t macro_outer =
((y >> 4) + (z >> 2) * (pitch_v >> 4)) * (pitch_h >> 5);
uint32_t macro =
((((x >> 5) + macro_outer) << (log2_bpp + 6)) & 0xFFFFFFF) << 1;
uint32_t micro = (((x & 7) + ((y & 6) << 2)) << (log2_bpp + 6)) >> 6;
uint32_t offset_outer = ((y >> 3) + (z >> 2)) & 1;
@Triang3l
Triang3l / GetMipTailLevelOffsetCoords.cc
Last active June 22, 2018 17:47
GetMipTailLevelOffsetCoords from XGraphics reversed for Xenia
// Reconstructed for Xenia by Triang3l.
DWORD D3D::AlignTextureDimensions(DWORD * pWidth, DWORD * pHeight, DWORD * pDepth,
DWORD BitsPerPixel, DWORD GpuFormat, DWORD GpuDimension, BOOL Tiled) {
DWORD widthAlignmentBlocks = 32;
DWORD heightAlignmentBlocks = (GpuDimension == GPU_DIMENSION_1D ? 1 : 32);
DWORD depthAlignment = (GpuDimension == GPU_DIMENSION_3D ? 4 : 1);
DWORD blockWidth, blockHeight;
D3D::BlockSizeOfGpuFormat(GpuFormat, &blockWidth, &blockHeight);
// Decompiled for Xenia by Triang3l.
DWORD XGSurfaceSize(DWORD Width, DWORD Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSampleType) {
Format = (D3DFORMAT) ((DWORD) Format & 63);
if (MultiSampleType >= D3DMULTISAMPLE_2_SAMPLES /* 1 */) {
Height *= 2;
}
if (MultiSampleType == D3DMULTISAMPLE_4_SAMPLES /* 2 */) {
Width *= 2;
}
DWORD bytesPerPixel = 4;
.data:824AA378 .long aFmt_1_reverse # "FMT_1_REVERSE"
.data:824AA37C .long aFmt_1 # "FMT_1"
.data:824AA380 .long aFmt_8 # "FMT_8"
.data:824AA384 .long aFmt_1_5_5_5 # "FMT_1_5_5_5"
.data:824AA388 .long aFmt_5_6_5 # "FMT_5_6_5"
.data:824AA38C .long aFmt_6_5_5 # "FMT_6_5_5"
.data:824AA390 .long aFmt_8_8_8_8 # "FMT_8_8_8_8"
.data:824AA394 .long aFmt_2_10_10_10 # "FMT_2_10_10_10"
.data:824AA398 .long aFmt_8_a # "FMT_8_A"
.data:824AA39C .long aFmt_8_b # "FMT_8_B"
.data:824AA378 .long aFmt_1_reverse # "FMT_1_REVERSE"
.rdata:82072C88 _g_FormatTable_D3D__3QBGB:.byte 0
.rdata:82072C89 .byte 1
.data:824AA37C .long aFmt_1 # "FMT_1"
.rdata:82072C8A .byte 0
.rdata:82072C8B .byte 1
.data:824AA380 .long aFmt_8 # "FMT_8"
.rdata:82072C8C .byte 0x20
// Check if the target format is 7e3 and the conversion is needed (this is
// pretty long, better to branch here).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(flags_temp);
++stat_.instruction_count;