Skip to content

Instantly share code, notes, and snippets.

@yuriks
Created December 19, 2016 05:44
Show Gist options
  • Save yuriks/e1281522d5c5f75ba10026a5d127ebcc to your computer and use it in GitHub Desktop.
Save yuriks/e1281522d5c5f75ba10026a5d127ebcc to your computer and use it in GitHub Desktop.
--- c:\users\yuriks\projects\3ds\citra\src\video_core\shader\shader.cpp --------
void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) {
48 89 5C 24 08 mov qword ptr [rsp+8],rbx
for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
44 0F B7 4A 34 movzx r9d,word ptr [rdx+34h]
unsigned int output_i = 0;
45 33 D2 xor r10d,r10d
4D 8B D8 mov r11,r8
48 8B D9 mov rbx,rcx
for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
45 8B C2 mov r8d,r10d
45 85 C9 test r9d,r9d
74 36 je Pica::Shader::UnitState::WriteOutput+51h (07FF63A9EA9B1h)
0F 1F 44 00 00 nop dword ptr [rax+rax]
41 0F BC D1 bsf edx,r9d
output.attr[output_i++] = registers.output[reg];
41 8B C2 mov eax,r10d
41 FF C2 inc r10d
for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
44 03 C2 add r8d,edx
output.attr[output_i++] = registers.output[reg];
48 03 C0 add rax,rax
41 8B C8 mov ecx,r8d
48 83 C1 20 add rcx,20h
48 03 C9 add rcx,rcx
0F 10 04 CB movups xmm0,xmmword ptr [rbx+rcx*8]
for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
8D 4A 01 lea ecx,[rdx+1]
41 D3 E9 shr r9d,cl
41 FF C0 inc r8d
output.attr[output_i++] = registers.output[reg];
41 0F 11 04 C3 movups xmmword ptr [r11+rax*8],xmm0
for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
45 85 C9 test r9d,r9d
for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
75 CF jne Pica::Shader::UnitState::WriteOutput+20h (07FF63A9EA980h)
}
}
48 8B 5C 24 08 mov rbx,qword ptr [rsp+8]
C3 ret
// A STL-like iterator is required to be able to use range-based for loops.
class Iterator {
public:
Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
Iterator(IntTy val) : m_val(val), m_bit(0) {}
Iterator& operator=(Iterator other) {
new (this) Iterator(other);
return *this;
}
int operator*() {
return m_bit + ComputeLsb();
}
Iterator& operator++() {
int lsb = ComputeLsb();
m_val >>= lsb + 1;
m_bit += lsb + 1;
m_has_lsb = false;
return *this;
}
Iterator operator++(int _) {
Iterator other(*this);
++*this;
return other;
}
bool operator==(Iterator other) const {
return m_val == other.m_val;
}
bool operator!=(Iterator other) const {
return m_val != other.m_val;
}
private:
int ComputeLsb() {
if (!m_has_lsb) {
m_lsb = LeastSignificantSetBit(m_val);
m_has_lsb = true;
}
return m_lsb;
}
IntTy m_val;
int m_bit;
int m_lsb = -1;
bool m_has_lsb = false;
};
void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) {
unsigned int output_i = 0;
for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
output.attr[output_i++] = registers.output[reg];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment