Created
February 23, 2015 12:52
-
-
Save Subv/8ccdb8324935b5edfebc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/externals/boost b/externals/boost | |
--- a/externals/boost | |
+++ b/externals/boost | |
@@ -1 +1 @@ | |
-Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5 | |
+Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5-dirty | |
diff --git a/externals/nihstro b/externals/nihstro | |
--- a/externals/nihstro | |
+++ b/externals/nihstro | |
@@ -1 +1 @@ | |
-Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211 | |
+Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211-dirty | |
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp | |
index 1ba6002..019dd39 100644 | |
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp | |
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp | |
@@ -161,6 +161,19 @@ void GraphicsFramebufferWidget::OnFramebufferFormatChanged(int new_value) | |
} | |
} | |
+unsigned int SeparateBy1(unsigned int x) { | |
+ x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210 | |
+ x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 | |
+ x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 | |
+ x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 | |
+ x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 | |
+ return x; | |
+} | |
+ | |
+u32 MortonCode2(unsigned int x, unsigned int y) { | |
+ return SeparateBy1(x) | (SeparateBy1(y) << 1); | |
+} | |
+ | |
void GraphicsFramebufferWidget::OnUpdate() | |
{ | |
QPixmap pixmap; | |
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp | |
index 8800235..cba3a1d 100644 | |
--- a/src/core/hw/gpu.cpp | |
+++ b/src/core/hw/gpu.cpp | |
@@ -52,6 +52,19 @@ inline void Read(T &var, const u32 raw_addr) { | |
var = g_regs[addr / 4]; | |
} | |
+unsigned int SeparateBy1(unsigned int x) { | |
+ x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210 | |
+ x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 | |
+ x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 | |
+ x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 | |
+ x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 | |
+ return x; | |
+} | |
+ | |
+u32 MortonCode2(unsigned int x, unsigned int y) { | |
+ return SeparateBy1(x) | (SeparateBy1(y) << 1); | |
+} | |
+ | |
template <typename T> | |
inline void Write(u32 addr, const T data) { | |
addr -= 0x1EF00000; | |
@@ -122,6 +135,9 @@ inline void Write(u32 addr, const T data) { | |
u32 output_width = config.output_width / pixel_skip; | |
+ if (config.flags & 0xFFFF88FF) | |
+ DebugBreak(); | |
+ | |
for (u32 y = 0; y < config.output_height; ++y) { | |
// TODO: Why does the register seem to hold twice the framebuffer width? | |
@@ -130,10 +146,13 @@ inline void Write(u32 addr, const T data) { | |
int r, g, b, a; | |
} source_color = { 0, 0, 0, 0 }; | |
+ u32 src_offset = config.raw_copy ? ((x + y * config.input_width)) : (MortonCode2(x, y)); | |
+ u32 dst_offset = (x + y * config.output_width) / pixel_skip; | |
+ | |
switch (config.input_format) { | |
case Regs::PixelFormat::RGBA8: | |
{ | |
- u8* srcptr = source_pointer + (x * pixel_skip + y * config.input_width) * 4; | |
+ u8* srcptr = source_pointer + src_offset * 4; | |
source_color.r = srcptr[3]; // red | |
source_color.g = srcptr[2]; // green | |
source_color.b = srcptr[1]; // blue | |
@@ -143,7 +162,7 @@ inline void Write(u32 addr, const T data) { | |
case Regs::PixelFormat::RGB5A1: | |
{ | |
- u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip); | |
+ u16 srcval = *(u16*)(source_pointer + src_offset * 2 ); | |
source_color.r = Color::Convert5To8((srcval >> 11) & 0x1F); // red | |
source_color.g = Color::Convert5To8((srcval >> 6) & 0x1F); // green | |
source_color.b = Color::Convert5To8((srcval >> 1) & 0x1F); // blue | |
@@ -153,7 +172,7 @@ inline void Write(u32 addr, const T data) { | |
case Regs::PixelFormat::RGBA4: | |
{ | |
- u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip); | |
+ u16 srcval = *(u16*)(source_pointer + src_offset * 2); | |
source_color.r = Color::Convert4To8((srcval >> 12) & 0xF); // red | |
source_color.g = Color::Convert4To8((srcval >> 8) & 0xF); // green | |
source_color.b = Color::Convert4To8((srcval >> 4) & 0xF); // blue | |
@@ -167,20 +186,20 @@ inline void Write(u32 addr, const T data) { | |
} | |
switch (config.output_format) { | |
- /*case Regs::PixelFormat::RGBA8: | |
+ case Regs::PixelFormat::RGBA8: | |
{ | |
// TODO: Untested | |
- u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.output_width * 4); | |
- dstptr[0] = source_color.r; | |
- dstptr[1] = source_color.g; | |
- dstptr[2] = source_color.b; | |
- dstptr[3] = source_color.a; | |
+ u8* dstptr = (u8*)(dest_pointer + dst_offset * 4); | |
+ dstptr[3] = source_color.r; | |
+ dstptr[2] = source_color.g; | |
+ dstptr[1] = source_color.b; | |
+ dstptr[0] = source_color.a; | |
break; | |
- }*/ | |
+ } | |
case Regs::PixelFormat::RGB8: | |
{ | |
- u8* dstptr = dest_pointer + (x + y * output_width) * 3; | |
+ u8* dstptr = dest_pointer + (dst_offset) * 3; | |
dstptr[2] = source_color.r; // red | |
dstptr[1] = source_color.g; // green | |
dstptr[0] = source_color.b; // blue | |
@@ -189,7 +208,7 @@ inline void Write(u32 addr, const T data) { | |
case Regs::PixelFormat::RGB5A1: | |
{ | |
- u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2); | |
+ u16* dstptr = (u16*)(dest_pointer + dst_offset * 2); | |
*dstptr = ((source_color.r >> 3) << 11) | ((source_color.g >> 3) << 6) | |
| ((source_color.b >> 3) << 1) | ( source_color.a >> 7); | |
break; | |
@@ -197,7 +216,7 @@ inline void Write(u32 addr, const T data) { | |
case Regs::PixelFormat::RGBA4: | |
{ | |
- u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2); | |
+ u16* dstptr = (u16*)(dest_pointer + dst_offset * 2); | |
*dstptr = ((source_color.r >> 4) << 12) | ((source_color.g >> 4) << 8) | |
| ((source_color.b >> 4) << 4) | ( source_color.a >> 4); | |
break; | |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h | |
index 75f5244..ffe7541 100644 | |
--- a/src/core/hw/gpu.h | |
+++ b/src/core/hw/gpu.h | |
@@ -192,6 +192,7 @@ struct Regs { | |
u32 flags; | |
BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true | |
+ BitField< 3, 1, u32> raw_copy; | |
BitField< 8, 3, PixelFormat> input_format; | |
BitField<12, 3, PixelFormat> output_format; | |
BitField<16, 1, u32> output_tiled; // stores output in a tiled format | |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp | |
index 17f8f70..30d50b1 100644 | |
--- a/src/video_core/rasterizer.cpp | |
+++ b/src/video_core/rasterizer.cpp | |
@@ -18,6 +18,19 @@ namespace Pica { | |
namespace Rasterizer { | |
+ unsigned int SeparateBy1(unsigned int x) { | |
+ x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210 | |
+ x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 | |
+ x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 | |
+ x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 | |
+ x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 | |
+ return x; | |
+ } | |
+ | |
+ u32 MortonCode2(unsigned int x, unsigned int y) { | |
+ return SeparateBy1(x) | (SeparateBy1(y) << 1); | |
+ } | |
+ | |
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |
const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | |
u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr)); | |
@@ -26,10 +39,12 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |
// NOTE: The framebuffer height register contains the actual FB height minus one. | |
y = (registers.framebuffer.height - y); | |
+ u32 code = MortonCode2(x, y); | |
+ | |
switch (registers.framebuffer.color_format) { | |
case registers.framebuffer.RGBA8: | |
{ | |
- u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; | |
+ u8* pixel = color_buffer + code * 4; | |
pixel[3] = color.r(); | |
pixel[2] = color.g(); | |
pixel[1] = color.b(); | |
@@ -38,7 +53,7 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |
} | |
default: | |
- LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | |
+ //LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | |
UNIMPLEMENTED(); | |
} | |
} | |
@@ -48,12 +63,13 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |
u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr)); | |
y = (registers.framebuffer.height - y); | |
+ u32 code = MortonCode2(x, y); | |
switch (registers.framebuffer.color_format) { | |
case registers.framebuffer.RGBA8: | |
{ | |
Math::Vec4<u8> ret; | |
- u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; | |
+ u8* pixel = color_buffer + code * 4; | |
ret.r() = pixel[3]; | |
ret.g() = pixel[2]; | |
ret.b() = pixel[1]; | |
@@ -61,7 +77,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |
return ret; | |
} | |
default: | |
- LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | |
+ //LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | |
UNIMPLEMENTED(); | |
} | |
@@ -73,9 +89,10 @@ static u32 GetDepth(int x, int y) { | |
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | |
y = (registers.framebuffer.height - y); | |
+ u32 code = MortonCode2(x, y); | |
// Assuming 16-bit depth buffer format until actual format handling is implemented | |
- return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); | |
+ return depth_buffer[code]; | |
} | |
static void SetDepth(int x, int y, u16 value) { | |
@@ -83,9 +100,10 @@ static void SetDepth(int x, int y, u16 value) { | |
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | |
y = (registers.framebuffer.height - y); | |
+ u32 code = MortonCode2(x, y); | |
// Assuming 16-bit depth buffer format until actual format handling is implemented | |
- *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; | |
+ depth_buffer[code] = value; | |
} | |
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp | |
index 2726951..f357b5c 100644 | |
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp | |
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |
@@ -262,6 +262,7 @@ void RendererOpenGL::DrawScreens() { | |
DrawSingleScreenRotated(textures[0], top_x, 0, | |
(float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight); | |
+ glFlush(); | |
DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight, | |
(float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment