Subv · February 23, 2015 12:52
diff --git a/gistfile1.diff b/gistfile1.diff
 diff --git a/externals/boost b/externals/boost
 --- a/externals/boost
 +++ b/externals/boost
 @@ -1 +1 @@
 -Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5
 +Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5-dirty
 diff --git a/externals/nihstro b/externals/nihstro
 --- a/externals/nihstro
 +++ b/externals/nihstro
 @@ -1 +1 @@
 -Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211
 +Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211-dirty
 diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
 index 1ba6002..019dd39 100644
 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp
 +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
 @@ -161,6 +161,19 @@ void GraphicsFramebufferWidget::OnFramebufferFormatChanged(int new_value)
     }
 }
 
 +unsigned int SeparateBy1(unsigned int x) {
 +    x &= 0x0000ffff;                  // x = ---- ---- ---- ---- fedc ba98 7654 3210
 +    x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
 +    x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
 +    x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
 +    x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
 +    return x;
 +}
 +
 +u32 MortonCode2(unsigned int x, unsigned int y) {
 +    return SeparateBy1(x) | (SeparateBy1(y) << 1);
 +}
 +
 void GraphicsFramebufferWidget::OnUpdate()
 {
     QPixmap pixmap;
 diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
 index 8800235..cba3a1d 100644
 --- a/src/core/hw/gpu.cpp
 +++ b/src/core/hw/gpu.cpp
 @@ -52,6 +52,19 @@ inline void Read(T &var, const u32 raw_addr) {
     var = g_regs[addr / 4];
 }
 
 +unsigned int SeparateBy1(unsigned int x) {
 +    x &= 0x0000ffff;                  // x = ---- ---- ---- ---- fedc ba98 7654 3210
 +    x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
 +    x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
 +    x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
 +    x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
 +    return x;
 +}
 +
 +u32 MortonCode2(unsigned int x, unsigned int y) {
 +    return SeparateBy1(x) | (SeparateBy1(y) << 1);
 +}
 +
 template <typename T>
 inline void Write(u32 addr, const T data) {
     addr -= 0x1EF00000;
 @@ -122,6 +135,9 @@ inline void Write(u32 addr, const T data) {
 
             u32 output_width = config.output_width / pixel_skip;
 
 +            if (config.flags & 0xFFFF88FF)
 +                DebugBreak();
 +
             for (u32 y = 0; y < config.output_height; ++y) {
                 // TODO: Why does the register seem to hold twice the framebuffer width?
 
 @@ -130,10 +146,13 @@ inline void Write(u32 addr, const T data) {
                         int r, g, b, a;
                     } source_color = { 0, 0, 0, 0 };
 
 +                    u32 src_offset = config.raw_copy ? ((x + y * config.input_width)) : (MortonCode2(x, y));
 +                    u32 dst_offset = (x + y * config.output_width) / pixel_skip;
 +
                     switch (config.input_format) {
                     case Regs::PixelFormat::RGBA8:
                     {
 -                        u8* srcptr = source_pointer + (x * pixel_skip + y * config.input_width) * 4;
 +                        u8* srcptr = source_pointer + src_offset * 4;
                         source_color.r = srcptr[3]; // red
                         source_color.g = srcptr[2]; // green
                         source_color.b = srcptr[1]; // blue
 @@ -143,7 +162,7 @@ inline void Write(u32 addr, const T data) {
 
                     case Regs::PixelFormat::RGB5A1:
                     {
 -                        u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip);
 +                        u16 srcval = *(u16*)(source_pointer + src_offset * 2 );
                         source_color.r = Color::Convert5To8((srcval >> 11) & 0x1F); // red
                         source_color.g = Color::Convert5To8((srcval >>  6) & 0x1F); // green
                         source_color.b = Color::Convert5To8((srcval >>  1) & 0x1F); // blue
 @@ -153,7 +172,7 @@ inline void Write(u32 addr, const T data) {
 
                     case Regs::PixelFormat::RGBA4:
                     {
 -                        u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip);
 +                        u16 srcval = *(u16*)(source_pointer + src_offset * 2);
                         source_color.r = Color::Convert4To8((srcval >> 12) & 0xF); // red
                         source_color.g = Color::Convert4To8((srcval >>  8) & 0xF); // green
                         source_color.b = Color::Convert4To8((srcval >>  4) & 0xF); // blue
 @@ -167,20 +186,20 @@ inline void Write(u32 addr, const T data) {
                     }
 
                     switch (config.output_format) {
 -                    /*case Regs::PixelFormat::RGBA8:
 +                    case Regs::PixelFormat::RGBA8:
                     {
                         // TODO: Untested
 -                        u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.output_width * 4);
 -                        dstptr[0] = source_color.r;
 -                        dstptr[1] = source_color.g;
 -                        dstptr[2] = source_color.b;
 -                        dstptr[3] = source_color.a;
 +                        u8* dstptr = (u8*)(dest_pointer + dst_offset * 4);
 +                        dstptr[3] = source_color.r;
 +                        dstptr[2] = source_color.g;
 +                        dstptr[1] = source_color.b;
 +                        dstptr[0] = source_color.a;
                         break;
 -                    }*/
 +                    }
 
                     case Regs::PixelFormat::RGB8:
                     {
 -                        u8* dstptr = dest_pointer + (x + y * output_width) * 3;
 +                        u8* dstptr = dest_pointer + (dst_offset) * 3;
                         dstptr[2] = source_color.r; // red
                         dstptr[1] = source_color.g; // green
                         dstptr[0] = source_color.b; // blue
 @@ -189,7 +208,7 @@ inline void Write(u32 addr, const T data) {
 
                     case Regs::PixelFormat::RGB5A1:
                     {
 -                        u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2);
 +                        u16* dstptr = (u16*)(dest_pointer + dst_offset * 2);
                         *dstptr = ((source_color.r >> 3) << 11) | ((source_color.g >> 3) << 6)
                                 | ((source_color.b >> 3) <<  1) | ( source_color.a >> 7);
                         break;
 @@ -197,7 +216,7 @@ inline void Write(u32 addr, const T data) {
 
                     case Regs::PixelFormat::RGBA4:
                     {
 -                        u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2);
 +                        u16* dstptr = (u16*)(dest_pointer + dst_offset * 2);
                         *dstptr = ((source_color.r >> 4) << 12) | ((source_color.g >> 4) << 8)
                                 | ((source_color.b >> 4) <<  4) | ( source_color.a >> 4);
                         break;
 diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
 index 75f5244..ffe7541 100644
 --- a/src/core/hw/gpu.h
 +++ b/src/core/hw/gpu.h
 @@ -192,6 +192,7 @@ struct Regs {
             u32 flags;
 
             BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true
 +            BitField< 3, 1, u32> raw_copy;
             BitField< 8, 3, PixelFormat> input_format;
             BitField<12, 3, PixelFormat> output_format;
             BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
 diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
 index 17f8f70..30d50b1 100644
 --- a/src/video_core/rasterizer.cpp
 +++ b/src/video_core/rasterizer.cpp
 @@ -18,6 +18,19 @@ namespace Pica {
 
 namespace Rasterizer {
 
 +    unsigned int SeparateBy1(unsigned int x) {
 +        x &= 0x0000ffff;                  // x = ---- ---- ---- ---- fedc ba98 7654 3210
 +        x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
 +        x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
 +        x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
 +        x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
 +        return x;
 +    }
 +
 +    u32 MortonCode2(unsigned int x, unsigned int y) {
 +        return SeparateBy1(x) | (SeparateBy1(y) << 1);
 +    }
 +
 static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
     const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
     u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr));
 @@ -26,10 +39,12 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
     // NOTE: The framebuffer height register contains the actual FB height minus one.
     y = (registers.framebuffer.height - y);
 
 +    u32 code = MortonCode2(x, y);
 +
     switch (registers.framebuffer.color_format) {
     case registers.framebuffer.RGBA8:
     {
 -        u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;
 +        u8* pixel = color_buffer + code * 4;
         pixel[3] = color.r();
         pixel[2] = color.g();
         pixel[1] = color.b();
 @@ -38,7 +53,7 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
     }
 
     default:
 -        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
 +        //LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
         UNIMPLEMENTED();
     }
 }
 @@ -48,12 +63,13 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
     u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr));
 
     y = (registers.framebuffer.height - y);
 +    u32 code = MortonCode2(x, y);
 
     switch (registers.framebuffer.color_format) {
     case registers.framebuffer.RGBA8:
     {
         Math::Vec4<u8> ret;
 -        u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;
 +        u8* pixel = color_buffer + code * 4;
         ret.r() = pixel[3];
         ret.g() = pixel[2];
         ret.b() = pixel[1];
 @@ -61,7 +77,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
         return ret;
     }
     default:
 -        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
 +        //LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
         UNIMPLEMENTED();
     }
 
 @@ -73,9 +89,10 @@ static u32 GetDepth(int x, int y) {
     u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
 
     y = (registers.framebuffer.height - y);
 +    u32 code = MortonCode2(x, y);
 
     // Assuming 16-bit depth buffer format until actual format handling is implemented
 -    return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
 +    return depth_buffer[code];
 }
 
 static void SetDepth(int x, int y, u16 value) {
 @@ -83,9 +100,10 @@ static void SetDepth(int x, int y, u16 value) {
     u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
 
     y = (registers.framebuffer.height - y);
 +    u32 code = MortonCode2(x, y);
 
     // Assuming 16-bit depth buffer format until actual format handling is implemented
 -    *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
 +    depth_buffer[code] = value;
 }
 
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
 index 2726951..f357b5c 100644
 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp
 +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
 @@ -262,6 +262,7 @@ void RendererOpenGL::DrawScreens() {
 
     DrawSingleScreenRotated(textures[0], top_x, 0,
         (float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight);
 +    glFlush();
     DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight,
         (float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight);
	diff --git a/externals/boost b/externals/boost
	--- a/externals/boost
	+++ b/externals/boost
	@@ -1 +1 @@
	-Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5
	+Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5-dirty
	diff --git a/externals/nihstro b/externals/nihstro
	--- a/externals/nihstro
	+++ b/externals/nihstro
	@@ -1 +1 @@
	-Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211
	+Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211-dirty
	diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
	index 1ba6002..019dd39 100644
	--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
	+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
	@@ -161,6 +161,19 @@ void GraphicsFramebufferWidget::OnFramebufferFormatChanged(int new_value)
	}
	}

	+unsigned int SeparateBy1(unsigned int x) {
	+ x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
	+ x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
	+ x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
	+ x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
	+ x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
	+ return x;
	+}
	+
	+u32 MortonCode2(unsigned int x, unsigned int y) {
	+ return SeparateBy1(x) \| (SeparateBy1(y) << 1);
	+}
	+
	void GraphicsFramebufferWidget::OnUpdate()
	{
	QPixmap pixmap;
	diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
	index 8800235..cba3a1d 100644
	--- a/src/core/hw/gpu.cpp
	+++ b/src/core/hw/gpu.cpp
	@@ -52,6 +52,19 @@ inline void Read(T &var, const u32 raw_addr) {
	var = g_regs[addr / 4];
	}

	+unsigned int SeparateBy1(unsigned int x) {
	+ x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
	+ x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
	+ x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
	+ x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
	+ x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
	+ return x;
	+}
	+
	+u32 MortonCode2(unsigned int x, unsigned int y) {
	+ return SeparateBy1(x) \| (SeparateBy1(y) << 1);
	+}
	+
	template <typename T>
	inline void Write(u32 addr, const T data) {
	addr -= 0x1EF00000;
	@@ -122,6 +135,9 @@ inline void Write(u32 addr, const T data) {

	u32 output_width = config.output_width / pixel_skip;

	+ if (config.flags & 0xFFFF88FF)
	+ DebugBreak();
	+
	for (u32 y = 0; y < config.output_height; ++y) {
	// TODO: Why does the register seem to hold twice the framebuffer width?

	@@ -130,10 +146,13 @@ inline void Write(u32 addr, const T data) {
	int r, g, b, a;
	} source_color = { 0, 0, 0, 0 };

	+ u32 src_offset = config.raw_copy ? ((x + y * config.input_width)) : (MortonCode2(x, y));
	+ u32 dst_offset = (x + y * config.output_width) / pixel_skip;
	+
	switch (config.input_format) {
	case Regs::PixelFormat::RGBA8:
	{
	- u8* srcptr = source_pointer + (x * pixel_skip + y * config.input_width) * 4;
	+ u8* srcptr = source_pointer + src_offset * 4;
	source_color.r = srcptr[3]; // red
	source_color.g = srcptr[2]; // green
	source_color.b = srcptr[1]; // blue
	@@ -143,7 +162,7 @@ inline void Write(u32 addr, const T data) {

	case Regs::PixelFormat::RGB5A1:
	{
	- u16 srcval = (u16)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip);
	+ u16 srcval = (u16)(source_pointer + src_offset * 2 );
	source_color.r = Color::Convert5To8((srcval >> 11) & 0x1F); // red
	source_color.g = Color::Convert5To8((srcval >> 6) & 0x1F); // green
	source_color.b = Color::Convert5To8((srcval >> 1) & 0x1F); // blue
	@@ -153,7 +172,7 @@ inline void Write(u32 addr, const T data) {

	case Regs::PixelFormat::RGBA4:
	{
	- u16 srcval = (u16)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip);
	+ u16 srcval = (u16)(source_pointer + src_offset * 2);
	source_color.r = Color::Convert4To8((srcval >> 12) & 0xF); // red
	source_color.g = Color::Convert4To8((srcval >> 8) & 0xF); // green
	source_color.b = Color::Convert4To8((srcval >> 4) & 0xF); // blue
	@@ -167,20 +186,20 @@ inline void Write(u32 addr, const T data) {
	}

	switch (config.output_format) {
	- /*case Regs::PixelFormat::RGBA8:
	+ case Regs::PixelFormat::RGBA8:
	{
	// TODO: Untested
	- u8* dstptr = (u32)(dest_pointer + x 4 + y * config.output_width * 4);
	- dstptr[0] = source_color.r;
	- dstptr[1] = source_color.g;
	- dstptr[2] = source_color.b;
	- dstptr[3] = source_color.a;
	+ u8* dstptr = (u8)(dest_pointer + dst_offset 4);
	+ dstptr[3] = source_color.r;
	+ dstptr[2] = source_color.g;
	+ dstptr[1] = source_color.b;
	+ dstptr[0] = source_color.a;
	break;
	- }*/
	+ }

	case Regs::PixelFormat::RGB8:
	{
	- u8* dstptr = dest_pointer + (x + y * output_width) * 3;
	+ u8* dstptr = dest_pointer + (dst_offset) * 3;
	dstptr[2] = source_color.r; // red
	dstptr[1] = source_color.g; // green
	dstptr[0] = source_color.b; // blue
	@@ -189,7 +208,7 @@ inline void Write(u32 addr, const T data) {

	case Regs::PixelFormat::RGB5A1:
	{
	- u16* dstptr = (u16)(dest_pointer + x 2 + y * config.output_width * 2);
	+ u16* dstptr = (u16)(dest_pointer + dst_offset 2);
	*dstptr = ((source_color.r >> 3) << 11) \| ((source_color.g >> 3) << 6)
	\| ((source_color.b >> 3) << 1) \| ( source_color.a >> 7);
	break;
	@@ -197,7 +216,7 @@ inline void Write(u32 addr, const T data) {

	case Regs::PixelFormat::RGBA4:
	{
	- u16* dstptr = (u16)(dest_pointer + x 2 + y * config.output_width * 2);
	+ u16* dstptr = (u16)(dest_pointer + dst_offset 2);
	*dstptr = ((source_color.r >> 4) << 12) \| ((source_color.g >> 4) << 8)
	\| ((source_color.b >> 4) << 4) \| ( source_color.a >> 4);
	break;
	diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
	index 75f5244..ffe7541 100644
	--- a/src/core/hw/gpu.h
	+++ b/src/core/hw/gpu.h
	@@ -192,6 +192,7 @@ struct Regs {
	u32 flags;

	BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true
	+ BitField< 3, 1, u32> raw_copy;
	BitField< 8, 3, PixelFormat> input_format;
	BitField<12, 3, PixelFormat> output_format;
	BitField<16, 1, u32> output_tiled; // stores output in a tiled format
	diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
	index 17f8f70..30d50b1 100644
	--- a/src/video_core/rasterizer.cpp
	+++ b/src/video_core/rasterizer.cpp
	@@ -18,6 +18,19 @@ namespace Pica {

	namespace Rasterizer {

	+ unsigned int SeparateBy1(unsigned int x) {
	+ x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
	+ x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
	+ x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
	+ x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
	+ x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
	+ return x;
	+ }
	+
	+ u32 MortonCode2(unsigned int x, unsigned int y) {
	+ return SeparateBy1(x) \| (SeparateBy1(y) << 1);
	+ }
	+
	static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
	const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
	u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr));
	@@ -26,10 +39,12 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
	// NOTE: The framebuffer height register contains the actual FB height minus one.
	y = (registers.framebuffer.height - y);

	+ u32 code = MortonCode2(x, y);
	+
	switch (registers.framebuffer.color_format) {
	case registers.framebuffer.RGBA8:
	{
	- u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;
	+ u8* pixel = color_buffer + code * 4;
	pixel[3] = color.r();
	pixel[2] = color.g();
	pixel[1] = color.b();
	@@ -38,7 +53,7 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
	}

	default:
	- LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
	+ //LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
	UNIMPLEMENTED();
	}
	}
	@@ -48,12 +63,13 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
	u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr));

	y = (registers.framebuffer.height - y);
	+ u32 code = MortonCode2(x, y);

	switch (registers.framebuffer.color_format) {
	case registers.framebuffer.RGBA8:
	{
	Math::Vec4<u8> ret;
	- u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;
	+ u8* pixel = color_buffer + code * 4;
	ret.r() = pixel[3];
	ret.g() = pixel[2];
	ret.b() = pixel[1];
	@@ -61,7 +77,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
	return ret;
	}
	default:
	- LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
	+ //LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
	UNIMPLEMENTED();
	}

	@@ -73,9 +89,10 @@ static u32 GetDepth(int x, int y) {
	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));

	y = (registers.framebuffer.height - y);
	+ u32 code = MortonCode2(x, y);

	// Assuming 16-bit depth buffer format until actual format handling is implemented
	- return (depth_buffer + x + y registers.framebuffer.GetWidth());
	+ return depth_buffer[code];
	}

	static void SetDepth(int x, int y, u16 value) {
	@@ -83,9 +100,10 @@ static void SetDepth(int x, int y, u16 value) {
	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));

	y = (registers.framebuffer.height - y);
	+ u32 code = MortonCode2(x, y);

	// Assuming 16-bit depth buffer format until actual format handling is implemented
	- (depth_buffer + x + y registers.framebuffer.GetWidth()) = value;
	+ depth_buffer[code] = value;
	}

	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
	diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
	index 2726951..f357b5c 100644
	--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
	+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
	@@ -262,6 +262,7 @@ void RendererOpenGL::DrawScreens() {

	DrawSingleScreenRotated(textures[0], top_x, 0,
	(float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight);
	+ glFlush();
	DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight,
	(float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight);