Created
February 23, 2015 22:38
-
-
Save Subv/6c246856452e8b77086a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/externals/boost b/externals/boost | |
--- a/externals/boost | |
+++ b/externals/boost | |
@@ -1 +1 @@ | |
-Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5 | |
+Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5-dirty | |
diff --git a/externals/nihstro b/externals/nihstro | |
--- a/externals/nihstro | |
+++ b/externals/nihstro | |
@@ -1 +1 @@ | |
-Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211 | |
+Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211-dirty | |
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp | |
index 8800235..08fb772 100644 | |
--- a/src/core/hw/gpu.cpp | |
+++ b/src/core/hw/gpu.cpp | |
@@ -122,6 +122,18 @@ inline void Write(u32 addr, const T data) { | |
u32 output_width = config.output_width / pixel_skip; | |
+ if (config.raw_copy) { | |
+ LOG_ERROR(HW_GPU, "Raw display transfers are not yet implemented."); | |
+ UNIMPLEMENTED(); | |
+ return; | |
+ } | |
+ | |
+ if (config.output_tiled0 || config.output_tiled1) { | |
+ LOG_ERROR(HW_GPU, "Display transfers with tiled output are not yet implemented"); | |
+ UNIMPLEMENTED(); | |
+ return; | |
+ } | |
+ | |
for (u32 y = 0; y < config.output_height; ++y) { | |
// TODO: Why does the register seem to hold twice the framebuffer width? | |
@@ -130,10 +142,29 @@ inline void Write(u32 addr, const T data) { | |
int r, g, b, a; | |
} source_color = { 0, 0, 0, 0 }; | |
+ const unsigned int block_width = 8; | |
+ const unsigned int block_height = 8; | |
+ | |
+ const unsigned int coarse_x = x & ~7; | |
+ const unsigned int coarse_y = y & ~7; | |
+ | |
+ // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are | |
+ // arranged in a Z-order curve. More details on the bit manipulation at: | |
+ // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | |
+ unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210 | |
+ i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | |
+ i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | |
+ i = (i | (i >> 7)) & 0x3F; | |
+ | |
+ const unsigned int offset = coarse_x * block_height; | |
+ | |
+ u32 dst_offset = (x + y * config.output_width) * GPU::Regs::BytesPerPixel(config.output_format) / pixel_skip; | |
+ u32 src_offset = (offset + i + coarse_y * output_width) * GPU::Regs::BytesPerPixel(config.input_format); | |
+ | |
switch (config.input_format) { | |
case Regs::PixelFormat::RGBA8: | |
{ | |
- u8* srcptr = source_pointer + (x * pixel_skip + y * config.input_width) * 4; | |
+ u8* srcptr = source_pointer + src_offset; | |
source_color.r = srcptr[3]; // red | |
source_color.g = srcptr[2]; // green | |
source_color.b = srcptr[1]; // blue | |
@@ -143,7 +174,7 @@ inline void Write(u32 addr, const T data) { | |
case Regs::PixelFormat::RGB5A1: | |
{ | |
- u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip); | |
+ u16 srcval = *(u16*)(source_pointer + src_offset); | |
source_color.r = Color::Convert5To8((srcval >> 11) & 0x1F); // red | |
source_color.g = Color::Convert5To8((srcval >> 6) & 0x1F); // green | |
source_color.b = Color::Convert5To8((srcval >> 1) & 0x1F); // blue | |
@@ -153,7 +184,7 @@ inline void Write(u32 addr, const T data) { | |
case Regs::PixelFormat::RGBA4: | |
{ | |
- u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip); | |
+ u16 srcval = *(u16*)(source_pointer + src_offset); | |
source_color.r = Color::Convert4To8((srcval >> 12) & 0xF); // red | |
source_color.g = Color::Convert4To8((srcval >> 8) & 0xF); // green | |
source_color.b = Color::Convert4To8((srcval >> 4) & 0xF); // blue | |
@@ -167,20 +198,20 @@ inline void Write(u32 addr, const T data) { | |
} | |
switch (config.output_format) { | |
- /*case Regs::PixelFormat::RGBA8: | |
+ case Regs::PixelFormat::RGBA8: | |
{ | |
// TODO: Untested | |
- u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.output_width * 4); | |
- dstptr[0] = source_color.r; | |
- dstptr[1] = source_color.g; | |
- dstptr[2] = source_color.b; | |
- dstptr[3] = source_color.a; | |
+ u8* dstptr = (u8*)(dest_pointer + dst_offset); | |
+ dstptr[3] = source_color.r; | |
+ dstptr[2] = source_color.g; | |
+ dstptr[1] = source_color.b; | |
+ dstptr[0] = source_color.a; | |
break; | |
- }*/ | |
+ } | |
case Regs::PixelFormat::RGB8: | |
{ | |
- u8* dstptr = dest_pointer + (x + y * output_width) * 3; | |
+ u8* dstptr = dest_pointer + dst_offset; | |
dstptr[2] = source_color.r; // red | |
dstptr[1] = source_color.g; // green | |
dstptr[0] = source_color.b; // blue | |
@@ -189,7 +220,7 @@ inline void Write(u32 addr, const T data) { | |
case Regs::PixelFormat::RGB5A1: | |
{ | |
- u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2); | |
+ u16* dstptr = (u16*)(dest_pointer + dst_offset); | |
*dstptr = ((source_color.r >> 3) << 11) | ((source_color.g >> 3) << 6) | |
| ((source_color.b >> 3) << 1) | ( source_color.a >> 7); | |
break; | |
@@ -197,7 +228,7 @@ inline void Write(u32 addr, const T data) { | |
case Regs::PixelFormat::RGBA4: | |
{ | |
- u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2); | |
+ u16* dstptr = (u16*)(dest_pointer + dst_offset); | |
*dstptr = ((source_color.r >> 4) << 12) | ((source_color.g >> 4) << 8) | |
| ((source_color.b >> 4) << 4) | ( source_color.a >> 4); | |
break; | |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h | |
index 75f5244..b365ff1 100644 | |
--- a/src/core/hw/gpu.h | |
+++ b/src/core/hw/gpu.h | |
@@ -192,9 +192,11 @@ struct Regs { | |
u32 flags; | |
BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true | |
+ BitField< 1, 1, u32> output_tiled0; // Converts the output to tiles. TODO(Subv): Verify and implement | |
+ BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing | |
BitField< 8, 3, PixelFormat> input_format; | |
BitField<12, 3, PixelFormat> output_format; | |
- BitField<16, 1, u32> output_tiled; // stores output in a tiled format | |
+ BitField<16, 1, u32> output_tiled1; // Converts the output to tiles. TODO(Subv): Verify and implement | |
// TODO: Not really sure if this actually scales, or even resizes at all. | |
BitField<24, 1, u32> scale_horizontally; | |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp | |
index 17f8f70..94610a3 100644 | |
--- a/src/video_core/rasterizer.cpp | |
+++ b/src/video_core/rasterizer.cpp | |
@@ -7,6 +7,7 @@ | |
#include "common/common_types.h" | |
#include "common/math_util.h" | |
+#include "core/hw/gpu.h" | |
#include "math.h" | |
#include "pica.h" | |
#include "rasterizer.h" | |
@@ -26,10 +27,27 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |
// NOTE: The framebuffer height register contains the actual FB height minus one. | |
y = (registers.framebuffer.height - y); | |
+ const unsigned int block_width = 8; | |
+ const unsigned int block_height = 8; | |
+ | |
+ const unsigned int coarse_x = x & ~7; | |
+ const unsigned int coarse_y = y & ~7; | |
+ | |
+ // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are | |
+ // arranged in a Z-order curve. More details on the bit manipulation at: | |
+ // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | |
+ unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210 | |
+ i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | |
+ i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | |
+ i = (i | (i >> 7)) & 0x3F; | |
+ | |
+ const unsigned int offset = coarse_x * block_height; | |
+ u32 dst_offset = (offset + i + coarse_y * registers.framebuffer.width) * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); | |
+ | |
switch (registers.framebuffer.color_format) { | |
case registers.framebuffer.RGBA8: | |
{ | |
- u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; | |
+ u8* pixel = color_buffer + dst_offset; | |
pixel[3] = color.r(); | |
pixel[2] = color.g(); | |
pixel[1] = color.b(); | |
@@ -48,12 +66,28 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |
u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr)); | |
y = (registers.framebuffer.height - y); | |
+ const unsigned int block_width = 8; | |
+ const unsigned int block_height = 8; | |
+ | |
+ const unsigned int coarse_x = x & ~7; | |
+ const unsigned int coarse_y = y & ~7; | |
+ | |
+ // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are | |
+ // arranged in a Z-order curve. More details on the bit manipulation at: | |
+ // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | |
+ unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210 | |
+ i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | |
+ i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | |
+ i = (i | (i >> 7)) & 0x3F; | |
+ | |
+ const unsigned int offset = coarse_x * block_height; | |
+ u32 src_offset = (offset + i + coarse_y * registers.framebuffer.width) * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); | |
switch (registers.framebuffer.color_format) { | |
case registers.framebuffer.RGBA8: | |
{ | |
Math::Vec4<u8> ret; | |
- u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; | |
+ u8* pixel = color_buffer + src_offset; | |
ret.r() = pixel[3]; | |
ret.g() = pixel[2]; | |
ret.b() = pixel[1]; | |
@@ -70,22 +104,54 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |
static u32 GetDepth(int x, int y) { | |
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | |
- u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | |
+ u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); | |
y = (registers.framebuffer.height - y); | |
+ const unsigned int block_width = 8; | |
+ const unsigned int block_height = 8; | |
+ | |
+ const unsigned int coarse_x = x & ~7; | |
+ const unsigned int coarse_y = y & ~7; | |
+ | |
+ // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are | |
+ // arranged in a Z-order curve. More details on the bit manipulation at: | |
+ // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | |
+ unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210 | |
+ i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | |
+ i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | |
+ i = (i | (i >> 7)) & 0x3F; | |
+ | |
+ const unsigned int offset = coarse_x * block_height; | |
+ u32 stride = registers.framebuffer.width * 2; | |
// Assuming 16-bit depth buffer format until actual format handling is implemented | |
- return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); | |
+ return *(u16*)(depth_buffer + i * 2 + offset * 2 + coarse_y * stride); | |
} | |
static void SetDepth(int x, int y, u16 value) { | |
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | |
- u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | |
+ u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); | |
y = (registers.framebuffer.height - y); | |
+ const unsigned int block_width = 8; | |
+ const unsigned int block_height = 8; | |
+ | |
+ const unsigned int coarse_x = x & ~7; | |
+ const unsigned int coarse_y = y & ~7; | |
+ | |
+ // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are | |
+ // arranged in a Z-order curve. More details on the bit manipulation at: | |
+ // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | |
+ unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210 | |
+ i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | |
+ i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | |
+ i = (i | (i >> 7)) & 0x3F; | |
+ | |
+ const unsigned int offset = coarse_x * block_height; | |
+ u32 stride = registers.framebuffer.width * 2; | |
// Assuming 16-bit depth buffer format until actual format handling is implemented | |
- *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; | |
+ *(u16*)(depth_buffer + i * 2 + offset * 2 + coarse_y * stride) = value; | |
} | |
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp | |
index 2726951..f357b5c 100644 | |
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp | |
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |
@@ -262,6 +262,7 @@ void RendererOpenGL::DrawScreens() { | |
DrawSingleScreenRotated(textures[0], top_x, 0, | |
(float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight); | |
+ glFlush(); | |
DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight, | |
(float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment