Skip to content

Instantly share code, notes, and snippets.

@Subv
Last active August 29, 2015 14:15
Show Gist options
  • Save Subv/bff3edba2d006dcdf9b2 to your computer and use it in GitHub Desktop.
Save Subv/bff3edba2d006dcdf9b2 to your computer and use it in GitHub Desktop.
diff --git a/externals/boost b/externals/boost
--- a/externals/boost
+++ b/externals/boost
@@ -1 +1 @@
-Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5
+Subproject commit 728a4d7d1c8b28355544ae829df9c4b5f28373c5-dirty
diff --git a/externals/nihstro b/externals/nihstro
--- a/externals/nihstro
+++ b/externals/nihstro
@@ -1 +1 @@
-Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211
+Subproject commit 0a8b4d221425f13e24a3cef9b02edc3221bab211-dirty
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 8800235..31a2045 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -52,6 +52,37 @@ inline void Read(T &var, const u32 raw_addr) {
var = g_regs[addr / 4];
}
+u32 Compact1By1(u32 x)
+{
+ x &= 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
+ x = (x ^ (x >> 1)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
+ x = (x ^ (x >> 2)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
+ x = (x ^ (x >> 4)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
+ x = (x ^ (x >> 8)) & 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
+ return x;
+}
+
+// Inverse of Part1By2 - "delete" all bits not at positions divisible by 3
+u32 Compact1By2(u32 x)
+{
+ x &= 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
+ x = (x ^ (x >> 2)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
+ x = (x ^ (x >> 4)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
+ x = (x ^ (x >> 8)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
+ x = (x ^ (x >> 16)) & 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210
+ return x;
+}
+
+u32 DecodeMorton2X(u32 code)
+{
+ return Compact1By1(code >> 0);
+}
+
+u32 DecodeMorton2Y(u32 code)
+{
+ return Compact1By1(code >> 1);
+}
+
template <typename T>
inline void Write(u32 addr, const T data) {
addr -= 0x1EF00000;
@@ -130,6 +161,10 @@ inline void Write(u32 addr, const T data) {
int r, g, b, a;
} source_color = { 0, 0, 0, 0 };
+ u32 morton_code = y * config.input_width + x;
+ u32 curve_x = DecodeMorton2X(morton_code);
+ u32 curve_y = DecodeMorton2Y(morton_code);
+
switch (config.input_format) {
case Regs::PixelFormat::RGBA8:
{
@@ -180,7 +215,7 @@ inline void Write(u32 addr, const T data) {
case Regs::PixelFormat::RGB8:
{
- u8* dstptr = dest_pointer + (x + y * output_width) * 3;
+ u8* dstptr = dest_pointer + (curve_x + curve_y * output_width) * 3;
dstptr[2] = source_color.r; // red
dstptr[1] = source_color.g; // green
dstptr[0] = source_color.b; // blue
@@ -189,7 +224,7 @@ inline void Write(u32 addr, const T data) {
case Regs::PixelFormat::RGB5A1:
{
- u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2);
+ u16* dstptr = (u16*)(dest_pointer + curve_x * 2 + curve_y * config.output_width * 2);
*dstptr = ((source_color.r >> 3) << 11) | ((source_color.g >> 3) << 6)
| ((source_color.b >> 3) << 1) | ( source_color.a >> 7);
break;
@@ -197,7 +232,7 @@ inline void Write(u32 addr, const T data) {
case Regs::PixelFormat::RGBA4:
{
- u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2);
+ u16* dstptr = (u16*)(dest_pointer + curve_x * 2 + curve_y * config.output_width * 2);
*dstptr = ((source_color.r >> 4) << 12) | ((source_color.g >> 4) << 8)
| ((source_color.b >> 4) << 4) | ( source_color.a >> 4);
break;
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 17f8f70..b932873 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -18,6 +18,19 @@ namespace Pica {
namespace Rasterizer {
+ unsigned int SeparateBy1(unsigned int x) {
+ x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
+ x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
+ x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
+ x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
+ x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
+ return x;
+ }
+
+ u32 MortonCode2(unsigned int x, unsigned int y) {
+ return SeparateBy1(x) | (SeparateBy1(y) << 1);
+ }
+
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr));
@@ -26,10 +39,12 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
// NOTE: The framebuffer height register contains the actual FB height minus one.
y = (registers.framebuffer.height - y);
+ u32 code = MortonCode2(x, y);
+
switch (registers.framebuffer.color_format) {
case registers.framebuffer.RGBA8:
{
- u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;
+ u8* pixel = color_buffer + code * 4;
pixel[3] = color.r();
pixel[2] = color.g();
pixel[1] = color.b();
@@ -48,12 +63,13 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
u8* color_buffer = Memory::GetPointer(PAddrToVAddr(addr));
y = (registers.framebuffer.height - y);
+ u32 code = MortonCode2(x, y);
switch (registers.framebuffer.color_format) {
case registers.framebuffer.RGBA8:
{
Math::Vec4<u8> ret;
- u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;
+ u8* pixel = color_buffer + code * 4;
ret.r() = pixel[3];
ret.g() = pixel[2];
ret.b() = pixel[1];
@@ -73,9 +89,10 @@ static u32 GetDepth(int x, int y) {
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
y = (registers.framebuffer.height - y);
+ u32 code = MortonCode2(x, y);
// Assuming 16-bit depth buffer format until actual format handling is implemented
- return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
+ return depth_buffer[code];
}
static void SetDepth(int x, int y, u16 value) {
@@ -83,9 +100,10 @@ static void SetDepth(int x, int y, u16 value) {
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
y = (registers.framebuffer.height - y);
+ u32 code = MortonCode2(x, y);
// Assuming 16-bit depth buffer format until actual format handling is implemented
- *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
+ depth_buffer[code] = value;
}
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 2726951..f357b5c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -262,6 +262,7 @@ void RendererOpenGL::DrawScreens() {
DrawSingleScreenRotated(textures[0], top_x, 0,
(float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight);
+ glFlush();
DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight,
(float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment