Created
February 19, 2015 19:48
-
-
Save Subv/24540e39963b36a73940 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp | |
index ea3367e..ba3876a 100644 | |
--- a/src/video_core/clipper.cpp | |
+++ b/src/video_core/clipper.cpp | |
@@ -16,7 +16,10 @@ namespace Clipper { | |
struct ClippingEdge { | |
public: | |
ClippingEdge(Math::Vec4<float24> coeffs, | |
- Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0), float24::FromFloat32(0), float24::FromFloat32(0), float24::FromFloat32(0))) | |
+ Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0), | |
+ float24::FromFloat32(0), | |
+ float24::FromFloat32(0), | |
+ float24::FromFloat32(0))) | |
: coeffs(coeffs), | |
bias(bias) | |
{ | |
@@ -87,28 +90,26 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | |
auto* output_list = &buffer_a; | |
auto* input_list = &buffer_b; | |
- // TODO: Test if dropping the whole primitive in this case reflects hardware behavior. | |
-/* ClippingEdge near_edge{ ClippingEdge::POS_Z, Math::Vec4<float24>(float24::FromFloat32(0), float24::FromFloat32(0), float24::FromFloat32(1), float24::FromFloat32(0)) }; | |
- if (near_edge.IsOutSide(v0) || near_edge.IsOutSide(v1) || near_edge.IsOutSide(v2)) | |
- return;*/ | |
- | |
// NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. | |
// TODO: Not sure if this is a valid approach. Also should probably instead use the smallest | |
// epsilon possible within float24 accuracy. | |
static const float24 EPSILON = float24::FromFloat32(0.00001); | |
- static const float24 zero = float24::FromFloat32(0.0); | |
- static const float24 one = float24::FromFloat32(1.0); | |
+ static const float24 f0 = float24::FromFloat32(0.0); | |
+ static const float24 f1 = float24::FromFloat32(1.0); | |
static const std::array<ClippingEdge, 7> clipping_edges = {{ | |
- { Math::MakeVec(one, zero, zero, -one) }, // x = +w | |
- { Math::MakeVec(-one, zero, zero, -one) }, // x = -w | |
- { Math::MakeVec(zero, one, zero, -one) }, // y = +w | |
- { Math::MakeVec(zero, -one, zero, -one) }, // y = -w | |
- { Math::MakeVec(zero, zero, one, zero) }, // z = 0 | |
- { Math::MakeVec(zero, zero, -one, -one) }, // z = -w | |
- { Math::MakeVec(zero, zero, zero, -one), Math::Vec4<float24>(zero, zero, zero, EPSILON) }, // w = EPSILON | |
- | |
+ { Math::MakeVec( f1, f0, f0, -f1) }, // x = +w | |
+ { Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w | |
+ { Math::MakeVec( f0, f1, f0, -f1) }, // y = +w | |
+ { Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w | |
+ { Math::MakeVec( f0, f0, f1, f0) }, // z = 0 | |
+ { Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w | |
+ { Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON | |
}}; | |
+ // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) | |
+ // drop the whole primitive instead of clipping the primitive properly. We should test if | |
+ // this happens on the 3DS, too. | |
+ | |
// Simple implementation of the Sutherland-Hodgman clipping algorithm. | |
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) | |
for (auto edge : clipping_edges) { | |
@@ -160,7 +161,6 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | |
vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); | |
Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); | |
- Rasterizer::ProcessTriangle(vtx2, vtx1, vtx0); | |
} | |
} | |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp | |
index d1165bb..586ad62 100644 | |
--- a/src/video_core/command_processor.cpp | |
+++ b/src/video_core/command_processor.cpp | |
@@ -30,6 +30,10 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |
if (id >= registers.NumIds()) | |
return; | |
+ // If we're skipping this frame, only allow trigger IRQ | |
+ if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq)) | |
+ return; | |
+ | |
// TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value | |
u32 old_value = registers[id]; | |
registers[id] = (old_value & ~mask) | (value & mask); | |
@@ -49,8 +53,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |
case PICA_REG_INDEX(trigger_draw): | |
case PICA_REG_INDEX(trigger_draw_indexed): | |
{ | |
- if (GPU::g_skip_frame) return; | |
- | |
DebugUtils::DumpTevStageConfig(registers.GetTevStages()); | |
if (g_debug_context) | |
@@ -61,15 +63,15 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |
// Information about internal vertex attributes | |
u32 vertex_attribute_sources[16]; | |
- std::fill(vertex_attribute_sources, &vertex_attribute_sources[16], 0xdeadbeef); | |
+ boost::fill(vertex_attribute_sources, 0xdeadbeef); | |
u32 vertex_attribute_strides[16]; | |
u32 vertex_attribute_formats[16]; | |
- u32 vertex_attribute_elements[16]; | |
- u32 vertex_attribute_element_size[16]; | |
// HACK: Initialize vertex_attribute_elements to zero to prevent infinite loops below. | |
// This is one of the hacks required to deal with uninitalized vertex attributes. | |
- boost::fill(vertex_attribute_elements, 0); | |
+ // TODO: Fix this properly. | |
+ u32 vertex_attribute_elements[16] = {}; | |
+ u32 vertex_attribute_element_size[16]; | |
// Setup attribute data from loaders | |
for (int loader = 0; loader < 12; ++loader) { | |
@@ -183,7 +185,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |
} | |
case PICA_REG_INDEX(vs_bool_uniforms): | |
- if (GPU::g_skip_frame) return; | |
for (unsigned i = 0; i < 16; ++i) | |
VertexShader::GetBoolUniform(i) = (registers.vs_bool_uniforms.Value() & (1 << i)) != 0; | |
@@ -194,7 +195,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): | |
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): | |
{ | |
- if (GPU::g_skip_frame) return; | |
int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); | |
auto values = registers.vs_int_uniforms[index]; | |
VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w); | |
@@ -212,7 +212,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): | |
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): | |
{ | |
- if (GPU::g_skip_frame) return; | |
auto& uniform_setup = registers.vs_uniform_setup; | |
// TODO: Does actual hardware indeed keep an intermediate buffer or does | |
@@ -280,7 +279,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): | |
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): | |
{ | |
- if (GPU::g_skip_frame) return; | |
VertexShader::SubmitSwizzleDataChange(registers.vs_swizzle_patterns.offset, value); | |
registers.vs_swizzle_patterns.offset++; | |
break; | |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp | |
index b60f3d5..0beb72e 100644 | |
--- a/src/video_core/debug_utils/debug_utils.cpp | |
+++ b/src/video_core/debug_utils/debug_utils.cpp | |
@@ -16,7 +16,7 @@ | |
#include <nihstro/shader_binary.h> | |
-#include "common/log.h" | |
+#include "common/assert.h" | |
#include "common/file_util.h" | |
#include "common/math_util.h" | |
@@ -189,7 +189,7 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | |
); | |
if (it == output_info_table.end()) { | |
- output_info_table.push_back({}); | |
+ output_info_table.emplace_back(); | |
output_info_table.back().type = type; | |
output_info_table.back().component_mask = component_mask; | |
output_info_table.back().id = i; | |
@@ -197,7 +197,7 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | |
it->component_mask = it->component_mask | component_mask; | |
} | |
} catch (const std::out_of_range& ) { | |
- _dbg_assert_msg_(HW_GPU, 0, "Unknown output attribute mapping"); | |
+ DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); | |
LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", | |
(int)output_attributes[i].map_x.Value(), | |
(int)output_attributes[i].map_y.Value(), | |
@@ -285,7 +285,7 @@ void OnPicaRegWrite(u32 id, u32 value) | |
if (!is_pica_tracing) | |
return; | |
- pica_trace->writes.push_back({id, value}); | |
+ pica_trace->writes.emplace_back(id, value); | |
} | |
std::unique_ptr<PicaTrace> FinishPicaTracing() | |
@@ -489,7 +489,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |
BitField<34, 3, u64> table_index_2; | |
BitField<37, 3, u64> table_index_1; | |
- union Union2 { | |
+ union { | |
// delta value + base value | |
BitField<40, 3, s64> db; | |
BitField<43, 5, u64> b; | |
@@ -501,7 +501,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |
BitField<59, 5, u64> r; | |
} differential; | |
- union Union3 { | |
+ union { | |
BitField<40, 4, u64> b2; | |
BitField<44, 4, u64> b1; | |
@@ -547,7 +547,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |
// Add modifier | |
unsigned table_index = (x < 2) ? table_index_2.Value() : table_index_1.Value(); | |
- static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{ | |
+ static const auto etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{ | |
{ 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 }, | |
{ 18, 60 }, { 24, 80 }, { 33, 106 }, { 47, 183 } | |
}}; | |
@@ -571,7 +571,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |
default: | |
LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format); | |
- /*_dbg_assert_(HW_GPU, 0);*/ | |
+ DEBUG_ASSERT(false); | |
return {}; | |
} | |
} | |
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h | |
index a51d49c..48ac269 100644 | |
--- a/src/video_core/gpu_debugger.h | |
+++ b/src/video_core/gpu_debugger.h | |
@@ -8,8 +8,6 @@ | |
#include <functional> | |
#include <vector> | |
-#include "common/log.h" | |
- | |
#include "core/hle/service/gsp_gpu.h" | |
#include "command_processor.h" | |
@@ -60,13 +58,13 @@ public: | |
if (observers.empty()) | |
return; | |
- gx_command_history.push_back(GSP_GPU::Command()); | |
- GSP_GPU::Command& cmd = gx_command_history[gx_command_history.size()-1]; | |
+ gx_command_history.emplace_back(); | |
+ GSP_GPU::Command& cmd = gx_command_history.back(); | |
memcpy(&cmd, command_data, sizeof(GSP_GPU::Command)); | |
ForEachObserver([this](DebuggerObserver* observer) { | |
- observer->GXCommandProcessed(this->gx_command_history.size()); | |
+ observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size())); | |
} ); | |
} | |
diff --git a/src/video_core/math.h b/src/video_core/math.h | |
index 9622e76..f9a8226 100644 | |
--- a/src/video_core/math.h | |
+++ b/src/video_core/math.h | |
@@ -457,27 +457,41 @@ public: | |
const T& b() const { return z; } | |
const T& a() const { return w; } | |
- // swizzlers - create a subvector of specific components | |
+ // Swizzlers - Create a subvector of specific components | |
// e.g. Vec2 uv() { return Vec2(x,y); } | |
- // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) | |
+ | |
+ // _DEFINE_SWIZZLER2 defines a single such function | |
+ // DEFINE_SWIZZLER2_COMP1 defines one-component functions for all component names (x<->r) | |
+ // DEFINE_SWIZZLER2_COMP2 defines two component functions for all component names (x<->r) and permutations (xy<->yx) | |
#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); } | |
-#define DEFINE_SWIZZLER2(a, b, a2, b2) \ | |
+#define DEFINE_SWIZZLER2_COMP1(a, a2) \ | |
+ _DEFINE_SWIZZLER2(a, a, a##a); \ | |
+ _DEFINE_SWIZZLER2(a, a, a2##a2) | |
+#define DEFINE_SWIZZLER2_COMP2(a, b, a2, b2) \ | |
_DEFINE_SWIZZLER2(a, b, a##b); \ | |
_DEFINE_SWIZZLER2(a, b, a2##b2); \ | |
_DEFINE_SWIZZLER2(b, a, b##a); \ | |
_DEFINE_SWIZZLER2(b, a, b2##a2) | |
- DEFINE_SWIZZLER2(x, y, r, g); | |
- DEFINE_SWIZZLER2(x, z, r, b); | |
- DEFINE_SWIZZLER2(x, w, r, a); | |
- DEFINE_SWIZZLER2(y, z, g, b); | |
- DEFINE_SWIZZLER2(y, w, g, a); | |
- DEFINE_SWIZZLER2(z, w, b, a); | |
-#undef DEFINE_SWIZZLER2 | |
+ DEFINE_SWIZZLER2_COMP2(x, y, r, g); | |
+ DEFINE_SWIZZLER2_COMP2(x, z, r, b); | |
+ DEFINE_SWIZZLER2_COMP2(x, w, r, a); | |
+ DEFINE_SWIZZLER2_COMP2(y, z, g, b); | |
+ DEFINE_SWIZZLER2_COMP2(y, w, g, a); | |
+ DEFINE_SWIZZLER2_COMP2(z, w, b, a); | |
+ DEFINE_SWIZZLER2_COMP1(x, r); | |
+ DEFINE_SWIZZLER2_COMP1(y, g); | |
+ DEFINE_SWIZZLER2_COMP1(z, b); | |
+ DEFINE_SWIZZLER2_COMP1(w, a); | |
+#undef DEFINE_SWIZZLER2_COMP1 | |
+#undef DEFINE_SWIZZLER2_COMP2 | |
#undef _DEFINE_SWIZZLER2 | |
#define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); } | |
-#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ | |
+#define DEFINE_SWIZZLER3_COMP1(a, a2) \ | |
+ _DEFINE_SWIZZLER3(a, a, a, a##a##a); \ | |
+ _DEFINE_SWIZZLER3(a, a, a, a2##a2##a2) | |
+#define DEFINE_SWIZZLER3_COMP3(a, b, c, a2, b2, c2) \ | |
_DEFINE_SWIZZLER3(a, b, c, a##b##c); \ | |
_DEFINE_SWIZZLER3(a, c, b, a##c##b); \ | |
_DEFINE_SWIZZLER3(b, a, c, b##a##c); \ | |
@@ -491,11 +505,16 @@ public: | |
_DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ | |
_DEFINE_SWIZZLER3(c, b, a, c2##b2##a2) | |
- DEFINE_SWIZZLER3(x, y, z, r, g, b); | |
- DEFINE_SWIZZLER3(x, y, w, r, g, a); | |
- DEFINE_SWIZZLER3(x, z, w, r, b, a); | |
- DEFINE_SWIZZLER3(y, z, w, g, b, a); | |
-#undef DEFINE_SWIZZLER3 | |
+ DEFINE_SWIZZLER3_COMP3(x, y, z, r, g, b); | |
+ DEFINE_SWIZZLER3_COMP3(x, y, w, r, g, a); | |
+ DEFINE_SWIZZLER3_COMP3(x, z, w, r, b, a); | |
+ DEFINE_SWIZZLER3_COMP3(y, z, w, g, b, a); | |
+ DEFINE_SWIZZLER3_COMP1(x, r); | |
+ DEFINE_SWIZZLER3_COMP1(y, g); | |
+ DEFINE_SWIZZLER3_COMP1(z, b); | |
+ DEFINE_SWIZZLER3_COMP1(w, a); | |
+#undef DEFINE_SWIZZLER3_COMP1 | |
+#undef DEFINE_SWIZZLER3_COMP3 | |
#undef _DEFINE_SWIZZLER3 | |
}; | |
@@ -612,7 +631,7 @@ static inline Vec4<T> MakeVec(const Vec3<T>& xyz, const T& w) | |
} | |
template<typename T> | |
-static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yzw) | |
+static inline Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) | |
{ | |
return MakeVec(x, yzw[0], yzw[1], yzw[2]); | |
} | |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h | |
index bc863df..e4a5ef7 100644 | |
--- a/src/video_core/pica.h | |
+++ b/src/video_core/pica.h | |
@@ -236,19 +236,29 @@ struct Regs { | |
}; | |
enum class ColorModifier : u32 { | |
- SourceColor = 0, | |
- OneMinusSourceColor = 1, | |
- SourceAlpha = 2, | |
- OneMinusSourceAlpha = 3, | |
- | |
- // Other values seem to be non-standard extensions | |
+ SourceColor = 0x0, | |
+ OneMinusSourceColor = 0x1, | |
+ SourceAlpha = 0x2, | |
+ OneMinusSourceAlpha = 0x3, | |
+ SourceRed = 0x4, | |
+ OneMinusSourceRed = 0x5, | |
+ | |
+ SourceGreen = 0x8, | |
+ OneMinusSourceGreen = 0x9, | |
+ | |
+ SourceBlue = 0xc, | |
+ OneMinusSourceBlue = 0xd, | |
}; | |
enum class AlphaModifier : u32 { | |
- SourceAlpha = 0, | |
- OneMinusSourceAlpha = 1, | |
- | |
- // Other values seem to be non-standard extensions | |
+ SourceAlpha = 0x0, | |
+ OneMinusSourceAlpha = 0x1, | |
+ SourceRed = 0x2, | |
+ OneMinusSourceRed = 0x3, | |
+ SourceGreen = 0x4, | |
+ OneMinusSourceGreen = 0x5, | |
+ SourceBlue = 0x6, | |
+ OneMinusSourceBlue = 0x7, | |
}; | |
enum class Operation : u32 { | |
@@ -333,16 +343,30 @@ struct Regs { | |
}; | |
union { | |
- enum BlendEquation : u32 { | |
- Add = 0, | |
+ enum class BlendEquation : u32 { | |
+ Add = 0, | |
+ Subtract = 1, | |
+ ReverseSubtract = 2, | |
+ Min = 3, | |
+ Max = 4 | |
}; | |
enum BlendFactor : u32 { | |
- Zero = 0, | |
- One = 1, | |
- | |
- SourceAlpha = 6, | |
- OneMinusSourceAlpha = 7, | |
+ Zero = 0, | |
+ One = 1, | |
+ SourceColor = 2, | |
+ OneMinusSourceColor = 3, | |
+ DestColor = 4, | |
+ OneMinusDestColor = 5, | |
+ SourceAlpha = 6, | |
+ OneMinusSourceAlpha = 7, | |
+ DestAlpha = 8, | |
+ OneMinusDestAlpha = 9, | |
+ ConstantColor = 10, | |
+ OneMinusConstantColor = 11, | |
+ ConstantAlpha = 12, | |
+ OneMinusConstantAlpha = 13, | |
+ SourceAlphaSaturate = 14 | |
}; | |
BitField< 0, 8, BlendEquation> blend_equation_rgb; | |
@@ -363,7 +387,12 @@ struct Regs { | |
BitField<0, 4, Op> op; | |
} logic_op; | |
- INSERT_PADDING_WORDS(0x1); | |
+ union { | |
+ BitField< 0, 8, u32> r; | |
+ BitField< 8, 8, u32> g; | |
+ BitField<16, 8, u32> b; | |
+ BitField<24, 8, u32> a; | |
+ } blend_const; | |
union { | |
BitField< 0, 1, u32> enable; | |
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp | |
index 242a07e..1776a19 100644 | |
--- a/src/video_core/primitive_assembly.cpp | |
+++ b/src/video_core/primitive_assembly.cpp | |
@@ -6,6 +6,7 @@ | |
#include "primitive_assembly.h" | |
#include "vertex_shader.h" | |
+#include "common/logging/log.h" | |
#include "video_core/debug_utils/debug_utils.h" | |
namespace Pica { | |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp | |
index c9e0a79..81df09b 100644 | |
--- a/src/video_core/rasterizer.cpp | |
+++ b/src/video_core/rasterizer.cpp | |
@@ -5,6 +5,7 @@ | |
#include <algorithm> | |
#include "common/common_types.h" | |
+#include "common/math_util.h" | |
#include "math.h" | |
#include "pica.h" | |
@@ -35,7 +36,7 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |
default: | |
LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | |
- exit(1); | |
+ UNIMPLEMENTED(); | |
} | |
} | |
@@ -57,8 +58,6 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |
static u32 GetDepth(int x, int y) { | |
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | |
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | |
- if (!depth_buffer) | |
- return 0; | |
y = (registers.framebuffer.height - y); | |
@@ -69,8 +68,6 @@ static u32 GetDepth(int x, int y) { | |
static void SetDepth(int x, int y, u16 value) { | |
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | |
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | |
- if (!depth_buffer) | |
- return; | |
y = (registers.framebuffer.height - y); | |
@@ -113,47 +110,45 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, | |
return Math::Cross(vec1, vec2).z; | |
}; | |
-void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
- const VertexShader::OutputVertex& v1_, | |
- const VertexShader::OutputVertex& v2_) | |
+/** | |
+ * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing | |
+ * culling via recursion. | |
+ */ | |
+static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |
+ const VertexShader::OutputVertex& v1, | |
+ const VertexShader::OutputVertex& v2, | |
+ bool reversed = false) | |
{ | |
// vertex positions in rasterizer coordinates | |
- auto FloatToFix = [](float24 flt) { | |
- // TODO: Rounding here is necessary to prevent garbage pixels at | |
- // triangle borders. Is it that the correct solution, though? | |
- return Fix12P4(static_cast<unsigned short>(round(flt.ToFloat32() * 16.0f))); | |
- }; | |
- auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { | |
- return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | |
- }; | |
- | |
- VertexShader::OutputVertex v0 = v0_; | |
- VertexShader::OutputVertex v1 = v1_; | |
- VertexShader::OutputVertex v2 = v2_; | |
+ static auto FloatToFix = [](float24 flt) { | |
+ // TODO: Rounding here is necessary to prevent garbage pixels at | |
+ // triangle borders. Is it that the correct solution, though? | |
+ return Fix12P4(static_cast<unsigned short>(round(flt.ToFloat32() * 16.0f))); | |
+ }; | |
+ static auto ScreenToRasterizerCoordinates = [](const Math::Vec3<float24>& vec) { | |
+ return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | |
+ }; | |
+ | |
Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | |
ScreenToRasterizerCoordinates(v1.screenpos), | |
ScreenToRasterizerCoordinates(v2.screenpos) }; | |
- if (registers.cull_mode == Regs::CullMode::KeepCounterClockWise) { | |
- // Reverse vertex order and use the CW code path. | |
- std::swap(vtxpos[1], vtxpos[2]); | |
- std::swap(v1, v2); | |
- } | |
- | |
- if (registers.cull_mode != Regs::CullMode::KeepAll) { | |
- // Cull away triangles which are wound counter-clockwise. | |
- // TODO: Make work :( | |
- if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { | |
- std::swap(vtxpos[1], vtxpos[2]); | |
- std::swap(v1, v2); | |
-// return; | |
+ if (registers.cull_mode == Regs::CullMode::KeepAll) { | |
+ // Make sure we always end up with a triangle wound counter-clockwise | |
+ if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { | |
+ ProcessTriangleInternal(v0, v2, v1, true); | |
+ return; | |
} | |
} else { | |
- // TODO: Consider a check for degenerate triangles ("SignedArea == 0") | |
- if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { | |
- std::swap(vtxpos[1], vtxpos[2]); | |
- std::swap(v1, v2); | |
+ if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) { | |
+ // Reverse vertex order and use the CCW code path. | |
+ ProcessTriangleInternal(v0, v2, v1, true); | |
+ return; | |
} | |
+ | |
+ // Cull away triangles which are wound clockwise. | |
+ if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) | |
+ return; | |
} | |
// TODO: Proper scissor rect test! | |
@@ -255,7 +250,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
if (!texture.enabled) | |
continue; | |
- //_dbg_assert_(HW_GPU, 0 != texture.config.address); | |
+ DEBUG_ASSERT(0 != texture.config.address); | |
int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); | |
int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); | |
@@ -267,25 +262,25 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
return val; | |
case Regs::TextureConfig::Repeat: | |
- return (int)(((unsigned)val) % size); | |
+ return (int)((unsigned)val % size); | |
case Regs::TextureConfig::MirroredRepeat: | |
{ | |
- int val = (int)(((unsigned)val) % (2*size)); | |
- if (val >= size) | |
- val = 2 * size - 1 - val; | |
- return val; | |
+ int coord = (int)((unsigned)val % (2 * size)); | |
+ if (coord >= size) | |
+ coord = 2 * size - 1 - coord; | |
+ return coord; | |
} | |
default: | |
LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode); | |
- //_dbg_assert_(HW_GPU, 0); | |
+ UNIMPLEMENTED(); | |
return 0; | |
} | |
}; | |
// Textures are laid out from bottom to top, hence we invert the t coordinate. | |
- // NOTE: This may not be the right to place the inversion. | |
+ // NOTE: This may not be the right place for the inversion. | |
// TODO: Check if this applies to ETC textures, too. | |
s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); | |
t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | |
@@ -335,41 +330,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
default: | |
LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); | |
- //_dbg_assert_(HW_GPU, 0); | |
+ UNIMPLEMENTED(); | |
return {}; | |
} | |
}; | |
- auto GetAlphaSource = [&](Source source) -> u8 { | |
- switch (source) { | |
- case Source::PrimaryColor: | |
- return primary_color.a(); | |
- | |
- case Source::Texture0: | |
- return texture_color[0].a(); | |
- | |
- case Source::Texture1: | |
- return texture_color[1].a(); | |
- | |
- case Source::Texture2: | |
- return texture_color[2].a(); | |
- | |
- case Source::Constant: | |
- return tev_stage.const_a; | |
- | |
- case Source::Previous: | |
- return combiner_output.a(); | |
- | |
- default: | |
- LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source); | |
- //_dbg_assert_(HW_GPU, 0); | |
- return 0; | |
- } | |
- }; | |
- | |
static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | |
- switch (factor) | |
- { | |
+ switch (factor) { | |
case ColorModifier::SourceColor: | |
return values.rgb(); | |
@@ -377,12 +344,28 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | |
case ColorModifier::SourceAlpha: | |
- return { values.a(), values.a(), values.a() }; | |
+ return values.aaa(); | |
- default: | |
- LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor); | |
- //_dbg_assert_(HW_GPU, 0); | |
- return {}; | |
+ case ColorModifier::OneMinusSourceAlpha: | |
+ return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); | |
+ | |
+ case ColorModifier::SourceRed: | |
+ return values.rrr(); | |
+ | |
+ case ColorModifier::OneMinusSourceRed: | |
+ return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); | |
+ | |
+ case ColorModifier::SourceGreen: | |
+ return values.ggg(); | |
+ | |
+ case ColorModifier::OneMinusSourceGreen: | |
+ return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); | |
+ | |
+ case ColorModifier::SourceBlue: | |
+ return values.bbb(); | |
+ | |
+ case ColorModifier::OneMinusSourceBlue: | |
+ return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | |
} | |
}; | |
@@ -394,10 +377,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
case AlphaModifier::OneMinusSourceAlpha: | |
return 255 - values.a(); | |
- default: | |
- LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor); | |
- //_dbg_assert_(HW_GPU, 0); | |
- return 0; | |
+ case AlphaModifier::SourceRed: | |
+ return values.r(); | |
+ | |
+ case AlphaModifier::OneMinusSourceRed: | |
+ return 255 - values.r(); | |
+ | |
+ case AlphaModifier::SourceGreen: | |
+ return values.g(); | |
+ | |
+ case AlphaModifier::OneMinusSourceGreen: | |
+ return 255 - values.g(); | |
+ | |
+ case AlphaModifier::SourceBlue: | |
+ return values.b(); | |
+ | |
+ case AlphaModifier::OneMinusSourceBlue: | |
+ return 255 - values.b(); | |
} | |
}; | |
@@ -451,7 +447,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
default: | |
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); | |
- //_dbg_assert_(HW_GPU, 0); | |
+ UNIMPLEMENTED(); | |
return {}; | |
} | |
}; | |
@@ -481,7 +477,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
default: | |
LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); | |
- //_dbg_assert_(HW_GPU, 0); | |
+ UNIMPLEMENTED(); | |
return 0; | |
} | |
}; | |
@@ -607,28 +603,58 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
auto params = registers.output_merger.alpha_blending; | |
auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> { | |
- switch(factor) { | |
+ switch (factor) { | |
case params.Zero: | |
return Math::Vec3<u8>(0, 0, 0); | |
case params.One: | |
return Math::Vec3<u8>(255, 255, 255); | |
+ case params.SourceColor: | |
+ return combiner_output.rgb(); | |
+ | |
+ case params.OneMinusSourceColor: | |
+ return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); | |
+ | |
+ case params.DestColor: | |
+ return dest.rgb(); | |
+ | |
+ case params.OneMinusDestColor: | |
+ return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); | |
+ | |
case params.SourceAlpha: | |
- return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a()); | |
+ return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); | |
case params.OneMinusSourceAlpha: | |
- return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a()); | |
+ return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); | |
+ | |
+ case params.DestAlpha: | |
+ return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); | |
+ | |
+ case params.OneMinusDestAlpha: | |
+ return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); | |
+ | |
+ case params.ConstantColor: | |
+ return Math::Vec3<u8>(registers.output_merger.blend_const.r, registers.output_merger.blend_const.g, registers.output_merger.blend_const.b); | |
+ | |
+ case params.OneMinusConstantColor: | |
+ return Math::Vec3<u8>(255 - registers.output_merger.blend_const.r, 255 - registers.output_merger.blend_const.g, 255 - registers.output_merger.blend_const.b); | |
+ | |
+ case params.ConstantAlpha: | |
+ return Math::Vec3<u8>(registers.output_merger.blend_const.a, registers.output_merger.blend_const.a, registers.output_merger.blend_const.a); | |
+ | |
+ case params.OneMinusConstantAlpha: | |
+ return Math::Vec3<u8>(255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a); | |
default: | |
- return Math::Vec3<u8>(0, 0, 0); //LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | |
- //exit(0); | |
+ LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | |
+ UNIMPLEMENTED(); | |
break; | |
} | |
}; | |
auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 { | |
- switch(factor) { | |
+ switch (factor) { | |
case params.Zero: | |
return 0; | |
@@ -641,11 +667,73 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
case params.OneMinusSourceAlpha: | |
return 255 - combiner_output.a(); | |
+ case params.DestAlpha: | |
+ return dest.a(); | |
+ | |
+ case params.OneMinusDestAlpha: | |
+ return 255 - dest.a(); | |
+ | |
+ case params.ConstantAlpha: | |
+ return registers.output_merger.blend_const.a; | |
+ | |
+ case params.OneMinusConstantAlpha: | |
+ return 255 - registers.output_merger.blend_const.a; | |
+ | |
default: | |
- return 0; //LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | |
- //exit(0); | |
+ LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | |
+ UNIMPLEMENTED(); | |
+ break; | |
+ } | |
+ }; | |
+ | |
+ using BlendEquation = decltype(params)::BlendEquation; | |
+ static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | |
+ const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | |
+ BlendEquation equation) { | |
+ Math::Vec4<int> result; | |
+ | |
+ auto src_result = (src * srcfactor).Cast<int>(); | |
+ auto dst_result = (dest * destfactor).Cast<int>(); | |
+ | |
+ switch (equation) { | |
+ case BlendEquation::Add: | |
+ result = (src_result + dst_result) / 255; | |
break; | |
+ | |
+ case BlendEquation::Subtract: | |
+ result = (src_result - dst_result) / 255; | |
+ break; | |
+ | |
+ case BlendEquation::ReverseSubtract: | |
+ result = (dst_result - src_result) / 255; | |
+ break; | |
+ | |
+ // TODO: How do these two actually work? | |
+ // OpenGL doesn't include the blend factors in the min/max computations, | |
+ // but is this what the 3DS actually does? | |
+ case BlendEquation::Min: | |
+ result.r() = std::min(src.r(), dest.r()); | |
+ result.g() = std::min(src.g(), dest.g()); | |
+ result.b() = std::min(src.b(), dest.b()); | |
+ result.a() = std::min(src.a(), dest.a()); | |
+ break; | |
+ | |
+ case BlendEquation::Max: | |
+ result.r() = std::max(src.r(), dest.r()); | |
+ result.g() = std::max(src.g(), dest.g()); | |
+ result.b() = std::max(src.b(), dest.b()); | |
+ result.a() = std::max(src.a(), dest.a()); | |
+ break; | |
+ | |
+ default: | |
+ LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | |
+ UNIMPLEMENTED(); | |
} | |
+ | |
+ return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), | |
+ MathUtil::Clamp(result.g(), 0, 255), | |
+ MathUtil::Clamp(result.b(), 0, 255), | |
+ MathUtil::Clamp(result.a(), 0, 255)); | |
}; | |
auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), | |
@@ -653,38 +741,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), | |
LookupFactorA(params.factor_dest_a)); | |
- switch (params.blend_equation_rgb) { | |
- case params.Add: | |
- { | |
- auto result = (combiner_output * srcfactor + dest * dstfactor) / 255; | |
- result.r() = std::min(255, result.r()); | |
- result.g() = std::min(255, result.g()); | |
- result.b() = std::min(255, result.b()); | |
- blend_output = result.Cast<u8>(); | |
- break; | |
- } | |
- | |
- default: | |
- LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value()); | |
- exit(0); | |
- } | |
- | |
- switch (params.blend_equation_a) { | |
- case params.Add: | |
- { | |
- auto result = (combiner_output * srcfactor + dest * dstfactor) / 255; | |
- result.a() = std::min(255, result.a()); | |
- blend_output.a() = result.Cast<u8>().a(); | |
- break; | |
- } | |
- | |
- default: | |
- LOG_CRITICAL(HW_GPU, "Unknown alpha blend equation %x", params.blend_equation_a.Value()); | |
- exit(0); | |
- } | |
+ blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); | |
+ blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); | |
} else { | |
LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); | |
- exit(0); | |
+ UNIMPLEMENTED(); | |
} | |
const Math::Vec4<u8> result = { | |
@@ -699,6 +760,12 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_, | |
} | |
} | |
+void ProcessTriangle(const VertexShader::OutputVertex& v0, | |
+ const VertexShader::OutputVertex& v1, | |
+ const VertexShader::OutputVertex& v2) { | |
+ ProcessTriangleInternal(v0, v1, v2); | |
+} | |
+ | |
} // namespace Rasterizer | |
} // namespace Pica | |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp | |
index e982e37..42d0e59 100644 | |
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp | |
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |
@@ -3,7 +3,7 @@ | |
// Refer to the license.txt file included. | |
#include "gl_shader_util.h" | |
-#include "common/log.h" | |
+#include "common/logging/log.h" | |
#include <vector> | |
#include <algorithm> | |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp | |
index aa47bd6..2726951 100644 | |
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp | |
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |
@@ -61,15 +61,13 @@ void RendererOpenGL::SwapBuffers() { | |
for(int i : {0, 1}) { | |
const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; | |
- if (textures[i].width != (GLsizei)framebuffer.width || textures[i].height != (GLsizei)framebuffer.height) { | |
+ if (textures[i].width != (GLsizei)framebuffer.width || | |
+ textures[i].height != (GLsizei)framebuffer.height || | |
+ textures[i].format != framebuffer.color_format) { | |
// Reallocate texture if the framebuffer size has changed. | |
// This is expected to not happen very often and hence should not be a | |
// performance problem. | |
- glBindTexture(GL_TEXTURE_2D, textures[i].handle); | |
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, framebuffer.width, framebuffer.height, 0, | |
- GL_BGR, GL_UNSIGNED_BYTE, nullptr); | |
- textures[i].width = framebuffer.width; | |
- textures[i].height = framebuffer.height; | |
+ ConfigureFramebufferTexture(textures[i], framebuffer); | |
} | |
LoadFBToActiveGLTexture(GPU::g_regs.framebuffer_config[i], textures[i]); | |
@@ -98,16 +96,15 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& | |
const u8* framebuffer_data = Memory::GetPointer(framebuffer_vaddr); | |
- // TODO: Handle other pixel formats | |
- _dbg_assert_msg_(Render_OpenGL, framebuffer.color_format == GPU::Regs::PixelFormat::RGB8, | |
- "Unsupported 3DS pixel format."); | |
+ int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); | |
+ size_t pixel_stride = framebuffer.stride / bpp; | |
- size_t pixel_stride = framebuffer.stride / 3; | |
// OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately | |
- _dbg_assert_(Render_OpenGL, pixel_stride * 3 == framebuffer.stride); | |
+ ASSERT(pixel_stride * bpp == framebuffer.stride); | |
+ | |
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default | |
// only allows rows to have a memory alignement of 4. | |
- _dbg_assert_(Render_OpenGL, pixel_stride % 4 == 0); | |
+ ASSERT(pixel_stride % 4 == 0); | |
glBindTexture(GL_TEXTURE_2D, texture.handle); | |
glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); | |
@@ -118,7 +115,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& | |
// TODO: Applications could theoretically crash Citra here by specifying too large | |
// framebuffer sizes. We should make sure that this cannot happen. | |
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, | |
- GL_BGR, GL_UNSIGNED_BYTE, framebuffer_data); | |
+ texture.gl_format, texture.gl_type, framebuffer_data); | |
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | |
@@ -171,6 +168,59 @@ void RendererOpenGL::InitOpenGLObjects() { | |
glBindTexture(GL_TEXTURE_2D, 0); | |
} | |
+void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |
+ const GPU::Regs::FramebufferConfig& framebuffer) { | |
+ GPU::Regs::PixelFormat format = framebuffer.color_format; | |
+ GLint internal_format; | |
+ | |
+ texture.format = format; | |
+ texture.width = framebuffer.width; | |
+ texture.height = framebuffer.height; | |
+ | |
+ switch (format) { | |
+ case GPU::Regs::PixelFormat::RGBA8: | |
+ internal_format = GL_RGBA; | |
+ texture.gl_format = GL_RGBA; | |
+ texture.gl_type = GL_UNSIGNED_INT_8_8_8_8; | |
+ break; | |
+ | |
+ case GPU::Regs::PixelFormat::RGB8: | |
+ // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every | |
+ // specific OpenGL type used in this function using native-endian (that is, little-endian | |
+ // mostly everywhere) for words or half-words. | |
+ // TODO: check how those behave on big-endian processors. | |
+ internal_format = GL_RGB; | |
+ texture.gl_format = GL_BGR; | |
+ texture.gl_type = GL_UNSIGNED_BYTE; | |
+ break; | |
+ | |
+ case GPU::Regs::PixelFormat::RGB565: | |
+ internal_format = GL_RGB; | |
+ texture.gl_format = GL_RGB; | |
+ texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; | |
+ break; | |
+ | |
+ case GPU::Regs::PixelFormat::RGB5A1: | |
+ internal_format = GL_RGBA; | |
+ texture.gl_format = GL_RGBA; | |
+ texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1; | |
+ break; | |
+ | |
+ case GPU::Regs::PixelFormat::RGBA4: | |
+ internal_format = GL_RGBA; | |
+ texture.gl_format = GL_RGBA; | |
+ texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4; | |
+ break; | |
+ | |
+ default: | |
+ UNIMPLEMENTED(); | |
+ } | |
+ | |
+ glBindTexture(GL_TEXTURE_2D, texture.handle); | |
+ glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, | |
+ texture.gl_format, texture.gl_type, nullptr); | |
+} | |
+ | |
/** | |
* Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. | |
*/ | |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h | |
index cf78c1e..bcabab5 100644 | |
--- a/src/video_core/renderer_opengl/renderer_opengl.h | |
+++ b/src/video_core/renderer_opengl/renderer_opengl.h | |
@@ -43,9 +43,14 @@ private: | |
GLuint handle; | |
GLsizei width; | |
GLsizei height; | |
+ GPU::Regs::PixelFormat format; | |
+ GLenum gl_format; | |
+ GLenum gl_type; | |
}; | |
void InitOpenGLObjects(); | |
+ static void ConfigureFramebufferTexture(TextureInfo& texture, | |
+ const GPU::Regs::FramebufferConfig& framebuffer); | |
void DrawScreens(); | |
void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); | |
void UpdateFramerate(); | |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp | |
index cb6785a..def868a 100644 | |
--- a/src/video_core/vertex_shader.cpp | |
+++ b/src/video_core/vertex_shader.cpp | |
@@ -85,9 +85,11 @@ struct VertexShaderState { | |
}; | |
struct CallStackElement { | |
- u32 final_address; | |
- u32 return_address; | |
- std::function<int(VertexShaderState&)> branch_end_callback; | |
+ u32 final_address; // Address upon which we jump to return_address | |
+ u32 return_address; // Where to jump when leaving scope | |
+ u8 repeat_counter; // How often to repeat until this call stack element is removed | |
+ u8 loop_increment; // Which value to add to the loop counter after an iteration | |
+ // TODO: Should this be a signed value? Does it even matter? | |
}; | |
// TODO: Is there a maximal size for this? | |
@@ -106,10 +108,12 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
while (true) { | |
if (!state.call_stack.empty()) { | |
- if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { | |
+ auto& top = state.call_stack.top(); | |
+ if (state.program_counter - shader_memory.data() == top.final_address) { | |
+ state.address_registers[2] += top.loop_increment; | |
- if (state.call_stack.top().branch_end_callback(state)) { | |
- state.program_counter = &shader_memory[state.call_stack.top().return_address]; | |
+ if (top.repeat_counter-- == 0) { | |
+ state.program_counter = &shader_memory[top.return_address]; | |
state.call_stack.pop(); | |
} | |
@@ -122,11 +126,10 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
const Instruction& instr = *(const Instruction*)state.program_counter; | |
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | |
- auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, | |
- u32 return_offset, | |
- std::function<int(VertexShaderState&)> branch_end_callback) { | |
+ static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions, | |
+ u32 return_offset, u8 repeat_count, u8 loop_increment) { | |
state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | |
- state.call_stack.push({ offset + num_instructions, return_offset, branch_end_callback }); | |
+ state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment }); | |
}; | |
u32 binary_offset = state.program_counter - shader_memory.data(); | |
@@ -152,13 +155,10 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
case Instruction::OpCodeType::Arithmetic: | |
{ | |
bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); | |
- if (is_inverted) { | |
- // TODO: We don't really support this properly: For instance, the address register | |
- // offset needs to be applied to SRC2 instead, etc. | |
- // For now, we just abort in this situation. | |
- LOG_CRITICAL(HW_GPU, "Bad condition..."); | |
- exit(0); | |
- } | |
+ // TODO: We don't really support this properly: For instance, the address register | |
+ // offset needs to be applied to SRC2 instead, etc. | |
+ // For now, we just abort in this situation. | |
+ ASSERT_MSG(!is_inverted, "Bad condition..."); | |
const int address_offset = (instr.common.address_register_index == 0) | |
? 0 : state.address_registers[instr.common.address_register_index - 1]; | |
@@ -166,8 +166,6 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); | |
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); | |
- if (!src1_ || !src2_) break; | |
- | |
const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | |
const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | |
@@ -263,7 +261,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
// TODO: Be stable against division by zero! | |
// TODO: I think this might be wrong... we should only use one component here | |
- dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32()); | |
+ dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); | |
} | |
break; | |
@@ -278,7 +276,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
// TODO: Be stable against division by zero! | |
// TODO: I think this might be wrong... we should only use one component here | |
- dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32())); | |
+ dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); | |
} | |
break; | |
@@ -350,7 +348,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
default: | |
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | |
(int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); | |
- _dbg_assert_(HW_GPU, 0); | |
+ DEBUG_ASSERT(false); | |
break; | |
} | |
@@ -468,8 +466,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
call(state, | |
instr.flow_control.dest_offset, | |
instr.flow_control.num_instructions, | |
- binary_offset + 1, | |
- [](VertexShaderState&) { return true; }); | |
+ binary_offset + 1, 0, 0); | |
break; | |
case Instruction::OpCode::CALLU: | |
@@ -477,9 +474,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
call(state, | |
instr.flow_control.dest_offset, | |
instr.flow_control.num_instructions, | |
- binary_offset + 1, | |
- [](VertexShaderState&) { return true; } | |
- ); | |
+ binary_offset + 1, 0, 0); | |
} | |
break; | |
@@ -488,9 +483,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
call(state, | |
instr.flow_control.dest_offset, | |
instr.flow_control.num_instructions, | |
- binary_offset + 1, | |
- [](VertexShaderState&) { return true; } | |
- ); | |
+ binary_offset + 1, 0, 0); | |
} | |
break; | |
@@ -502,14 +495,12 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
call(state, | |
binary_offset + 1, | |
instr.flow_control.dest_offset - binary_offset - 1, | |
- instr.flow_control.dest_offset + instr.flow_control.num_instructions, | |
- [](VertexShaderState&) { return true; }); | |
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | |
} else { | |
call(state, | |
instr.flow_control.dest_offset, | |
instr.flow_control.num_instructions, | |
- instr.flow_control.dest_offset + instr.flow_control.num_instructions, | |
- [](VertexShaderState&) { return true; }); | |
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | |
} | |
break; | |
@@ -522,14 +513,12 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
call(state, | |
binary_offset + 1, | |
instr.flow_control.dest_offset - binary_offset - 1, | |
- instr.flow_control.dest_offset + instr.flow_control.num_instructions, | |
- [](VertexShaderState&) { return true; }); | |
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | |
} else { | |
call(state, | |
instr.flow_control.dest_offset, | |
instr.flow_control.num_instructions, | |
- instr.flow_control.dest_offset + instr.flow_control.num_instructions, | |
- [](VertexShaderState&) { return true; }); | |
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | |
} | |
break; | |
@@ -537,25 +526,14 @@ static void ProcessShaderCode(VertexShaderState& state) { | |
case Instruction::OpCode::LOOP: | |
{ | |
- LOG_ERROR(HW_GPU, "%x %x %x %x %x", state.address_registers[2], | |
- shader_uniforms.i[instr.flow_control.int_uniform_id].x, | |
- shader_uniforms.i[instr.flow_control.int_uniform_id].y, | |
- shader_uniforms.i[instr.flow_control.int_uniform_id].z, | |
- shader_uniforms.i[instr.flow_control.int_uniform_id].w); | |
state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y; | |
call(state, | |
binary_offset + 1, | |
instr.flow_control.dest_offset - binary_offset + 1, | |
instr.flow_control.dest_offset + 1, | |
- [&instr, binary_offset](VertexShaderState& state) { // Capture by value intended! | |
-// state.address_registers[2] += shader_uniforms.i[instr.flow_control.int_uniform_id].z; | |
- state.address_registers[2] += 1; | |
- state.program_counter = &shader_memory[binary_offset+1]; | |
- return state.address_registers[2] > shader_uniforms.i[instr.flow_control.int_uniform_id].x + | |
- shader_uniforms.i[instr.flow_control.int_uniform_id].y; | |
- } | |
- ); | |
+ shader_uniforms.i[instr.flow_control.int_uniform_id].x, | |
+ shader_uniforms.i[instr.flow_control.int_uniform_id].z); | |
break; | |
} | |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp | |
index c9707e5..0a23659 100644 | |
--- a/src/video_core/video_core.cpp | |
+++ b/src/video_core/video_core.cpp | |
@@ -4,7 +4,6 @@ | |
#include "common/common.h" | |
#include "common/emu_window.h" | |
-#include "common/log.h" | |
#include "core/core.h" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment