Skip to content

Instantly share code, notes, and snippets.

@Subv
Created February 19, 2015 19:48
Show Gist options
  • Save Subv/24540e39963b36a73940 to your computer and use it in GitHub Desktop.
Save Subv/24540e39963b36a73940 to your computer and use it in GitHub Desktop.
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index ea3367e..ba3876a 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -16,7 +16,10 @@ namespace Clipper {
struct ClippingEdge {
public:
ClippingEdge(Math::Vec4<float24> coeffs,
- Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0), float24::FromFloat32(0), float24::FromFloat32(0), float24::FromFloat32(0)))
+ Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0),
+ float24::FromFloat32(0),
+ float24::FromFloat32(0),
+ float24::FromFloat32(0)))
: coeffs(coeffs),
bias(bias)
{
@@ -87,28 +90,26 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
auto* output_list = &buffer_a;
auto* input_list = &buffer_b;
- // TODO: Test if dropping the whole primitive in this case reflects hardware behavior.
-/* ClippingEdge near_edge{ ClippingEdge::POS_Z, Math::Vec4<float24>(float24::FromFloat32(0), float24::FromFloat32(0), float24::FromFloat32(1), float24::FromFloat32(0)) };
- if (near_edge.IsOutSide(v0) || near_edge.IsOutSide(v1) || near_edge.IsOutSide(v2))
- return;*/
-
// NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
// TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
// epsilon possible within float24 accuracy.
static const float24 EPSILON = float24::FromFloat32(0.00001);
- static const float24 zero = float24::FromFloat32(0.0);
- static const float24 one = float24::FromFloat32(1.0);
+ static const float24 f0 = float24::FromFloat32(0.0);
+ static const float24 f1 = float24::FromFloat32(1.0);
static const std::array<ClippingEdge, 7> clipping_edges = {{
- { Math::MakeVec(one, zero, zero, -one) }, // x = +w
- { Math::MakeVec(-one, zero, zero, -one) }, // x = -w
- { Math::MakeVec(zero, one, zero, -one) }, // y = +w
- { Math::MakeVec(zero, -one, zero, -one) }, // y = -w
- { Math::MakeVec(zero, zero, one, zero) }, // z = 0
- { Math::MakeVec(zero, zero, -one, -one) }, // z = -w
- { Math::MakeVec(zero, zero, zero, -one), Math::Vec4<float24>(zero, zero, zero, EPSILON) }, // w = EPSILON
-
+ { Math::MakeVec( f1, f0, f0, -f1) }, // x = +w
+ { Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w
+ { Math::MakeVec( f0, f1, f0, -f1) }, // y = +w
+ { Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w
+ { Math::MakeVec( f0, f0, f1, f0) }, // z = 0
+ { Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w
+ { Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON
}};
+ // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
+ // drop the whole primitive instead of clipping the primitive properly. We should test if
+ // this happens on the 3DS, too.
+
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
for (auto edge : clipping_edges) {
@@ -160,7 +161,6 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32());
Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2);
- Rasterizer::ProcessTriangle(vtx2, vtx1, vtx0);
}
}
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d1165bb..586ad62 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -30,6 +30,10 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
if (id >= registers.NumIds())
return;
+ // If we're skipping this frame, only allow trigger IRQ
+ if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq))
+ return;
+
// TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value
u32 old_value = registers[id];
registers[id] = (old_value & ~mask) | (value & mask);
@@ -49,8 +53,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(trigger_draw):
case PICA_REG_INDEX(trigger_draw_indexed):
{
- if (GPU::g_skip_frame) return;
-
DebugUtils::DumpTevStageConfig(registers.GetTevStages());
if (g_debug_context)
@@ -61,15 +63,15 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
// Information about internal vertex attributes
u32 vertex_attribute_sources[16];
- std::fill(vertex_attribute_sources, &vertex_attribute_sources[16], 0xdeadbeef);
+ boost::fill(vertex_attribute_sources, 0xdeadbeef);
u32 vertex_attribute_strides[16];
u32 vertex_attribute_formats[16];
- u32 vertex_attribute_elements[16];
- u32 vertex_attribute_element_size[16];
// HACK: Initialize vertex_attribute_elements to zero to prevent infinite loops below.
// This is one of the hacks required to deal with uninitalized vertex attributes.
- boost::fill(vertex_attribute_elements, 0);
+ // TODO: Fix this properly.
+ u32 vertex_attribute_elements[16] = {};
+ u32 vertex_attribute_element_size[16];
// Setup attribute data from loaders
for (int loader = 0; loader < 12; ++loader) {
@@ -183,7 +185,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
}
case PICA_REG_INDEX(vs_bool_uniforms):
- if (GPU::g_skip_frame) return;
for (unsigned i = 0; i < 16; ++i)
VertexShader::GetBoolUniform(i) = (registers.vs_bool_uniforms.Value() & (1 << i)) != 0;
@@ -194,7 +195,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3):
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4):
{
- if (GPU::g_skip_frame) return;
int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1));
auto values = registers.vs_int_uniforms[index];
VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
@@ -212,7 +212,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8):
{
- if (GPU::g_skip_frame) return;
auto& uniform_setup = registers.vs_uniform_setup;
// TODO: Does actual hardware indeed keep an intermediate buffer or does
@@ -280,7 +279,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc):
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd):
{
- if (GPU::g_skip_frame) return;
VertexShader::SubmitSwizzleDataChange(registers.vs_swizzle_patterns.offset, value);
registers.vs_swizzle_patterns.offset++;
break;
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index b60f3d5..0beb72e 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -16,7 +16,7 @@
#include <nihstro/shader_binary.h>
-#include "common/log.h"
+#include "common/assert.h"
#include "common/file_util.h"
#include "common/math_util.h"
@@ -189,7 +189,7 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
);
if (it == output_info_table.end()) {
- output_info_table.push_back({});
+ output_info_table.emplace_back();
output_info_table.back().type = type;
output_info_table.back().component_mask = component_mask;
output_info_table.back().id = i;
@@ -197,7 +197,7 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
it->component_mask = it->component_mask | component_mask;
}
} catch (const std::out_of_range& ) {
- _dbg_assert_msg_(HW_GPU, 0, "Unknown output attribute mapping");
+ DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping");
LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x",
(int)output_attributes[i].map_x.Value(),
(int)output_attributes[i].map_y.Value(),
@@ -285,7 +285,7 @@ void OnPicaRegWrite(u32 id, u32 value)
if (!is_pica_tracing)
return;
- pica_trace->writes.push_back({id, value});
+ pica_trace->writes.emplace_back(id, value);
}
std::unique_ptr<PicaTrace> FinishPicaTracing()
@@ -489,7 +489,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
BitField<34, 3, u64> table_index_2;
BitField<37, 3, u64> table_index_1;
- union Union2 {
+ union {
// delta value + base value
BitField<40, 3, s64> db;
BitField<43, 5, u64> b;
@@ -501,7 +501,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
BitField<59, 5, u64> r;
} differential;
- union Union3 {
+ union {
BitField<40, 4, u64> b2;
BitField<44, 4, u64> b1;
@@ -547,7 +547,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
// Add modifier
unsigned table_index = (x < 2) ? table_index_2.Value() : table_index_1.Value();
- static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{
+ static const auto etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{
{ 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 },
{ 18, 60 }, { 24, 80 }, { 33, 106 }, { 47, 183 }
}};
@@ -571,7 +571,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
default:
LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format);
- /*_dbg_assert_(HW_GPU, 0);*/
+ DEBUG_ASSERT(false);
return {};
}
}
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h
index a51d49c..48ac269 100644
--- a/src/video_core/gpu_debugger.h
+++ b/src/video_core/gpu_debugger.h
@@ -8,8 +8,6 @@
#include <functional>
#include <vector>
-#include "common/log.h"
-
#include "core/hle/service/gsp_gpu.h"
#include "command_processor.h"
@@ -60,13 +58,13 @@ public:
if (observers.empty())
return;
- gx_command_history.push_back(GSP_GPU::Command());
- GSP_GPU::Command& cmd = gx_command_history[gx_command_history.size()-1];
+ gx_command_history.emplace_back();
+ GSP_GPU::Command& cmd = gx_command_history.back();
memcpy(&cmd, command_data, sizeof(GSP_GPU::Command));
ForEachObserver([this](DebuggerObserver* observer) {
- observer->GXCommandProcessed(this->gx_command_history.size());
+ observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size()));
} );
}
diff --git a/src/video_core/math.h b/src/video_core/math.h
index 9622e76..f9a8226 100644
--- a/src/video_core/math.h
+++ b/src/video_core/math.h
@@ -457,27 +457,41 @@ public:
const T& b() const { return z; }
const T& a() const { return w; }
- // swizzlers - create a subvector of specific components
+ // Swizzlers - Create a subvector of specific components
// e.g. Vec2 uv() { return Vec2(x,y); }
- // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
+
+ // _DEFINE_SWIZZLER2 defines a single such function
+ // DEFINE_SWIZZLER2_COMP1 defines one-component functions for all component names (x<->r)
+ // DEFINE_SWIZZLER2_COMP2 defines two component functions for all component names (x<->r) and permutations (xy<->yx)
#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
-#define DEFINE_SWIZZLER2(a, b, a2, b2) \
+#define DEFINE_SWIZZLER2_COMP1(a, a2) \
+ _DEFINE_SWIZZLER2(a, a, a##a); \
+ _DEFINE_SWIZZLER2(a, a, a2##a2)
+#define DEFINE_SWIZZLER2_COMP2(a, b, a2, b2) \
_DEFINE_SWIZZLER2(a, b, a##b); \
_DEFINE_SWIZZLER2(a, b, a2##b2); \
_DEFINE_SWIZZLER2(b, a, b##a); \
_DEFINE_SWIZZLER2(b, a, b2##a2)
- DEFINE_SWIZZLER2(x, y, r, g);
- DEFINE_SWIZZLER2(x, z, r, b);
- DEFINE_SWIZZLER2(x, w, r, a);
- DEFINE_SWIZZLER2(y, z, g, b);
- DEFINE_SWIZZLER2(y, w, g, a);
- DEFINE_SWIZZLER2(z, w, b, a);
-#undef DEFINE_SWIZZLER2
+ DEFINE_SWIZZLER2_COMP2(x, y, r, g);
+ DEFINE_SWIZZLER2_COMP2(x, z, r, b);
+ DEFINE_SWIZZLER2_COMP2(x, w, r, a);
+ DEFINE_SWIZZLER2_COMP2(y, z, g, b);
+ DEFINE_SWIZZLER2_COMP2(y, w, g, a);
+ DEFINE_SWIZZLER2_COMP2(z, w, b, a);
+ DEFINE_SWIZZLER2_COMP1(x, r);
+ DEFINE_SWIZZLER2_COMP1(y, g);
+ DEFINE_SWIZZLER2_COMP1(z, b);
+ DEFINE_SWIZZLER2_COMP1(w, a);
+#undef DEFINE_SWIZZLER2_COMP1
+#undef DEFINE_SWIZZLER2_COMP2
#undef _DEFINE_SWIZZLER2
#define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); }
-#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \
+#define DEFINE_SWIZZLER3_COMP1(a, a2) \
+ _DEFINE_SWIZZLER3(a, a, a, a##a##a); \
+ _DEFINE_SWIZZLER3(a, a, a, a2##a2##a2)
+#define DEFINE_SWIZZLER3_COMP3(a, b, c, a2, b2, c2) \
_DEFINE_SWIZZLER3(a, b, c, a##b##c); \
_DEFINE_SWIZZLER3(a, c, b, a##c##b); \
_DEFINE_SWIZZLER3(b, a, c, b##a##c); \
@@ -491,11 +505,16 @@ public:
_DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \
_DEFINE_SWIZZLER3(c, b, a, c2##b2##a2)
- DEFINE_SWIZZLER3(x, y, z, r, g, b);
- DEFINE_SWIZZLER3(x, y, w, r, g, a);
- DEFINE_SWIZZLER3(x, z, w, r, b, a);
- DEFINE_SWIZZLER3(y, z, w, g, b, a);
-#undef DEFINE_SWIZZLER3
+ DEFINE_SWIZZLER3_COMP3(x, y, z, r, g, b);
+ DEFINE_SWIZZLER3_COMP3(x, y, w, r, g, a);
+ DEFINE_SWIZZLER3_COMP3(x, z, w, r, b, a);
+ DEFINE_SWIZZLER3_COMP3(y, z, w, g, b, a);
+ DEFINE_SWIZZLER3_COMP1(x, r);
+ DEFINE_SWIZZLER3_COMP1(y, g);
+ DEFINE_SWIZZLER3_COMP1(z, b);
+ DEFINE_SWIZZLER3_COMP1(w, a);
+#undef DEFINE_SWIZZLER3_COMP1
+#undef DEFINE_SWIZZLER3_COMP3
#undef _DEFINE_SWIZZLER3
};
@@ -612,7 +631,7 @@ static inline Vec4<T> MakeVec(const Vec3<T>& xyz, const T& w)
}
template<typename T>
-static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yzw)
+static inline Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw)
{
return MakeVec(x, yzw[0], yzw[1], yzw[2]);
}
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index bc863df..e4a5ef7 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -236,19 +236,29 @@ struct Regs {
};
enum class ColorModifier : u32 {
- SourceColor = 0,
- OneMinusSourceColor = 1,
- SourceAlpha = 2,
- OneMinusSourceAlpha = 3,
-
- // Other values seem to be non-standard extensions
+ SourceColor = 0x0,
+ OneMinusSourceColor = 0x1,
+ SourceAlpha = 0x2,
+ OneMinusSourceAlpha = 0x3,
+ SourceRed = 0x4,
+ OneMinusSourceRed = 0x5,
+
+ SourceGreen = 0x8,
+ OneMinusSourceGreen = 0x9,
+
+ SourceBlue = 0xc,
+ OneMinusSourceBlue = 0xd,
};
enum class AlphaModifier : u32 {
- SourceAlpha = 0,
- OneMinusSourceAlpha = 1,
-
- // Other values seem to be non-standard extensions
+ SourceAlpha = 0x0,
+ OneMinusSourceAlpha = 0x1,
+ SourceRed = 0x2,
+ OneMinusSourceRed = 0x3,
+ SourceGreen = 0x4,
+ OneMinusSourceGreen = 0x5,
+ SourceBlue = 0x6,
+ OneMinusSourceBlue = 0x7,
};
enum class Operation : u32 {
@@ -333,16 +343,30 @@ struct Regs {
};
union {
- enum BlendEquation : u32 {
- Add = 0,
+ enum class BlendEquation : u32 {
+ Add = 0,
+ Subtract = 1,
+ ReverseSubtract = 2,
+ Min = 3,
+ Max = 4
};
enum BlendFactor : u32 {
- Zero = 0,
- One = 1,
-
- SourceAlpha = 6,
- OneMinusSourceAlpha = 7,
+ Zero = 0,
+ One = 1,
+ SourceColor = 2,
+ OneMinusSourceColor = 3,
+ DestColor = 4,
+ OneMinusDestColor = 5,
+ SourceAlpha = 6,
+ OneMinusSourceAlpha = 7,
+ DestAlpha = 8,
+ OneMinusDestAlpha = 9,
+ ConstantColor = 10,
+ OneMinusConstantColor = 11,
+ ConstantAlpha = 12,
+ OneMinusConstantAlpha = 13,
+ SourceAlphaSaturate = 14
};
BitField< 0, 8, BlendEquation> blend_equation_rgb;
@@ -363,7 +387,12 @@ struct Regs {
BitField<0, 4, Op> op;
} logic_op;
- INSERT_PADDING_WORDS(0x1);
+ union {
+ BitField< 0, 8, u32> r;
+ BitField< 8, 8, u32> g;
+ BitField<16, 8, u32> b;
+ BitField<24, 8, u32> a;
+ } blend_const;
union {
BitField< 0, 1, u32> enable;
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index 242a07e..1776a19 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -6,6 +6,7 @@
#include "primitive_assembly.h"
#include "vertex_shader.h"
+#include "common/logging/log.h"
#include "video_core/debug_utils/debug_utils.h"
namespace Pica {
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index c9e0a79..81df09b 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -5,6 +5,7 @@
#include <algorithm>
#include "common/common_types.h"
+#include "common/math_util.h"
#include "math.h"
#include "pica.h"
@@ -35,7 +36,7 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
default:
LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
- exit(1);
+ UNIMPLEMENTED();
}
}
@@ -57,8 +58,6 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
static u32 GetDepth(int x, int y) {
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
- if (!depth_buffer)
- return 0;
y = (registers.framebuffer.height - y);
@@ -69,8 +68,6 @@ static u32 GetDepth(int x, int y) {
static void SetDepth(int x, int y, u16 value) {
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
- if (!depth_buffer)
- return;
y = (registers.framebuffer.height - y);
@@ -113,47 +110,45 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
return Math::Cross(vec1, vec2).z;
};
-void ProcessTriangle(const VertexShader::OutputVertex& v0_,
- const VertexShader::OutputVertex& v1_,
- const VertexShader::OutputVertex& v2_)
+/**
+ * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
+ * culling via recursion.
+ */
+static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
+ const VertexShader::OutputVertex& v1,
+ const VertexShader::OutputVertex& v2,
+ bool reversed = false)
{
// vertex positions in rasterizer coordinates
- auto FloatToFix = [](float24 flt) {
- // TODO: Rounding here is necessary to prevent garbage pixels at
- // triangle borders. Is it that the correct solution, though?
- return Fix12P4(static_cast<unsigned short>(round(flt.ToFloat32() * 16.0f)));
- };
- auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
- return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
- };
-
- VertexShader::OutputVertex v0 = v0_;
- VertexShader::OutputVertex v1 = v1_;
- VertexShader::OutputVertex v2 = v2_;
+ static auto FloatToFix = [](float24 flt) {
+ // TODO: Rounding here is necessary to prevent garbage pixels at
+ // triangle borders. Is it that the correct solution, though?
+ return Fix12P4(static_cast<unsigned short>(round(flt.ToFloat32() * 16.0f)));
+ };
+ static auto ScreenToRasterizerCoordinates = [](const Math::Vec3<float24>& vec) {
+ return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
+ };
+
Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
ScreenToRasterizerCoordinates(v1.screenpos),
ScreenToRasterizerCoordinates(v2.screenpos) };
- if (registers.cull_mode == Regs::CullMode::KeepCounterClockWise) {
- // Reverse vertex order and use the CW code path.
- std::swap(vtxpos[1], vtxpos[2]);
- std::swap(v1, v2);
- }
-
- if (registers.cull_mode != Regs::CullMode::KeepAll) {
- // Cull away triangles which are wound counter-clockwise.
- // TODO: Make work :(
- if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
- std::swap(vtxpos[1], vtxpos[2]);
- std::swap(v1, v2);
-// return;
+ if (registers.cull_mode == Regs::CullMode::KeepAll) {
+ // Make sure we always end up with a triangle wound counter-clockwise
+ if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
+ ProcessTriangleInternal(v0, v2, v1, true);
+ return;
}
} else {
- // TODO: Consider a check for degenerate triangles ("SignedArea == 0")
- if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
- std::swap(vtxpos[1], vtxpos[2]);
- std::swap(v1, v2);
+ if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) {
+ // Reverse vertex order and use the CCW code path.
+ ProcessTriangleInternal(v0, v2, v1, true);
+ return;
}
+
+ // Cull away triangles which are wound clockwise.
+ if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
+ return;
}
// TODO: Proper scissor rect test!
@@ -255,7 +250,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
if (!texture.enabled)
continue;
- //_dbg_assert_(HW_GPU, 0 != texture.config.address);
+ DEBUG_ASSERT(0 != texture.config.address);
int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
@@ -267,25 +262,25 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
return val;
case Regs::TextureConfig::Repeat:
- return (int)(((unsigned)val) % size);
+ return (int)((unsigned)val % size);
case Regs::TextureConfig::MirroredRepeat:
{
- int val = (int)(((unsigned)val) % (2*size));
- if (val >= size)
- val = 2 * size - 1 - val;
- return val;
+ int coord = (int)((unsigned)val % (2 * size));
+ if (coord >= size)
+ coord = 2 * size - 1 - coord;
+ return coord;
}
default:
LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode);
- //_dbg_assert_(HW_GPU, 0);
+ UNIMPLEMENTED();
return 0;
}
};
// Textures are laid out from bottom to top, hence we invert the t coordinate.
- // NOTE: This may not be the right to place the inversion.
+ // NOTE: This may not be the right place for the inversion.
// TODO: Check if this applies to ETC textures, too.
s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
@@ -335,41 +330,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
default:
LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
- //_dbg_assert_(HW_GPU, 0);
+ UNIMPLEMENTED();
return {};
}
};
- auto GetAlphaSource = [&](Source source) -> u8 {
- switch (source) {
- case Source::PrimaryColor:
- return primary_color.a();
-
- case Source::Texture0:
- return texture_color[0].a();
-
- case Source::Texture1:
- return texture_color[1].a();
-
- case Source::Texture2:
- return texture_color[2].a();
-
- case Source::Constant:
- return tev_stage.const_a;
-
- case Source::Previous:
- return combiner_output.a();
-
- default:
- LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source);
- //_dbg_assert_(HW_GPU, 0);
- return 0;
- }
- };
-
static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
- switch (factor)
- {
+ switch (factor) {
case ColorModifier::SourceColor:
return values.rgb();
@@ -377,12 +344,28 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
case ColorModifier::SourceAlpha:
- return { values.a(), values.a(), values.a() };
+ return values.aaa();
- default:
- LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor);
- //_dbg_assert_(HW_GPU, 0);
- return {};
+ case ColorModifier::OneMinusSourceAlpha:
+ return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>();
+
+ case ColorModifier::SourceRed:
+ return values.rrr();
+
+ case ColorModifier::OneMinusSourceRed:
+ return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>();
+
+ case ColorModifier::SourceGreen:
+ return values.ggg();
+
+ case ColorModifier::OneMinusSourceGreen:
+ return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>();
+
+ case ColorModifier::SourceBlue:
+ return values.bbb();
+
+ case ColorModifier::OneMinusSourceBlue:
+ return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
}
};
@@ -394,10 +377,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
case AlphaModifier::OneMinusSourceAlpha:
return 255 - values.a();
- default:
- LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor);
- //_dbg_assert_(HW_GPU, 0);
- return 0;
+ case AlphaModifier::SourceRed:
+ return values.r();
+
+ case AlphaModifier::OneMinusSourceRed:
+ return 255 - values.r();
+
+ case AlphaModifier::SourceGreen:
+ return values.g();
+
+ case AlphaModifier::OneMinusSourceGreen:
+ return 255 - values.g();
+
+ case AlphaModifier::SourceBlue:
+ return values.b();
+
+ case AlphaModifier::OneMinusSourceBlue:
+ return 255 - values.b();
}
};
@@ -451,7 +447,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
default:
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
- //_dbg_assert_(HW_GPU, 0);
+ UNIMPLEMENTED();
return {};
}
};
@@ -481,7 +477,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
default:
LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
- //_dbg_assert_(HW_GPU, 0);
+ UNIMPLEMENTED();
return 0;
}
};
@@ -607,28 +603,58 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
auto params = registers.output_merger.alpha_blending;
auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
- switch(factor) {
+ switch (factor) {
case params.Zero:
return Math::Vec3<u8>(0, 0, 0);
case params.One:
return Math::Vec3<u8>(255, 255, 255);
+ case params.SourceColor:
+ return combiner_output.rgb();
+
+ case params.OneMinusSourceColor:
+ return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b());
+
+ case params.DestColor:
+ return dest.rgb();
+
+ case params.OneMinusDestColor:
+ return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b());
+
case params.SourceAlpha:
- return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a());
+ return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a());
case params.OneMinusSourceAlpha:
- return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a());
+ return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a());
+
+ case params.DestAlpha:
+ return Math::Vec3<u8>(dest.a(), dest.a(), dest.a());
+
+ case params.OneMinusDestAlpha:
+ return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a());
+
+ case params.ConstantColor:
+ return Math::Vec3<u8>(registers.output_merger.blend_const.r, registers.output_merger.blend_const.g, registers.output_merger.blend_const.b);
+
+ case params.OneMinusConstantColor:
+ return Math::Vec3<u8>(255 - registers.output_merger.blend_const.r, 255 - registers.output_merger.blend_const.g, 255 - registers.output_merger.blend_const.b);
+
+ case params.ConstantAlpha:
+ return Math::Vec3<u8>(registers.output_merger.blend_const.a, registers.output_merger.blend_const.a, registers.output_merger.blend_const.a);
+
+ case params.OneMinusConstantAlpha:
+ return Math::Vec3<u8>(255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a);
default:
- return Math::Vec3<u8>(0, 0, 0); //LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
- //exit(0);
+ LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
+ UNIMPLEMENTED();
break;
}
};
auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
- switch(factor) {
+ switch (factor) {
case params.Zero:
return 0;
@@ -641,11 +667,73 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
case params.OneMinusSourceAlpha:
return 255 - combiner_output.a();
+ case params.DestAlpha:
+ return dest.a();
+
+ case params.OneMinusDestAlpha:
+ return 255 - dest.a();
+
+ case params.ConstantAlpha:
+ return registers.output_merger.blend_const.a;
+
+ case params.OneMinusConstantAlpha:
+ return 255 - registers.output_merger.blend_const.a;
+
default:
- return 0; //LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
- //exit(0);
+ LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
+ UNIMPLEMENTED();
+ break;
+ }
+ };
+
+ using BlendEquation = decltype(params)::BlendEquation;
+ static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
+ const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
+ BlendEquation equation) {
+ Math::Vec4<int> result;
+
+ auto src_result = (src * srcfactor).Cast<int>();
+ auto dst_result = (dest * destfactor).Cast<int>();
+
+ switch (equation) {
+ case BlendEquation::Add:
+ result = (src_result + dst_result) / 255;
break;
+
+ case BlendEquation::Subtract:
+ result = (src_result - dst_result) / 255;
+ break;
+
+ case BlendEquation::ReverseSubtract:
+ result = (dst_result - src_result) / 255;
+ break;
+
+ // TODO: How do these two actually work?
+ // OpenGL doesn't include the blend factors in the min/max computations,
+ // but is this what the 3DS actually does?
+ case BlendEquation::Min:
+ result.r() = std::min(src.r(), dest.r());
+ result.g() = std::min(src.g(), dest.g());
+ result.b() = std::min(src.b(), dest.b());
+ result.a() = std::min(src.a(), dest.a());
+ break;
+
+ case BlendEquation::Max:
+ result.r() = std::max(src.r(), dest.r());
+ result.g() = std::max(src.g(), dest.g());
+ result.b() = std::max(src.b(), dest.b());
+ result.a() = std::max(src.a(), dest.a());
+ break;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation);
+ UNIMPLEMENTED();
}
+
+ return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255),
+ MathUtil::Clamp(result.g(), 0, 255),
+ MathUtil::Clamp(result.b(), 0, 255),
+ MathUtil::Clamp(result.a(), 0, 255));
};
auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb),
@@ -653,38 +741,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb),
LookupFactorA(params.factor_dest_a));
- switch (params.blend_equation_rgb) {
- case params.Add:
- {
- auto result = (combiner_output * srcfactor + dest * dstfactor) / 255;
- result.r() = std::min(255, result.r());
- result.g() = std::min(255, result.g());
- result.b() = std::min(255, result.b());
- blend_output = result.Cast<u8>();
- break;
- }
-
- default:
- LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value());
- exit(0);
- }
-
- switch (params.blend_equation_a) {
- case params.Add:
- {
- auto result = (combiner_output * srcfactor + dest * dstfactor) / 255;
- result.a() = std::min(255, result.a());
- blend_output.a() = result.Cast<u8>().a();
- break;
- }
-
- default:
- LOG_CRITICAL(HW_GPU, "Unknown alpha blend equation %x", params.blend_equation_a.Value());
- exit(0);
- }
+ blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
+ blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
} else {
LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
- exit(0);
+ UNIMPLEMENTED();
}
const Math::Vec4<u8> result = {
@@ -699,6 +760,12 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0_,
}
}
+void ProcessTriangle(const VertexShader::OutputVertex& v0,
+ const VertexShader::OutputVertex& v1,
+ const VertexShader::OutputVertex& v2) {
+ ProcessTriangleInternal(v0, v1, v2);
+}
+
} // namespace Rasterizer
} // namespace Pica
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index e982e37..42d0e59 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -3,7 +3,7 @@
// Refer to the license.txt file included.
#include "gl_shader_util.h"
-#include "common/log.h"
+#include "common/logging/log.h"
#include <vector>
#include <algorithm>
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index aa47bd6..2726951 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -61,15 +61,13 @@ void RendererOpenGL::SwapBuffers() {
for(int i : {0, 1}) {
const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
- if (textures[i].width != (GLsizei)framebuffer.width || textures[i].height != (GLsizei)framebuffer.height) {
+ if (textures[i].width != (GLsizei)framebuffer.width ||
+ textures[i].height != (GLsizei)framebuffer.height ||
+ textures[i].format != framebuffer.color_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
- glBindTexture(GL_TEXTURE_2D, textures[i].handle);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, framebuffer.width, framebuffer.height, 0,
- GL_BGR, GL_UNSIGNED_BYTE, nullptr);
- textures[i].width = framebuffer.width;
- textures[i].height = framebuffer.height;
+ ConfigureFramebufferTexture(textures[i], framebuffer);
}
LoadFBToActiveGLTexture(GPU::g_regs.framebuffer_config[i], textures[i]);
@@ -98,16 +96,15 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
const u8* framebuffer_data = Memory::GetPointer(framebuffer_vaddr);
- // TODO: Handle other pixel formats
- _dbg_assert_msg_(Render_OpenGL, framebuffer.color_format == GPU::Regs::PixelFormat::RGB8,
- "Unsupported 3DS pixel format.");
+ int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
+ size_t pixel_stride = framebuffer.stride / bpp;
- size_t pixel_stride = framebuffer.stride / 3;
// OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately
- _dbg_assert_(Render_OpenGL, pixel_stride * 3 == framebuffer.stride);
+ ASSERT(pixel_stride * bpp == framebuffer.stride);
+
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default
// only allows rows to have a memory alignement of 4.
- _dbg_assert_(Render_OpenGL, pixel_stride % 4 == 0);
+ ASSERT(pixel_stride % 4 == 0);
glBindTexture(GL_TEXTURE_2D, texture.handle);
glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
@@ -118,7 +115,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
// TODO: Applications could theoretically crash Citra here by specifying too large
// framebuffer sizes. We should make sure that this cannot happen.
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
- GL_BGR, GL_UNSIGNED_BYTE, framebuffer_data);
+ texture.gl_format, texture.gl_type, framebuffer_data);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
@@ -171,6 +168,59 @@ void RendererOpenGL::InitOpenGLObjects() {
glBindTexture(GL_TEXTURE_2D, 0);
}
+void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
+ const GPU::Regs::FramebufferConfig& framebuffer) {
+ GPU::Regs::PixelFormat format = framebuffer.color_format;
+ GLint internal_format;
+
+ texture.format = format;
+ texture.width = framebuffer.width;
+ texture.height = framebuffer.height;
+
+ switch (format) {
+ case GPU::Regs::PixelFormat::RGBA8:
+ internal_format = GL_RGBA;
+ texture.gl_format = GL_RGBA;
+ texture.gl_type = GL_UNSIGNED_INT_8_8_8_8;
+ break;
+
+ case GPU::Regs::PixelFormat::RGB8:
+ // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every
+ // specific OpenGL type used in this function using native-endian (that is, little-endian
+ // mostly everywhere) for words or half-words.
+ // TODO: check how those behave on big-endian processors.
+ internal_format = GL_RGB;
+ texture.gl_format = GL_BGR;
+ texture.gl_type = GL_UNSIGNED_BYTE;
+ break;
+
+ case GPU::Regs::PixelFormat::RGB565:
+ internal_format = GL_RGB;
+ texture.gl_format = GL_RGB;
+ texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
+ break;
+
+ case GPU::Regs::PixelFormat::RGB5A1:
+ internal_format = GL_RGBA;
+ texture.gl_format = GL_RGBA;
+ texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1;
+ break;
+
+ case GPU::Regs::PixelFormat::RGBA4:
+ internal_format = GL_RGBA;
+ texture.gl_format = GL_RGBA;
+ texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4;
+ break;
+
+ default:
+ UNIMPLEMENTED();
+ }
+
+ glBindTexture(GL_TEXTURE_2D, texture.handle);
+ glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
+ texture.gl_format, texture.gl_type, nullptr);
+}
+
/**
* Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
*/
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cf78c1e..bcabab5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -43,9 +43,14 @@ private:
GLuint handle;
GLsizei width;
GLsizei height;
+ GPU::Regs::PixelFormat format;
+ GLenum gl_format;
+ GLenum gl_type;
};
void InitOpenGLObjects();
+ static void ConfigureFramebufferTexture(TextureInfo& texture,
+ const GPU::Regs::FramebufferConfig& framebuffer);
void DrawScreens();
void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h);
void UpdateFramerate();
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index cb6785a..def868a 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -85,9 +85,11 @@ struct VertexShaderState {
};
struct CallStackElement {
- u32 final_address;
- u32 return_address;
- std::function<int(VertexShaderState&)> branch_end_callback;
+ u32 final_address; // Address upon which we jump to return_address
+ u32 return_address; // Where to jump when leaving scope
+ u8 repeat_counter; // How often to repeat until this call stack element is removed
+ u8 loop_increment; // Which value to add to the loop counter after an iteration
+ // TODO: Should this be a signed value? Does it even matter?
};
// TODO: Is there a maximal size for this?
@@ -106,10 +108,12 @@ static void ProcessShaderCode(VertexShaderState& state) {
while (true) {
if (!state.call_stack.empty()) {
- if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) {
+ auto& top = state.call_stack.top();
+ if (state.program_counter - shader_memory.data() == top.final_address) {
+ state.address_registers[2] += top.loop_increment;
- if (state.call_stack.top().branch_end_callback(state)) {
- state.program_counter = &shader_memory[state.call_stack.top().return_address];
+ if (top.repeat_counter-- == 0) {
+ state.program_counter = &shader_memory[top.return_address];
state.call_stack.pop();
}
@@ -122,11 +126,10 @@ static void ProcessShaderCode(VertexShaderState& state) {
const Instruction& instr = *(const Instruction*)state.program_counter;
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
- auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions,
- u32 return_offset,
- std::function<int(VertexShaderState&)> branch_end_callback) {
+ static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions,
+ u32 return_offset, u8 repeat_count, u8 loop_increment) {
state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
- state.call_stack.push({ offset + num_instructions, return_offset, branch_end_callback });
+ state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment });
};
u32 binary_offset = state.program_counter - shader_memory.data();
@@ -152,13 +155,10 @@ static void ProcessShaderCode(VertexShaderState& state) {
case Instruction::OpCodeType::Arithmetic:
{
bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed);
- if (is_inverted) {
- // TODO: We don't really support this properly: For instance, the address register
- // offset needs to be applied to SRC2 instead, etc.
- // For now, we just abort in this situation.
- LOG_CRITICAL(HW_GPU, "Bad condition...");
- exit(0);
- }
+ // TODO: We don't really support this properly: For instance, the address register
+ // offset needs to be applied to SRC2 instead, etc.
+ // For now, we just abort in this situation.
+ ASSERT_MSG(!is_inverted, "Bad condition...");
const int address_offset = (instr.common.address_register_index == 0)
? 0 : state.address_registers[instr.common.address_register_index - 1];
@@ -166,8 +166,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset);
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted));
- if (!src1_ || !src2_) break;
-
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
@@ -263,7 +261,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
// TODO: Be stable against division by zero!
// TODO: I think this might be wrong... we should only use one component here
- dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32());
+ dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32());
}
break;
@@ -278,7 +276,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
// TODO: Be stable against division by zero!
// TODO: I think this might be wrong... we should only use one component here
- dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32()));
+ dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32()));
}
break;
@@ -350,7 +348,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
- _dbg_assert_(HW_GPU, 0);
+ DEBUG_ASSERT(false);
break;
}
@@ -468,8 +466,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
- binary_offset + 1,
- [](VertexShaderState&) { return true; });
+ binary_offset + 1, 0, 0);
break;
case Instruction::OpCode::CALLU:
@@ -477,9 +474,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
- binary_offset + 1,
- [](VertexShaderState&) { return true; }
- );
+ binary_offset + 1, 0, 0);
}
break;
@@ -488,9 +483,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
- binary_offset + 1,
- [](VertexShaderState&) { return true; }
- );
+ binary_offset + 1, 0, 0);
}
break;
@@ -502,14 +495,12 @@ static void ProcessShaderCode(VertexShaderState& state) {
call(state,
binary_offset + 1,
instr.flow_control.dest_offset - binary_offset - 1,
- instr.flow_control.dest_offset + instr.flow_control.num_instructions,
- [](VertexShaderState&) { return true; });
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
} else {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
- instr.flow_control.dest_offset + instr.flow_control.num_instructions,
- [](VertexShaderState&) { return true; });
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
}
break;
@@ -522,14 +513,12 @@ static void ProcessShaderCode(VertexShaderState& state) {
call(state,
binary_offset + 1,
instr.flow_control.dest_offset - binary_offset - 1,
- instr.flow_control.dest_offset + instr.flow_control.num_instructions,
- [](VertexShaderState&) { return true; });
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
} else {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
- instr.flow_control.dest_offset + instr.flow_control.num_instructions,
- [](VertexShaderState&) { return true; });
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
}
break;
@@ -537,25 +526,14 @@ static void ProcessShaderCode(VertexShaderState& state) {
case Instruction::OpCode::LOOP:
{
- LOG_ERROR(HW_GPU, "%x %x %x %x %x", state.address_registers[2],
- shader_uniforms.i[instr.flow_control.int_uniform_id].x,
- shader_uniforms.i[instr.flow_control.int_uniform_id].y,
- shader_uniforms.i[instr.flow_control.int_uniform_id].z,
- shader_uniforms.i[instr.flow_control.int_uniform_id].w);
state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y;
call(state,
binary_offset + 1,
instr.flow_control.dest_offset - binary_offset + 1,
instr.flow_control.dest_offset + 1,
- [&instr, binary_offset](VertexShaderState& state) { // Capture by value intended!
-// state.address_registers[2] += shader_uniforms.i[instr.flow_control.int_uniform_id].z;
- state.address_registers[2] += 1;
- state.program_counter = &shader_memory[binary_offset+1];
- return state.address_registers[2] > shader_uniforms.i[instr.flow_control.int_uniform_id].x +
- shader_uniforms.i[instr.flow_control.int_uniform_id].y;
- }
- );
+ shader_uniforms.i[instr.flow_control.int_uniform_id].x,
+ shader_uniforms.i[instr.flow_control.int_uniform_id].z);
break;
}
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index c9707e5..0a23659 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -4,7 +4,6 @@
#include "common/common.h"
#include "common/emu_window.h"
-#include "common/log.h"
#include "core/core.h"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment