summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/citra/config.cpp1
-rw-r--r--src/citra_qt/config.cpp2
-rw-r--r--src/citra_qt/debugger/graphics_cmdlists.cpp4
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp3
-rw-r--r--src/citra_qt/main.ui17
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/color.h (renamed from src/video_core/color.h)3
-rw-r--r--src/common/vector_math.h (renamed from src/video_core/math.h)0
-rw-r--r--src/core/arm/dyncom/arm_dyncom_thumb.cpp43
-rw-r--r--src/core/hw/gpu.cpp7
-rw-r--r--src/core/settings.h1
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp70
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp4
-rw-r--r--src/video_core/debug_utils/debug_utils.h3
-rw-r--r--src/video_core/pica.h81
-rw-r--r--src/video_core/rasterizer.cpp78
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shaders.h24
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_state.h2
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h31
-rw-r--r--src/video_core/vertex_shader.cpp29
25 files changed, 336 insertions, 100 deletions
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 846479fd7..1378567c1 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -66,7 +66,6 @@ void Config::ReadValues() {
Settings::values.pad_cright_key = glfw_config->GetInteger("Controls", "pad_cright", GLFW_KEY_L);
// Core
- Settings::values.gpu_refresh_rate = glfw_config->GetInteger("Core", "gpu_refresh_rate", 30);
Settings::values.frame_skip = glfw_config->GetInteger("Core", "frame_skip", 0);
// Renderer
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index 460f4ec07..2a9af1f38 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -49,7 +49,6 @@ void Config::ReadValues() {
qt_config->endGroup();
qt_config->beginGroup("Core");
- Settings::values.gpu_refresh_rate = qt_config->value("gpu_refresh_rate", 30).toInt();
Settings::values.frame_skip = qt_config->value("frame_skip", 0).toInt();
qt_config->endGroup();
@@ -102,7 +101,6 @@ void Config::SaveValues() {
qt_config->endGroup();
qt_config->beginGroup("Core");
- qt_config->setValue("gpu_refresh_rate", Settings::values.gpu_refresh_rate);
qt_config->setValue("frame_skip", Settings::values.frame_skip);
qt_config->endGroup();
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 804c735a3..cabf5fe07 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -11,10 +11,10 @@
#include <QSpinBox>
#include <QComboBox>
-#include "video_core/pica.h"
-#include "video_core/math.h"
+#include "common/vector_math.h"
#include "video_core/debug_utils/debug_utils.h"
+#include "video_core/pica.h"
#include "graphics_cmdlists.h"
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index e07344591..6bbe7572c 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -9,10 +9,11 @@
#include <QPushButton>
#include <QSpinBox>
+#include "common/color.h"
+
#include "core/hw/gpu.h"
#include "core/memory.h"
-#include "video_core/color.h"
#include "video_core/pica.h"
#include "video_core/utils.h"
diff --git a/src/citra_qt/main.ui b/src/citra_qt/main.ui
index 0942c28c8..9a809ee6c 100644
--- a/src/citra_qt/main.ui
+++ b/src/citra_qt/main.ui
@@ -24,7 +24,20 @@
<bool>true</bool>
</property>
<widget class="QWidget" name="centralwidget">
- <layout class="QHBoxLayout" name="horizontalLayout"/>
+ <layout class="QHBoxLayout" name="horizontalLayout">
+ <property name="leftMargin">
+ <number>0</number>
+ </property>
+ <property name="topMargin">
+ <number>0</number>
+ </property>
+ <property name="rightMargin">
+ <number>0</number>
+ </property>
+ <property name="bottomMargin">
+ <number>0</number>
+ </property>
+ </layout>
</widget>
<widget class="QMenuBar" name="menubar">
<property name="geometry">
@@ -92,7 +105,7 @@
</action>
<action name="action_Start">
<property name="enabled">
- <bool>false</bool>
+ <bool>false</bool>
</property>
<property name="text">
<string>&amp;Start</string>
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index dbaaac77b..e78f4f144 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -24,6 +24,7 @@ set(HEADERS
bit_field.h
break_points.h
chunk_file.h
+ color.h
common_funcs.h
common_paths.h
common_types.h
@@ -54,6 +55,7 @@ set(HEADERS
thread_queue_list.h
thunk.h
timer.h
+ vector_math.h
)
create_directory_groups(${SRCS} ${HEADERS})
diff --git a/src/video_core/color.h b/src/common/color.h
index 4d2026eb0..422fdc8af 100644
--- a/src/video_core/color.h
+++ b/src/common/color.h
@@ -6,8 +6,7 @@
#include "common/common_types.h"
#include "common/swap.h"
-
-#include "video_core/math.h"
+#include "common/vector_math.h"
namespace Color {
diff --git a/src/video_core/math.h b/src/common/vector_math.h
index 4928c9bf2..4928c9bf2 100644
--- a/src/video_core/math.h
+++ b/src/common/vector_math.h
diff --git a/src/core/arm/dyncom/arm_dyncom_thumb.cpp b/src/core/arm/dyncom/arm_dyncom_thumb.cpp
index 2fc8170be..83b532aac 100644
--- a/src/core/arm/dyncom/arm_dyncom_thumb.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_thumb.cpp
@@ -274,9 +274,46 @@ tdstate thumb_translate(u32 addr, u32 instr, u32* ainstr, u32* inst_size) {
? 0xE24DDF00 // SUB
: 0xE28DDF00) // ADD
|(tinstr & 0x007F); // off7
- } else if ((tinstr & 0x0F00) == 0x0e00)
- *ainstr = 0xEF000000 | 0x180000; // base | BKPT mask
- else {
+ } else if ((tinstr & 0x0F00) == 0x0e00) {
+ // BKPT
+ *ainstr = 0xEF000000 // base
+ | BITS(tinstr, 0, 3) // imm4 field;
+ | (BITS(tinstr, 4, 7) << 8); // beginning 4 bits of imm12
+ } else if ((tinstr & 0x0F00) == 0x0200) {
+ static const ARMword subset[4] = {
+ 0xE6BF0070, // SXTH
+ 0xE6AF0070, // SXTB
+ 0xE6FF0070, // UXTH
+ 0xE6EF0070, // UXTB
+ };
+
+ *ainstr = subset[BITS(tinstr, 6, 7)] // base
+ | (BITS(tinstr, 0, 2) << 12) // Rd
+ | BITS(tinstr, 3, 5); // Rm
+ } else if ((tinstr & 0x0F00) == 0x600) {
+ if (BIT(tinstr, 5) == 0) {
+ // SETEND
+ *ainstr = 0xF1010000 // base
+ | (BIT(tinstr, 3) << 9); // endian specifier
+ } else {
+ // CPS
+ *ainstr = 0xF1080000 // base
+ | (BIT(tinstr, 0) << 6) // fiq bit
+ | (BIT(tinstr, 1) << 7) // irq bit
+ | (BIT(tinstr, 2) << 8) // abort bit
+ | (BIT(tinstr, 4) << 18); // enable bit
+ }
+ } else if ((tinstr & 0x0F00) == 0x0a00) {
+ static const ARMword subset[3] = {
+ 0xE6BF0F30, // REV
+ 0xE6BF0FB0, // REV16
+ 0xE6FF0FB0, // REVSH
+ };
+
+ *ainstr = subset[BITS(tinstr, 6, 7)] // base
+ | (BITS(tinstr, 0, 2) << 12) // Rd
+ | BITS(tinstr, 3, 5); // Rm
+ } else {
static const ARMword subset[4] = {
0xE92D0000, // STMDB sp!,{rlist}
0xE92D4000, // STMDB sp!,{rlist,lr}
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index ed607646e..7471def57 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/color.h"
#include "common/common_types.h"
#include "core/arm/arm_interface.h"
@@ -22,7 +23,6 @@
#include "video_core/command_processor.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
-#include "video_core/color.h"
namespace GPU {
@@ -30,8 +30,8 @@ Regs g_regs;
/// True if the current frame was skipped
bool g_skip_frame;
-/// 268MHz / gpu_refresh_rate frames per second
-static u64 frame_ticks;
+/// 268MHz CPU clocks / 60Hz frames per second
+const u64 frame_ticks = 268123480ull / 60;
/// Event id for CoreTiming
static int vblank_event;
/// Total number of frames drawn
@@ -357,7 +357,6 @@ void Init() {
framebuffer_sub.color_format = Regs::PixelFormat::RGB8;
framebuffer_sub.active_fb = 0;
- frame_ticks = 268123480 / Settings::values.gpu_refresh_rate;
last_skip_frame = false;
g_skip_frame = false;
frame_count = 0;
diff --git a/src/core/settings.h b/src/core/settings.h
index 54c1023b8..5a70d157a 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -35,7 +35,6 @@ struct Values {
int pad_cright_key;
// Core
- int gpu_refresh_rate;
int frame_skip;
// Data Storage
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 0258a3255..5c7f4ae18 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,11 +29,9 @@ set(HEADERS
renderer_opengl/pica_to_gl.h
renderer_opengl/renderer_opengl.h
clipper.h
- color.h
command_processor.h
gpu_debugger.h
hwrasterizer_base.h
- math.h
pica.h
primitive_assembly.h
rasterizer.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 29ba6b769..b46fadd9f 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -56,7 +56,17 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
// Trigger IRQ
case PICA_REG_INDEX(trigger_irq):
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D);
- return;
+ break;
+
+ case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c):
+ case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d):
+ {
+ unsigned index = id - PICA_REG_INDEX(command_buffer.trigger[0]);
+ u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index));
+ g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
+ g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32);
+ break;
+ }
// It seems like these trigger vertex rendering
case PICA_REG_INDEX(trigger_draw):
@@ -363,38 +373,34 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id));
}
-static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) {
- const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]);
-
- u32* read_pointer = (u32*)first_command_word;
-
- const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) |
- ((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) |
- ((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) |
- ((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u);
-
- WritePicaReg(header.cmd_id, *read_pointer, write_mask);
- read_pointer += 2;
-
- for (unsigned int i = 1; i < 1+header.extra_data_length; ++i) {
- u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0);
- WritePicaReg(cmd, *read_pointer, write_mask);
- ++read_pointer;
- }
-
- // align read pointer to 8 bytes
- if ((first_command_word - read_pointer) % 2)
- ++read_pointer;
-
- return read_pointer - first_command_word;
-}
-
void ProcessCommandList(const u32* list, u32 size) {
- u32* read_pointer = (u32*)list;
- u32 list_length = size / sizeof(u32);
-
- while (read_pointer < list + list_length) {
- read_pointer += ExecuteCommandBlock(read_pointer);
+ g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = list;
+ g_state.cmd_list.length = size / sizeof(u32);
+
+ while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) {
+ // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
+ static const u32 expand_bits_to_bytes[] = {
+ 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
+ 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
+ 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
+ 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
+ };
+
+ // Align read pointer to 8 bytes
+ if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0)
+ ++g_state.cmd_list.current_ptr;
+
+ u32 value = *g_state.cmd_list.current_ptr++;
+ const CommandHeader header = { *g_state.cmd_list.current_ptr++ };
+ const u32 write_mask = expand_bits_to_bytes[header.parameter_mask];
+ u32 cmd = header.cmd_id;
+
+ WritePicaReg(cmd, value, write_mask);
+
+ for (unsigned i = 0; i < header.extra_data_length; ++i) {
+ u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0);
+ WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, write_mask);
+ }
}
}
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index b92cd1a7e..7b8ab72b6 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -17,11 +17,11 @@
#include <nihstro/shader_binary.h>
#include "common/assert.h"
+#include "common/color.h"
#include "common/file_util.h"
#include "common/math_util.h"
+#include "common/vector_math.h"
-#include "video_core/color.h"
-#include "video_core/math.h"
#include "video_core/pica.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index f361a5385..7926d64ec 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -12,7 +12,8 @@
#include <mutex>
#include <vector>
-#include "video_core/math.h"
+#include "common/vector_math.h"
+
#include "video_core/pica.h"
namespace Pica {
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 6ebeb08f7..684ec9818 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -15,8 +15,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/logging/log.h"
-
-#include "math.h"
+#include "common/vector_math.h"
namespace Pica {
@@ -162,6 +161,25 @@ struct Regs {
ETC1A4 = 13, // compressed
};
+ enum class LogicOp : u32 {
+ Clear = 0,
+ And = 1,
+ AndReverse = 2,
+ Copy = 3,
+ Set = 4,
+ CopyInverted = 5,
+ NoOp = 6,
+ Invert = 7,
+ Nand = 8,
+ Or = 9,
+ Nor = 10,
+ Xor = 11,
+ Equiv = 12,
+ AndInverted = 13,
+ OrReverse = 14,
+ OrInverted = 15,
+ };
+
static unsigned NibblesPerPixel(TextureFormat format) {
switch (format) {
case TextureFormat::RGBA8:
@@ -221,6 +239,7 @@ struct Regs {
enum class Source : u32 {
PrimaryColor = 0x0,
PrimaryFragmentColor = 0x1,
+ SecondaryFragmentColor = 0x2,
Texture0 = 0x3,
Texture1 = 0x4,
@@ -413,12 +432,8 @@ struct Regs {
} alpha_blending;
union {
- enum Op {
- Set = 4,
- };
-
- BitField<0, 4, Op> op;
- } logic_op;
+ BitField<0, 4, LogicOp> logic_op;
+ };
union {
BitField< 0, 8, u32> r;
@@ -708,7 +723,33 @@ struct Regs {
u32 set_value[3];
} vs_default_attributes_setup;
- INSERT_PADDING_WORDS(0x28);
+ INSERT_PADDING_WORDS(0x2);
+
+ struct {
+ // There are two channels that can be used to configure the next command buffer, which
+ // can be then executed by writing to the "trigger" registers. There are two reasons why a
+ // game might use this feature:
+ // 1) With this, an arbitrary number of additional command buffers may be executed in
+ // sequence without requiring any intervention of the CPU after the initial one is
+ // kicked off.
+ // 2) Games can configure these registers to provide a command list subroutine mechanism.
+
+ BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer
+ BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer
+ u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to
+
+ unsigned GetSize(unsigned index) const {
+ ASSERT(index < 2);
+ return 8 * size[index];
+ }
+
+ PAddr GetPhysicalAddress(unsigned index) const {
+ ASSERT(index < 2);
+ return (PAddr)(8 * addr[index]);
+ }
+ } command_buffer;
+
+ INSERT_PADDING_WORDS(0x20);
enum class TriangleTopology : u32 {
List = 0,
@@ -861,6 +902,7 @@ struct Regs {
ADD_FIELD(trigger_draw);
ADD_FIELD(trigger_draw_indexed);
ADD_FIELD(vs_default_attributes_setup);
+ ADD_FIELD(command_buffer);
ADD_FIELD(triangle_topology);
ADD_FIELD(vs_bool_uniforms);
ADD_FIELD(vs_int_uniforms);
@@ -938,6 +980,7 @@ ASSERT_REG_POSITION(num_vertices, 0x228);
ASSERT_REG_POSITION(trigger_draw, 0x22e);
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
+ASSERT_REG_POSITION(command_buffer, 0x238);
ASSERT_REG_POSITION(triangle_topology, 0x25e);
ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1);
@@ -1053,21 +1096,12 @@ private:
float value;
};
-union CommandHeader {
- CommandHeader(u32 h) : hex(h) {}
-
- u32 hex;
-
- BitField< 0, 16, u32> cmd_id;
- BitField<16, 4, u32> parameter_mask;
- BitField<20, 11, u32> extra_data_length;
- BitField<31, 1, u32> group_commands;
-};
-
/// Struct used to describe current Pica state
struct State {
+ /// Pica registers
Regs regs;
+ /// Vertex shader memory
struct {
struct {
Math::Vec4<float24> f[96];
@@ -1080,6 +1114,13 @@ struct State {
std::array<u32, 1024> program_code;
std::array<u32, 1024> swizzle_data;
} vs;
+
+ /// Current Pica command list
+ struct {
+ const u32* head_ptr;
+ const u32* current_ptr;
+ u32 length;
+ } cmd_list;
};
/// Initialize Pica state
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 6df3a74f2..59d156ee7 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -4,6 +4,7 @@
#include <algorithm>
+#include "common/color.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/profiler.h"
@@ -13,7 +14,6 @@
#include "debug_utils/debug_utils.h"
#include "math.h"
-#include "color.h"
#include "pica.h"
#include "rasterizer.h"
#include "vertex_shader.h"
@@ -402,11 +402,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
auto GetSource = [&](Source source) -> Math::Vec4<u8> {
switch (source) {
- // TODO: What's the difference between these two?
case Source::PrimaryColor:
+
+ // HACK: Until we implement fragment lighting, use primary_color
case Source::PrimaryFragmentColor:
return primary_color;
+ // HACK: Until we implement fragment lighting, use zero
+ case Source::SecondaryFragmentColor:
+ return {0, 0, 0, 0};
+
case Source::Texture0:
return texture_color[0];
@@ -570,6 +575,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
case Operation::Add:
return std::min(255, input[0] + input[1]);
+ case Operation::AddSigned:
+ {
+ // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
+ auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
+ return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
+ }
+
case Operation::Lerp:
return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
@@ -808,10 +820,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
}
};
- using BlendEquation = Regs::BlendEquation;
static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
- BlendEquation equation) {
+ Regs::BlendEquation equation) {
Math::Vec4<int> result;
auto src_result = (src * srcfactor).Cast<int>();
@@ -866,8 +877,63 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
} else {
- LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op);
- UNIMPLEMENTED();
+ static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 {
+ switch (op) {
+ case Regs::LogicOp::Clear:
+ return 0;
+
+ case Regs::LogicOp::And:
+ return src & dest;
+
+ case Regs::LogicOp::AndReverse:
+ return src & ~dest;
+
+ case Regs::LogicOp::Copy:
+ return src;
+
+ case Regs::LogicOp::Set:
+ return 255;
+
+ case Regs::LogicOp::CopyInverted:
+ return ~src;
+
+ case Regs::LogicOp::NoOp:
+ return dest;
+
+ case Regs::LogicOp::Invert:
+ return ~dest;
+
+ case Regs::LogicOp::Nand:
+ return ~(src & dest);
+
+ case Regs::LogicOp::Or:
+ return src | dest;
+
+ case Regs::LogicOp::Nor:
+ return ~(src | dest);
+
+ case Regs::LogicOp::Xor:
+ return src ^ dest;
+
+ case Regs::LogicOp::Equiv:
+ return ~(src ^ dest);
+
+ case Regs::LogicOp::AndInverted:
+ return ~src & dest;
+
+ case Regs::LogicOp::OrReverse:
+ return src | ~dest;
+
+ case Regs::LogicOp::OrInverted:
+ return ~src | dest;
+ }
+ };
+
+ blend_output = Math::MakeVec(
+ LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
+ LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),
+ LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op),
+ LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op));
}
const Math::Vec4<u8> result = {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bacdb7172..d31c46cca 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -2,10 +2,11 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/color.h"
+
#include "core/settings.h"
#include "core/hw/gpu.h"
-#include "video_core/color.h"
#include "video_core/pica.h"
#include "video_core/utils.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -135,6 +136,7 @@ void RasterizerOpenGL::Reset() {
SyncBlendFuncs();
SyncBlendColor();
SyncAlphaTest();
+ SyncLogicOp();
SyncStencilTest();
SyncDepthTest();
@@ -249,6 +251,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
SyncDepthTest();
break;
+ // Logic op
+ case PICA_REG_INDEX(output_merger.logic_op):
+ SyncLogicOp();
+ break;
+
// TEV stage 0
case PICA_REG_INDEX(tev_stage0.color_source1):
SyncTevSources(0, regs.tev_stage0);
@@ -633,6 +640,10 @@ void RasterizerOpenGL::SyncAlphaTest() {
glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f);
}
+void RasterizerOpenGL::SyncLogicOp() {
+ state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op);
+}
+
void RasterizerOpenGL::SyncStencilTest() {
// TODO: Implement stencil test, mask, and op
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9896f8d04..d7d422b1f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -125,6 +125,9 @@ private:
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
+ /// Syncs the logic op states to match the PICA register
+ void SyncLogicOp();
+
/// Syncs the stencil test states to match the PICA register
void SyncStencilTest();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 6f88a8b21..2e4110a88 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -4,13 +4,13 @@
#include "common/make_unique.h"
#include "common/math_util.h"
+#include "common/vector_math.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
#include "video_core/debug_utils/debug_utils.h"
-#include "video_core/math.h"
RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
FullFlush();
diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h
index 8f0941230..a8cb2f595 100644
--- a/src/video_core/renderer_opengl/gl_shaders.h
+++ b/src/video_core/renderer_opengl/gl_shaders.h
@@ -69,15 +69,16 @@ const char g_fragment_shader_hw[] = R"(
#define NUM_VTX_ATTR 7
#define NUM_TEV_STAGES 6
-#define SOURCE_PRIMARYCOLOR 0x0
-#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1
-#define SOURCE_TEXTURE0 0x3
-#define SOURCE_TEXTURE1 0x4
-#define SOURCE_TEXTURE2 0x5
-#define SOURCE_TEXTURE3 0x6
-#define SOURCE_PREVIOUSBUFFER 0xd
-#define SOURCE_CONSTANT 0xe
-#define SOURCE_PREVIOUS 0xf
+#define SOURCE_PRIMARYCOLOR 0x0
+#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1
+#define SOURCE_SECONDARYFRAGMENTCOLOR 0x2
+#define SOURCE_TEXTURE0 0x3
+#define SOURCE_TEXTURE1 0x4
+#define SOURCE_TEXTURE2 0x5
+#define SOURCE_TEXTURE3 0x6
+#define SOURCE_PREVIOUSBUFFER 0xd
+#define SOURCE_CONSTANT 0xe
+#define SOURCE_PREVIOUS 0xf
#define COLORMODIFIER_SOURCECOLOR 0x0
#define COLORMODIFIER_ONEMINUSSOURCECOLOR 0x1
@@ -151,8 +152,11 @@ vec4 GetSource(int source) {
if (source == SOURCE_PRIMARYCOLOR) {
return o[2];
} else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) {
- // HACK: Uses color value, but should really use fragment lighting output
+ // HACK: Until we implement fragment lighting, use primary_color
return o[2];
+ } else if (source == SOURCE_SECONDARYFRAGMENTCOLOR) {
+ // HACK: Until we implement fragment lighting, use zero
+ return vec4(0.0, 0.0, 0.0, 0.0);
} else if (source == SOURCE_TEXTURE0) {
return texture(tex[0], o[3].xy);
} else if (source == SOURCE_TEXTURE1) {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 0d7ba1983..9c5f38f94 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -32,6 +32,8 @@ OpenGLState::OpenGLState() {
blend.color.blue = 0.0f;
blend.color.alpha = 0.0f;
+ logic_op = GL_COPY;
+
for (auto& texture_unit : texture_units) {
texture_unit.enabled_2d = false;
texture_unit.texture_2d = 0;
@@ -99,8 +101,13 @@ void OpenGLState::Apply() {
if (blend.enabled != cur_state.blend.enabled) {
if (blend.enabled) {
glEnable(GL_BLEND);
+
+ cur_state.logic_op = GL_COPY;
+ glLogicOp(cur_state.logic_op);
+ glDisable(GL_COLOR_LOGIC_OP);
} else {
glDisable(GL_BLEND);
+ glEnable(GL_COLOR_LOGIC_OP);
}
}
@@ -118,6 +125,10 @@ void OpenGLState::Apply() {
glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func);
}
+ if (logic_op != cur_state.logic_op) {
+ glLogicOp(logic_op);
+ }
+
// Textures
for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) {
if (texture_units[texture_index].enabled_2d != cur_state.texture_units[texture_index].enabled_2d) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 63dba2761..6b97721d6 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -42,6 +42,8 @@ public:
} color; // GL_BLEND_COLOR
} blend;
+ GLenum logic_op; // GL_LOGIC_OP_MODE
+
// 3 texture units - one for each that is used in PICA fragment shader emulation
struct {
bool enabled_2d; // GL_TEXTURE_2D
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index f8763e71b..e566f9f7a 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -71,6 +71,37 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
return blend_func_table[(unsigned)factor];
}
+inline GLenum LogicOp(Pica::Regs::LogicOp op) {
+ static const GLenum logic_op_table[] = {
+ GL_CLEAR, // Clear
+ GL_AND, // And
+ GL_AND_REVERSE, // AndReverse
+ GL_COPY, // Copy
+ GL_SET, // Set
+ GL_COPY_INVERTED, // CopyInverted
+ GL_NOOP, // NoOp
+ GL_INVERT, // Invert
+ GL_NAND, // Nand
+ GL_OR, // Or
+ GL_NOR, // Nor
+ GL_XOR, // Xor
+ GL_EQUIV, // Equiv
+ GL_AND_INVERTED, // AndInverted
+ GL_OR_REVERSE, // OrReverse
+ GL_OR_INVERTED, // OrInverted
+ };
+
+ // Range check table for input
+ if ((unsigned)op >= ARRAY_SIZE(logic_op_table)) {
+ LOG_CRITICAL(Render_OpenGL, "Unknown logic op %d", op);
+ UNREACHABLE();
+
+ return GL_COPY;
+ }
+
+ return logic_op_table[(unsigned)op];
+}
+
inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {
static const GLenum compare_func_table[] = {
GL_NEVER, // CompareFunc::Never
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 4ebb42429..87006a832 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -119,17 +119,13 @@ static void ProcessShaderCode(VertexShaderState& state) {
switch (instr.opcode.Value().GetInfo().type) {
case OpCode::Type::Arithmetic:
{
- bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed);
- // TODO: We don't really support this properly: For instance, the address register
- // offset needs to be applied to SRC2 instead, etc.
- // For now, we just abort in this situation.
- ASSERT_MSG(!is_inverted, "Bad condition...");
+ const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
const int address_offset = (instr.common.address_register_index == 0)
? 0 : state.address_registers[instr.common.address_register_index - 1];
- const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset);
- const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted));
+ const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset));
+ const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
@@ -208,6 +204,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
break;
+ case OpCode::Id::MIN:
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = std::min(src1[i], src2[i]);
+ }
+ break;
+
case OpCode::Id::DP3:
case OpCode::Id::DP4:
{
@@ -279,6 +284,16 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
+ case OpCode::Id::SLT:
+ case OpCode::Id::SLTI:
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
+ }
+ break;
+
case OpCode::Id::CMP:
for (int i = 0; i < 2; ++i) {
// TODO: Can you restrict to one compare via dest masking?