diff options
25 files changed, 336 insertions, 100 deletions
diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 846479fd7..1378567c1 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -66,7 +66,6 @@ void Config::ReadValues() { Settings::values.pad_cright_key = glfw_config->GetInteger("Controls", "pad_cright", GLFW_KEY_L); // Core - Settings::values.gpu_refresh_rate = glfw_config->GetInteger("Core", "gpu_refresh_rate", 30); Settings::values.frame_skip = glfw_config->GetInteger("Core", "frame_skip", 0); // Renderer diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp index 460f4ec07..2a9af1f38 100644 --- a/src/citra_qt/config.cpp +++ b/src/citra_qt/config.cpp @@ -49,7 +49,6 @@ void Config::ReadValues() { qt_config->endGroup(); qt_config->beginGroup("Core"); - Settings::values.gpu_refresh_rate = qt_config->value("gpu_refresh_rate", 30).toInt(); Settings::values.frame_skip = qt_config->value("frame_skip", 0).toInt(); qt_config->endGroup(); @@ -102,7 +101,6 @@ void Config::SaveValues() { qt_config->endGroup(); qt_config->beginGroup("Core"); - qt_config->setValue("gpu_refresh_rate", Settings::values.gpu_refresh_rate); qt_config->setValue("frame_skip", Settings::values.frame_skip); qt_config->endGroup(); diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index 804c735a3..cabf5fe07 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -11,10 +11,10 @@ #include <QSpinBox> #include <QComboBox> -#include "video_core/pica.h" -#include "video_core/math.h" +#include "common/vector_math.h" #include "video_core/debug_utils/debug_utils.h" +#include "video_core/pica.h" #include "graphics_cmdlists.h" diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index e07344591..6bbe7572c 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp @@ -9,10 +9,11 @@ #include <QPushButton> #include <QSpinBox> +#include "common/color.h" + #include "core/hw/gpu.h" #include "core/memory.h" -#include "video_core/color.h" #include "video_core/pica.h" #include "video_core/utils.h" diff --git a/src/citra_qt/main.ui b/src/citra_qt/main.ui index 0942c28c8..9a809ee6c 100644 --- a/src/citra_qt/main.ui +++ b/src/citra_qt/main.ui @@ -24,7 +24,20 @@ <bool>true</bool> </property> <widget class="QWidget" name="centralwidget"> - <layout class="QHBoxLayout" name="horizontalLayout"/> + <layout class="QHBoxLayout" name="horizontalLayout"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + </layout> </widget> <widget class="QMenuBar" name="menubar"> <property name="geometry"> @@ -92,7 +105,7 @@ </action> <action name="action_Start"> <property name="enabled"> - <bool>false</bool> + <bool>false</bool> </property> <property name="text"> <string>&Start</string> diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index dbaaac77b..e78f4f144 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -24,6 +24,7 @@ set(HEADERS bit_field.h break_points.h chunk_file.h + color.h common_funcs.h common_paths.h common_types.h @@ -54,6 +55,7 @@ set(HEADERS thread_queue_list.h thunk.h timer.h + vector_math.h ) create_directory_groups(${SRCS} ${HEADERS}) diff --git a/src/video_core/color.h b/src/common/color.h index 4d2026eb0..422fdc8af 100644 --- a/src/video_core/color.h +++ b/src/common/color.h @@ -6,8 +6,7 @@ #include "common/common_types.h" #include "common/swap.h" - -#include "video_core/math.h" +#include "common/vector_math.h" namespace Color { diff --git a/src/video_core/math.h b/src/common/vector_math.h index 4928c9bf2..4928c9bf2 100644 --- a/src/video_core/math.h +++ b/src/common/vector_math.h diff --git a/src/core/arm/dyncom/arm_dyncom_thumb.cpp b/src/core/arm/dyncom/arm_dyncom_thumb.cpp index 2fc8170be..83b532aac 100644 --- a/src/core/arm/dyncom/arm_dyncom_thumb.cpp +++ b/src/core/arm/dyncom/arm_dyncom_thumb.cpp @@ -274,9 +274,46 @@ tdstate thumb_translate(u32 addr, u32 instr, u32* ainstr, u32* inst_size) { ? 0xE24DDF00 // SUB : 0xE28DDF00) // ADD |(tinstr & 0x007F); // off7 - } else if ((tinstr & 0x0F00) == 0x0e00) - *ainstr = 0xEF000000 | 0x180000; // base | BKPT mask - else { + } else if ((tinstr & 0x0F00) == 0x0e00) { + // BKPT + *ainstr = 0xEF000000 // base + | BITS(tinstr, 0, 3) // imm4 field; + | (BITS(tinstr, 4, 7) << 8); // beginning 4 bits of imm12 + } else if ((tinstr & 0x0F00) == 0x0200) { + static const ARMword subset[4] = { + 0xE6BF0070, // SXTH + 0xE6AF0070, // SXTB + 0xE6FF0070, // UXTH + 0xE6EF0070, // UXTB + }; + + *ainstr = subset[BITS(tinstr, 6, 7)] // base + | (BITS(tinstr, 0, 2) << 12) // Rd + | BITS(tinstr, 3, 5); // Rm + } else if ((tinstr & 0x0F00) == 0x600) { + if (BIT(tinstr, 5) == 0) { + // SETEND + *ainstr = 0xF1010000 // base + | (BIT(tinstr, 3) << 9); // endian specifier + } else { + // CPS + *ainstr = 0xF1080000 // base + | (BIT(tinstr, 0) << 6) // fiq bit + | (BIT(tinstr, 1) << 7) // irq bit + | (BIT(tinstr, 2) << 8) // abort bit + | (BIT(tinstr, 4) << 18); // enable bit + } + } else if ((tinstr & 0x0F00) == 0x0a00) { + static const ARMword subset[3] = { + 0xE6BF0F30, // REV + 0xE6BF0FB0, // REV16 + 0xE6FF0FB0, // REVSH + }; + + *ainstr = subset[BITS(tinstr, 6, 7)] // base + | (BITS(tinstr, 0, 2) << 12) // Rd + | BITS(tinstr, 3, 5); // Rm + } else { static const ARMword subset[4] = { 0xE92D0000, // STMDB sp!,{rlist} 0xE92D4000, // STMDB sp!,{rlist,lr} diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index ed607646e..7471def57 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/color.h" #include "common/common_types.h" #include "core/arm/arm_interface.h" @@ -22,7 +23,6 @@ #include "video_core/command_processor.h" #include "video_core/utils.h" #include "video_core/video_core.h" -#include "video_core/color.h" namespace GPU { @@ -30,8 +30,8 @@ Regs g_regs; /// True if the current frame was skipped bool g_skip_frame; -/// 268MHz / gpu_refresh_rate frames per second -static u64 frame_ticks; +/// 268MHz CPU clocks / 60Hz frames per second +const u64 frame_ticks = 268123480ull / 60; /// Event id for CoreTiming static int vblank_event; /// Total number of frames drawn @@ -357,7 +357,6 @@ void Init() { framebuffer_sub.color_format = Regs::PixelFormat::RGB8; framebuffer_sub.active_fb = 0; - frame_ticks = 268123480 / Settings::values.gpu_refresh_rate; last_skip_frame = false; g_skip_frame = false; frame_count = 0; diff --git a/src/core/settings.h b/src/core/settings.h index 54c1023b8..5a70d157a 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -35,7 +35,6 @@ struct Values { int pad_cright_key; // Core - int gpu_refresh_rate; int frame_skip; // Data Storage diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0258a3255..5c7f4ae18 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,11 +29,9 @@ set(HEADERS renderer_opengl/pica_to_gl.h renderer_opengl/renderer_opengl.h clipper.h - color.h command_processor.h gpu_debugger.h hwrasterizer_base.h - math.h pica.h primitive_assembly.h rasterizer.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 29ba6b769..b46fadd9f 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -56,7 +56,17 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // Trigger IRQ case PICA_REG_INDEX(trigger_irq): GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); - return; + break; + + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): + { + unsigned index = id - PICA_REG_INDEX(command_buffer.trigger[0]); + u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); + g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; + g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); + break; + } // It seems like these trigger vertex rendering case PICA_REG_INDEX(trigger_draw): @@ -363,38 +373,34 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id)); } -static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { - const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]); - - u32* read_pointer = (u32*)first_command_word; - - const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) | - ((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) | - ((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) | - ((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u); - - WritePicaReg(header.cmd_id, *read_pointer, write_mask); - read_pointer += 2; - - for (unsigned int i = 1; i < 1+header.extra_data_length; ++i) { - u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); - WritePicaReg(cmd, *read_pointer, write_mask); - ++read_pointer; - } - - // align read pointer to 8 bytes - if ((first_command_word - read_pointer) % 2) - ++read_pointer; - - return read_pointer - first_command_word; -} - void ProcessCommandList(const u32* list, u32 size) { - u32* read_pointer = (u32*)list; - u32 list_length = size / sizeof(u32); - - while (read_pointer < list + list_length) { - read_pointer += ExecuteCommandBlock(read_pointer); + g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = list; + g_state.cmd_list.length = size / sizeof(u32); + + while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) { + // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF + static const u32 expand_bits_to_bytes[] = { + 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, + 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, + 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, + 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff + }; + + // Align read pointer to 8 bytes + if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0) + ++g_state.cmd_list.current_ptr; + + u32 value = *g_state.cmd_list.current_ptr++; + const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; + const u32 write_mask = expand_bits_to_bytes[header.parameter_mask]; + u32 cmd = header.cmd_id; + + WritePicaReg(cmd, value, write_mask); + + for (unsigned i = 0; i < header.extra_data_length; ++i) { + u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); + WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, write_mask); + } } } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index b92cd1a7e..7b8ab72b6 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -17,11 +17,11 @@ #include <nihstro/shader_binary.h> #include "common/assert.h" +#include "common/color.h" #include "common/file_util.h" #include "common/math_util.h" +#include "common/vector_math.h" -#include "video_core/color.h" -#include "video_core/math.h" #include "video_core/pica.h" #include "video_core/utils.h" #include "video_core/video_core.h" diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index f361a5385..7926d64ec 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -12,7 +12,8 @@ #include <mutex> #include <vector> -#include "video_core/math.h" +#include "common/vector_math.h" + #include "video_core/pica.h" namespace Pica { diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 6ebeb08f7..684ec9818 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -15,8 +15,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/logging/log.h" - -#include "math.h" +#include "common/vector_math.h" namespace Pica { @@ -162,6 +161,25 @@ struct Regs { ETC1A4 = 13, // compressed }; + enum class LogicOp : u32 { + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15, + }; + static unsigned NibblesPerPixel(TextureFormat format) { switch (format) { case TextureFormat::RGBA8: @@ -221,6 +239,7 @@ struct Regs { enum class Source : u32 { PrimaryColor = 0x0, PrimaryFragmentColor = 0x1, + SecondaryFragmentColor = 0x2, Texture0 = 0x3, Texture1 = 0x4, @@ -413,12 +432,8 @@ struct Regs { } alpha_blending; union { - enum Op { - Set = 4, - }; - - BitField<0, 4, Op> op; - } logic_op; + BitField<0, 4, LogicOp> logic_op; + }; union { BitField< 0, 8, u32> r; @@ -708,7 +723,33 @@ struct Regs { u32 set_value[3]; } vs_default_attributes_setup; - INSERT_PADDING_WORDS(0x28); + INSERT_PADDING_WORDS(0x2); + + struct { + // There are two channels that can be used to configure the next command buffer, which + // can be then executed by writing to the "trigger" registers. There are two reasons why a + // game might use this feature: + // 1) With this, an arbitrary number of additional command buffers may be executed in + // sequence without requiring any intervention of the CPU after the initial one is + // kicked off. + // 2) Games can configure these registers to provide a command list subroutine mechanism. + + BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer + BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer + u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to + + unsigned GetSize(unsigned index) const { + ASSERT(index < 2); + return 8 * size[index]; + } + + PAddr GetPhysicalAddress(unsigned index) const { + ASSERT(index < 2); + return (PAddr)(8 * addr[index]); + } + } command_buffer; + + INSERT_PADDING_WORDS(0x20); enum class TriangleTopology : u32 { List = 0, @@ -861,6 +902,7 @@ struct Regs { ADD_FIELD(trigger_draw); ADD_FIELD(trigger_draw_indexed); ADD_FIELD(vs_default_attributes_setup); + ADD_FIELD(command_buffer); ADD_FIELD(triangle_topology); ADD_FIELD(vs_bool_uniforms); ADD_FIELD(vs_int_uniforms); @@ -938,6 +980,7 @@ ASSERT_REG_POSITION(num_vertices, 0x228); ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); +ASSERT_REG_POSITION(command_buffer, 0x238); ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); @@ -1053,21 +1096,12 @@ private: float value; }; -union CommandHeader { - CommandHeader(u32 h) : hex(h) {} - - u32 hex; - - BitField< 0, 16, u32> cmd_id; - BitField<16, 4, u32> parameter_mask; - BitField<20, 11, u32> extra_data_length; - BitField<31, 1, u32> group_commands; -}; - /// Struct used to describe current Pica state struct State { + /// Pica registers Regs regs; + /// Vertex shader memory struct { struct { Math::Vec4<float24> f[96]; @@ -1080,6 +1114,13 @@ struct State { std::array<u32, 1024> program_code; std::array<u32, 1024> swizzle_data; } vs; + + /// Current Pica command list + struct { + const u32* head_ptr; + const u32* current_ptr; + u32 length; + } cmd_list; }; /// Initialize Pica state diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 6df3a74f2..59d156ee7 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -4,6 +4,7 @@ #include <algorithm> +#include "common/color.h" #include "common/common_types.h" #include "common/math_util.h" #include "common/profiler.h" @@ -13,7 +14,6 @@ #include "debug_utils/debug_utils.h" #include "math.h" -#include "color.h" #include "pica.h" #include "rasterizer.h" #include "vertex_shader.h" @@ -402,11 +402,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, auto GetSource = [&](Source source) -> Math::Vec4<u8> { switch (source) { - // TODO: What's the difference between these two? case Source::PrimaryColor: + + // HACK: Until we implement fragment lighting, use primary_color case Source::PrimaryFragmentColor: return primary_color; + // HACK: Until we implement fragment lighting, use zero + case Source::SecondaryFragmentColor: + return {0, 0, 0, 0}; + case Source::Texture0: return texture_color[0]; @@ -570,6 +575,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, case Operation::Add: return std::min(255, input[0] + input[1]); + case Operation::AddSigned: + { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct + auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; + return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); + } + case Operation::Lerp: return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; @@ -808,10 +820,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } }; - using BlendEquation = Regs::BlendEquation; static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, - BlendEquation equation) { + Regs::BlendEquation equation) { Math::Vec4<int> result; auto src_result = (src * srcfactor).Cast<int>(); @@ -866,8 +877,63 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); } else { - LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op); - UNIMPLEMENTED(); + static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { + switch (op) { + case Regs::LogicOp::Clear: + return 0; + + case Regs::LogicOp::And: + return src & dest; + + case Regs::LogicOp::AndReverse: + return src & ~dest; + + case Regs::LogicOp::Copy: + return src; + + case Regs::LogicOp::Set: + return 255; + + case Regs::LogicOp::CopyInverted: + return ~src; + + case Regs::LogicOp::NoOp: + return dest; + + case Regs::LogicOp::Invert: + return ~dest; + + case Regs::LogicOp::Nand: + return ~(src & dest); + + case Regs::LogicOp::Or: + return src | dest; + + case Regs::LogicOp::Nor: + return ~(src | dest); + + case Regs::LogicOp::Xor: + return src ^ dest; + + case Regs::LogicOp::Equiv: + return ~(src ^ dest); + + case Regs::LogicOp::AndInverted: + return ~src & dest; + + case Regs::LogicOp::OrReverse: + return src | ~dest; + + case Regs::LogicOp::OrInverted: + return ~src | dest; + } + }; + + blend_output = Math::MakeVec( + LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), + LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), + LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), + LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); } const Math::Vec4<u8> result = { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bacdb7172..d31c46cca 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -2,10 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/color.h" + #include "core/settings.h" #include "core/hw/gpu.h" -#include "video_core/color.h" #include "video_core/pica.h" #include "video_core/utils.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -135,6 +136,7 @@ void RasterizerOpenGL::Reset() { SyncBlendFuncs(); SyncBlendColor(); SyncAlphaTest(); + SyncLogicOp(); SyncStencilTest(); SyncDepthTest(); @@ -249,6 +251,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncDepthTest(); break; + // Logic op + case PICA_REG_INDEX(output_merger.logic_op): + SyncLogicOp(); + break; + // TEV stage 0 case PICA_REG_INDEX(tev_stage0.color_source1): SyncTevSources(0, regs.tev_stage0); @@ -633,6 +640,10 @@ void RasterizerOpenGL::SyncAlphaTest() { glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); } +void RasterizerOpenGL::SyncLogicOp() { + state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op); +} + void RasterizerOpenGL::SyncStencilTest() { // TODO: Implement stencil test, mask, and op } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9896f8d04..d7d422b1f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -125,6 +125,9 @@ private: /// Syncs the alpha test states to match the PICA register void SyncAlphaTest(); + /// Syncs the logic op states to match the PICA register + void SyncLogicOp(); + /// Syncs the stencil test states to match the PICA register void SyncStencilTest(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 6f88a8b21..2e4110a88 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -4,13 +4,13 @@ #include "common/make_unique.h" #include "common/math_util.h" +#include "common/vector_math.h" #include "core/memory.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/math.h" RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FullFlush(); diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h index 8f0941230..a8cb2f595 100644 --- a/src/video_core/renderer_opengl/gl_shaders.h +++ b/src/video_core/renderer_opengl/gl_shaders.h @@ -69,15 +69,16 @@ const char g_fragment_shader_hw[] = R"( #define NUM_VTX_ATTR 7 #define NUM_TEV_STAGES 6 -#define SOURCE_PRIMARYCOLOR 0x0 -#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1 -#define SOURCE_TEXTURE0 0x3 -#define SOURCE_TEXTURE1 0x4 -#define SOURCE_TEXTURE2 0x5 -#define SOURCE_TEXTURE3 0x6 -#define SOURCE_PREVIOUSBUFFER 0xd -#define SOURCE_CONSTANT 0xe -#define SOURCE_PREVIOUS 0xf +#define SOURCE_PRIMARYCOLOR 0x0 +#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1 +#define SOURCE_SECONDARYFRAGMENTCOLOR 0x2 +#define SOURCE_TEXTURE0 0x3 +#define SOURCE_TEXTURE1 0x4 +#define SOURCE_TEXTURE2 0x5 +#define SOURCE_TEXTURE3 0x6 +#define SOURCE_PREVIOUSBUFFER 0xd +#define SOURCE_CONSTANT 0xe +#define SOURCE_PREVIOUS 0xf #define COLORMODIFIER_SOURCECOLOR 0x0 #define COLORMODIFIER_ONEMINUSSOURCECOLOR 0x1 @@ -151,8 +152,11 @@ vec4 GetSource(int source) { if (source == SOURCE_PRIMARYCOLOR) { return o[2]; } else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) { - // HACK: Uses color value, but should really use fragment lighting output + // HACK: Until we implement fragment lighting, use primary_color return o[2]; + } else if (source == SOURCE_SECONDARYFRAGMENTCOLOR) { + // HACK: Until we implement fragment lighting, use zero + return vec4(0.0, 0.0, 0.0, 0.0); } else if (source == SOURCE_TEXTURE0) { return texture(tex[0], o[3].xy); } else if (source == SOURCE_TEXTURE1) { diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 0d7ba1983..9c5f38f94 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -32,6 +32,8 @@ OpenGLState::OpenGLState() { blend.color.blue = 0.0f; blend.color.alpha = 0.0f; + logic_op = GL_COPY; + for (auto& texture_unit : texture_units) { texture_unit.enabled_2d = false; texture_unit.texture_2d = 0; @@ -99,8 +101,13 @@ void OpenGLState::Apply() { if (blend.enabled != cur_state.blend.enabled) { if (blend.enabled) { glEnable(GL_BLEND); + + cur_state.logic_op = GL_COPY; + glLogicOp(cur_state.logic_op); + glDisable(GL_COLOR_LOGIC_OP); } else { glDisable(GL_BLEND); + glEnable(GL_COLOR_LOGIC_OP); } } @@ -118,6 +125,10 @@ void OpenGLState::Apply() { glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func); } + if (logic_op != cur_state.logic_op) { + glLogicOp(logic_op); + } + // Textures for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) { if (texture_units[texture_index].enabled_2d != cur_state.texture_units[texture_index].enabled_2d) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 63dba2761..6b97721d6 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -42,6 +42,8 @@ public: } color; // GL_BLEND_COLOR } blend; + GLenum logic_op; // GL_LOGIC_OP_MODE + // 3 texture units - one for each that is used in PICA fragment shader emulation struct { bool enabled_2d; // GL_TEXTURE_2D diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index f8763e71b..e566f9f7a 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -71,6 +71,37 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { return blend_func_table[(unsigned)factor]; } +inline GLenum LogicOp(Pica::Regs::LogicOp op) { + static const GLenum logic_op_table[] = { + GL_CLEAR, // Clear + GL_AND, // And + GL_AND_REVERSE, // AndReverse + GL_COPY, // Copy + GL_SET, // Set + GL_COPY_INVERTED, // CopyInverted + GL_NOOP, // NoOp + GL_INVERT, // Invert + GL_NAND, // Nand + GL_OR, // Or + GL_NOR, // Nor + GL_XOR, // Xor + GL_EQUIV, // Equiv + GL_AND_INVERTED, // AndInverted + GL_OR_REVERSE, // OrReverse + GL_OR_INVERTED, // OrInverted + }; + + // Range check table for input + if ((unsigned)op >= ARRAY_SIZE(logic_op_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown logic op %d", op); + UNREACHABLE(); + + return GL_COPY; + } + + return logic_op_table[(unsigned)op]; +} + inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { static const GLenum compare_func_table[] = { GL_NEVER, // CompareFunc::Never diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 4ebb42429..87006a832 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -119,17 +119,13 @@ static void ProcessShaderCode(VertexShaderState& state) { switch (instr.opcode.Value().GetInfo().type) { case OpCode::Type::Arithmetic: { - bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed); - // TODO: We don't really support this properly: For instance, the address register - // offset needs to be applied to SRC2 instead, etc. - // For now, we just abort in this situation. - ASSERT_MSG(!is_inverted, "Bad condition..."); + const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); const int address_offset = (instr.common.address_register_index == 0) ? 0 : state.address_registers[instr.common.address_register_index - 1]; - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); - const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); @@ -208,6 +204,15 @@ static void ProcessShaderCode(VertexShaderState& state) { } break; + case OpCode::Id::MIN: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = std::min(src1[i], src2[i]); + } + break; + case OpCode::Id::DP3: case OpCode::Id::DP4: { @@ -279,6 +284,16 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case OpCode::Id::SLT: + case OpCode::Id::SLTI: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + } + break; + case OpCode::Id::CMP: for (int i = 0; i < 2; ++i) { // TODO: Can you restrict to one compare via dest masking? |