diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/debug_data.h | 186 | ||||
-rw-r--r-- | src/video_core/shader/shader.cpp | 26 | ||||
-rw-r--r-- | src/video_core/shader/shader.h | 199 | ||||
-rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 235 | ||||
-rw-r--r-- | src/video_core/shader/shader_interpreter.h | 7 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 14 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 2 |
9 files changed, 336 insertions, 338 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9aa446a8f..6ca319b59 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -39,6 +39,7 @@ set(HEADERS rasterizer.h rasterizer_interface.h renderer_base.h + shader/debug_data.h shader/shader.h shader/shader_interpreter.h swrasterizer.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 8a5d8533c..04de3e6b1 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -138,7 +138,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { immediate_attribute_id = 0; - Shader::UnitState<false> shader_unit; + Shader::UnitState shader_unit; g_state.vs.Setup(); // Send to vertex shader @@ -237,7 +237,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { unsigned int vertex_cache_pos = 0; vertex_cache_ids.fill(-1); - Shader::UnitState<false> shader_unit; + Shader::UnitState shader_unit; g_state.vs.Setup(); for (unsigned int index = 0; index < regs.num_vertices; ++index) { diff --git a/src/video_core/shader/debug_data.h b/src/video_core/shader/debug_data.h new file mode 100644 index 000000000..9e82122e1 --- /dev/null +++ b/src/video_core/shader/debug_data.h @@ -0,0 +1,186 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/common_types.h" +#include "common/vector_math.h" +#include "video_core/pica_types.h" + +namespace Pica { +namespace Shader { + +/// Helper structure used to keep track of data useful for inspection of shader emulation +template <bool full_debugging> +struct DebugData; + +template <> +struct DebugData<false> { + // TODO: Hide these behind and interface and move them to DebugData<true> + u32 max_offset = 0; ///< maximum program counter ever reached + u32 max_opdesc_id = 0; ///< maximum swizzle pattern index ever used +}; + +template <> +struct DebugData<true> { + /// Records store the input and output operands of a particular instruction. + struct Record { + enum Type { + // Floating point arithmetic operands + SRC1 = 0x1, + SRC2 = 0x2, + SRC3 = 0x4, + + // Initial and final output operand value + DEST_IN = 0x8, + DEST_OUT = 0x10, + + // Current and next instruction offset (in words) + CUR_INSTR = 0x20, + NEXT_INSTR = 0x40, + + // Output address register value + ADDR_REG_OUT = 0x80, + + // Result of a comparison instruction + CMP_RESULT = 0x100, + + // Input values for conditional flow control instructions + COND_BOOL_IN = 0x200, + COND_CMP_IN = 0x400, + + // Input values for a loop + LOOP_INT_IN = 0x800, + }; + + Math::Vec4<float24> src1; + Math::Vec4<float24> src2; + Math::Vec4<float24> src3; + + Math::Vec4<float24> dest_in; + Math::Vec4<float24> dest_out; + + s32 address_registers[2]; + bool conditional_code[2]; + bool cond_bool; + bool cond_cmp[2]; + Math::Vec4<u8> loop_int; + + u32 instruction_offset; + u32 next_instruction; + + /// set of enabled fields (as a combination of Type flags) + unsigned mask = 0; + }; + + u32 max_offset = 0; ///< maximum program counter ever reached + u32 max_opdesc_id = 0; ///< maximum swizzle pattern index ever used + + /// List of records for each executed shader instruction + std::vector<DebugData<true>::Record> records; +}; + +/// Type alias for better readability +using DebugDataRecord = DebugData<true>::Record; + +/// Helper function to set a DebugData<true>::Record field based on the template enum parameter. +template <DebugDataRecord::Type type, typename ValueType> +inline void SetField(DebugDataRecord& record, ValueType value); + +template <> +inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { + record.src1.x = value[0]; + record.src1.y = value[1]; + record.src1.z = value[2]; + record.src1.w = value[3]; +} + +template <> +inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { + record.src2.x = value[0]; + record.src2.y = value[1]; + record.src2.z = value[2]; + record.src2.w = value[3]; +} + +template <> +inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { + record.src3.x = value[0]; + record.src3.y = value[1]; + record.src3.z = value[2]; + record.src3.w = value[3]; +} + +template <> +inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { + record.dest_in.x = value[0]; + record.dest_in.y = value[1]; + record.dest_in.z = value[2]; + record.dest_in.w = value[3]; +} + +template <> +inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { + record.dest_out.x = value[0]; + record.dest_out.y = value[1]; + record.dest_out.z = value[2]; + record.dest_out.w = value[3]; +} + +template <> +inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { + record.address_registers[0] = value[0]; + record.address_registers[1] = value[1]; +} + +template <> +inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { + record.conditional_code[0] = value[0]; + record.conditional_code[1] = value[1]; +} + +template <> +inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { + record.cond_bool = value; +} + +template <> +inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { + record.cond_cmp[0] = value[0]; + record.cond_cmp[1] = value[1]; +} + +template <> +inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { + record.loop_int = value; +} + +template <> +inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { + record.instruction_offset = value; +} + +template <> +inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { + record.next_instruction = value; +} + +/// Helper function to set debug information on the current shader iteration. +template <DebugDataRecord::Type type, typename ValueType> +inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { + // Debugging disabled => nothing to do +} + +template <DebugDataRecord::Type type, typename ValueType> +inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { + if (offset >= debug_data.records.size()) + debug_data.records.resize(offset + 1); + + SetField<type, ValueType>(debug_data.records[offset], value); + debug_data.records[offset].mask |= type; +} + +} // namespace Shader +} // namespace Pica diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index c7f23dab9..a4aa3c9e0 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -109,15 +109,12 @@ void ShaderSetup::Setup() { MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { +void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; auto& setup = g_state.vs; MICROPROFILE_SCOPE(GPU_Shader); - state.debug.max_offset = 0; - state.debug.max_opdesc_id = 0; - // Setup input register table const auto& attribute_register_map = config.input_register_map; @@ -128,22 +125,23 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num state.conditional_code[1] = false; #ifdef ARCHITECTURE_x86_64 - if (VideoCore::g_shader_jit_enabled) + if (VideoCore::g_shader_jit_enabled) { jit_shader->Run(setup, state, config.main_offset); - else - RunInterpreter(setup, state, config.main_offset); + } else { + DebugData<false> dummy_debug_data; + RunInterpreter(setup, state, dummy_debug_data, config.main_offset); + } #else - RunInterpreter(setup, state, config.main_offset); + DebugData<false> dummy_debug_data; + RunInterpreter(setup, state, dummy_debug_data, config.main_offset); #endif // ARCHITECTURE_x86_64 } DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { - UnitState<true> state; - - state.debug.max_offset = 0; - state.debug.max_opdesc_id = 0; + UnitState state; + DebugData<true> debug_data; // Setup input register table boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); @@ -154,8 +152,8 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ state.conditional_code[0] = false; state.conditional_code[1] = false; - RunInterpreter(setup, state, config.main_offset); - return state.debug; + RunInterpreter(setup, state, debug_data, config.main_offset); + return debug_data; } } // namespace Shader diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 0111d8c0f..2b07759b9 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -8,8 +8,6 @@ #include <cstddef> #include <memory> #include <type_traits> -#include <vector> -#include <boost/container/static_vector.hpp> #include <nihstro/shader_bytecode.h> #include "common/assert.h" #include "common/common_funcs.h" @@ -17,6 +15,7 @@ #include "common/vector_math.h" #include "video_core/pica.h" #include "video_core/pica_types.h" +#include "video_core/shader/debug_data.h" using nihstro::RegisterType; using nihstro::SourceRegister; @@ -89,183 +88,12 @@ struct OutputRegisters { }; static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); -// Helper structure used to keep track of data useful for inspection of shader emulation -template <bool full_debugging> -struct DebugData; - -template <> -struct DebugData<false> { - // TODO: Hide these behind and interface and move them to DebugData<true> - u32 max_offset; // maximum program counter ever reached - u32 max_opdesc_id; // maximum swizzle pattern index ever used -}; - -template <> -struct DebugData<true> { - // Records store the input and output operands of a particular instruction. - struct Record { - enum Type { - // Floating point arithmetic operands - SRC1 = 0x1, - SRC2 = 0x2, - SRC3 = 0x4, - - // Initial and final output operand value - DEST_IN = 0x8, - DEST_OUT = 0x10, - - // Current and next instruction offset (in words) - CUR_INSTR = 0x20, - NEXT_INSTR = 0x40, - - // Output address register value - ADDR_REG_OUT = 0x80, - - // Result of a comparison instruction - CMP_RESULT = 0x100, - - // Input values for conditional flow control instructions - COND_BOOL_IN = 0x200, - COND_CMP_IN = 0x400, - - // Input values for a loop - LOOP_INT_IN = 0x800, - }; - - Math::Vec4<float24> src1; - Math::Vec4<float24> src2; - Math::Vec4<float24> src3; - - Math::Vec4<float24> dest_in; - Math::Vec4<float24> dest_out; - - s32 address_registers[2]; - bool conditional_code[2]; - bool cond_bool; - bool cond_cmp[2]; - Math::Vec4<u8> loop_int; - - u32 instruction_offset; - u32 next_instruction; - - // set of enabled fields (as a combination of Type flags) - unsigned mask = 0; - }; - - u32 max_offset; // maximum program counter ever reached - u32 max_opdesc_id; // maximum swizzle pattern index ever used - - // List of records for each executed shader instruction - std::vector<DebugData<true>::Record> records; -}; - -// Type alias for better readability -using DebugDataRecord = DebugData<true>::Record; - -// Helper function to set a DebugData<true>::Record field based on the template enum parameter. -template <DebugDataRecord::Type type, typename ValueType> -inline void SetField(DebugDataRecord& record, ValueType value); - -template <> -inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { - record.src1.x = value[0]; - record.src1.y = value[1]; - record.src1.z = value[2]; - record.src1.w = value[3]; -} - -template <> -inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { - record.src2.x = value[0]; - record.src2.y = value[1]; - record.src2.z = value[2]; - record.src2.w = value[3]; -} - -template <> -inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { - record.src3.x = value[0]; - record.src3.y = value[1]; - record.src3.z = value[2]; - record.src3.w = value[3]; -} - -template <> -inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { - record.dest_in.x = value[0]; - record.dest_in.y = value[1]; - record.dest_in.z = value[2]; - record.dest_in.w = value[3]; -} - -template <> -inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { - record.dest_out.x = value[0]; - record.dest_out.y = value[1]; - record.dest_out.z = value[2]; - record.dest_out.w = value[3]; -} - -template <> -inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { - record.address_registers[0] = value[0]; - record.address_registers[1] = value[1]; -} - -template <> -inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { - record.conditional_code[0] = value[0]; - record.conditional_code[1] = value[1]; -} - -template <> -inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { - record.cond_bool = value; -} - -template <> -inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { - record.cond_cmp[0] = value[0]; - record.cond_cmp[1] = value[1]; -} - -template <> -inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { - record.loop_int = value; -} - -template <> -inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { - record.instruction_offset = value; -} - -template <> -inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { - record.next_instruction = value; -} - -// Helper function to set debug information on the current shader iteration. -template <DebugDataRecord::Type type, typename ValueType> -inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { - // Debugging disabled => nothing to do -} - -template <DebugDataRecord::Type type, typename ValueType> -inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { - if (offset >= debug_data.records.size()) - debug_data.records.resize(offset + 1); - - SetField<type, ValueType>(debug_data.records[offset], value); - debug_data.records[offset].mask |= type; -} - /** * This structure contains the state information that needs to be unique for a shader unit. The 3DS * has four shader units that process shaders in parallel. At the present, Citra only implements a * single shader unit that processes all shaders serially. Putting the state information in a struct * here will make it easier for us to parallelize the shader processing later. */ -template <bool Debug> struct UnitState { struct Registers { // The registers are accessed by the shader JIT using SSE instructions, and are therefore @@ -283,8 +111,6 @@ struct UnitState { // TODO: How many bits do these actually have? s32 address_registers[3]; - DebugData<Debug> debug; - static size_t InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: @@ -332,21 +158,16 @@ struct ShaderSetup { std::array<Math::Vec4<u8>, 4> i; } uniforms; - static size_t UniformOffset(RegisterType type, unsigned index) { - switch (type) { - case RegisterType::FloatUniform: - return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); - - case RegisterType::BoolUniform: - return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); + static size_t GetFloatUniformOffset(unsigned index) { + return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); + } - case RegisterType::IntUniform: - return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); + static size_t GetBoolUniformOffset(unsigned index) { + return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); + } - default: - UNREACHABLE(); - return 0; - } + static size_t GetIntUniformOffset(unsigned index) { + return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); } std::array<u32, 1024> program_code; @@ -364,7 +185,7 @@ struct ShaderSetup { * @param input Input vertex into the shader * @param num_attributes The number of vertex shader attributes */ - void Run(UnitState<false>& state, const InputVertex& input, int num_attributes); + void Run(UnitState& state, const InputVertex& input, int num_attributes); /** * Produce debug information based on the given shader and input vertex diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 6abb6761f..70db4167e 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -6,6 +6,7 @@ #include <array> #include <cmath> #include <numeric> +#include <boost/container/static_vector.hpp> #include <nihstro/shader_bytecode.h> #include "common/assert.h" #include "common/common_types.h" @@ -38,12 +39,42 @@ struct CallStackElement { }; template <bool Debug> -void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { +void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, + unsigned offset) { // TODO: Is there a maximal size for this? boost::container::static_vector<CallStackElement, 16> call_stack; - u32 program_counter = offset; + auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset, + u8 repeat_count, u8 loop_increment) { + // -1 to make sure when incrementing the PC we end up at the correct offset + program_counter = offset - 1; + ASSERT(call_stack.size() < call_stack.capacity()); + call_stack.push_back( + {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); + }; + + auto evaluate_condition = [&state](Instruction::FlowControlType flow_control) { + using Op = Instruction::FlowControlType::Op; + + bool result_x = flow_control.refx.Value() == state.conditional_code[0]; + bool result_y = flow_control.refy.Value() == state.conditional_code[1]; + + switch (flow_control.op) { + case Op::Or: + return result_x || result_y; + case Op::And: + return result_x && result_y; + case Op::JustX: + return result_x; + case Op::JustY: + return result_y; + default: + UNREACHABLE(); + return false; + } + }; + const auto& uniforms = g_state.vs.uniforms; const auto& swizzle_data = g_state.vs.swizzle_data; const auto& program_code = g_state.vs.program_code; @@ -74,20 +105,11 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned const Instruction instr = {program_code[program_counter]}; const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]}; - auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, - u32 num_instructions, u32 return_offset, - u8 repeat_count, u8 loop_increment) { - // -1 to make sure when incrementing the PC we end up at the correct offset - program_counter = offset - 1; - ASSERT(call_stack.size() < call_stack.capacity()); - call_stack.push_back( - {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); - }; - Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); + Record<DebugDataRecord::CUR_INSTR>(debug_data, iteration, program_counter); if (iteration > 0) - Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter); + Record<DebugDataRecord::NEXT_INSTR>(debug_data, iteration - 1, program_counter); - state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter); + debug_data.max_offset = std::max<u32>(debug_data.max_offset, 1 + program_counter); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -155,54 +177,54 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] : dummy_vec4_float24; - state.debug.max_opdesc_id = - std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id); + debug_data.max_opdesc_id = + std::max<u32>(debug_data.max_opdesc_id, 1 + instr.common.operand_desc_id); switch (instr.opcode.Value().EffectiveOpCode()) { case OpCode::Id::ADD: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; dest[i] = src1[i] + src2[i]; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; } case OpCode::Id::MUL: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; dest[i] = src1[i] * src2[i]; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; } case OpCode::Id::FLR: - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; case OpCode::Id::MAX: - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -212,13 +234,13 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned // max(NaN, 0) -> 0 dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i]; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; case OpCode::Id::MIN: - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -228,16 +250,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned // min(NaN, 0) -> 0 dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i]; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; case OpCode::Id::DP3: case OpCode::Id::DP4: case OpCode::Id::DPH: case OpCode::Id::DPHI: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) @@ -253,14 +275,14 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned dest[i] = dot; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; } // Reciprocal case OpCode::Id::RCP: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -268,14 +290,14 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned dest[i] = rcp_res; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; } // Reciprocal Square Root case OpCode::Id::RSQ: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -283,12 +305,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned dest[i] = rsq_res; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; } case OpCode::Id::MOVA: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); for (int i = 0; i < 2; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -296,29 +318,29 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned // TODO: Figure out how the rounding is done on hardware state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); } - Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, + Record<DebugDataRecord::ADDR_REG_OUT>(debug_data, iteration, state.address_registers); break; } case OpCode::Id::MOV: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; dest[i] = src1[i]; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; } case OpCode::Id::SGE: case OpCode::Id::SGEI: - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -326,14 +348,14 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; case OpCode::Id::SLT: case OpCode::Id::SLTI: - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -341,12 +363,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; case OpCode::Id::CMP: - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); for (int i = 0; i < 2; ++i) { // TODO: Can you restrict to one compare via dest masking? @@ -383,12 +405,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; } } - Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); + Record<DebugDataRecord::CMP_RESULT>(debug_data, iteration, state.conditional_code); break; case OpCode::Id::EX2: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); // EX2 only takes first component exp2 and writes it to all dest components float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32())); @@ -399,13 +421,13 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned dest[i] = ex2_res; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; } case OpCode::Id::LG2: { - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); // LG2 only takes the first component log2 and writes it to all dest components float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32())); @@ -416,7 +438,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned dest[i] = lg2_res; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); break; } @@ -498,17 +520,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] : dummy_vec4_float24; - Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); - Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); - Record<DebugDataRecord::SRC3>(state.debug, iteration, src3); - Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); + Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); + Record<DebugDataRecord::SRC3>(debug_data, iteration, src3); + Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; dest[i] = src1[i] * src2[i] + src3[i]; } - Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); } else { LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value().EffectiveOpCode(), @@ -518,26 +540,6 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned } default: { - static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, - Instruction::FlowControlType flow_control) { - bool results[2] = {refx == state.conditional_code[0], - refy == state.conditional_code[1]}; - - switch (flow_control.op) { - case flow_control.Or: - return results[0] || results[1]; - - case flow_control.And: - return results[0] && results[1]; - - case flow_control.JustX: - return results[0]; - - case flow_control.JustY: - return results[1]; - } - }; - // Handle each instruction on its own switch (instr.opcode.Value()) { case OpCode::Id::END: @@ -545,17 +547,15 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; case OpCode::Id::JMPC: - Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, - state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, - instr.flow_control)) { + Record<DebugDataRecord::COND_CMP_IN>(debug_data, iteration, state.conditional_code); + if (evaluate_condition(instr.flow_control)) { program_counter = instr.flow_control.dest_offset - 1; } break; case OpCode::Id::JMPU: Record<DebugDataRecord::COND_BOOL_IN>( - state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + debug_data, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { @@ -564,25 +564,23 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned break; case OpCode::Id::CALL: - call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, program_counter + 1, 0, 0); break; case OpCode::Id::CALLU: Record<DebugDataRecord::COND_BOOL_IN>( - state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + debug_data, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, program_counter + 1, 0, 0); } break; case OpCode::Id::CALLC: - Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, - state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, - instr.flow_control)) { - call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + Record<DebugDataRecord::COND_CMP_IN>(debug_data, iteration, state.conditional_code); + if (evaluate_condition(instr.flow_control)) { + call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, program_counter + 1, 0, 0); } break; @@ -592,14 +590,13 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned case OpCode::Id::IFU: Record<DebugDataRecord::COND_BOOL_IN>( - state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + debug_data, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state, program_counter + 1, - instr.flow_control.dest_offset - program_counter - 1, + call(program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); } else { - call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); } @@ -609,16 +606,13 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned case OpCode::Id::IFC: { // TODO: Do we need to consider swizzlers here? - Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, - state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, - instr.flow_control)) { - call(state, program_counter + 1, - instr.flow_control.dest_offset - program_counter - 1, + Record<DebugDataRecord::COND_CMP_IN>(debug_data, iteration, state.conditional_code); + if (evaluate_condition(instr.flow_control)) { + call(program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); } else { - call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); } @@ -633,9 +627,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned uniforms.i[instr.flow_control.int_uniform_id].w); state.address_registers[2] = loop_param.y; - Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); - call(state, program_counter + 1, - instr.flow_control.dest_offset - program_counter + 1, + Record<DebugDataRecord::LOOP_INT_IN>(debug_data, iteration, loop_param); + call(program_counter + 1, instr.flow_control.dest_offset - program_counter + 1, instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); break; } @@ -657,8 +650,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned } // Explicit instantiation -template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset); -template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset); +template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset); +template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset); } // namespace diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 48ede0a2e..d31dcd7a6 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -8,11 +8,14 @@ namespace Pica { namespace Shader { -template <bool Debug> struct UnitState; template <bool Debug> -void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); +struct DebugData; + +template <bool Debug> +void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, + unsigned offset); } // namespace diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 3ba31d474..c588b778b 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -185,10 +185,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { src_ptr = SETUP; - src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex()); + src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); } else { src_ptr = STATE; - src_offset = UnitState<false>::InputOffset(src_reg); + src_offset = UnitState::InputOffset(src_reg); } int src_offset_disp = (int)src_offset; @@ -266,9 +266,7 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; - int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); - ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), - "Destinaton offset too large for int type"); + size_t dest_offset_disp = UnitState::OutputOffset(dest); // If all components are enabled, write the result to the destination register if (swiz.dest_mask == NO_DEST_REG_MASK) { @@ -348,8 +346,7 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { } void JitShader::Compile_UniformCondition(Instruction instr) { - size_t offset = - ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); + size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); cmp(byte[SETUP + offset], 0); } @@ -732,8 +729,7 @@ void JitShader::Compile_LOOP(Instruction instr) { // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by // 4 bits) to be used as an offset into the 16-byte vector registers later - size_t offset = - ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); + size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); mov(LOOPCOUNT, dword[SETUP + offset]); mov(LOOPCOUNT_REG, LOOPCOUNT); shr(LOOPCOUNT_REG, 4); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index e0ecde3f2..f37548306 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -34,7 +34,7 @@ class JitShader : public Xbyak::CodeGenerator { public: JitShader(); - void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const { + void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { program(&setup, &state, instruction_labels[offset].getAddress()); } |