diff options
author | Jannik Vogel <email@jannikvogel.de> | 2016-05-13 08:46:14 +0200 |
---|---|---|
committer | Jannik Vogel <email@jannikvogel.de> | 2016-05-13 09:20:14 +0200 |
commit | 4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e (patch) | |
tree | 5c7873636fe7317c6c4489292c48a2a8f7f41595 | |
parent | Merge pull request #1695 from Subv/tls_alloc (diff) | |
download | yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.tar yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.tar.gz yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.tar.bz2 yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.tar.lz yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.tar.xz yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.tar.zst yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.zip |
-rw-r--r-- | src/video_core/shader/shader.cpp | 3 | ||||
-rw-r--r-- | src/video_core/shader/shader.h | 25 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 32 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 6 |
4 files changed, 42 insertions, 24 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index e93a9d92a..d36d3d78e 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; + auto& setup = g_state.vs; MICROPROFILE_SCOPE(GPU_Shader); @@ -81,7 +82,7 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) - jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); + jit_shader->Run(setup, state, config.main_offset); else RunInterpreter(state); #else diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 983e4a967..84898f21c 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -283,10 +283,10 @@ struct UnitState { static size_t InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: - return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); case RegisterType::Temporary: - return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); default: UNREACHABLE(); @@ -297,10 +297,10 @@ struct UnitState { static size_t OutputOffset(const DestRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Output: - return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); case RegisterType::Temporary: - return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); default: UNREACHABLE(); @@ -323,6 +323,23 @@ struct ShaderSetup { std::array<Math::Vec4<u8>, 4> i; } uniforms; + static size_t UniformOffset(RegisterType type, unsigned index) { + switch (type) { + case RegisterType::FloatUniform: + return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); + + case RegisterType::BoolUniform: + return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); + + case RegisterType::IntUniform: + return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); + + default: + UNREACHABLE(); + return 0; + } + } + std::array<u32, 1024> program_code; std::array<u32, 1024> swizzle_data; diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 99f6c51eb..43e7e6b4c 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -102,7 +102,7 @@ const JitFunction instr_table[64] = { // purposes, as documented below: /// Pointer to the uniform memory -static const X64Reg UNIFORMS = R9; +static const X64Reg SETUP = R9; /// The two 32-bit VS address offset registers set by the MOVA instruction static const X64Reg ADDROFFS_REG_0 = R10; static const X64Reg ADDROFFS_REG_1 = R11; @@ -117,7 +117,7 @@ static const X64Reg COND0 = R13; /// Result of the previous CMP instruction for the Y-component comparison static const X64Reg COND1 = R14; /// Pointer to the UnitState instance for the current VS unit -static const X64Reg REGISTERS = R15; +static const X64Reg STATE = R15; /// SIMD scratch register static const X64Reg SCRATCH = XMM0; /// Loaded with the first swizzled source register, otherwise can be used as a scratch register @@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15; // State registers that must not be modified by external functions calls // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed static const BitSet32 persistent_regs = { - UNIFORMS, REGISTERS, // Pointers to register blocks + SETUP, STATE, // Pointers to register blocks ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers ONE+16, NEGBIT+16, // Constants }; @@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe size_t src_offset; if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { - src_ptr = UNIFORMS; - src_offset = src_reg.GetIndex() * sizeof(float24) * 4; + src_ptr = SETUP; + src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex()); } else { - src_ptr = REGISTERS; + src_ptr = STATE; src_offset = UnitState<false>::InputOffset(src_reg); } @@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { // If all components are enabled, write the result to the destination register if (swiz.dest_mask == NO_DEST_REG_MASK) { // Store dest back to memory - MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); + MOVAPS(MDisp(STATE, dest_offset_disp), src); } else { // Not all components are enabled, so mask the result when storing to the destination register... - MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); + MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); if (Common::GetCPUCaps().sse4_1) { u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); @@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { } // Store dest back to memory - MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); + MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH); } } @@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { } void JitShader::Compile_UniformCondition(Instruction instr) { - int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); - CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); + int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); + CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); } BitSet32 JitShader::PersistentCallerSavedRegs() { @@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) { looping = true; - int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); - MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); + int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); + MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); SHR(32, R(LOOPCOUNT_REG), Imm8(8)); AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start @@ -826,8 +826,8 @@ void JitShader::Compile() { // The stack pointer is 8 modulo 16 at the entry of a procedure ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); - MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); - MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); + MOV(PTRBITS, R(SETUP), R(ABI_PARAM1)); + MOV(PTRBITS, R(STATE), R(ABI_PARAM2)); // Zero address/loop registers XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); @@ -845,7 +845,7 @@ void JitShader::Compile() { MOVAPS(NEGBIT, MatR(RAX)); // Jump to start of the shader program - JMPptr(R(ABI_PARAM2)); + JMPptr(R(ABI_PARAM3)); // Compile entire program Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 30aa7ff30..5468459d4 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock { public: JitShader(); - void Run(void* registers, unsigned offset) const { - program(registers, code_ptr[offset]); + void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const { + program(&setup, &state, code_ptr[offset]); } void Compile(); @@ -117,7 +117,7 @@ private: /// Branches that need to be fixed up once the entire shader program is compiled std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; - using CompiledShader = void(void* registers, const u8* start_addr); + using CompiledShader = void(const void* setup, void* state, const u8* start_addr); CompiledShader* program = nullptr; }; |