diff options
-rw-r--r-- | src/core/arm/arm_interface.h | 53 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic.cpp | 35 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic.h | 10 | ||||
-rw-r--r-- | src/core/arm/unicorn/arm_unicorn.cpp | 27 | ||||
-rw-r--r-- | src/core/arm/unicorn/arm_unicorn.h | 10 | ||||
-rw-r--r-- | src/core/gdbstub/gdbstub.cpp | 50 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 4 | ||||
-rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/video_core/engines/shader_header.h | 103 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 31 |
10 files changed, 199 insertions, 125 deletions
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 0b2af2a9b..867e34932 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -10,7 +10,7 @@ namespace Core { -/// Generic ARM11 CPU interface +/// Generic ARMv8 CPU interface class ARM_Interface : NonCopyable { public: virtual ~ARM_Interface() {} @@ -19,9 +19,9 @@ public: std::array<u64, 31> cpu_registers; u64 sp; u64 pc; - u64 cpsr; - std::array<u128, 32> fpu_registers; - u64 fpscr; + u64 pstate; + std::array<u128, 32> vector_registers; + u64 fpcr; }; /// Runs the CPU until an event happens @@ -69,42 +69,50 @@ public: */ virtual void SetReg(int index, u64 value) = 0; - virtual u128 GetExtReg(int index) const = 0; - - virtual void SetExtReg(int index, u128 value) = 0; - /** - * Gets the value of a VFP register - * @param index Register index (0-31) - * @return Returns the value in the register + * Gets the value of a specified vector register. + * + * @param index The index of the vector register. + * @return the value within the vector register. */ - virtual u32 GetVFPReg(int index) const = 0; + virtual u128 GetVectorReg(int index) const = 0; /** - * Sets a VFP register to the given value - * @param index Register index (0-31) - * @param value Value to set register to + * Sets a given value into a vector register. + * + * @param index The index of the vector register. + * @param value The new value to place in the register. */ - virtual void SetVFPReg(int index, u32 value) = 0; + virtual void SetVectorReg(int index, u128 value) = 0; /** - * Get the current CPSR register - * @return Returns the value of the CPSR register + * Get the current PSTATE register + * @return Returns the value of the PSTATE register */ - virtual u32 GetCPSR() const = 0; + virtual u32 GetPSTATE() const = 0; /** - * Set the current CPSR register - * @param cpsr Value to set CPSR to + * Set the current PSTATE register + * @param pstate Value to set PSTATE to */ - virtual void SetCPSR(u32 cpsr) = 0; + virtual void SetPSTATE(u32 pstate) = 0; virtual VAddr GetTlsAddress() const = 0; virtual void SetTlsAddress(VAddr address) = 0; + /** + * Gets the value within the TPIDR_EL0 (read/write software thread ID) register. + * + * @return the value within the register. + */ virtual u64 GetTPIDR_EL0() const = 0; + /** + * Sets a new value within the TPIDR_EL0 (read/write software thread ID) register. + * + * @param value The new value to place in the register. + */ virtual void SetTPIDR_EL0(u64 value) = 0; /** @@ -119,6 +127,7 @@ public: */ virtual void LoadContext(const ThreadContext& ctx) = 0; + /// Clears the exclusive monitor's state. virtual void ClearExclusiveState() = 0; /// Prepare core for thread reschedule (if needed to correctly handle state) diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 0c175d872..3f072c51f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -194,29 +194,20 @@ void ARM_Dynarmic::SetReg(int index, u64 value) { jit->SetRegister(index, value); } -u128 ARM_Dynarmic::GetExtReg(int index) const { +u128 ARM_Dynarmic::GetVectorReg(int index) const { return jit->GetVector(index); } -void ARM_Dynarmic::SetExtReg(int index, u128 value) { +void ARM_Dynarmic::SetVectorReg(int index, u128 value) { jit->SetVector(index, value); } -u32 ARM_Dynarmic::GetVFPReg(int /*index*/) const { - UNIMPLEMENTED(); - return {}; -} - -void ARM_Dynarmic::SetVFPReg(int /*index*/, u32 /*value*/) { - UNIMPLEMENTED(); -} - -u32 ARM_Dynarmic::GetCPSR() const { +u32 ARM_Dynarmic::GetPSTATE() const { return jit->GetPstate(); } -void ARM_Dynarmic::SetCPSR(u32 cpsr) { - jit->SetPstate(cpsr); +void ARM_Dynarmic::SetPSTATE(u32 pstate) { + jit->SetPstate(pstate); } u64 ARM_Dynarmic::GetTlsAddress() const { @@ -239,18 +230,18 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { ctx.cpu_registers = jit->GetRegisters(); ctx.sp = jit->GetSP(); ctx.pc = jit->GetPC(); - ctx.cpsr = jit->GetPstate(); - ctx.fpu_registers = jit->GetVectors(); - ctx.fpscr = jit->GetFpcr(); + ctx.pstate = jit->GetPstate(); + ctx.vector_registers = jit->GetVectors(); + ctx.fpcr = jit->GetFpcr(); } void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { jit->SetRegisters(ctx.cpu_registers); jit->SetSP(ctx.sp); jit->SetPC(ctx.pc); - jit->SetPstate(static_cast<u32>(ctx.cpsr)); - jit->SetVectors(ctx.fpu_registers); - jit->SetFpcr(static_cast<u32>(ctx.fpscr)); + jit->SetPstate(static_cast<u32>(ctx.pstate)); + jit->SetVectors(ctx.vector_registers); + jit->SetFpcr(static_cast<u32>(ctx.fpcr)); } void ARM_Dynarmic::PrepareReschedule() { @@ -304,8 +295,8 @@ bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr va bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) { return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] { - Memory::Write64(vaddr, value[0]); - Memory::Write64(vaddr, value[1]); + Memory::Write64(vaddr + 0, value[0]); + Memory::Write64(vaddr + 8, value[1]); }); } diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 56c60c853..e61382d3d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -29,14 +29,12 @@ public: u64 GetPC() const override; u64 GetReg(int index) const override; void SetReg(int index, u64 value) override; - u128 GetExtReg(int index) const override; - void SetExtReg(int index, u128 value) override; - u32 GetVFPReg(int index) const override; - void SetVFPReg(int index, u32 value) override; - u32 GetCPSR() const override; + u128 GetVectorReg(int index) const override; + void SetVectorReg(int index, u128 value) override; + u32 GetPSTATE() const override; + void SetPSTATE(u32 pstate) override; void Run() override; void Step() override; - void SetCPSR(u32 cpsr) override; VAddr GetTlsAddress() const override; void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 4e02b7cd4..e218a0b15 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -131,33 +131,24 @@ void ARM_Unicorn::SetReg(int regn, u64 val) { CHECKED(uc_reg_write(uc, treg, &val)); } -u128 ARM_Unicorn::GetExtReg(int /*index*/) const { +u128 ARM_Unicorn::GetVectorReg(int /*index*/) const { UNIMPLEMENTED(); static constexpr u128 res{}; return res; } -void ARM_Unicorn::SetExtReg(int /*index*/, u128 /*value*/) { +void ARM_Unicorn::SetVectorReg(int /*index*/, u128 /*value*/) { UNIMPLEMENTED(); } -u32 ARM_Unicorn::GetVFPReg(int /*index*/) const { - UNIMPLEMENTED(); - return {}; -} - -void ARM_Unicorn::SetVFPReg(int /*index*/, u32 /*value*/) { - UNIMPLEMENTED(); -} - -u32 ARM_Unicorn::GetCPSR() const { +u32 ARM_Unicorn::GetPSTATE() const { u64 nzcv{}; CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &nzcv)); return static_cast<u32>(nzcv); } -void ARM_Unicorn::SetCPSR(u32 cpsr) { - u64 nzcv = cpsr; +void ARM_Unicorn::SetPSTATE(u32 pstate) { + u64 nzcv = pstate; CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &nzcv)); } @@ -219,7 +210,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) { CHECKED(uc_reg_read(uc, UC_ARM64_REG_SP, &ctx.sp)); CHECKED(uc_reg_read(uc, UC_ARM64_REG_PC, &ctx.pc)); - CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.cpsr)); + CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.pstate)); for (auto i = 0; i < 29; ++i) { uregs[i] = UC_ARM64_REG_X0 + i; @@ -234,7 +225,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) { for (int i = 0; i < 32; ++i) { uregs[i] = UC_ARM64_REG_Q0 + i; - tregs[i] = &ctx.fpu_registers[i]; + tregs[i] = &ctx.vector_registers[i]; } CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32)); @@ -246,7 +237,7 @@ void ARM_Unicorn::LoadContext(const ThreadContext& ctx) { CHECKED(uc_reg_write(uc, UC_ARM64_REG_SP, &ctx.sp)); CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &ctx.pc)); - CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.cpsr)); + CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.pstate)); for (int i = 0; i < 29; ++i) { uregs[i] = UC_ARM64_REG_X0 + i; @@ -261,7 +252,7 @@ void ARM_Unicorn::LoadContext(const ThreadContext& ctx) { for (auto i = 0; i < 32; ++i) { uregs[i] = UC_ARM64_REG_Q0 + i; - tregs[i] = (void*)&ctx.fpu_registers[i]; + tregs[i] = (void*)&ctx.vector_registers[i]; } CHECKED(uc_reg_write_batch(uc, uregs, tregs, 32)); diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index d6f7cf4ab..75761950b 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -22,12 +22,10 @@ public: u64 GetPC() const override; u64 GetReg(int index) const override; void SetReg(int index, u64 value) override; - u128 GetExtReg(int index) const override; - void SetExtReg(int index, u128 value) override; - u32 GetVFPReg(int index) const override; - void SetVFPReg(int index, u32 value) override; - u32 GetCPSR() const override; - void SetCPSR(u32 cpsr) override; + u128 GetVectorReg(int index) const override; + void SetVectorReg(int index, u128 value) override; + u32 GetPSTATE() const override; + void SetPSTATE(u32 pstate) override; VAddr GetTlsAddress() const override; void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index cfaf20a88..1b04f68bf 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -65,9 +65,9 @@ constexpr u32 MSG_WAITALL = 8; constexpr u32 LR_REGISTER = 30; constexpr u32 SP_REGISTER = 31; constexpr u32 PC_REGISTER = 32; -constexpr u32 CPSR_REGISTER = 33; +constexpr u32 PSTATE_REGISTER = 33; constexpr u32 UC_ARM64_REG_Q0 = 34; -constexpr u32 FPSCR_REGISTER = 66; +constexpr u32 FPCR_REGISTER = 66; // TODO/WiP - Used while working on support for FPU constexpr u32 TODO_DUMMY_REG_997 = 997; @@ -116,7 +116,7 @@ constexpr char target_xml[] = <reg name="pc" bitsize="64" type="code_ptr"/> - <flags id="cpsr_flags" size="4"> + <flags id="pstate_flags" size="4"> <field name="SP" start="0" end="0"/> <field name="" start="1" end="1"/> <field name="EL" start="2" end="3"/> @@ -135,7 +135,7 @@ constexpr char target_xml[] = <field name="Z" start="30" end="30"/> <field name="N" start="31" end="31"/> </flags> - <reg name="cpsr" bitsize="32" type="cpsr_flags"/> + <reg name="pstate" bitsize="32" type="pstate_flags"/> </feature> <feature name="org.gnu.gdb.aarch64.fpu"> </feature> @@ -227,10 +227,10 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) { return thread->context.sp; } else if (id == PC_REGISTER) { return thread->context.pc; - } else if (id == CPSR_REGISTER) { - return thread->context.cpsr; - } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) { - return thread->context.fpu_registers[id - UC_ARM64_REG_Q0][0]; + } else if (id == PSTATE_REGISTER) { + return thread->context.pstate; + } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) { + return thread->context.vector_registers[id - UC_ARM64_REG_Q0][0]; } else { return 0; } @@ -247,10 +247,10 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr) thread->context.sp = val; } else if (id == PC_REGISTER) { thread->context.pc = val; - } else if (id == CPSR_REGISTER) { - thread->context.cpsr = val; - } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) { - thread->context.fpu_registers[id - (CPSR_REGISTER + 1)][0] = val; + } else if (id == PSTATE_REGISTER) { + thread->context.pstate = val; + } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) { + thread->context.vector_registers[id - (PSTATE_REGISTER + 1)][0] = val; } } @@ -781,11 +781,11 @@ static void ReadRegister() { LongToGdbHex(reply, RegRead(id, current_thread)); } else if (id == PC_REGISTER) { LongToGdbHex(reply, RegRead(id, current_thread)); - } else if (id == CPSR_REGISTER) { - IntToGdbHex(reply, (u32)RegRead(id, current_thread)); - } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) { + } else if (id == PSTATE_REGISTER) { + IntToGdbHex(reply, static_cast<u32>(RegRead(id, current_thread))); + } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { LongToGdbHex(reply, RegRead(id, current_thread)); - } else if (id == FPSCR_REGISTER) { + } else if (id == FPCR_REGISTER) { LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread)); } else { LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread)); @@ -811,7 +811,7 @@ static void ReadRegisters() { bufptr += 16; - IntToGdbHex(bufptr, (u32)RegRead(CPSR_REGISTER, current_thread)); + IntToGdbHex(bufptr, static_cast<u32>(RegRead(PSTATE_REGISTER, current_thread))); bufptr += 8; @@ -843,11 +843,11 @@ static void WriteRegister() { RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); } else if (id == PC_REGISTER) { RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); - } else if (id == CPSR_REGISTER) { + } else if (id == PSTATE_REGISTER) { RegWrite(id, GdbHexToInt(buffer_ptr), current_thread); - } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) { + } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); - } else if (id == FPSCR_REGISTER) { + } else if (id == FPCR_REGISTER) { RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread); } else { RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread); @@ -866,16 +866,16 @@ static void WriteRegisters() { if (command_buffer[0] != 'G') return SendReply("E01"); - for (u32 i = 0, reg = 0; reg <= FPSCR_REGISTER; i++, reg++) { + for (u32 i = 0, reg = 0; reg <= FPCR_REGISTER; i++, reg++) { if (reg <= SP_REGISTER) { RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread); } else if (reg == PC_REGISTER) { RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread); - } else if (reg == CPSR_REGISTER) { - RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread); - } else if (reg >= UC_ARM64_REG_Q0 && reg < FPSCR_REGISTER) { + } else if (reg == PSTATE_REGISTER) { + RegWrite(PSTATE_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread); + } else if (reg >= UC_ARM64_REG_Q0 && reg < FPCR_REGISTER) { RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread); - } else if (reg == FPSCR_REGISTER) { + } else if (reg == FPCR_REGISTER) { RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread); } else { UNIMPLEMENTED(); diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 89cd5f401..d4183d6e3 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -217,8 +217,8 @@ static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAdd context.cpu_registers[0] = arg; context.pc = entry_point; context.sp = stack_top; - context.cpsr = 0; - context.fpscr = 0; + context.pstate = 0; + context.fpcr = 0; } ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point, diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4a79ce39c..f5ae57039 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(video_core STATIC engines/maxwell_dma.cpp engines/maxwell_dma.h engines/shader_bytecode.h + engines/shader_header.h gpu.cpp gpu.h macro_interpreter.cpp diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h new file mode 100644 index 000000000..a885ee3cf --- /dev/null +++ b/src/video_core/engines/shader_header.h @@ -0,0 +1,103 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra::Shader { + +enum class OutputTopology : u32 { + PointList = 1, + LineStrip = 6, + TriangleStrip = 7, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture +struct Header { + union { + BitField<0, 5, u32> sph_type; + BitField<5, 5, u32> version; + BitField<10, 4, u32> shader_type; + BitField<14, 1, u32> mrt_enable; + BitField<15, 1, u32> kills_pixels; + BitField<16, 1, u32> does_global_store; + BitField<17, 4, u32> sass_version; + BitField<21, 5, u32> reserved; + BitField<26, 1, u32> does_load_or_store; + BitField<27, 1, u32> does_fp64; + BitField<28, 4, u32> stream_out_mask; + } common0; + + union { + BitField<0, 24, u32> shader_local_memory_low_size; + BitField<24, 8, u32> per_patch_attribute_count; + } common1; + + union { + BitField<0, 24, u32> shader_local_memory_high_size; + BitField<24, 8, u32> threads_per_input_primitive; + } common2; + + union { + BitField<0, 24, u32> shader_local_memory_crs_size; + BitField<24, 4, OutputTopology> output_topology; + BitField<28, 4, u32> reserved; + } common3; + + union { + BitField<0, 12, u32> max_output_vertices; + BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. + BitField<24, 4, u32> reserved; + BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + } common4; + + union { + struct { + INSERT_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_PADDING_BYTES(1); // ImapSystemValuesB + INSERT_PADDING_BYTES(16); // ImapGenericVector[32] + INSERT_PADDING_BYTES(2); // ImapColor + INSERT_PADDING_BYTES(2); // ImapSystemValuesC + INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES(1); // ImapReserved + INSERT_PADDING_BYTES(3); // OmapSystemValuesA + INSERT_PADDING_BYTES(1); // OmapSystemValuesB + INSERT_PADDING_BYTES(16); // OmapGenericVector[32] + INSERT_PADDING_BYTES(2); // OmapColor + INSERT_PADDING_BYTES(2); // OmapSystemValuesC + INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10] + INSERT_PADDING_BYTES(1); // OmapReserved + } vtg; + + struct { + INSERT_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_PADDING_BYTES(1); // ImapSystemValuesB + INSERT_PADDING_BYTES(32); // ImapGenericVector[32] + INSERT_PADDING_BYTES(2); // ImapColor + INSERT_PADDING_BYTES(2); // ImapSystemValuesC + INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES(2); // ImapReserved + struct { + u32 target; + union { + BitField<0, 1, u32> sample_mask; + BitField<1, 1, u32> depth; + BitField<2, 30, u32> reserved; + }; + } omap; + bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { + const u32 bit = render_target * 4 + component; + return omap.target & (1 << bit); + } + } ps; + }; +}; + +static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); + +} // namespace Tegra::Shader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 252ff18fc..a1638c12e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/engines/shader_header.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -26,7 +27,7 @@ using Tegra::Shader::Sampler; using Tegra::Shader::SubOp; constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; -constexpr u32 PROGRAM_HEADER_SIZE = 0x50; +constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header); class DecompileFail : public std::runtime_error { public: @@ -674,7 +675,7 @@ public: u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) : subroutines(subroutines), program_code(program_code), main_offset(main_offset), stage(stage), suffix(suffix) { - + std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); Generate(suffix); } @@ -688,23 +689,6 @@ public: } private: - // Shader program header for a Fragment Shader. - struct FragmentHeader { - INSERT_PADDING_WORDS(5); - INSERT_PADDING_WORDS(13); - u32 enabled_color_outputs; - union { - BitField<0, 1, u32> writes_samplemask; - BitField<1, 1, u32> writes_depth; - }; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return enabled_color_outputs & (1 << bit); - } - }; - static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); - /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { const auto iter = subroutines.find(Subroutine{begin, end, suffix}); @@ -1010,10 +994,8 @@ private: /// Writes the output values from a fragment shader to the corresponding GLSL output variables. void EmitFragmentOutputsWrite() { ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); - FragmentHeader header; - std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); - ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); + ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented"); // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. @@ -1022,7 +1004,7 @@ private: ++render_target) { // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { - if (header.IsColorComponentOutputEnabled(render_target, component)) { + if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, regs.GetRegisterAsFloat(current_reg))); ++current_reg; @@ -1030,7 +1012,7 @@ private: } } - if (header.writes_depth) { + if (header.ps.omap.depth) { // The depth output is always 2 registers after the last color output, and current_reg // already contains one past the last color register. @@ -2666,6 +2648,7 @@ private: private: const std::set<Subroutine>& subroutines; const ProgramCode& program_code; + Tegra::Shader::Header header; const u32 main_offset; Maxwell3D::Regs::ShaderStage stage; const std::string& suffix; |