summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/core/arm/arm_interface.h53
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp35
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h10
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp27
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h10
-rw-r--r--src/core/gdbstub/gdbstub.cpp50
-rw-r--r--src/core/hle/kernel/thread.cpp4
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/shader_header.h103
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp31
10 files changed, 199 insertions, 125 deletions
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 0b2af2a9b..867e34932 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -10,7 +10,7 @@
namespace Core {
-/// Generic ARM11 CPU interface
+/// Generic ARMv8 CPU interface
class ARM_Interface : NonCopyable {
public:
virtual ~ARM_Interface() {}
@@ -19,9 +19,9 @@ public:
std::array<u64, 31> cpu_registers;
u64 sp;
u64 pc;
- u64 cpsr;
- std::array<u128, 32> fpu_registers;
- u64 fpscr;
+ u64 pstate;
+ std::array<u128, 32> vector_registers;
+ u64 fpcr;
};
/// Runs the CPU until an event happens
@@ -69,42 +69,50 @@ public:
*/
virtual void SetReg(int index, u64 value) = 0;
- virtual u128 GetExtReg(int index) const = 0;
-
- virtual void SetExtReg(int index, u128 value) = 0;
-
/**
- * Gets the value of a VFP register
- * @param index Register index (0-31)
- * @return Returns the value in the register
+ * Gets the value of a specified vector register.
+ *
+ * @param index The index of the vector register.
+ * @return the value within the vector register.
*/
- virtual u32 GetVFPReg(int index) const = 0;
+ virtual u128 GetVectorReg(int index) const = 0;
/**
- * Sets a VFP register to the given value
- * @param index Register index (0-31)
- * @param value Value to set register to
+ * Sets a given value into a vector register.
+ *
+ * @param index The index of the vector register.
+ * @param value The new value to place in the register.
*/
- virtual void SetVFPReg(int index, u32 value) = 0;
+ virtual void SetVectorReg(int index, u128 value) = 0;
/**
- * Get the current CPSR register
- * @return Returns the value of the CPSR register
+ * Get the current PSTATE register
+ * @return Returns the value of the PSTATE register
*/
- virtual u32 GetCPSR() const = 0;
+ virtual u32 GetPSTATE() const = 0;
/**
- * Set the current CPSR register
- * @param cpsr Value to set CPSR to
+ * Set the current PSTATE register
+ * @param pstate Value to set PSTATE to
*/
- virtual void SetCPSR(u32 cpsr) = 0;
+ virtual void SetPSTATE(u32 pstate) = 0;
virtual VAddr GetTlsAddress() const = 0;
virtual void SetTlsAddress(VAddr address) = 0;
+ /**
+ * Gets the value within the TPIDR_EL0 (read/write software thread ID) register.
+ *
+ * @return the value within the register.
+ */
virtual u64 GetTPIDR_EL0() const = 0;
+ /**
+ * Sets a new value within the TPIDR_EL0 (read/write software thread ID) register.
+ *
+ * @param value The new value to place in the register.
+ */
virtual void SetTPIDR_EL0(u64 value) = 0;
/**
@@ -119,6 +127,7 @@ public:
*/
virtual void LoadContext(const ThreadContext& ctx) = 0;
+ /// Clears the exclusive monitor's state.
virtual void ClearExclusiveState() = 0;
/// Prepare core for thread reschedule (if needed to correctly handle state)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 0c175d872..3f072c51f 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -194,29 +194,20 @@ void ARM_Dynarmic::SetReg(int index, u64 value) {
jit->SetRegister(index, value);
}
-u128 ARM_Dynarmic::GetExtReg(int index) const {
+u128 ARM_Dynarmic::GetVectorReg(int index) const {
return jit->GetVector(index);
}
-void ARM_Dynarmic::SetExtReg(int index, u128 value) {
+void ARM_Dynarmic::SetVectorReg(int index, u128 value) {
jit->SetVector(index, value);
}
-u32 ARM_Dynarmic::GetVFPReg(int /*index*/) const {
- UNIMPLEMENTED();
- return {};
-}
-
-void ARM_Dynarmic::SetVFPReg(int /*index*/, u32 /*value*/) {
- UNIMPLEMENTED();
-}
-
-u32 ARM_Dynarmic::GetCPSR() const {
+u32 ARM_Dynarmic::GetPSTATE() const {
return jit->GetPstate();
}
-void ARM_Dynarmic::SetCPSR(u32 cpsr) {
- jit->SetPstate(cpsr);
+void ARM_Dynarmic::SetPSTATE(u32 pstate) {
+ jit->SetPstate(pstate);
}
u64 ARM_Dynarmic::GetTlsAddress() const {
@@ -239,18 +230,18 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) {
ctx.cpu_registers = jit->GetRegisters();
ctx.sp = jit->GetSP();
ctx.pc = jit->GetPC();
- ctx.cpsr = jit->GetPstate();
- ctx.fpu_registers = jit->GetVectors();
- ctx.fpscr = jit->GetFpcr();
+ ctx.pstate = jit->GetPstate();
+ ctx.vector_registers = jit->GetVectors();
+ ctx.fpcr = jit->GetFpcr();
}
void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
jit->SetRegisters(ctx.cpu_registers);
jit->SetSP(ctx.sp);
jit->SetPC(ctx.pc);
- jit->SetPstate(static_cast<u32>(ctx.cpsr));
- jit->SetVectors(ctx.fpu_registers);
- jit->SetFpcr(static_cast<u32>(ctx.fpscr));
+ jit->SetPstate(static_cast<u32>(ctx.pstate));
+ jit->SetVectors(ctx.vector_registers);
+ jit->SetFpcr(static_cast<u32>(ctx.fpcr));
}
void ARM_Dynarmic::PrepareReschedule() {
@@ -304,8 +295,8 @@ bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr va
bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] {
- Memory::Write64(vaddr, value[0]);
- Memory::Write64(vaddr, value[1]);
+ Memory::Write64(vaddr + 0, value[0]);
+ Memory::Write64(vaddr + 8, value[1]);
});
}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 56c60c853..e61382d3d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -29,14 +29,12 @@ public:
u64 GetPC() const override;
u64 GetReg(int index) const override;
void SetReg(int index, u64 value) override;
- u128 GetExtReg(int index) const override;
- void SetExtReg(int index, u128 value) override;
- u32 GetVFPReg(int index) const override;
- void SetVFPReg(int index, u32 value) override;
- u32 GetCPSR() const override;
+ u128 GetVectorReg(int index) const override;
+ void SetVectorReg(int index, u128 value) override;
+ u32 GetPSTATE() const override;
+ void SetPSTATE(u32 pstate) override;
void Run() override;
void Step() override;
- void SetCPSR(u32 cpsr) override;
VAddr GetTlsAddress() const override;
void SetTlsAddress(VAddr address) override;
void SetTPIDR_EL0(u64 value) override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 4e02b7cd4..e218a0b15 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -131,33 +131,24 @@ void ARM_Unicorn::SetReg(int regn, u64 val) {
CHECKED(uc_reg_write(uc, treg, &val));
}
-u128 ARM_Unicorn::GetExtReg(int /*index*/) const {
+u128 ARM_Unicorn::GetVectorReg(int /*index*/) const {
UNIMPLEMENTED();
static constexpr u128 res{};
return res;
}
-void ARM_Unicorn::SetExtReg(int /*index*/, u128 /*value*/) {
+void ARM_Unicorn::SetVectorReg(int /*index*/, u128 /*value*/) {
UNIMPLEMENTED();
}
-u32 ARM_Unicorn::GetVFPReg(int /*index*/) const {
- UNIMPLEMENTED();
- return {};
-}
-
-void ARM_Unicorn::SetVFPReg(int /*index*/, u32 /*value*/) {
- UNIMPLEMENTED();
-}
-
-u32 ARM_Unicorn::GetCPSR() const {
+u32 ARM_Unicorn::GetPSTATE() const {
u64 nzcv{};
CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &nzcv));
return static_cast<u32>(nzcv);
}
-void ARM_Unicorn::SetCPSR(u32 cpsr) {
- u64 nzcv = cpsr;
+void ARM_Unicorn::SetPSTATE(u32 pstate) {
+ u64 nzcv = pstate;
CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &nzcv));
}
@@ -219,7 +210,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) {
CHECKED(uc_reg_read(uc, UC_ARM64_REG_SP, &ctx.sp));
CHECKED(uc_reg_read(uc, UC_ARM64_REG_PC, &ctx.pc));
- CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.cpsr));
+ CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.pstate));
for (auto i = 0; i < 29; ++i) {
uregs[i] = UC_ARM64_REG_X0 + i;
@@ -234,7 +225,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) {
for (int i = 0; i < 32; ++i) {
uregs[i] = UC_ARM64_REG_Q0 + i;
- tregs[i] = &ctx.fpu_registers[i];
+ tregs[i] = &ctx.vector_registers[i];
}
CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32));
@@ -246,7 +237,7 @@ void ARM_Unicorn::LoadContext(const ThreadContext& ctx) {
CHECKED(uc_reg_write(uc, UC_ARM64_REG_SP, &ctx.sp));
CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &ctx.pc));
- CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.cpsr));
+ CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.pstate));
for (int i = 0; i < 29; ++i) {
uregs[i] = UC_ARM64_REG_X0 + i;
@@ -261,7 +252,7 @@ void ARM_Unicorn::LoadContext(const ThreadContext& ctx) {
for (auto i = 0; i < 32; ++i) {
uregs[i] = UC_ARM64_REG_Q0 + i;
- tregs[i] = (void*)&ctx.fpu_registers[i];
+ tregs[i] = (void*)&ctx.vector_registers[i];
}
CHECKED(uc_reg_write_batch(uc, uregs, tregs, 32));
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index d6f7cf4ab..75761950b 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -22,12 +22,10 @@ public:
u64 GetPC() const override;
u64 GetReg(int index) const override;
void SetReg(int index, u64 value) override;
- u128 GetExtReg(int index) const override;
- void SetExtReg(int index, u128 value) override;
- u32 GetVFPReg(int index) const override;
- void SetVFPReg(int index, u32 value) override;
- u32 GetCPSR() const override;
- void SetCPSR(u32 cpsr) override;
+ u128 GetVectorReg(int index) const override;
+ void SetVectorReg(int index, u128 value) override;
+ u32 GetPSTATE() const override;
+ void SetPSTATE(u32 pstate) override;
VAddr GetTlsAddress() const override;
void SetTlsAddress(VAddr address) override;
void SetTPIDR_EL0(u64 value) override;
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index cfaf20a88..1b04f68bf 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -65,9 +65,9 @@ constexpr u32 MSG_WAITALL = 8;
constexpr u32 LR_REGISTER = 30;
constexpr u32 SP_REGISTER = 31;
constexpr u32 PC_REGISTER = 32;
-constexpr u32 CPSR_REGISTER = 33;
+constexpr u32 PSTATE_REGISTER = 33;
constexpr u32 UC_ARM64_REG_Q0 = 34;
-constexpr u32 FPSCR_REGISTER = 66;
+constexpr u32 FPCR_REGISTER = 66;
// TODO/WiP - Used while working on support for FPU
constexpr u32 TODO_DUMMY_REG_997 = 997;
@@ -116,7 +116,7 @@ constexpr char target_xml[] =
<reg name="pc" bitsize="64" type="code_ptr"/>
- <flags id="cpsr_flags" size="4">
+ <flags id="pstate_flags" size="4">
<field name="SP" start="0" end="0"/>
<field name="" start="1" end="1"/>
<field name="EL" start="2" end="3"/>
@@ -135,7 +135,7 @@ constexpr char target_xml[] =
<field name="Z" start="30" end="30"/>
<field name="N" start="31" end="31"/>
</flags>
- <reg name="cpsr" bitsize="32" type="cpsr_flags"/>
+ <reg name="pstate" bitsize="32" type="pstate_flags"/>
</feature>
<feature name="org.gnu.gdb.aarch64.fpu">
</feature>
@@ -227,10 +227,10 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) {
return thread->context.sp;
} else if (id == PC_REGISTER) {
return thread->context.pc;
- } else if (id == CPSR_REGISTER) {
- return thread->context.cpsr;
- } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) {
- return thread->context.fpu_registers[id - UC_ARM64_REG_Q0][0];
+ } else if (id == PSTATE_REGISTER) {
+ return thread->context.pstate;
+ } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) {
+ return thread->context.vector_registers[id - UC_ARM64_REG_Q0][0];
} else {
return 0;
}
@@ -247,10 +247,10 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr)
thread->context.sp = val;
} else if (id == PC_REGISTER) {
thread->context.pc = val;
- } else if (id == CPSR_REGISTER) {
- thread->context.cpsr = val;
- } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) {
- thread->context.fpu_registers[id - (CPSR_REGISTER + 1)][0] = val;
+ } else if (id == PSTATE_REGISTER) {
+ thread->context.pstate = val;
+ } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) {
+ thread->context.vector_registers[id - (PSTATE_REGISTER + 1)][0] = val;
}
}
@@ -781,11 +781,11 @@ static void ReadRegister() {
LongToGdbHex(reply, RegRead(id, current_thread));
} else if (id == PC_REGISTER) {
LongToGdbHex(reply, RegRead(id, current_thread));
- } else if (id == CPSR_REGISTER) {
- IntToGdbHex(reply, (u32)RegRead(id, current_thread));
- } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) {
+ } else if (id == PSTATE_REGISTER) {
+ IntToGdbHex(reply, static_cast<u32>(RegRead(id, current_thread)));
+ } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
LongToGdbHex(reply, RegRead(id, current_thread));
- } else if (id == FPSCR_REGISTER) {
+ } else if (id == FPCR_REGISTER) {
LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread));
} else {
LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread));
@@ -811,7 +811,7 @@ static void ReadRegisters() {
bufptr += 16;
- IntToGdbHex(bufptr, (u32)RegRead(CPSR_REGISTER, current_thread));
+ IntToGdbHex(bufptr, static_cast<u32>(RegRead(PSTATE_REGISTER, current_thread)));
bufptr += 8;
@@ -843,11 +843,11 @@ static void WriteRegister() {
RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
} else if (id == PC_REGISTER) {
RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
- } else if (id == CPSR_REGISTER) {
+ } else if (id == PSTATE_REGISTER) {
RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
- } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) {
+ } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
- } else if (id == FPSCR_REGISTER) {
+ } else if (id == FPCR_REGISTER) {
RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread);
} else {
RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread);
@@ -866,16 +866,16 @@ static void WriteRegisters() {
if (command_buffer[0] != 'G')
return SendReply("E01");
- for (u32 i = 0, reg = 0; reg <= FPSCR_REGISTER; i++, reg++) {
+ for (u32 i = 0, reg = 0; reg <= FPCR_REGISTER; i++, reg++) {
if (reg <= SP_REGISTER) {
RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else if (reg == PC_REGISTER) {
RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
- } else if (reg == CPSR_REGISTER) {
- RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
- } else if (reg >= UC_ARM64_REG_Q0 && reg < FPSCR_REGISTER) {
+ } else if (reg == PSTATE_REGISTER) {
+ RegWrite(PSTATE_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
+ } else if (reg >= UC_ARM64_REG_Q0 && reg < FPCR_REGISTER) {
RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
- } else if (reg == FPSCR_REGISTER) {
+ } else if (reg == FPCR_REGISTER) {
RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else {
UNIMPLEMENTED();
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 89cd5f401..d4183d6e3 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -217,8 +217,8 @@ static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAdd
context.cpu_registers[0] = arg;
context.pc = entry_point;
context.sp = stack_top;
- context.cpsr = 0;
- context.fpscr = 0;
+ context.pstate = 0;
+ context.fpcr = 0;
}
ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point,
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4a79ce39c..f5ae57039 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(video_core STATIC
engines/maxwell_dma.cpp
engines/maxwell_dma.h
engines/shader_bytecode.h
+ engines/shader_header.h
gpu.cpp
gpu.h
macro_interpreter.cpp
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
new file mode 100644
index 000000000..a885ee3cf
--- /dev/null
+++ b/src/video_core/engines/shader_header.h
@@ -0,0 +1,103 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra::Shader {
+
+enum class OutputTopology : u32 {
+ PointList = 1,
+ LineStrip = 6,
+ TriangleStrip = 7,
+};
+
+// Documentation in:
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
+struct Header {
+ union {
+ BitField<0, 5, u32> sph_type;
+ BitField<5, 5, u32> version;
+ BitField<10, 4, u32> shader_type;
+ BitField<14, 1, u32> mrt_enable;
+ BitField<15, 1, u32> kills_pixels;
+ BitField<16, 1, u32> does_global_store;
+ BitField<17, 4, u32> sass_version;
+ BitField<21, 5, u32> reserved;
+ BitField<26, 1, u32> does_load_or_store;
+ BitField<27, 1, u32> does_fp64;
+ BitField<28, 4, u32> stream_out_mask;
+ } common0;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_low_size;
+ BitField<24, 8, u32> per_patch_attribute_count;
+ } common1;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_high_size;
+ BitField<24, 8, u32> threads_per_input_primitive;
+ } common2;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_crs_size;
+ BitField<24, 4, OutputTopology> output_topology;
+ BitField<28, 4, u32> reserved;
+ } common3;
+
+ union {
+ BitField<0, 12, u32> max_output_vertices;
+ BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
+ BitField<24, 4, u32> reserved;
+ BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+ } common4;
+
+ union {
+ struct {
+ INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(1); // ImapReserved
+ INSERT_PADDING_BYTES(3); // OmapSystemValuesA
+ INSERT_PADDING_BYTES(1); // OmapSystemValuesB
+ INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // OmapColor
+ INSERT_PADDING_BYTES(2); // OmapSystemValuesC
+ INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(1); // OmapReserved
+ } vtg;
+
+ struct {
+ INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(2); // ImapReserved
+ struct {
+ u32 target;
+ union {
+ BitField<0, 1, u32> sample_mask;
+ BitField<1, 1, u32> depth;
+ BitField<2, 30, u32> reserved;
+ };
+ } omap;
+ bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
+ const u32 bit = render_target * 4 + component;
+ return omap.target & (1 << bit);
+ }
+ } ps;
+ };
+};
+
+static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
+
+} // namespace Tegra::Shader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 252ff18fc..a1638c12e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
using Tegra::Shader::SubOp;
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
-constexpr u32 PROGRAM_HEADER_SIZE = 0x50;
+constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
class DecompileFail : public std::runtime_error {
public:
@@ -674,7 +675,7 @@ public:
u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
stage(stage), suffix(suffix) {
-
+ std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
Generate(suffix);
}
@@ -688,23 +689,6 @@ public:
}
private:
- // Shader program header for a Fragment Shader.
- struct FragmentHeader {
- INSERT_PADDING_WORDS(5);
- INSERT_PADDING_WORDS(13);
- u32 enabled_color_outputs;
- union {
- BitField<0, 1, u32> writes_samplemask;
- BitField<1, 1, u32> writes_depth;
- };
-
- bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
- const u32 bit = render_target * 4 + component;
- return enabled_color_outputs & (1 << bit);
- }
- };
- static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
-
/// Gets the Subroutine object corresponding to the specified address.
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
const auto iter = subroutines.find(Subroutine{begin, end, suffix});
@@ -1010,10 +994,8 @@ private:
/// Writes the output values from a fragment shader to the corresponding GLSL output variables.
void EmitFragmentOutputsWrite() {
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
- FragmentHeader header;
- std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
- ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented");
+ ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
@@ -1022,7 +1004,7 @@ private:
++render_target) {
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
- if (header.IsColorComponentOutputEnabled(render_target, component)) {
+ if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
regs.GetRegisterAsFloat(current_reg)));
++current_reg;
@@ -1030,7 +1012,7 @@ private:
}
}
- if (header.writes_depth) {
+ if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
@@ -2666,6 +2648,7 @@ private:
private:
const std::set<Subroutine>& subroutines;
const ProgramCode& program_code;
+ Tegra::Shader::Header header;
const u32 main_offset;
Maxwell3D::Regs::ShaderStage stage;
const std::string& suffix;