summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorraven02 <jacky.kktsui@yahoo.com.hk>2018-09-19 13:53:11 +0200
committerGitHub <noreply@github.com>2018-09-19 13:53:11 +0200
commitc8f9bbbf859c0e38cf691b64c67761382fcebfc2 (patch)
tree99529c2277a6b740a6e278985c5147fa649c5497 /src/video_core
parentAdd 1D sampler for TLDS - TexelFetch (Mario Rabbids) (diff)
parentMerge pull request #1348 from ogniK5377/GetImageSize (diff)
downloadyuzu-c8f9bbbf859c0e38cf691b64c67761382fcebfc2.tar
yuzu-c8f9bbbf859c0e38cf691b64c67761382fcebfc2.tar.gz
yuzu-c8f9bbbf859c0e38cf691b64c67761382fcebfc2.tar.bz2
yuzu-c8f9bbbf859c0e38cf691b64c67761382fcebfc2.tar.lz
yuzu-c8f9bbbf859c0e38cf691b64c67761382fcebfc2.tar.xz
yuzu-c8f9bbbf859c0e38cf691b64c67761382fcebfc2.tar.zst
yuzu-c8f9bbbf859c0e38cf691b64c67761382fcebfc2.zip
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp13
-rw-r--r--src/video_core/engines/maxwell_3d.h28
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h213
-rw-r--r--src/video_core/engines/shader_header.h103
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/macro_interpreter.h2
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h16
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp30
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h50
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp223
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp2
-rw-r--r--src/video_core/textures/decoders.cpp53
24 files changed, 651 insertions, 195 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4a79ce39c..f5ae57039 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(video_core STATIC
engines/maxwell_dma.cpp
engines/maxwell_dma.h
engines/shader_bytecode.h
+ engines/shader_header.h
gpu.cpp
gpu.h
macro_interpreter.cpp
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index dcf9ef8b9..021b83eaa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -26,7 +26,7 @@ public:
void WriteReg(u32 method, u32 value);
struct Regs {
- static constexpr size_t NUM_REGS = 0x258;
+ static constexpr std::size_t NUM_REGS = 0x258;
struct Surface {
RenderTargetFormat format;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 329079ddd..8afd26fe9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -248,8 +248,8 @@ void Maxwell3D::DrawArrays() {
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
- auto& shader = state.shader_stages[static_cast<size_t>(stage)];
- auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)];
+ auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+ auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)];
auto& buffer = shader.const_buffers[bind_data.index];
@@ -316,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
std::vector<Texture::FullTextureInfo> textures;
- auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)];
+ auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
// Offset into the texture constbuffer where the texture info begins.
- static constexpr size_t TextureInfoOffset = 0x20;
+ static constexpr std::size_t TextureInfoOffset = 0x20;
for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
@@ -360,8 +360,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
return textures;
}
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
- auto& shader = state.shader_stages[static_cast<size_t>(stage)];
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
+ std::size_t offset) const {
+ auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d3be900a4..b81b0723d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -34,17 +34,17 @@ public:
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
struct Regs {
- static constexpr size_t NUM_REGS = 0xE00;
-
- static constexpr size_t NumRenderTargets = 8;
- static constexpr size_t NumViewports = 16;
- static constexpr size_t NumCBData = 16;
- static constexpr size_t NumVertexArrays = 32;
- static constexpr size_t NumVertexAttributes = 32;
- static constexpr size_t MaxShaderProgram = 6;
- static constexpr size_t MaxShaderStage = 5;
+ static constexpr std::size_t NUM_REGS = 0xE00;
+
+ static constexpr std::size_t NumRenderTargets = 8;
+ static constexpr std::size_t NumViewports = 16;
+ static constexpr std::size_t NumCBData = 16;
+ static constexpr std::size_t NumVertexArrays = 32;
+ static constexpr std::size_t NumVertexAttributes = 32;
+ static constexpr std::size_t MaxShaderProgram = 6;
+ static constexpr std::size_t MaxShaderStage = 5;
// Maximum number of const buffers per shader stage.
- static constexpr size_t MaxConstBuffers = 18;
+ static constexpr std::size_t MaxConstBuffers = 18;
enum class QueryMode : u32 {
Write = 0,
@@ -443,9 +443,9 @@ public:
}
};
- bool IsShaderConfigEnabled(size_t index) const {
+ bool IsShaderConfigEnabled(std::size_t index) const {
// The VertexB is always enabled.
- if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) {
+ if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
return true;
}
return shader_config[index].enable != 0;
@@ -571,7 +571,7 @@ public:
BitField<25, 3, u32> map_7;
};
- u32 GetMap(size_t index) const {
+ u32 GetMap(std::size_t index) const {
const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
map_4, map_5, map_6, map_7};
ASSERT(index < maps.size());
@@ -925,7 +925,7 @@ public:
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
/// Returns the texture information for a specific texture in a specific shader stage.
- Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
+ Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
private:
VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c24d33d5c..aa7481b8c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -50,7 +50,7 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.dst_params.pos_y == 0);
if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
- size_t copy_size = regs.x_count;
+ std::size_t copy_size = regs.x_count;
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
// buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 7882f16e0..311ccb616 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -23,7 +23,7 @@ public:
void WriteReg(u32 method, u32 value);
struct Regs {
- static constexpr size_t NUM_REGS = 0x1D6;
+ static constexpr std::size_t NUM_REGS = 0x1D6;
struct Parameters {
union {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d6e2397f2..7e1de0fa1 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -20,10 +20,10 @@ namespace Tegra::Shader {
struct Register {
/// Number of registers
- static constexpr size_t NumRegisters = 256;
+ static constexpr std::size_t NumRegisters = 256;
/// Register 255 is special cased to always be 0
- static constexpr size_t ZeroIndex = 255;
+ static constexpr std::size_t ZeroIndex = 255;
enum class Size : u64 {
Byte = 0,
@@ -240,6 +240,41 @@ enum class FlowCondition : u64 {
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
};
+enum class ControlCode : u64 {
+ F = 0,
+ LT = 1,
+ EQ = 2,
+ LE = 3,
+ GT = 4,
+ NE = 5,
+ GE = 6,
+ Num = 7,
+ Nan = 8,
+ LTU = 9,
+ EQU = 10,
+ LEU = 11,
+ GTU = 12,
+ NEU = 13,
+ GEU = 14,
+ //
+ OFF = 16,
+ LO = 17,
+ SFF = 18,
+ LS = 19,
+ HI = 20,
+ SFT = 21,
+ HS = 22,
+ OFT = 23,
+ CSM_TA = 24,
+ CSM_TR = 25,
+ CSM_MX = 26,
+ FCSM_TA = 27,
+ FCSM_TR = 28,
+ FCSM_MX = 29,
+ RLE = 30,
+ RGT = 31,
+};
+
enum class PredicateResultMode : u64 {
None = 0x0,
NotZero = 0x3,
@@ -271,6 +306,15 @@ enum class TextureProcessMode : u64 {
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
};
+enum class TextureMiscMode : u64 {
+ DC,
+ AOFFI, // Uses Offset
+ NDV,
+ NODEP,
+ MZ,
+ PTP,
+};
+
enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
@@ -546,6 +590,15 @@ union Instruction {
} pset;
union {
+ BitField<0, 3, u64> pred0;
+ BitField<3, 3, u64> pred3;
+ BitField<8, 5, ControlCode> cc; // flag in cc
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred39;
+ BitField<45, 4, PredOperation> op; // op with pred39
+ } csetp;
+
+ union {
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
@@ -590,42 +643,127 @@ union Instruction {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
BitField<31, 4, u64> component_mask;
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<54, 1, u64> aoffi_flag;
BitField<55, 3, TextureProcessMode> process_mode;
- bool IsComponentEnabled(size_t component) const {
+ bool IsComponentEnabled(std::size_t component) const {
return ((1ull << component) & component_mask) != 0;
}
+
+ TextureProcessMode GetTextureProcessMode() const {
+ return process_mode;
+ }
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return aoffi_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
} tex;
union {
BitField<22, 6, TextureQueryType> query_type;
BitField<31, 4, u64> component_mask;
+ BitField<49, 1, u64> nodep_flag;
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
} txq;
union {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
BitField<31, 4, u64> component_mask;
+ BitField<35, 1, u64> ndv_flag;
+ BitField<49, 1, u64> nodep_flag;
- bool IsComponentEnabled(size_t component) const {
+ bool IsComponentEnabled(std::size_t component) const {
return ((1ull << component) & component_mask) != 0;
}
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::NDV:
+ return (ndv_flag != 0);
+ case TextureMiscMode::NODEP:
+ return (nodep_flag != 0);
+ default:
+ break;
+ }
+ return false;
+ }
} tmml;
union {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
+ BitField<35, 1, u64> ndv_flag;
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<54, 2, u64> info;
BitField<56, 2, u64> component;
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::NDV:
+ return ndv_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return info == 1;
+ case TextureMiscMode::PTP:
+ return info == 2;
+ default:
+ break;
+ }
+ return false;
+ }
} tld4;
union {
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<51, 1, u64> aoffi_flag;
BitField<52, 2, u64> component;
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return aoffi_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
} tld4s;
union {
BitField<0, 8, Register> gpr0;
BitField<28, 8, Register> gpr28;
- BitField<49, 1, u64> nodep;
+ BitField<49, 1, u64> nodep_flag;
BitField<50, 3, u64> component_mask_selector;
BitField<53, 4, u64> texture_info;
@@ -645,6 +783,37 @@ union Instruction {
UNREACHABLE();
}
+ TextureProcessMode GetTextureProcessMode() const {
+ switch (texture_info) {
+ case 0:
+ case 2:
+ case 6:
+ case 8:
+ case 9:
+ case 11:
+ return TextureProcessMode::LZ;
+ case 3:
+ case 5:
+ case 13:
+ return TextureProcessMode::LL;
+ default:
+ break;
+ }
+ return TextureProcessMode::None;
+ }
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::DC:
+ return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
+
bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info >= 7 && texture_info <= 9;
@@ -654,7 +823,7 @@ union Instruction {
return gpr28.Value() != Register::ZeroIndex;
}
- bool IsComponentEnabled(size_t component) const {
+ bool IsComponentEnabled(std::size_t component) const {
static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
{},
{0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
@@ -662,7 +831,7 @@ union Instruction {
{0x7, 0xb, 0xd, 0xe, 0xf},
}};
- size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
+ std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
u32 mask = mask_lut[index][component_mask_selector];
@@ -673,6 +842,7 @@ union Instruction {
} texs;
union {
+ BitField<49, 1, u64> nodep_flag;
BitField<53, 4, u64> texture_info;
TextureType GetTextureType() const {
@@ -693,6 +863,26 @@ union Instruction {
UNREACHABLE();
}
+ TextureProcessMode GetTextureProcessMode() const {
+ if (texture_info == 1 || texture_info == 5 || texture_info == 12)
+ return TextureProcessMode::LL;
+ return TextureProcessMode::LZ;
+ }
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::AOFFI:
+ return texture_info == 12 || texture_info == 4;
+ case TextureMiscMode::MZ:
+ return texture_info == 5;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
+
bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info == 8;
@@ -735,6 +925,7 @@ union Instruction {
BitField<36, 5, u64> index;
} cbuf36;
+ BitField<47, 1, u64> generates_cc;
BitField<61, 1, u64> is_b_imm;
BitField<60, 1, u64> is_b_gpr;
BitField<59, 1, u64> is_c_gpr;
@@ -859,6 +1050,7 @@ public:
ISET_IMM,
PSETP,
PSET,
+ CSETP,
XMAD_IMM,
XMAD_CR,
XMAD_RC,
@@ -947,7 +1139,7 @@ public:
private:
struct Detail {
private:
- static constexpr size_t opcode_bitsize = 16;
+ static constexpr std::size_t opcode_bitsize = 16;
/**
* Generates the mask and the expected value after masking from a given bitstring.
@@ -956,8 +1148,8 @@ private:
*/
static auto GetMaskAndExpect(const char* const bitstring) {
u16 mask = 0, expect = 0;
- for (size_t i = 0; i < opcode_bitsize; i++) {
- const size_t bit_position = opcode_bitsize - i - 1;
+ for (std::size_t i = 0; i < opcode_bitsize; i++) {
+ const std::size_t bit_position = opcode_bitsize - i - 1;
switch (bitstring[i]) {
case '0':
mask |= 1 << bit_position;
@@ -1095,6 +1287,7 @@ private:
INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
+ INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
new file mode 100644
index 000000000..a885ee3cf
--- /dev/null
+++ b/src/video_core/engines/shader_header.h
@@ -0,0 +1,103 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra::Shader {
+
+enum class OutputTopology : u32 {
+ PointList = 1,
+ LineStrip = 6,
+ TriangleStrip = 7,
+};
+
+// Documentation in:
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
+struct Header {
+ union {
+ BitField<0, 5, u32> sph_type;
+ BitField<5, 5, u32> version;
+ BitField<10, 4, u32> shader_type;
+ BitField<14, 1, u32> mrt_enable;
+ BitField<15, 1, u32> kills_pixels;
+ BitField<16, 1, u32> does_global_store;
+ BitField<17, 4, u32> sass_version;
+ BitField<21, 5, u32> reserved;
+ BitField<26, 1, u32> does_load_or_store;
+ BitField<27, 1, u32> does_fp64;
+ BitField<28, 4, u32> stream_out_mask;
+ } common0;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_low_size;
+ BitField<24, 8, u32> per_patch_attribute_count;
+ } common1;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_high_size;
+ BitField<24, 8, u32> threads_per_input_primitive;
+ } common2;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_crs_size;
+ BitField<24, 4, OutputTopology> output_topology;
+ BitField<28, 4, u32> reserved;
+ } common3;
+
+ union {
+ BitField<0, 12, u32> max_output_vertices;
+ BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
+ BitField<24, 4, u32> reserved;
+ BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+ } common4;
+
+ union {
+ struct {
+ INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(1); // ImapReserved
+ INSERT_PADDING_BYTES(3); // OmapSystemValuesA
+ INSERT_PADDING_BYTES(1); // OmapSystemValuesB
+ INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // OmapColor
+ INSERT_PADDING_BYTES(2); // OmapSystemValuesC
+ INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(1); // OmapReserved
+ } vtg;
+
+ struct {
+ INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(2); // ImapReserved
+ struct {
+ u32 target;
+ union {
+ BitField<0, 1, u32> sample_mask;
+ BitField<1, 1, u32> depth;
+ BitField<2, 30, u32> reserved;
+ };
+ } omap;
+ bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
+ const u32 bit = render_target * 4 + component;
+ return omap.target & (1 << bit);
+ }
+ } ps;
+ };
+};
+
+static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
+
+} // namespace Tegra::Shader
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 7329ca766..5cc1e19ca 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 {
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
B5G6R5_UNORM = 0xE8,
+ BGR5A1_UNORM = 0xE9,
RG8_UNORM = 0xEA,
RG8_SNORM = 0xEB,
R16_UNORM = 0xEE,
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 7d836b816..cee0baaf3 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -152,7 +152,7 @@ private:
boost::optional<u32>
delayed_pc; ///< Program counter to execute at after the delay slot is executed.
- static constexpr size_t NumMacroRegisters = 8;
+ static constexpr std::size_t NumMacroRegisters = 8;
/// General purpose macro registers.
std::array<u32, NumMacroRegisters> registers = {};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0b5d18bcb..578aca789 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -12,10 +12,10 @@
namespace OpenGL {
-OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
+OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment,
- bool cache) {
+GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
+ std::size_t alignment, bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
@@ -53,7 +53,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, siz
return uploaded_offset;
}
-GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) {
+GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size,
+ std::size_t alignment) {
AlignBuffer(alignment);
std::memcpy(buffer_ptr, raw_pointer, size);
GLintptr uploaded_offset = buffer_offset;
@@ -63,7 +64,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size,
return uploaded_offset;
}
-void OGLBufferCache::Map(size_t max_size) {
+void OGLBufferCache::Map(std::size_t max_size) {
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -81,10 +82,10 @@ GLuint OGLBufferCache::GetHandle() const {
return stream_buffer.GetHandle();
}
-void OGLBufferCache::AlignBuffer(size_t alignment) {
+void OGLBufferCache::AlignBuffer(std::size_t alignment) {
// Align the offset, not the mapped pointer
GLintptr offset_aligned =
- static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
+ static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
buffer_ptr += offset_aligned - buffer_offset;
buffer_offset = offset_aligned;
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 6da862902..6c18461f4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -19,32 +19,32 @@ struct CachedBufferEntry final {
return addr;
}
- size_t GetSizeInBytes() const {
+ std::size_t GetSizeInBytes() const {
return size;
}
VAddr addr;
- size_t size;
+ std::size_t size;
GLintptr offset;
- size_t alignment;
+ std::size_t alignment;
};
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
public:
- explicit OGLBufferCache(size_t size);
+ explicit OGLBufferCache(std::size_t size);
- GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4,
+ GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool cache = true);
- GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4);
+ GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
- void Map(size_t max_size);
+ void Map(std::size_t max_size);
void Unmap();
GLuint GetHandle() const;
protected:
- void AlignBuffer(size_t alignment);
+ void AlignBuffer(std::size_t alignment);
private:
OGLStreamBuffer stream_buffer;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7e1bba67d..274c2dbcf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -46,7 +46,7 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100,
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
// Create sampler objects
- for (size_t i = 0; i < texture_samplers.size(); ++i) {
+ for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
@@ -181,7 +181,7 @@ void RasterizerOpenGL::SetupShaders() {
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
- for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -190,12 +190,12 @@ void RasterizerOpenGL::SetupShaders() {
continue;
}
- const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
+ const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
const GLintptr offset = buffer_cache.UploadHostMemory(
- &ubo, sizeof(ubo), static_cast<size_t>(uniform_buffer_alignment));
+ &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
// Bind the buffer
glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo));
@@ -238,10 +238,10 @@ void RasterizerOpenGL::SetupShaders() {
shader_program_manager->UseTrivialGeometryShader();
}
-size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
+std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- size_t size = 0;
+ std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!regs.vertex_array[index].IsEnabled())
continue;
@@ -299,7 +299,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
bool preserve_contents,
- boost::optional<size_t> single_color_target) {
+ boost::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
@@ -330,7 +330,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
} else {
// Multiple color attachments are enabled
std::array<GLenum, Maxwell::NumRenderTargets> buffers;
- for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
glFramebufferTexture2D(
@@ -342,7 +342,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
}
} else {
// No color attachments are enabled - zero out all of them
- for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
0, 0);
@@ -462,15 +462,15 @@ void RasterizerOpenGL::DrawArrays() {
state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply();
- size_t buffer_size = CalculateVertexArraysSize();
+ std::size_t buffer_size = CalculateVertexArraysSize();
if (is_indexed) {
- buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size;
+ buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
}
// Uniform space for the 5 shader stages
buffer_size =
- Common::AlignUp<size_t>(buffer_size, 4) +
+ Common::AlignUp<std::size_t>(buffer_size, 4) +
(sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
@@ -644,7 +644,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
- const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
+ const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
const auto& entries = shader->GetShaderEntries().const_buffer_entries;
constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
@@ -667,7 +667,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
continue;
}
- size_t size = 0;
+ std::size_t size = 0;
if (used_buffer.IsIndirect()) {
// Buffer is accessed indirectly, so upload the entire thing
@@ -689,7 +689,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
GLintptr const_buffer_offset = buffer_cache.UploadMemory(
- buffer.address, size, static_cast<size_t>(uniform_buffer_alignment));
+ buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
// Now configure the bindpoint of the buffer inside the shader
glUniformBlockBinding(shader->GetProgramHandle(),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 163412882..bf9560bdc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -73,7 +73,7 @@ public:
};
/// Maximum supported size that a constbuffer can have in bytes.
- static constexpr size_t MaxConstbufferSize = 0x10000;
+ static constexpr std::size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
@@ -106,7 +106,7 @@ private:
*/
void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
bool preserve_contents = true,
- boost::optional<size_t> single_color_target = {});
+ boost::optional<std::size_t> single_color_target = {});
/*
* Configures the current constbuffers to use for the draw command.
@@ -180,12 +180,12 @@ private:
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
- static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+ static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
OGLFramebuffer framebuffer;
GLint uniform_buffer_alignment;
- size_t CalculateVertexArraysSize() const;
+ std::size_t CalculateVertexArraysSize() const;
void SetupVertexArrays();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 32001e44b..86682d7cb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -75,7 +75,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
return params;
}
-/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) {
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.Address());
@@ -167,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -203,7 +204,7 @@ static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
}
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
- ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
+ ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
ASSERT(component_type == format.component_type);
@@ -213,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
static bool IsPixelFormatASTC(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
+ case PixelFormat::ASTC_2D_8X8:
return true;
default:
return false;
@@ -223,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
return {4, 4};
+ case PixelFormat::ASTC_2D_8X8:
+ return {8, 8};
default:
LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
UNREACHABLE();
@@ -256,7 +260,7 @@ static bool IsFormatBCn(PixelFormat format) {
}
template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size,
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size,
VAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
@@ -267,7 +271,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
addr, tile_size, bytes_per_pixel, stride, height, block_height);
- const size_t size_to_copy{std::min(gl_buffer_size, data.size())};
+ const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
memcpy(gl_buffer, data.data(), size_to_copy);
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
@@ -278,7 +282,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t
}
}
-static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
// clang-format off
@@ -327,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
MortonCopy<true, PixelFormat::RG8S>,
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::R32UI>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
@@ -335,7 +340,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
// clang-format on
};
-static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
// clang-format off
@@ -386,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
MortonCopy<false, PixelFormat::RG8S>,
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::R32UI>,
+ nullptr,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,
@@ -513,9 +519,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
S8Z24 input_pixel{};
Z24S8 output_pixel{};
constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
- for (size_t y = 0; y < height; ++y) {
- for (size_t x = 0; x < width; ++x) {
- const size_t offset{bpp * (y * width + x)};
+ for (std::size_t y = 0; y < height; ++y) {
+ for (std::size_t x = 0; x < width; ++x) {
+ const std::size_t offset{bpp * (y * width + x)};
std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
output_pixel.s8.Assign(input_pixel.s8);
output_pixel.z24.Assign(input_pixel.z24);
@@ -526,9 +532,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
- for (size_t y = 0; y < height; ++y) {
- for (size_t x = 0; x < width; ++x) {
- const size_t offset{bpp * (y * width + x)};
+ for (std::size_t y = 0; y < height; ++y) {
+ for (std::size_t x = 0; x < width; ++x) {
+ const std::size_t offset{bpp * (y * width + x)};
const u8 temp{data[offset]};
data[offset] = data[offset + 1];
data[offset + 1] = temp;
@@ -544,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
u32 width, u32 height) {
switch (pixel_format) {
- case PixelFormat::ASTC_2D_4X4: {
+ case PixelFormat::ASTC_2D_4X4:
+ case PixelFormat::ASTC_2D_8X8: {
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
u32 block_width{};
u32 block_height{};
@@ -591,13 +598,13 @@ void CachedSurface::LoadGLBuffer() {
UNREACHABLE();
}
- gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size);
- morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
+ gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);
+ morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
params.addr);
} else {
const u8* const texture_src_data_end{texture_src_data +
- (static_cast<size_t>(params.depth) * copy_size)};
+ (static_cast<std::size_t>(params.depth) * copy_size)};
gl_buffer.assign(texture_src_data, texture_src_data_end);
}
@@ -616,7 +623,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
MICROPROFILE_SCOPE(OpenGL_TextureUL);
- ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height *
+ ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height *
GetGLBytesPerPixel(params.pixel_format) * params.depth);
const auto& rect{params.GetRect()};
@@ -624,8 +631,9 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
// Load data from memory to the surface
const GLint x0 = static_cast<GLint>(rect.left);
const GLint y0 = static_cast<GLint>(rect.bottom);
- const size_t buffer_offset =
- static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) *
+ const std::size_t buffer_offset =
+ static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
+ static_cast<std::size_t>(x0)) *
GetGLBytesPerPixel(params.pixel_format);
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -727,7 +735,7 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
return GetSurface(depth_params, preserve_contents);
}
-Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) {
+Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
@@ -825,7 +833,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
- size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
+ std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
@@ -849,7 +857,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
"reinterpretation but the texture is tiled.");
}
- size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
+ std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
std::vector<u8> data(remaining_size);
Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 57ea8593b..d7a4bc37f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -70,19 +70,20 @@ struct SurfaceParams {
RG8S = 42,
RG32UI = 43,
R32UI = 44,
+ ASTC_2D_8X8 = 45,
MaxColorFormat,
// Depth formats
- Z32F = 45,
- Z16 = 46,
+ Z32F = 46,
+ Z16 = 47,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 47,
- S8Z24 = 48,
- Z32FS8 = 49,
+ Z24S8 = 48,
+ S8Z24 = 49,
+ Z32FS8 = 50,
MaxDepthStencilFormat,
@@ -90,7 +91,7 @@ struct SurfaceParams {
Invalid = 255,
};
- static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max);
+ static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
enum class ComponentType {
Invalid = 0,
@@ -192,6 +193,7 @@ struct SurfaceParams {
1, // RG8S
1, // RG32UI
1, // R32UI
+ 4, // ASTC_2D_8X8
1, // Z32F
1, // Z16
1, // Z24S8
@@ -199,8 +201,8 @@ struct SurfaceParams {
1, // Z32FS8
}};
- ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
- return compression_factor_table[static_cast<size_t>(format)];
+ ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
+ return compression_factor_table[static_cast<std::size_t>(format)];
}
static constexpr u32 GetFormatBpp(PixelFormat format) {
@@ -253,6 +255,7 @@ struct SurfaceParams {
16, // RG8S
64, // RG32UI
32, // R32UI
+ 16, // ASTC_2D_8X8
32, // Z32F
16, // Z16
32, // Z24S8
@@ -260,8 +263,8 @@ struct SurfaceParams {
64, // Z32FS8
}};
- ASSERT(static_cast<size_t>(format) < bpp_table.size());
- return bpp_table[static_cast<size_t>(format)];
+ ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
+ return bpp_table[static_cast<std::size_t>(format)];
}
u32 GetFormatBpp() const {
@@ -316,6 +319,8 @@ struct SurfaceParams {
return PixelFormat::R11FG11FB10F;
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
return PixelFormat::B5G6R5U;
+ case Tegra::RenderTargetFormat::BGR5A1_UNORM:
+ return PixelFormat::A1B5G5R5U;
case Tegra::RenderTargetFormat::RGBA32_UINT:
return PixelFormat::RGBA32UI;
case Tegra::RenderTargetFormat::R8_UNORM:
@@ -522,6 +527,8 @@ struct SurfaceParams {
return PixelFormat::BC6H_SF16;
case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
return PixelFormat::ASTC_2D_4X4;
+ case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
+ return PixelFormat::ASTC_2D_8X8;
case Tegra::Texture::TextureFormat::R16_G16:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
@@ -576,6 +583,7 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RG16_UNORM:
case Tegra::RenderTargetFormat::R16_UNORM:
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+ case Tegra::RenderTargetFormat::BGR5A1_UNORM:
case Tegra::RenderTargetFormat::RG8_UNORM:
case Tegra::RenderTargetFormat::RGBA16_UNORM:
return ComponentType::UNorm;
@@ -636,16 +644,18 @@ struct SurfaceParams {
}
static SurfaceType GetFormatType(PixelFormat pixel_format) {
- if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) {
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
return SurfaceType::ColorTexture;
}
- if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) {
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
return SurfaceType::Depth;
}
- if (static_cast<size_t>(pixel_format) <
- static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
return SurfaceType::DepthStencil;
}
@@ -659,7 +669,7 @@ struct SurfaceParams {
MathUtil::Rectangle<u32> GetRect() const;
/// Returns the size of this surface in bytes, adjusted for compression
- size_t SizeInBytes() const {
+ std::size_t SizeInBytes() const {
const u32 compression_factor{GetCompressionFactor(pixel_format)};
ASSERT(width % compression_factor == 0);
ASSERT(height % compression_factor == 0);
@@ -671,7 +681,7 @@ struct SurfaceParams {
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
/// Creates SurfaceParams from a framebuffer configuration
- static SurfaceParams CreateForFramebuffer(size_t index);
+ static SurfaceParams CreateForFramebuffer(std::size_t index);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -694,7 +704,7 @@ struct SurfaceParams {
u32 height;
u32 depth;
u32 unaligned_height;
- size_t size_in_bytes;
+ std::size_t size_in_bytes;
SurfaceTarget target;
};
@@ -711,7 +721,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
namespace std {
template <>
struct hash<SurfaceReserveKey> {
- size_t operator()(const SurfaceReserveKey& k) const {
+ std::size_t operator()(const SurfaceReserveKey& k) const {
return k.Hash();
}
};
@@ -727,7 +737,7 @@ public:
return params.addr;
}
- size_t GetSizeInBytes() const {
+ std::size_t GetSizeInBytes() const {
return params.size_in_bytes;
}
@@ -775,7 +785,7 @@ public:
Surface GetDepthBufferSurface(bool preserve_contents);
/// Get the color surface based on the framebuffer configuration and the specified render target
- Surface GetColorBufferSurface(size_t index, bool preserve_contents);
+ Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
/// Flushes the surface to Switch memory
void FlushSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 61080f5cc..894fe6eae 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -14,7 +14,7 @@ namespace OpenGL {
/// Gets the address for the specified shader stage program
static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+ const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
shader_config.offset);
}
@@ -28,7 +28,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) {
/// Helper function to set shader uniform block bindings for a single shader stage
static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
- Maxwell::ShaderStage binding, size_t expected_size) {
+ Maxwell::ShaderStage binding, std::size_t expected_size) {
const GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
return;
@@ -36,7 +36,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
GLint ub_size = 0;
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
- ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size,
+ ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
"Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6e6febcbc..9bafe43a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -28,7 +28,7 @@ public:
}
/// Gets the size of the shader in guest memory, required for cache management
- size_t GetSizeInBytes() const {
+ std::size_t GetSizeInBytes() const {
return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7a5321b9c..00cd05e62 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
using Tegra::Shader::SubOp;
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
-constexpr u32 PROGRAM_HEADER_SIZE = 0x50;
+constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
class DecompileFail : public std::runtime_error {
public:
@@ -189,7 +190,7 @@ public:
private:
void AppendIndentation() {
- shader_source.append(static_cast<size_t>(scope) * 4, ' ');
+ shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
}
std::string shader_source;
@@ -208,7 +209,7 @@ public:
UnsignedInteger,
};
- GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
+ GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
/// Gets the GLSL type string for a register
static std::string GetTypeString() {
@@ -226,15 +227,23 @@ public:
}
/// Returns the index of the register
- size_t GetIndex() const {
+ std::size_t GetIndex() const {
return index;
}
private:
- const size_t index;
+ const std::size_t index;
const std::string& suffix;
};
+enum class InternalFlag : u64 {
+ ZeroFlag = 0,
+ CarryFlag = 1,
+ OverflowFlag = 2,
+ NaNFlag = 3,
+ Amount
+};
+
/**
* Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
* of all registers (e.g. whether they are currently being used as Floats or Integers), and
@@ -328,13 +337,19 @@ public:
void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
const std::string& value, u64 dest_num_components,
u64 value_num_components, bool is_saturated = false,
- u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
+ u64 dest_elem = 0, Register::Size size = Register::Size::Word,
+ bool sets_cc = false) {
ASSERT_MSG(!is_saturated, "Unimplemented");
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
dest_num_components, value_num_components, dest_elem);
+
+ if (sets_cc) {
+ const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
+ SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
+ }
}
/**
@@ -351,6 +366,26 @@ public:
shader.AddLine(dest + " = " + src + ';');
}
+ std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
+ switch (cc) {
+ case Tegra::Shader::ControlCode::NEU:
+ return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc));
+ UNREACHABLE();
+ return "false";
+ }
+ }
+
+ std::string GetInternalFlag(const InternalFlag ii) const {
+ const u32 code = static_cast<u32>(ii);
+ return "internalFlag_" + std::to_string(code) + suffix;
+ }
+
+ void SetInternalFlag(const InternalFlag ii, const std::string& value) const {
+ shader.AddLine(GetInternalFlag(ii) + " = " + value + ';');
+ }
+
/**
* Writes code that does a output attribute assignment to register operation. Output attributes
* are stored as floats, so this may require conversion.
@@ -414,6 +449,12 @@ public:
}
declarations.AddNewLine();
+ for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
+ const InternalFlag code = static_cast<InternalFlag>(ii);
+ declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
+ }
+ declarations.AddNewLine();
+
for (const auto element : declr_input_attribute) {
// TODO(bunnei): Use proper number of elements for these
u32 idx =
@@ -468,7 +509,7 @@ public:
/// necessary.
std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
bool is_array) {
- const size_t offset = static_cast<size_t>(sampler.index.Value());
+ const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
const auto itr =
@@ -481,7 +522,7 @@ public:
}
// Otherwise create a new mapping for this sampler
- const size_t next_index = used_samplers.size();
+ const std::size_t next_index = used_samplers.size();
const SamplerEntry entry{stage, offset, next_index, type, is_array};
used_samplers.emplace_back(entry);
return entry.GetName();
@@ -531,7 +572,7 @@ private:
void BuildRegisterList() {
regs.reserve(Register::NumRegisters);
- for (size_t index = 0; index < Register::NumRegisters; ++index) {
+ for (std::size_t index = 0; index < Register::NumRegisters; ++index) {
regs.emplace_back(index, suffix);
}
}
@@ -674,7 +715,7 @@ public:
u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
stage(stage), suffix(suffix) {
-
+ std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
Generate(suffix);
}
@@ -688,23 +729,6 @@ public:
}
private:
- // Shader program header for a Fragment Shader.
- struct FragmentHeader {
- INSERT_PADDING_WORDS(5);
- INSERT_PADDING_WORDS(13);
- u32 enabled_color_outputs;
- union {
- BitField<0, 1, u32> writes_samplemask;
- BitField<1, 1, u32> writes_depth;
- };
-
- bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
- const u32 bit = render_target * 4 + component;
- return enabled_color_outputs & (1 << bit);
- }
- };
- static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
-
/// Gets the Subroutine object corresponding to the specified address.
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
const auto iter = subroutines.find(Subroutine{begin, end, suffix});
@@ -862,7 +886,7 @@ private:
*/
bool IsSchedInstruction(u32 offset) const {
// sched instructions appear once every 4 instructions.
- static constexpr size_t SchedPeriod = 4;
+ static constexpr std::size_t SchedPeriod = 4;
u32 absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
@@ -930,7 +954,7 @@ private:
std::string result;
result += '(';
- for (size_t i = 0; i < shift_amounts.size(); ++i) {
+ for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
if (i)
result += '|';
result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
@@ -954,9 +978,7 @@ private:
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
- ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented");
-
- size_t written_components = 0;
+ std::size_t written_components = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component)) {
continue;
@@ -1010,10 +1032,8 @@ private:
/// Writes the output values from a fragment shader to the corresponding GLSL output variables.
void EmitFragmentOutputsWrite() {
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
- FragmentHeader header;
- std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
- ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented");
+ ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
@@ -1022,7 +1042,7 @@ private:
++render_target) {
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
- if (header.IsColorComponentOutputEnabled(render_target, component)) {
+ if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
regs.GetRegisterAsFloat(current_reg)));
++current_reg;
@@ -1030,7 +1050,7 @@ private:
}
}
- if (header.writes_depth) {
+ if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
@@ -1510,8 +1530,6 @@ private:
case OpCode::Id::LEA_IMM:
case OpCode::Id::LEA_RZ:
case OpCode::Id::LEA_HI: {
- std::string op_a;
- std::string op_b;
std::string op_c;
switch (opcode->GetId()) {
@@ -1642,7 +1660,8 @@ private:
}
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
- 1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
+ 1, instr.alu.saturate_d, 0, instr.conversion.dest_size,
+ instr.generates_cc.Value() != 0);
break;
}
case OpCode::Id::I2F_R:
@@ -1781,8 +1800,8 @@ private:
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
Tegra::Shader::IpaSampleMode::Default};
- u32 next_element = instr.attribute.fmt20.element;
- u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
+ u64 next_element = instr.attribute.fmt20.element;
+ u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto LoadNextElement = [&](u32 reg_offset) {
regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
@@ -1846,8 +1865,8 @@ private:
ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
"Unaligned attribute loads are not supported");
- u32 next_element = instr.attribute.fmt20.element;
- u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
+ u64 next_element = instr.attribute.fmt20.element;
+ u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto StoreNextElement = [&](u32 reg_offset) {
regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
@@ -1873,6 +1892,13 @@ private:
Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
std::string coord;
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
switch (texture_type) {
case Tegra::Shader::TextureType::Texture1D: {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
@@ -1937,8 +1963,8 @@ private:
UNREACHABLE();
}
}
- size_t dest_elem{};
- for (size_t elem = 0; elem < 4; ++elem) {
+ std::size_t dest_elem{};
+ for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
@@ -1955,6 +1981,11 @@ private:
Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
bool is_array{instr.texs.IsArrayTexture()};
+ ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
switch (texture_type) {
case Tegra::Shader::TextureType::Texture2D: {
if (is_array) {
@@ -1990,6 +2021,13 @@ private:
std::string coord;
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
+
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
+ "MZ is not implemented");
switch (texture_type) {
case Tegra::Shader::TextureType::Texture1D: {
@@ -2024,6 +2062,17 @@ private:
ASSERT(instr.tld4.array == 0);
std::string coord;
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+ "NDV is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
+ "PTP is not implemented");
+
switch (instr.tld4.texture_type) {
case Tegra::Shader::TextureType::Texture2D: {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
@@ -2047,8 +2096,8 @@ private:
const std::string texture = "textureGather(" + sampler + ", coords, " +
std::to_string(instr.tld4.component) + ')';
- size_t dest_elem{};
- for (size_t elem = 0; elem < 4; ++elem) {
+ std::size_t dest_elem{};
+ for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
@@ -2061,6 +2110,13 @@ private:
break;
}
case OpCode::Id::TLD4S: {
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
@@ -2073,6 +2129,9 @@ private:
break;
}
case OpCode::Id::TXQ: {
+ ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+
// TODO: the new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
@@ -2093,6 +2152,11 @@ private:
break;
}
case OpCode::Id::TMML: {
+ ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+ "NDV is not implemented");
+
const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const bool is_array = instr.tmml.array != 0;
@@ -2259,31 +2323,55 @@ private:
break;
}
case OpCode::Type::PredicateSetPredicate: {
- const std::string op_a =
- GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
- const std::string op_b =
- GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+ switch (opcode->GetId()) {
+ case OpCode::Id::PSETP: {
+ const std::string op_a =
+ GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+ const std::string op_b =
+ GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
- // We can't use the constant predicate as destination.
- ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+ // We can't use the constant predicate as destination.
+ ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- const std::string second_pred =
- GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+ const std::string second_pred =
+ GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
- const std::string combiner = GetPredicateCombiner(instr.psetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.psetp.op);
- const std::string predicate =
- '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
+ const std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(instr.psetp.pred3,
- '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+ // Set the primary predicate to the result of Predicate OP SecondPredicate
+ SetPredicate(instr.psetp.pred3,
+ '(' + predicate + ") " + combiner + " (" + second_pred + ')');
- if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
- // if enabled
- SetPredicate(instr.psetp.pred0,
- "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+ if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+ // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+ // if enabled
+ SetPredicate(instr.psetp.pred0,
+ "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+ }
+ break;
+ }
+ case OpCode::Id::CSETP: {
+ const std::string pred =
+ GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
+ const std::string combiner = GetPredicateCombiner(instr.csetp.op);
+ const std::string controlCode = regs.GetControlCode(instr.csetp.cc);
+ if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
+ SetPredicate(instr.csetp.pred3,
+ '(' + controlCode + ") " + combiner + " (" + pred + ')');
+ }
+ if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+ SetPredicate(instr.csetp.pred0,
+ "!(" + controlCode + ") " + combiner + " (" + pred + ')');
+ }
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
}
break;
}
@@ -2673,6 +2761,7 @@ private:
private:
const std::set<Subroutine>& subroutines;
const ProgramCode& program_code;
+ Tegra::Shader::Header header;
const u32 main_offset;
Maxwell3D::Regs::ShaderStage stage;
const std::string& suffix;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index a43e2997b..d53b93ad5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -13,7 +13,7 @@
namespace OpenGL::GLShader {
-constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
+constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
using ProgramCode = std::vector<u64>;
class ConstBufferEntry {
@@ -51,7 +51,7 @@ public:
}
std::string GetName() const {
- return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index);
+ return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
}
u32 GetHash() const {
@@ -74,15 +74,15 @@ class SamplerEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
- SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index,
+ SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
Tegra::Shader::TextureType type, bool is_array)
: offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {}
- size_t GetOffset() const {
+ std::size_t GetOffset() const {
return offset;
}
- size_t GetIndex() const {
+ std::size_t GetIndex() const {
return sampler_index;
}
@@ -91,7 +91,7 @@ public:
}
std::string GetName() const {
- return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' +
+ return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
std::to_string(sampler_index);
}
@@ -133,7 +133,7 @@ public:
}
static std::string GetArrayName(Maxwell::ShaderStage stage) {
- return TextureSamplerNames[static_cast<size_t>(stage)];
+ return TextureSamplerNames[static_cast<std::size_t>(stage)];
}
private:
@@ -143,9 +143,9 @@ private:
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling
/// instruction.
- size_t offset;
+ std::size_t offset;
Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
- size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
+ std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
bool is_array; ///< Whether the texture is being sampled as an array texture or not.
};
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 533e42caa..b86cd96e8 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,7 +12,7 @@
namespace OpenGL::GLShader {
/// Number of OpenGL texture samplers that can be used in the fragment shader
-static constexpr size_t NumTextureSamplers = 32;
+static constexpr std::size_t NumTextureSamplers = 32;
using Tegra::Engines::Maxwell3D;
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 6f70deb96..af99132ba 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -272,7 +272,7 @@ void OpenGLState::Apply() const {
}
// Clip distance
- for (size_t i = 0; i < clip_distance.size(); ++i) {
+ for (std::size_t i = 0; i < clip_distance.size(); ++i) {
if (clip_distance[i] != cur_state.clip_distance[i]) {
if (clip_distance[i]) {
glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index aadf68f16..664f3ca20 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -61,7 +61,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
mapped_size = size;
if (alignment > 0) {
- buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
+ buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
}
bool invalidate = false;
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 272294c62..20ba6d4f6 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -46,6 +46,48 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
}
}
+template <std::size_t N, std::size_t M>
+struct alignas(64) SwizzleTable {
+ constexpr SwizzleTable() {
+ for (u32 y = 0; y < N; ++y) {
+ for (u32 x = 0; x < M; ++x) {
+ const u32 x2 = x * 16;
+ values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
+ ((x2 % 32) / 16) * 32 + (y % 2) * 16);
+ }
+ }
+ }
+ const std::array<u16, M>& operator[](std::size_t index) const {
+ return values[index];
+ }
+ std::array<std::array<u16, M>, N> values{};
+};
+
+constexpr auto swizzle_table = SwizzleTable<8, 4>();
+
+void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data,
+ u8* unswizzled_data, bool unswizzle, u32 block_height) {
+ std::array<u8*, 2> data_ptrs;
+ const std::size_t stride{width * bytes_per_pixel};
+ const std::size_t image_width_in_gobs{(stride + 63) / 64};
+ const std::size_t copy_size{16};
+ for (std::size_t y = 0; y < height; ++y) {
+ const std::size_t initial_gob =
+ (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs +
+ (y % (8 * block_height) / 8) * 512;
+ const std::size_t pixel_base{y * width * bytes_per_pixel};
+ const auto& table = swizzle_table[y % 8];
+ for (std::size_t xb = 0; xb < stride; xb += copy_size) {
+ const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height};
+ const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]};
+ const std::size_t pixel_index{xb + pixel_base};
+ data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
+ data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
+ std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
+ }
+ }
+}
+
u32 BytesPerPixel(TextureFormat format) {
switch (format) {
case TextureFormat::DXT1:
@@ -63,6 +105,7 @@ u32 BytesPerPixel(TextureFormat format) {
case TextureFormat::R32_G32_B32:
return 12;
case TextureFormat::ASTC_2D_4X4:
+ case TextureFormat::ASTC_2D_8X8:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::BF10GF11RF11:
@@ -91,8 +134,13 @@ u32 BytesPerPixel(TextureFormat format) {
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
u32 height, u32 block_height) {
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
- CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
- Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+ if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
+ FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel,
+ Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+ } else {
+ CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
+ Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+ }
return unswizzled_data;
}
@@ -111,6 +159,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::BC6H_UF16:
case TextureFormat::BC6H_SF16:
case TextureFormat::ASTC_2D_4X4:
+ case TextureFormat::ASTC_2D_8X8:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A1B5G5R5: