diff options
author | bunnei <bunneidev@gmail.com> | 2020-06-02 20:58:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-02 20:58:50 +0200 |
commit | 597d8b4bd457ff0aa1293ff4ac7761e2eefc9150 (patch) | |
tree | f16a61443b947bf5cc336d51e32111c8f67eb6b3 | |
parent | Merge pull request #4016 from ReinUsesLisp/invocation-info (diff) | |
parent | glsl: Squash constant buffers into a single SSBO when we hit the limit (diff) | |
download | yuzu-597d8b4bd457ff0aa1293ff4ac7761e2eefc9150.tar yuzu-597d8b4bd457ff0aa1293ff4ac7761e2eefc9150.tar.gz yuzu-597d8b4bd457ff0aa1293ff4ac7761e2eefc9150.tar.bz2 yuzu-597d8b4bd457ff0aa1293ff4ac7761e2eefc9150.tar.lz yuzu-597d8b4bd457ff0aa1293ff4ac7761e2eefc9150.tar.xz yuzu-597d8b4bd457ff0aa1293ff4ac7761e2eefc9150.tar.zst yuzu-597d8b4bd457ff0aa1293ff4ac7761e2eefc9150.zip |
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 46 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 78 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 99 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 6 |
7 files changed, 173 insertions, 79 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index e1b245288..d20547c04 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -6,6 +6,7 @@ #include <array> #include <cstddef> #include <cstring> +#include <limits> #include <optional> #include <vector> @@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1; constexpr u32 NumStages = 5; -constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, - GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, - GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; +constexpr std::array LimitUBOs = { + GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, + GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, + GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; constexpr std::array LimitSSBOs = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, + GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; + GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; -constexpr std::array LimitSamplers = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS}; +constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, + GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_TEXTURE_IMAGE_UNITS, + GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; -constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, - GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, - GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; +constexpr std::array LimitImages = { + GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, + GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, + GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; template <typename T> T GetInteger(GLenum pname) { @@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { return std::exchange(base, base + amount); } +std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { + std::array<u32, Tegra::Engines::MaxShaderTypes> max; + std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), + [](GLenum pname) { return GetInteger<u32>(pname); }); + return max; +} + std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; @@ -159,7 +170,8 @@ bool IsASTCSupported() { } // Anonymous namespace -Device::Device() : base_bindings{BuildBaseBindings()} { +Device::Device() + : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); const std::vector extensions = GetExtensions(); @@ -194,7 +206,9 @@ Device::Device() : base_bindings{BuildBaseBindings()} { } Device::Device(std::nullptr_t) { - uniform_buffer_alignment = 0; + max_uniform_buffers.fill(std::numeric_limits<u32>::max()); + uniform_buffer_alignment = 4; + shader_storage_alignment = 4; max_vertex_attributes = 16; max_varyings = 15; has_warp_intrinsics = true; @@ -202,8 +216,6 @@ Device::Device(std::nullptr_t) { has_vertex_viewport_layer = true; has_image_load_formatted = true; has_variable_aoffi = true; - has_component_indexing_bug = false; - has_precise_bug = false; } bool Device::TestVariableAoffi() { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 683ed9002..98cca0254 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -24,6 +24,10 @@ public: explicit Device(); explicit Device(std::nullptr_t); + u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { + return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; + } + const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { return base_bindings[stage_index]; } @@ -92,7 +96,8 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); - std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; + std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; + std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; std::size_t uniform_buffer_alignment{}; std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3c421dd16..55e79aaf6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -54,6 +54,12 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 namespace { +constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; +constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = + NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; +constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = + NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; + constexpr std::size_t NumSupportedVertexAttributes = 16; template <typename Engine, typename Entry> @@ -104,6 +110,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { CheckExtensions(); + unified_uniform_buffer.Create(); + glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); + if (device.UseAssemblyShaders()) { glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); for (const GLuint cbuf : staging_cbufs) { @@ -842,34 +851,56 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad MICROPROFILE_SCOPE(OpenGL_UBO); const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& shader_stage = stages[stage_index]; + const auto& entries = shader->GetEntries(); + const bool use_unified = entries.use_unified_uniforms; + const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE; - u32 binding = - device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; - for (const auto& entry : shader->GetEntries().const_buffers) { - const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; - SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); + const auto base_bindings = device.GetBaseBindings(stage_index); + u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer; + for (const auto& entry : entries.const_buffers) { + const u32 index = entry.GetIndex(); + const auto& buffer = shader_stage.const_buffers[index]; + SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified, + base_unified_offset + index * Maxwell::MaxConstBufferSize); + ++binding; + } + if (use_unified) { + const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer + + entries.global_memory_entries.size()); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, + base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE); } } void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_UBO); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& entries = kernel->GetEntries(); + const bool use_unified = entries.use_unified_uniforms; u32 binding = 0; - for (const auto& entry : kernel->GetEntries().const_buffers) { + for (const auto& entry : entries.const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); Tegra::Engines::ConstBufferInfo buffer; buffer.address = config.Address(); buffer.size = config.size; buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); + SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry, + use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize); + ++binding; + } + if (use_unified) { + const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size()); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0, + NUM_CONST_BUFFERS_BYTES_PER_STAGE); } } void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, - const ConstBufferEntry& entry) { + const ConstBufferEntry& entry, bool use_unified, + std::size_t unified_offset) { if (!buffer.enabled) { // Set values to zero to unbind buffers if (device.UseAssemblyShaders()) { @@ -885,20 +916,29 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, // UBO alignment requirements. const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); - const auto alignment = device.GetUniformBufferAlignment(); - auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, - device.HasFastBufferSubData()); - if (!device.UseAssemblyShaders()) { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + const bool fast_upload = !use_unified && device.HasFastBufferSubData(); + + const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); + const GPUVAddr gpu_addr = buffer.address; + auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); + + if (device.UseAssemblyShaders()) { + UNIMPLEMENTED_IF(use_unified); + if (offset != 0) { + const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; + glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); + cbuf = staging_cbuf; + offset = 0; + } + glBindBufferRangeNV(stage, binding, cbuf, offset, size); return; } - if (offset != 0) { - const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; - glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); - cbuf = staging_cbuf; - offset = 0; + + if (use_unified) { + glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); + } else { + glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); } - glBindBufferRangeNV(stage, binding, cbuf, offset, size); } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 87f7fe159..f5dc56a0e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -107,7 +107,8 @@ private: /// Configures a constant buffer. void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, - const ConstBufferEntry& entry); + const ConstBufferEntry& entry, bool use_unified, + std::size_t unified_offset); /// Configures the current global memory entries to use for the draw command. void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); @@ -253,6 +254,7 @@ private: Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; std::size_t current_cbuf = 0; + OGLBuffer unified_uniform_buffer; /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4cd0f36cf..a991ca64a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -241,8 +241,9 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader( - params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); + return std::shared_ptr<CachedShader>( + new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), + MakeEntries(params.device, ir, shader_type), std::move(program))); } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { @@ -265,8 +266,9 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader( - params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); + return std::shared_ptr<CachedShader>( + new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), + MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); } Shader CachedShader::CreateFromCache(const ShaderParameters& params, @@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, PrecompiledShader shader; shader.program = std::move(program); shader.registry = std::move(registry); - shader.entries = MakeEntries(ir); + shader.entries = MakeEntries(device, ir, entry.type); std::scoped_lock lock{mutex}; if (callback) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 9cb115959..502b95973 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -61,8 +61,8 @@ struct TextureDerivates {}; using TextureArgument = std::pair<Type, Node>; using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = - static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); +constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); +constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt #define ftou floatBitsToUint @@ -402,6 +402,13 @@ std::string FlowStackTopName(MetaStackClass stack) { return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); } +bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) { + const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size()); + // We waste one UBO for emulation + const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1; + return num_ubos > num_available_ubos; +} + struct GenericVaryingDescription { std::string name; u8 first_element = 0; @@ -412,8 +419,9 @@ class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier, std::string_view suffix) - : device{device}, ir{ir}, registry{registry}, stage{stage}, - identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { + : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier}, + suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{ + UseUnifiedUniforms(device, ir, stage)} { if (stage != ShaderType::Compute) { transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); } @@ -834,12 +842,24 @@ private: } void DeclareConstantBuffers() { + if (use_unified_uniforms) { + const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer + + static_cast<u32>(ir.GetGlobalMemory().size()); + code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{", + binding); + code.AddLine(" uint cbufs[];"); + code.AddLine("}};"); + code.AddNewLine(); + return; + } + u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& buffers : ir.GetConstantBuffers()) { - const auto index = buffers.first; + for (const auto [index, info] : ir.GetConstantBuffers()) { + const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4; + const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); + code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); code.AddLine("}};"); code.AddNewLine(); } @@ -1038,42 +1058,51 @@ private: if (const auto cbuf = std::get_if<CbufNode>(&*node)) { const Node offset = cbuf->GetOffset(); + const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS; + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; + if (use_unified_uniforms) { + return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), + Type::Uint}; + } else { + return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), + offset_imm / (4 * 4), (offset_imm / 4) % 4), + Type::Uint}; + } } - if (std::holds_alternative<OperationNode>(*offset)) { - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); + // Indirect access + if (use_unified_uniforms) { + return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset, + Visit(offset).AsUint()), + Type::Uint}; + } - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } + const std::string final_offset = code.GenerateTemporary(); + code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, - pack, GetSwizzle(swizzle)); - } - return {result, Type::Uint}; + if (!device.HasComponentIndexingBug()) { + return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset), + Type::Uint}; } - UNREACHABLE_MSG("Unmanaged offset node type"); + // AMD's proprietary GLSL compiler emits ill code for variable component access. + // To bypass this driver bug generate 4 ifs, one per each component. + const std::string pack = code.GenerateTemporary(); + code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + final_offset); + + const std::string result = code.GenerateTemporary(); + code.AddLine("uint {};", result); + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, + GetSwizzle(swizzle)); + } + return {result, Type::Uint}; } if (const auto gmem = std::get_if<GmemNode>(&*node)) { @@ -2710,6 +2739,7 @@ private: const std::string_view identifier; const std::string_view suffix; const Header header; + const bool use_unified_uniforms; std::unordered_map<u8, VaryingTFB> transform_feedback; ShaderWriter code; @@ -2905,7 +2935,7 @@ void GLSLDecompiler::DecompileAST() { } // Anonymous namespace -ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { +ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { ShaderEntries entries; for (const auto& cbuf : ir.GetConstantBuffers()) { entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), @@ -2926,6 +2956,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; } entries.shader_length = ir.GetLength(); + entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage); return entries; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e8a178764..451c9689a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -53,11 +53,13 @@ struct ShaderEntries { std::vector<GlobalMemoryEntry> global_memory_entries; std::vector<SamplerEntry> samplers; std::vector<ImageEntry> images; - u32 clip_distances{}; std::size_t shader_length{}; + u32 clip_distances{}; + bool use_unified_uniforms{}; }; -ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); +ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + Tegra::Engines::ShaderType stage); std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, const VideoCommon::Shader::Registry& registry, |