diff options
Diffstat (limited to 'src/shader_recompiler')
15 files changed, 164 insertions, 36 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 0c1fbc7b1..282668b36 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -35,6 +35,15 @@ std::string_view OutputVertexIndex(EmitContext& ctx) { return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; } +std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) { + if (binding.IsImmediate()) { + return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index); + } else { + const auto binding_var{ctx.var_alloc.Consume(binding)}; + return fmt::format("GetCbufIndirect({},{})", binding_var, index); + } +} + void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, const IR::Value& offset, u32 num_bits, std::string_view cast = {}, std::string_view bit_offset = {}) { @@ -55,8 +64,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) : fmt::format("[({}>>2)%4]", offset_var)}; - const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; - const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; + const auto cbuf{ChooseCbuf(ctx, binding, index)}; + const auto cbuf_cast{fmt::format("{}({}{{}})", cast, cbuf)}; const auto extraction{num_bits == 32 ? cbuf_cast : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast, bit_offset, num_bits)}; @@ -140,9 +149,9 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; if (offset.IsImmediate()) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; static constexpr u32 cbuf_size{0x10000}; const u32 u32_offset{offset.U32()}; const s32 signed_offset{static_cast<s32>(offset.U32())}; @@ -162,17 +171,17 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding return; } const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))}; if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst, - cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var); + ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf, + offset_var, cast, cbuf, offset_var); return; } const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset, - swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var, - "xyzw"[(swizzle + 1) % 4]); + ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast, + cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]); } } diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index e816a93ec..17266f40d 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -359,6 +359,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += "layout(location=0) uniform vec4 scaling;"; } DefineConstantBuffers(bindings); + DefineConstantBufferIndirect(); DefineStorageBuffers(bindings); SetupImages(bindings); SetupTextures(bindings); @@ -436,6 +437,24 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) { } } +void EmitContext::DefineConstantBufferIndirect() { + if (!info.uses_cbuf_indirect) { + return; + } + + header += profile.has_gl_cbuf_ftou_bug ? "uvec4 " : "vec4 "; + header += "GetCbufIndirect(uint binding, uint offset){" + "switch(binding){" + "default:"; + + for (const auto& desc : info.constant_buffer_descriptors) { + header += + fmt::format("case {}:return {}_cbuf{}[offset];", desc.index, stage_name, desc.index); + } + + header += "}}"; +} + void EmitContext::DefineStorageBuffers(Bindings& bindings) { if (info.storage_buffers_descriptors.empty()) { return; diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.h b/src/shader_recompiler/backend/glsl/glsl_emit_context.h index d9b639d29..2b13db6e6 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.h +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.h @@ -162,6 +162,7 @@ public: private: void SetupExtensions(); void DefineConstantBuffers(Bindings& bindings); + void DefineConstantBufferIndirect(); void DefineStorageBuffers(Bindings& bindings); void DefineGenericOutput(size_t index, u32 invocations); void DefineHelperFunctions(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index d3cbb14a9..cb47d253c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <bit> + #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 3c2a5e16f..aa7082978 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <bit> #include <tuple> #include <utility> diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 53be98ced..9c83cd2e4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <array> +#include <bit> #include <climits> #include <boost/container/static_vector.hpp> @@ -1042,15 +1043,15 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) { const Id merge_label{OpLabel()}; const Id uniform_type{uniform_types.*member_ptr}; - std::array<Id, Info::MAX_CBUFS> buf_labels; - std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals; - for (u32 i = 0; i < Info::MAX_CBUFS; i++) { + std::array<Id, Info::MAX_INDIRECT_CBUFS> buf_labels; + std::array<Sirit::Literal, Info::MAX_INDIRECT_CBUFS> buf_literals; + for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) { buf_labels[i] = OpLabel(); buf_literals[i] = Sirit::Literal{i}; } OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); OpSwitch(binding, buf_labels[0], buf_literals, buf_labels); - for (u32 i = 0; i < Info::MAX_CBUFS; i++) { + for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) { AddLabel(buf_labels[i]); const Id cbuf{cbufs[i].*member_ptr}; const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)}; @@ -1063,22 +1064,23 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) { return func; }}; IR::Type types{info.used_indirect_cbuf_types}; - if (True(types & IR::Type::U8)) { + bool supports_aliasing = profile.support_descriptor_aliasing; + if (supports_aliasing && True(types & IR::Type::U8)) { load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8); } - if (True(types & IR::Type::U16)) { + if (supports_aliasing && True(types & IR::Type::U16)) { load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16); } - if (True(types & IR::Type::F32)) { + if (supports_aliasing && True(types & IR::Type::F32)) { load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32); } - if (True(types & IR::Type::U32)) { + if (supports_aliasing && True(types & IR::Type::U32)) { load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32); } - if (True(types & IR::Type::U32x2)) { + if (supports_aliasing && True(types & IR::Type::U32x2)) { load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2); } - if (True(types & IR::Type::U32x4)) { + if (!supports_aliasing || True(types & IR::Type::U32x4)) { load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4); } } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 631446cf7..4a2564f47 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -326,6 +326,11 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) { phi_args.emplace_back(predecessor, value); } +void Inst::ErasePhiOperand(size_t index) { + const auto operand_it{phi_args.begin() + static_cast<ptrdiff_t>(index)}; + phi_args.erase(operand_it); +} + void Inst::OrderPhiArgs() { if (op != Opcode::Phi) { throw LogicError("{} is not a Phi instruction", op); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 947579852..14f6e55bc 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -179,9 +179,13 @@ public: /// Get a pointer to the block of a phi argument. [[nodiscard]] Block* PhiBlock(size_t index) const; + /// Add phi operand to a phi instruction. void AddPhiOperand(Block* predecessor, const Value& value); + // Erase the phi operand at the given index. + void ErasePhiOperand(size_t index); + /// Orders the Phi arguments from farthest away to nearest. void OrderPhiArgs(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp index 6cd13ec48..62d20ebe4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -11,9 +11,13 @@ void ExitFragment(TranslatorVisitor& v) { const ProgramHeader sph{v.env.SPH()}; IR::Reg src_reg{IR::Reg::R0}; for (u32 render_target = 0; render_target < 8; ++render_target) { + if (!sph.ps.HasOutputComponents(render_target)) { + continue; + } const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; for (u32 component = 0; component < 4; ++component) { if (!mask[component]) { + ++src_reg; continue; } v.ir.SetFragColor(render_target, component, v.F(src_reg)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index 57b4f0eee..60732215b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -132,7 +132,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { multisample = v.X(meta_reg++); } if (tld.clamp != 0) { - throw NotImplementedException("TLD.CL - CLAMP is not implmented"); + throw NotImplementedException("TLD.CL - CLAMP is not implemented"); } IR::TextureInstInfo info{}; info.type.Assign(GetType(tld.type)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index 311a9e763..f89ce1b68 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -81,7 +81,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } const tmml{insn}; if ((tmml.mask & 0b1100) != 0) { - throw NotImplementedException("TMML BA results are not implmented"); + throw NotImplementedException("TMML BA results are not implemented"); } const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 0b2c60842..16278faab 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -32,13 +32,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { void AddRegisterIndexedLdc(Info& info) { info.uses_cbuf_indirect = true; - // The shader can use any possible constant buffer - info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1; - - auto& cbufs{info.constant_buffer_descriptors}; - cbufs.clear(); - for (u32 i = 0; i < Info::MAX_CBUFS; i++) { - cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1}); + for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) { + AddConstantBufferDescriptor(info, i, 1); // The shader can use any possible access size info.constant_buffer_used_sizes[i] = 0x10'000; diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 400836301..6697fde85 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -2,24 +2,104 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> + +#include <boost/container/small_vector.hpp> + #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { - -void DeadCodeEliminationPass(IR::Program& program) { +namespace { +template <bool TEST_USES> +void DeadInstElimination(IR::Block* const block) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (IR::Block* const block : program.post_order_blocks) { - auto it{block->end()}; - while (it != block->begin()) { - --it; - if (!it->HasUses() && !it->MayHaveSideEffects()) { - it->Invalidate(); - it = block->Instructions().erase(it); + auto it{block->end()}; + while (it != block->begin()) { + --it; + if constexpr (TEST_USES) { + if (it->HasUses() || it->MayHaveSideEffects()) { + continue; + } + } + it->Invalidate(); + it = block->Instructions().erase(it); + } +} + +void DeletedPhiArgElimination(IR::Program& program, std::span<const IR::Block*> dead_blocks) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& phi : *block) { + if (!IR::IsPhi(phi)) { + continue; + } + for (size_t i = 0; i < phi.NumArgs(); ++i) { + if (std::ranges::find(dead_blocks, phi.PhiBlock(i)) == dead_blocks.end()) { + continue; + } + // Phi operand at this index is an unreachable block + phi.ErasePhiOperand(i); + --i; + } + } + } +} + +void DeadBranchElimination(IR::Program& program) { + boost::container::small_vector<const IR::Block*, 3> dead_blocks; + const auto begin_it{program.syntax_list.begin()}; + for (auto node_it = begin_it; node_it != program.syntax_list.end(); ++node_it) { + if (node_it->type != IR::AbstractSyntaxNode::Type::If) { + continue; + } + IR::Inst* const cond_ref{node_it->data.if_node.cond.Inst()}; + const IR::U1 cond{cond_ref->Arg(0)}; + if (!cond.IsImmediate()) { + continue; + } + if (cond.U1()) { + continue; + } + // False immediate condition. Remove condition ref, erase the entire branch. + cond_ref->Invalidate(); + // Account for nested if-statements within the if(false) branch + u32 nested_ifs{1u}; + while (node_it->type != IR::AbstractSyntaxNode::Type::EndIf || nested_ifs > 0) { + node_it = program.syntax_list.erase(node_it); + switch (node_it->type) { + case IR::AbstractSyntaxNode::Type::If: + ++nested_ifs; + break; + case IR::AbstractSyntaxNode::Type::EndIf: + --nested_ifs; + break; + case IR::AbstractSyntaxNode::Type::Block: { + IR::Block* const block{node_it->data.block}; + DeadInstElimination<false>(block); + dead_blocks.push_back(block); + break; + } + default: + break; } } + // Erase EndIf node of the if(false) branch + node_it = program.syntax_list.erase(node_it); + // Account for loop increment + --node_it; + } + if (!dead_blocks.empty()) { + DeletedPhiArgElimination(program, std::span(dead_blocks.data(), dead_blocks.size())); + } +} +} // namespace + +void DeadCodeEliminationPass(IR::Program& program) { + DeadBranchElimination(program); + for (IR::Block* const block : program.post_order_blocks) { + DeadInstElimination<true>(block); } } diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index bd6c2bfb5..0cd6597ef 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -196,6 +196,11 @@ struct ProgramHeader { return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; } + [[nodiscard]] bool HasOutputComponents(u32 rt) const noexcept { + const u32 bits{omap.target >> (rt * 4)}; + return (bits & 0xf) != 0; + } + [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const { const auto& vector{imap_generic_vector[attribute]}; return {vector.x, vector.y, vector.z, vector.w}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 9d36bd9eb..a3a09c71c 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -105,6 +105,7 @@ struct ImageDescriptor { using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; struct Info { + static constexpr size_t MAX_INDIRECT_CBUFS{14}; static constexpr size_t MAX_CBUFS{18}; static constexpr size_t MAX_SSBOS{32}; |