From d5237342668db88cc81fefbee81f468b5214e655 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 18:51:32 -0300 Subject: decode/register_set_predicate: Use move for shared pointers Avoid atomic counters used by shared pointers. --- .../shader/decode/register_set_predicate.cpp | 33 +++++++++++----------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 8d54cce34..a6733255d 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -10,6 +12,7 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -23,7 +26,7 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - const Node apply_mask = [&] { + Node apply_mask = [this, opcode, instr] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: case OpCode::Id::P2R_IMM: @@ -34,25 +37,23 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { } }(); - const auto offset = static_cast(instr.p2r_r2p.byte) * 8; + const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: { - const Node mask = GetRegister(instr.gpr8); + Node mask = GetRegister(instr.gpr8); for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - const auto shift = static_cast(pred); + const u32 shift = static_cast(pred); - const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); - const Node condition = - Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); + Node apply = BitfieldExtract(apply_mask, shift, 1); + Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - const Node value_compare = BitfieldExtract(mask, offset + shift, 1); - const Node value = - Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); + Node compare = BitfieldExtract(mask, offset + shift, 1); + Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); - bb.push_back(Conditional(condition, {code})); + Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), move(value)); + bb.push_back(Conditional(condition, {move(code)})); } break; } @@ -61,12 +62,12 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit)); + value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); } - value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8); + value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); + value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, move(value)); break; } default: -- cgit v1.2.3 From ffc5ec6fa816b1bf56044b9d8cf5f1935abe77ee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 19:05:34 -0300 Subject: decode/register_set_predicate: Implement CC P2R CC takes the state of condition codes and puts them into a register. We already have this implemented for PR (predicates). This commit implements CC over that. --- .../shader/decode/register_set_predicate.cpp | 23 +++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index a6733255d..6116c31aa 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -17,15 +17,14 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; namespace { -constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7; -} +constexpr u64 NUM_CONDITION_CODES = 4; +constexpr u64 NUM_PREDICATES = 7; +} // namespace u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - Node apply_mask = [this, opcode, instr] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: @@ -39,12 +38,18 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; + const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; + const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; + const auto get_entry = [this, cc](u64 entry) { + return cc ? GetInternalFlag(static_cast(entry)) : GetPredicate(entry); + }; + switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: { Node mask = GetRegister(instr.gpr8); - for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - const u32 shift = static_cast(pred); + for (u64 entry = 0; entry < num_entries; ++entry) { + const u32 shift = static_cast(entry); Node apply = BitfieldExtract(apply_mask, shift, 1); Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); @@ -52,15 +57,15 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { Node compare = BitfieldExtract(mask, offset + shift, 1); Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), move(value)); + Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); bb.push_back(Conditional(condition, {move(code)})); } break; } case OpCode::Id::P2R_IMM: { Node value = Immediate(0); - for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), + for (u64 entry = 0; entry < num_entries; ++entry) { + Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), Immediate(0)); value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); } -- cgit v1.2.3 From 255197e64363f9286ed145cafdeb129c85c16621 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 21:56:11 -0300 Subject: shader/arithmetic_integer: Implement CC for IADD --- .../renderer_opengl/gl_shader_decompiler.cpp | 10 ++++++++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 11 +++++++++++ .../shader/decode/arithmetic_integer.cpp | 22 +++++++++++++++++++--- src/video_core/shader/node.h | 2 ++ 4 files changed, 42 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0cd3ad7e1..3803a6f3a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1870,6 +1870,14 @@ private: return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); } + Expression LogicalAddCarry(Operation operation) { + const std::string carry = code.GenerateTemporary(); + code.AddLine("uint {};", carry); + code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), + VisitOperand(operation, 1).AsUint(), carry); + return {fmt::format("({} != 0)", carry), Type::Bool}; + } + Expression LogicalFIsNan(Operation operation) { return GenerateUnary(operation, "isnan", Type::Bool, Type::Float); } @@ -2441,6 +2449,8 @@ private: &GLSLDecompiler::LogicalNotEqual, &GLSLDecompiler::LogicalGreaterEqual, + &GLSLDecompiler::LogicalAddCarry, + &GLSLDecompiler::Logical2HLessThan, &GLSLDecompiler::Logical2HEqual, &GLSLDecompiler::Logical2HLessEqual, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index aaa138f52..20b6ca0ad 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1584,6 +1584,15 @@ private: return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; } + Expression LogicalAddCarry(Operation operation) { + const Id op_a = AsUint(Visit(operation[0])); + const Id op_b = AsUint(Visit(operation[1])); + + const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); + const Id carry = OpCompositeExtract(t_uint, result, 1); + return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool}; + } + Expression LogicalAssign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -2518,6 +2527,8 @@ private: &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, + &SPIRVDecompiler::LogicalAddCarry, + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 9af8c606d..99b4b6342 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -40,10 +40,26 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + Node value = Operation(OperationCode::IAdd, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + if (instr.generates_cc) { + const Node i0 = Immediate(0); + + Node zero = Operation(OperationCode::LogicalIEqual, value, i0); + Node sign = Operation(OperationCode::LogicalILessThan, value, i0); + Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); + + Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); + Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); + Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); + Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); + + SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); + SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); + SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); + SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); + } + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::IADD3_C: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 3eee961f5..3f5a7bc7a 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -132,6 +132,8 @@ enum class OperationCode { LogicalUNotEqual, /// (uint a, uint b) -> bool LogicalUGreaterEqual, /// (uint a, uint b) -> bool + LogicalAddCarry, /// (uint a, uint b) -> bool + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 -- cgit v1.2.3 From c788f9c0bd9cb0b0cb66f7424a65032cca3731cc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:41:20 -0300 Subject: shader/arithmetic_integer: Implement IADD.X IADD.X takes the carry flag and adds it to the result. This is generally used to emulate 64-bit operations with 32-bit registers. --- src/video_core/engines/shader_bytecode.h | 4 ++++ src/video_core/shader/decode/arithmetic_integer.cpp | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index cde3a26b9..8dae754d4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -813,6 +813,10 @@ union Instruction { BitField<49, 1, u64> negate_a; } alu_integer; + union { + BitField<43, 1, u64> x; + } iadd; + union { BitField<39, 1, u64> ftz; BitField<32, 1, u64> saturate; diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 99b4b6342..2a3311cb8 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -42,6 +42,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { Node value = Operation(OperationCode::IAdd, op_a, op_b); + if (instr.iadd.x) { + Node carry = GetInternalFlag(InternalFlag::Carry); + Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); + value = Operation(OperationCode::IAdd, std::move(value), std::move(x)); + } + if (instr.generates_cc) { const Node i0 = Immediate(0); -- cgit v1.2.3 From 2a96bea6a7efd9efaaa8d1d72f1eb8ca27cb81f8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:42:33 -0300 Subject: shader/arithmetic_integer: Change IAdd to UAdd to avoid signed overflow Signed integer addition overflow might be undefined behavior. It's free to change operations to UAdd and use unsigned integers to avoid potential bugs. --- src/video_core/shader/decode/arithmetic_integer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 2a3311cb8..addd7f533 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -40,12 +40,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - Node value = Operation(OperationCode::IAdd, op_a, op_b); + Node value = Operation(OperationCode::UAdd, op_a, op_b); if (instr.iadd.x) { Node carry = GetInternalFlag(InternalFlag::Carry); Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::IAdd, std::move(value), std::move(x)); + value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); } if (instr.generates_cc) { -- cgit v1.2.3 From e895a4e2d7ebf29fbafda7d950301cd7d03efdbb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:53:54 -0300 Subject: shader/arithmetic_integer: Fix edge case and mark IADD.X Rd.CC as unimplemented IADD.X Rd.CC requires some extra logic that is not currently implemented. Abort when this is hit. --- src/video_core/shader/decode/arithmetic_integer.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index addd7f533..ced5c3dc1 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -35,7 +35,8 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::IADD_C: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); + UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); + UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); @@ -49,6 +50,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { } if (instr.generates_cc) { + // Avoid changing result's carry flag + SetTemporary(bb, 0, std::move(value)); + value = GetTemporary(0); + const Node i0 = Immediate(0); Node zero = Operation(OperationCode::LogicalIEqual, value, i0); -- cgit v1.2.3 From 871aadbe36b7480f7dc318e2a8bebb064a1fbaaf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 28 Apr 2020 17:14:53 -0300 Subject: shader/arithmetic_integer: Fix tracking issue in temporary This temporary is not needed as we mark Rd.CC + IADD.X as unimplemented. It caused issues when tracking global buffers. --- src/video_core/shader/decode/arithmetic_integer.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index ced5c3dc1..a041519b7 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -50,10 +50,6 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { } if (instr.generates_cc) { - // Avoid changing result's carry flag - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - const Node i0 = Immediate(0); Node zero = Operation(OperationCode::LogicalIEqual, value, i0); -- cgit v1.2.3