From 64613db6052dfe0c056ce3d6ead21ae9a32f03ae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 9 Apr 2019 18:41:41 -0300 Subject: shader_ir/decode: Implement half float saturation --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 15 +++++++++++---- src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 6 ++++++ .../shader/decode/arithmetic_half_immediate.cpp | 6 ++---- src/video_core/shader/shader_ir.cpp | 9 +++++++++ src/video_core/shader/shader_ir.h | 3 +++ 5 files changed, 31 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index cbaa4dceb..9c972fd3a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -617,13 +617,11 @@ private: } std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { - std::string value = VisitOperand(operation, operand_index); + const std::string value = VisitOperand(operation, operand_index); switch (type) { case Type::HalfFloat: { const auto half_meta = std::get_if(&operation.GetMeta()); - if (!half_meta) { - value = "toHalf2(" + value + ')'; - } + ASSERT(half_meta); switch (half_meta->types.at(operand_index)) { case Tegra::Shader::HalfType::H0_H1: @@ -1067,6 +1065,14 @@ private: return BitwiseCastResult(value, Type::HalfFloat); } + std::string HClamp(Operation operation) { + const std::string value = VisitOperand(operation, 0, Type::HalfFloat); + const std::string min = VisitOperand(operation, 1, Type::Float); + const std::string max = VisitOperand(operation, 2, Type::Float); + const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))"; + return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); + } + std::string HMergeF32(Operation operation) { return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; } @@ -1501,6 +1507,7 @@ private: &GLSLDecompiler::Fma, &GLSLDecompiler::Absolute, &GLSLDecompiler::HNegate, + &GLSLDecompiler::HClamp, &GLSLDecompiler::HMergeF32, &GLSLDecompiler::HMergeH0, &GLSLDecompiler::HMergeH1, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index e0a6f5e87..6ecb0bcb0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -744,6 +744,11 @@ private: return {}; } + Id HClamp(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HMergeF32(Operation operation) { UNIMPLEMENTED(); return {}; @@ -1216,6 +1221,7 @@ private: &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, &SPIRVDecompiler::HNegate, + &SPIRVDecompiler::HClamp, &SPIRVDecompiler::HMergeF32, &SPIRVDecompiler::HMergeH0, &SPIRVDecompiler::HMergeH1, diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 4a5b0620e..cf4bc0432 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -23,8 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { } else { UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); } - UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, - "Half float immediate saturation not implemented"); Node op_a = GetRegister(instr.gpr8); op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); @@ -43,10 +41,10 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); + value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); + value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); SetRegister(bb, instr.gpr0, value); - return pc; } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 5c1c591f8..d158b4bfd 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -218,6 +218,15 @@ Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { return value; } +Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { + if (!saturate) { + return value; + } + const Node positive_zero = Immediate(std::copysignf(0, 1)); + const Node positive_one = Immediate(1.0f); + return Operation(OperationCode::HClamp, HALF_NO_PRECISE, value, positive_zero, positive_one); +} + Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { const std::unordered_map PredicateComparisonTable = { {PredCondition::LessThan, OperationCode::LogicalFLessThan}, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0ae51389b..d329da58d 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -114,6 +114,7 @@ enum class OperationCode { HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 HAbsolute, /// (f16vec2 a) -> f16vec2 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 HMergeF32, /// (f16vec2 src) -> float HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 @@ -716,6 +717,8 @@ private: Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); /// Conditionally absolute/negated half float pair. Absolute is applied first Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); + /// Conditionally saturates a half float pair + Node GetSaturatedHalfFloat(Node value, bool saturate = true); /// Returns a predicate comparing two floats Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); -- cgit v1.2.3