diff options
author | bunnei <bunneidev@gmail.com> | 2018-12-06 17:56:05 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-06 17:56:05 +0100 |
commit | 7fbd484f0ef1562503f1ec1f36c08a4182d207b2 (patch) | |
tree | 8d988d33b2e38fa797156c9d60c3ad832439a88b /src/video_core | |
parent | Merge pull request #1867 from lioncash/alloc (diff) | |
parent | gl_shader_decompiler: Implement TEXS.F16 (diff) | |
download | yuzu-7fbd484f0ef1562503f1ec1f36c08a4182d207b2.tar yuzu-7fbd484f0ef1562503f1ec1f36c08a4182d207b2.tar.gz yuzu-7fbd484f0ef1562503f1ec1f36c08a4182d207b2.tar.bz2 yuzu-7fbd484f0ef1562503f1ec1f36c08a4182d207b2.tar.lz yuzu-7fbd484f0ef1562503f1ec1f36c08a4182d207b2.tar.xz yuzu-7fbd484f0ef1562503f1ec1f36c08a4182d207b2.tar.zst yuzu-7fbd484f0ef1562503f1ec1f36c08a4182d207b2.zip |
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 70 |
2 files changed, 55 insertions, 18 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index b9faaf8e0..5ea094e64 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1049,6 +1049,7 @@ union Instruction { BitField<49, 1, u64> nodep_flag; BitField<50, 3, u64> component_mask_selector; BitField<53, 4, u64> texture_info; + BitField<60, 1, u64> fp32_flag; TextureType GetTextureType() const { // The TEXS instruction has a weird encoding for the texture type. @@ -1549,7 +1550,7 @@ private: INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), - INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), + INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 8d68156bf..4fc09cac6 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -50,6 +50,14 @@ public: using std::runtime_error::runtime_error; }; +/// Generates code to use for a swizzle operation. +static std::string GetSwizzle(u64 elem) { + ASSERT(elem <= 3); + std::string swizzle = "."; + swizzle += "xyzw"[elem]; + return swizzle; +} + /// Translate topology static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { @@ -1004,14 +1012,6 @@ private: } } - /// Generates code to use for a swizzle operation. - static std::string GetSwizzle(u64 elem) { - ASSERT(elem <= 3); - std::string swizzle = "."; - swizzle += "xyzw"[elem]; - return swizzle; - } - ShaderWriter& shader; ShaderWriter& declarations; std::vector<GLSLRegister> regs; @@ -1343,7 +1343,7 @@ private: regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); } - void WriteTexsInstruction(const Instruction& instr, const std::string& texture) { + void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 @@ -1368,6 +1368,38 @@ private: } } + void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) { + // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half + // float instruction). + + std::array<std::string, 4> components; + u32 written_components = 0; + + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + components[written_components++] = texture + GetSwizzle(component); + } + if (written_components == 0) + return; + + const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) { + return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')'; + }; + + regs.SetRegisterToHalfFloat( + instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1), + Tegra::Shader::HalfMerge::H0_H1, 1, 1); + + if (written_components > 2) { + ASSERT(instr.texs.HasTwoDestinations()); + regs.SetRegisterToHalfFloat( + instr.gpr28, 0, + BuildComponent(components[2], components[3], written_components > 3), + Tegra::Shader::HalfMerge::H0_H1, 1, 1); + } + } + static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: @@ -2766,24 +2798,27 @@ private: const bool depth_compare = instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); const auto process_mode = instr.texs.GetTextureProcessMode(); + UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); const auto scope = shader.Scope(); - const auto [coord, texture] = + auto [coord, texture] = GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array); shader.AddLine(coord); - if (!depth_compare) { - shader.AddLine("vec4 texture_tmp = " + texture + ';'); + if (depth_compare) { + texture = "vec4(" + texture + ')'; + } + shader.AddLine("vec4 texture_tmp = " + texture + ';'); + if (instr.texs.fp32_flag) { + WriteTexsInstructionFloat(instr, "texture_tmp"); } else { - shader.AddLine("vec4 texture_tmp = vec4(" + texture + ");"); + WriteTexsInstructionHalfFloat(instr, "texture_tmp"); } - - WriteTexsInstruction(instr, "texture_tmp"); break; } case OpCode::Id::TLDS: { @@ -2842,7 +2877,7 @@ private: } }(); - WriteTexsInstruction(instr, texture); + WriteTexsInstructionFloat(instr, texture); break; } case OpCode::Id::TLD4: { @@ -2940,7 +2975,8 @@ private: if (depth_compare) { texture = "vec4(" + texture + ')'; } - WriteTexsInstruction(instr, texture); + + WriteTexsInstructionFloat(instr, texture); break; } case OpCode::Id::TXQ: { |