summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-02-17 00:52:12 +0100
committerameerj <52414509+ameerj@users.noreply.github.com>2021-07-23 03:51:22 +0200
commitc67d64365a712830fe140dd36e24e2efd9b8a812 (patch)
tree9287589f2b72d1cbd0cb113c2024b2bc531408c3 /src/video_core/renderer_opengl
parentshader: Add XMAD multiplication folding optimization (diff)
downloadyuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.gz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.bz2
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.lz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.xz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.zst
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.zip
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2124
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp314
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp564
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h102
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2986
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp482
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h176
10 files changed, 8 insertions, 6871 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index e8d8d2aa5..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
-// GLASM lacks booleans, so they have to be implemented as integers.
-// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
-// select between two values, because -1 will be evaluated as true and 0 as false.
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using namespace VideoCommon::Shader;
-using Operation = const OperationNode&;
-
-constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
-
-char Swizzle(std::size_t component) {
- static constexpr std::string_view SWIZZLE{"xyzw"};
- return SWIZZLE.at(component);
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
- return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
- ASSERT(IsGenericAttribute(index));
- return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-std::string_view Modifiers(Operation operation) {
- const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
- if (meta && meta->precise) {
- return ".PREC";
- }
- return "";
-}
-
-std::string_view GetInputFlags(PixelImap attribute) {
- switch (attribute) {
- case PixelImap::Perspective:
- return "";
- case PixelImap::Constant:
- return "FLAT ";
- case PixelImap::ScreenLinear:
- return "NOPERSPECTIVE ";
- case PixelImap::Unused:
- break;
- }
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
- return {};
-}
-
-std::string_view ImageType(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "BUFFER";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "ARRAY1D";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "ARRAY2D";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- }
- UNREACHABLE();
- return {};
-}
-
-std::string_view StackName(MetaStackClass stack) {
- switch (stack) {
- case MetaStackClass::Ssy:
- return "SSY";
- case MetaStackClass::Pbk:
- return "PBK";
- }
- UNREACHABLE();
- return "";
-};
-
-std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
- switch (topology) {
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
- return "POINTS";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
- return "LINES";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
- return "LINES_ADJACENCY";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
- return "TRIANGLES";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
- return "TRIANGLES_ADJACENCY";
- default:
- UNIMPLEMENTED_MSG("topology={}", topology);
- return "POINTS";
- }
-}
-
-std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
- switch (topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return "POINTS";
- case Tegra::Shader::OutputTopology::LineStrip:
- return "LINE_STRIP";
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return "TRIANGLE_STRIP";
- default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
- return "points";
- }
-}
-
-std::string_view StageInputName(ShaderType stage) {
- switch (stage) {
- case ShaderType::Vertex:
- case ShaderType::Geometry:
- return "vertex";
- case ShaderType::Fragment:
- return "fragment";
- case ShaderType::Compute:
- return "invocation";
- default:
- UNREACHABLE();
- return "";
- }
-}
-
-std::string TextureType(const MetaTexture& meta) {
- if (meta.sampler.is_buffer) {
- return "BUFFER";
- }
- std::string type;
- if (meta.sampler.is_shadow) {
- type += "SHADOW";
- }
- if (meta.sampler.is_array) {
- type += "ARRAY";
- }
- type += [&meta] {
- switch (meta.sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return "1D";
- case Tegra::Shader::TextureType::Texture2D:
- return "2D";
- case Tegra::Shader::TextureType::Texture3D:
- return "3D";
- case Tegra::Shader::TextureType::TextureCube:
- return "CUBE";
- }
- UNREACHABLE();
- return "2D";
- }();
- return type;
-}
-
-class ARBDecompiler final {
-public:
- explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier);
-
- std::string Code() const {
- return shader_source;
- }
-
-private:
- void DefineGlobalMemory();
-
- void DeclareHeader();
- void DeclareVertex();
- void DeclareGeometry();
- void DeclareFragment();
- void DeclareCompute();
- void DeclareInputAttributes();
- void DeclareOutputAttributes();
- void DeclareLocalMemory();
- void DeclareGlobalMemory();
- void DeclareConstantBuffers();
- void DeclareRegisters();
- void DeclareTemporaries();
- void DeclarePredicates();
- void DeclareInternalFlags();
-
- void InitializeVariables();
-
- void DecompileAST();
- void DecompileBranchMode();
-
- void VisitAST(const ASTNode& node);
- std::string VisitExpression(const Expr& node);
-
- void VisitBlock(const NodeBlock& bb);
-
- std::string Visit(const Node& node);
-
- std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
- std::string BuildAoffi(Operation);
- std::string GlobalMemoryPointer(const GmemNode& gmem);
- void Exit();
-
- std::string Assign(Operation);
- std::string Select(Operation);
- std::string FClamp(Operation);
- std::string FCastHalf0(Operation);
- std::string FCastHalf1(Operation);
- std::string FSqrt(Operation);
- std::string FSwizzleAdd(Operation);
- std::string HAdd2(Operation);
- std::string HMul2(Operation);
- std::string HFma2(Operation);
- std::string HAbsolute(Operation);
- std::string HNegate(Operation);
- std::string HClamp(Operation);
- std::string HCastFloat(Operation);
- std::string HUnpack(Operation);
- std::string HMergeF32(Operation);
- std::string HMergeH0(Operation);
- std::string HMergeH1(Operation);
- std::string HPack2(Operation);
- std::string LogicalAssign(Operation);
- std::string LogicalPick2(Operation);
- std::string LogicalAnd2(Operation);
- std::string FloatOrdered(Operation);
- std::string FloatUnordered(Operation);
- std::string LogicalAddCarry(Operation);
- std::string Texture(Operation);
- std::string TextureGather(Operation);
- std::string TextureQueryDimensions(Operation);
- std::string TextureQueryLod(Operation);
- std::string TexelFetch(Operation);
- std::string TextureGradient(Operation);
- std::string ImageLoad(Operation);
- std::string ImageStore(Operation);
- std::string Branch(Operation);
- std::string BranchIndirect(Operation);
- std::string PushFlowStack(Operation);
- std::string PopFlowStack(Operation);
- std::string Exit(Operation);
- std::string Discard(Operation);
- std::string EmitVertex(Operation);
- std::string EndPrimitive(Operation);
- std::string InvocationId(Operation);
- std::string YNegate(Operation);
- std::string ThreadId(Operation);
- std::string ShuffleIndexed(Operation);
- std::string Barrier(Operation);
- std::string MemoryBarrierGroup(Operation);
- std::string MemoryBarrierGlobal(Operation);
-
- template <const std::string_view& op>
- std::string Unary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
- return temporary;
- }
-
- template <const std::string_view& op>
- std::string Binary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
- Visit(operation[1]));
- return temporary;
- }
-
- template <const std::string_view& op>
- std::string Trinary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
- Visit(operation[1]), Visit(operation[2]));
- return temporary;
- }
-
- template <const std::string_view& op, bool unordered>
- std::string FloatComparison(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("MOV.S {} (NE.x), -1;", temporary);
-
- const std::string op_a = Visit(operation[0]);
- const std::string op_b = Visit(operation[1]);
- if constexpr (unordered) {
- AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), -1;", temporary);
- AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), -1;", temporary);
- } else if (op == SNE_F) {
- AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), 0;", temporary);
- AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), 0;", temporary);
- }
- return temporary;
- }
-
- template <const std::string_view& op, bool is_nan>
- std::string HalfComparison(Operation operation) {
- std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- const std::string op_a = Visit(operation[0]);
- const std::string op_b = Visit(operation[1]);
- AddLine("UP2H.F {}, {};", tmp1, op_a);
- AddLine("UP2H.F {}, {};", tmp2, op_b);
- AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
- AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
- AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
- AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
- AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
- if constexpr (is_nan) {
- AddLine("MOVC.F RC.x, {};", op_a);
- AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
- AddLine("MOVC.F RC.x, {};", op_b);
- AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
- }
- return tmp1;
- }
-
- template <const std::string_view& op, const std::string_view& type>
- std::string AtomicImage(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t num_coords = operation.GetOperandsCount();
- const std::size_t num_values = meta.values.size();
-
- const std::string coord = AllocVectorTemporary();
- const std::string value = AllocVectorTemporary();
- for (std::size_t i = 0; i < num_coords; ++i) {
- AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
- }
- for (std::size_t i = 0; i < num_values; ++i) {
- AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
- }
-
- AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
- image_id, ImageType(meta.image.type));
- return fmt::format("{}.x", coord);
- }
-
- template <const std::string_view& op, const std::string_view& type>
- std::string Atomic(Operation operation) {
- std::string temporary = AllocTemporary();
- std::string address;
- std::string_view opname;
- bool robust = false;
- if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
- address = GlobalMemoryPointer(*gmem);
- opname = "ATOM";
- robust = true;
- } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
- address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
- opname = "ATOMS";
- } else {
- UNREACHABLE();
- return "{0, 0, 0, 0}";
- }
- if (robust) {
- AddLine("IF NE.x;");
- }
- AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
- if (robust) {
- AddLine("ELSE;");
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("ENDIF;");
- }
- return temporary;
- }
-
- template <char type>
- std::string Negate(Operation operation) {
- std::string temporary = AllocTemporary();
- if constexpr (type == 'F') {
- AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
- } else {
- AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
- }
- return temporary;
- }
-
- template <char type>
- std::string Absolute(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
- return temporary;
- }
-
- template <char type>
- std::string BitfieldInsert(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
- AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
- AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
- Visit(operation[0]));
- return fmt::format("{}.x", temporary);
- }
-
- template <char type>
- std::string BitfieldExtract(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
- AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
- AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
- }
-
- template <char swizzle>
- std::string LocalInvocationId(Operation) {
- return fmt::format("invocation.localid.{}", swizzle);
- }
-
- template <char swizzle>
- std::string WorkGroupId(Operation) {
- return fmt::format("invocation.groupid.{}", swizzle);
- }
-
- template <char c1, char c2>
- std::string ThreadMask(Operation) {
- return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
- }
-
- template <typename... Args>
- void AddExpression(std::string_view text, Args&&... args) {
- shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- void AddLine(std::string_view text, Args&&... args) {
- AddExpression(text, std::forward<Args>(args)...);
- shader_source += '\n';
- }
-
- std::string AllocLongVectorTemporary() {
- max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
- return fmt::format("L{}", num_long_temporaries++);
- }
-
- std::string AllocLongTemporary() {
- return fmt::format("{}.x", AllocLongVectorTemporary());
- }
-
- std::string AllocVectorTemporary() {
- max_temporaries = std::max(max_temporaries, num_temporaries + 1);
- return fmt::format("T{}", num_temporaries++);
- }
-
- std::string AllocTemporary() {
- return fmt::format("{}.x", AllocVectorTemporary());
- }
-
- void ResetTemporaries() noexcept {
- num_temporaries = 0;
- num_long_temporaries = 0;
- }
-
- const Device& device;
- const ShaderIR& ir;
- const Registry& registry;
- const ShaderType stage;
-
- std::size_t num_temporaries = 0;
- std::size_t max_temporaries = 0;
-
- std::size_t num_long_temporaries = 0;
- std::size_t max_long_temporaries = 0;
-
- std::map<GlobalMemoryBase, u32> global_memory_names;
-
- std::string shader_source;
-
- static constexpr std::string_view ADD_F32 = "ADD.F32";
- static constexpr std::string_view ADD_S = "ADD.S";
- static constexpr std::string_view ADD_U = "ADD.U";
- static constexpr std::string_view MUL_F32 = "MUL.F32";
- static constexpr std::string_view MUL_S = "MUL.S";
- static constexpr std::string_view MUL_U = "MUL.U";
- static constexpr std::string_view DIV_F32 = "DIV.F32";
- static constexpr std::string_view DIV_S = "DIV.S";
- static constexpr std::string_view DIV_U = "DIV.U";
- static constexpr std::string_view MAD_F32 = "MAD.F32";
- static constexpr std::string_view RSQ_F32 = "RSQ.F32";
- static constexpr std::string_view COS_F32 = "COS.F32";
- static constexpr std::string_view SIN_F32 = "SIN.F32";
- static constexpr std::string_view EX2_F32 = "EX2.F32";
- static constexpr std::string_view LG2_F32 = "LG2.F32";
- static constexpr std::string_view SLT_F = "SLT.F32";
- static constexpr std::string_view SLT_S = "SLT.S";
- static constexpr std::string_view SLT_U = "SLT.U";
- static constexpr std::string_view SEQ_F = "SEQ.F32";
- static constexpr std::string_view SEQ_S = "SEQ.S";
- static constexpr std::string_view SEQ_U = "SEQ.U";
- static constexpr std::string_view SLE_F = "SLE.F32";
- static constexpr std::string_view SLE_S = "SLE.S";
- static constexpr std::string_view SLE_U = "SLE.U";
- static constexpr std::string_view SGT_F = "SGT.F32";
- static constexpr std::string_view SGT_S = "SGT.S";
- static constexpr std::string_view SGT_U = "SGT.U";
- static constexpr std::string_view SNE_F = "SNE.F32";
- static constexpr std::string_view SNE_S = "SNE.S";
- static constexpr std::string_view SNE_U = "SNE.U";
- static constexpr std::string_view SGE_F = "SGE.F32";
- static constexpr std::string_view SGE_S = "SGE.S";
- static constexpr std::string_view SGE_U = "SGE.U";
- static constexpr std::string_view AND_S = "AND.S";
- static constexpr std::string_view AND_U = "AND.U";
- static constexpr std::string_view TRUNC_F = "TRUNC.F";
- static constexpr std::string_view TRUNC_S = "TRUNC.S";
- static constexpr std::string_view TRUNC_U = "TRUNC.U";
- static constexpr std::string_view SHL_S = "SHL.S";
- static constexpr std::string_view SHL_U = "SHL.U";
- static constexpr std::string_view SHR_S = "SHR.S";
- static constexpr std::string_view SHR_U = "SHR.U";
- static constexpr std::string_view OR_S = "OR.S";
- static constexpr std::string_view OR_U = "OR.U";
- static constexpr std::string_view XOR_S = "XOR.S";
- static constexpr std::string_view XOR_U = "XOR.U";
- static constexpr std::string_view NOT_S = "NOT.S";
- static constexpr std::string_view NOT_U = "NOT.U";
- static constexpr std::string_view BTC_S = "BTC.S";
- static constexpr std::string_view BTC_U = "BTC.U";
- static constexpr std::string_view BTFM_S = "BTFM.S";
- static constexpr std::string_view BTFM_U = "BTFM.U";
- static constexpr std::string_view ROUND_F = "ROUND.F";
- static constexpr std::string_view CEIL_F = "CEIL.F";
- static constexpr std::string_view FLR_F = "FLR.F";
- static constexpr std::string_view I2F_S = "I2F.S";
- static constexpr std::string_view I2F_U = "I2F.U";
- static constexpr std::string_view MIN_F = "MIN.F";
- static constexpr std::string_view MIN_S = "MIN.S";
- static constexpr std::string_view MIN_U = "MIN.U";
- static constexpr std::string_view MAX_F = "MAX.F";
- static constexpr std::string_view MAX_S = "MAX.S";
- static constexpr std::string_view MAX_U = "MAX.U";
- static constexpr std::string_view MOV_U = "MOV.U";
- static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
- static constexpr std::string_view TGALL_U = "TGALL.U";
- static constexpr std::string_view TGANY_U = "TGANY.U";
- static constexpr std::string_view TGEQ_U = "TGEQ.U";
- static constexpr std::string_view EXCH = "EXCH";
- static constexpr std::string_view ADD = "ADD";
- static constexpr std::string_view MIN = "MIN";
- static constexpr std::string_view MAX = "MAX";
- static constexpr std::string_view AND = "AND";
- static constexpr std::string_view OR = "OR";
- static constexpr std::string_view XOR = "XOR";
- static constexpr std::string_view U32 = "U32";
- static constexpr std::string_view S32 = "S32";
-
- static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
- using DecompilerType = std::string (ARBDecompiler::*)(Operation);
- static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
- &ARBDecompiler::Assign,
-
- &ARBDecompiler::Select,
-
- &ARBDecompiler::Binary<ADD_F32>,
- &ARBDecompiler::Binary<MUL_F32>,
- &ARBDecompiler::Binary<DIV_F32>,
- &ARBDecompiler::Trinary<MAD_F32>,
- &ARBDecompiler::Negate<'F'>,
- &ARBDecompiler::Absolute<'F'>,
- &ARBDecompiler::FClamp,
- &ARBDecompiler::FCastHalf0,
- &ARBDecompiler::FCastHalf1,
- &ARBDecompiler::Binary<MIN_F>,
- &ARBDecompiler::Binary<MAX_F>,
- &ARBDecompiler::Unary<COS_F32>,
- &ARBDecompiler::Unary<SIN_F32>,
- &ARBDecompiler::Unary<EX2_F32>,
- &ARBDecompiler::Unary<LG2_F32>,
- &ARBDecompiler::Unary<RSQ_F32>,
- &ARBDecompiler::FSqrt,
- &ARBDecompiler::Unary<ROUND_F>,
- &ARBDecompiler::Unary<FLR_F>,
- &ARBDecompiler::Unary<CEIL_F>,
- &ARBDecompiler::Unary<TRUNC_F>,
- &ARBDecompiler::Unary<I2F_S>,
- &ARBDecompiler::Unary<I2F_U>,
- &ARBDecompiler::FSwizzleAdd,
-
- &ARBDecompiler::Binary<ADD_S>,
- &ARBDecompiler::Binary<MUL_S>,
- &ARBDecompiler::Binary<DIV_S>,
- &ARBDecompiler::Negate<'S'>,
- &ARBDecompiler::Absolute<'S'>,
- &ARBDecompiler::Binary<MIN_S>,
- &ARBDecompiler::Binary<MAX_S>,
-
- &ARBDecompiler::Unary<TRUNC_S>,
- &ARBDecompiler::Unary<MOV_U>,
- &ARBDecompiler::Binary<SHL_S>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<SHR_S>,
- &ARBDecompiler::Binary<AND_S>,
- &ARBDecompiler::Binary<OR_S>,
- &ARBDecompiler::Binary<XOR_S>,
- &ARBDecompiler::Unary<NOT_S>,
- &ARBDecompiler::BitfieldInsert<'S'>,
- &ARBDecompiler::BitfieldExtract<'S'>,
- &ARBDecompiler::Unary<BTC_S>,
- &ARBDecompiler::Unary<BTFM_S>,
-
- &ARBDecompiler::Binary<ADD_U>,
- &ARBDecompiler::Binary<MUL_U>,
- &ARBDecompiler::Binary<DIV_U>,
- &ARBDecompiler::Binary<MIN_U>,
- &ARBDecompiler::Binary<MAX_U>,
- &ARBDecompiler::Unary<TRUNC_U>,
- &ARBDecompiler::Unary<MOV_U>,
- &ARBDecompiler::Binary<SHL_U>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<AND_U>,
- &ARBDecompiler::Binary<OR_U>,
- &ARBDecompiler::Binary<XOR_U>,
- &ARBDecompiler::Unary<NOT_U>,
- &ARBDecompiler::BitfieldInsert<'U'>,
- &ARBDecompiler::BitfieldExtract<'U'>,
- &ARBDecompiler::Unary<BTC_U>,
- &ARBDecompiler::Unary<BTFM_U>,
-
- &ARBDecompiler::HAdd2,
- &ARBDecompiler::HMul2,
- &ARBDecompiler::HFma2,
- &ARBDecompiler::HAbsolute,
- &ARBDecompiler::HNegate,
- &ARBDecompiler::HClamp,
- &ARBDecompiler::HCastFloat,
- &ARBDecompiler::HUnpack,
- &ARBDecompiler::HMergeF32,
- &ARBDecompiler::HMergeH0,
- &ARBDecompiler::HMergeH1,
- &ARBDecompiler::HPack2,
-
- &ARBDecompiler::LogicalAssign,
- &ARBDecompiler::Binary<AND_U>,
- &ARBDecompiler::Binary<OR_U>,
- &ARBDecompiler::Binary<XOR_U>,
- &ARBDecompiler::Unary<NOT_U>,
- &ARBDecompiler::LogicalPick2,
- &ARBDecompiler::LogicalAnd2,
-
- &ARBDecompiler::FloatComparison<SLT_F, false>,
- &ARBDecompiler::FloatComparison<SEQ_F, false>,
- &ARBDecompiler::FloatComparison<SLE_F, false>,
- &ARBDecompiler::FloatComparison<SGT_F, false>,
- &ARBDecompiler::FloatComparison<SNE_F, false>,
- &ARBDecompiler::FloatComparison<SGE_F, false>,
- &ARBDecompiler::FloatOrdered,
- &ARBDecompiler::FloatUnordered,
- &ARBDecompiler::FloatComparison<SLT_F, true>,
- &ARBDecompiler::FloatComparison<SEQ_F, true>,
- &ARBDecompiler::FloatComparison<SLE_F, true>,
- &ARBDecompiler::FloatComparison<SGT_F, true>,
- &ARBDecompiler::FloatComparison<SNE_F, true>,
- &ARBDecompiler::FloatComparison<SGE_F, true>,
-
- &ARBDecompiler::Binary<SLT_S>,
- &ARBDecompiler::Binary<SEQ_S>,
- &ARBDecompiler::Binary<SLE_S>,
- &ARBDecompiler::Binary<SGT_S>,
- &ARBDecompiler::Binary<SNE_S>,
- &ARBDecompiler::Binary<SGE_S>,
-
- &ARBDecompiler::Binary<SLT_U>,
- &ARBDecompiler::Binary<SEQ_U>,
- &ARBDecompiler::Binary<SLE_U>,
- &ARBDecompiler::Binary<SGT_U>,
- &ARBDecompiler::Binary<SNE_U>,
- &ARBDecompiler::Binary<SGE_U>,
-
- &ARBDecompiler::LogicalAddCarry,
-
- &ARBDecompiler::HalfComparison<SLT_F, false>,
- &ARBDecompiler::HalfComparison<SEQ_F, false>,
- &ARBDecompiler::HalfComparison<SLE_F, false>,
- &ARBDecompiler::HalfComparison<SGT_F, false>,
- &ARBDecompiler::HalfComparison<SNE_F, false>,
- &ARBDecompiler::HalfComparison<SGE_F, false>,
- &ARBDecompiler::HalfComparison<SLT_F, true>,
- &ARBDecompiler::HalfComparison<SEQ_F, true>,
- &ARBDecompiler::HalfComparison<SLE_F, true>,
- &ARBDecompiler::HalfComparison<SGT_F, true>,
- &ARBDecompiler::HalfComparison<SNE_F, true>,
- &ARBDecompiler::HalfComparison<SGE_F, true>,
-
- &ARBDecompiler::Texture,
- &ARBDecompiler::Texture,
- &ARBDecompiler::TextureGather,
- &ARBDecompiler::TextureQueryDimensions,
- &ARBDecompiler::TextureQueryLod,
- &ARBDecompiler::TexelFetch,
- &ARBDecompiler::TextureGradient,
-
- &ARBDecompiler::ImageLoad,
- &ARBDecompiler::ImageStore,
-
- &ARBDecompiler::AtomicImage<ADD, U32>,
- &ARBDecompiler::AtomicImage<AND, U32>,
- &ARBDecompiler::AtomicImage<OR, U32>,
- &ARBDecompiler::AtomicImage<XOR, U32>,
- &ARBDecompiler::AtomicImage<EXCH, U32>,
-
- &ARBDecompiler::Atomic<EXCH, U32>,
- &ARBDecompiler::Atomic<ADD, U32>,
- &ARBDecompiler::Atomic<MIN, U32>,
- &ARBDecompiler::Atomic<MAX, U32>,
- &ARBDecompiler::Atomic<AND, U32>,
- &ARBDecompiler::Atomic<OR, U32>,
- &ARBDecompiler::Atomic<XOR, U32>,
-
- &ARBDecompiler::Atomic<EXCH, S32>,
- &ARBDecompiler::Atomic<ADD, S32>,
- &ARBDecompiler::Atomic<MIN, S32>,
- &ARBDecompiler::Atomic<MAX, S32>,
- &ARBDecompiler::Atomic<AND, S32>,
- &ARBDecompiler::Atomic<OR, S32>,
- &ARBDecompiler::Atomic<XOR, S32>,
-
- &ARBDecompiler::Atomic<ADD, U32>,
- &ARBDecompiler::Atomic<MIN, U32>,
- &ARBDecompiler::Atomic<MAX, U32>,
- &ARBDecompiler::Atomic<AND, U32>,
- &ARBDecompiler::Atomic<OR, U32>,
- &ARBDecompiler::Atomic<XOR, U32>,
-
- &ARBDecompiler::Atomic<ADD, S32>,
- &ARBDecompiler::Atomic<MIN, S32>,
- &ARBDecompiler::Atomic<MAX, S32>,
- &ARBDecompiler::Atomic<AND, S32>,
- &ARBDecompiler::Atomic<OR, S32>,
- &ARBDecompiler::Atomic<XOR, S32>,
-
- &ARBDecompiler::Branch,
- &ARBDecompiler::BranchIndirect,
- &ARBDecompiler::PushFlowStack,
- &ARBDecompiler::PopFlowStack,
- &ARBDecompiler::Exit,
- &ARBDecompiler::Discard,
-
- &ARBDecompiler::EmitVertex,
- &ARBDecompiler::EndPrimitive,
-
- &ARBDecompiler::InvocationId,
- &ARBDecompiler::YNegate,
- &ARBDecompiler::LocalInvocationId<'x'>,
- &ARBDecompiler::LocalInvocationId<'y'>,
- &ARBDecompiler::LocalInvocationId<'z'>,
- &ARBDecompiler::WorkGroupId<'x'>,
- &ARBDecompiler::WorkGroupId<'y'>,
- &ARBDecompiler::WorkGroupId<'z'>,
-
- &ARBDecompiler::Unary<TGBALLOT_U>,
- &ARBDecompiler::Unary<TGALL_U>,
- &ARBDecompiler::Unary<TGANY_U>,
- &ARBDecompiler::Unary<TGEQ_U>,
-
- &ARBDecompiler::ThreadId,
- &ARBDecompiler::ThreadMask<'e', 'q'>,
- &ARBDecompiler::ThreadMask<'g', 'e'>,
- &ARBDecompiler::ThreadMask<'g', 't'>,
- &ARBDecompiler::ThreadMask<'l', 'e'>,
- &ARBDecompiler::ThreadMask<'l', 't'>,
- &ARBDecompiler::ShuffleIndexed,
-
- &ARBDecompiler::Barrier,
- &ARBDecompiler::MemoryBarrierGroup,
- &ARBDecompiler::MemoryBarrierGlobal,
- };
-};
-
-ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
- DefineGlobalMemory();
-
- AddLine("TEMP RC;");
- AddLine("TEMP FSWZA[4];");
- AddLine("TEMP FSWZB[4];");
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
- }
- AddLine("END");
-
- const std::string code = std::move(shader_source);
- DeclareHeader();
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareLocalMemory();
- DeclareGlobalMemory();
- DeclareConstantBuffers();
- DeclareRegisters();
- DeclareTemporaries();
- DeclarePredicates();
- DeclareInternalFlags();
-
- shader_source += code;
-}
-
-std::string_view HeaderStageName(ShaderType stage) {
- switch (stage) {
- case ShaderType::Vertex:
- return "vp";
- case ShaderType::Geometry:
- return "gp";
- case ShaderType::Fragment:
- return "fp";
- case ShaderType::Compute:
- return "cp";
- default:
- UNREACHABLE();
- return "";
- }
-}
-
-void ARBDecompiler::DefineGlobalMemory() {
- u32 binding = 0;
- for (const auto& pair : ir.GetGlobalMemory()) {
- const GlobalMemoryBase base = pair.first;
- global_memory_names.emplace(base, binding);
- ++binding;
- }
-}
-
-void ARBDecompiler::DeclareHeader() {
- AddLine("!!NV{}5.0", HeaderStageName(stage));
- // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
- AddLine("OPTION NV_internal;");
- AddLine("OPTION NV_gpu_program_fp64;");
- AddLine("OPTION NV_shader_thread_group;");
- if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
- AddLine("OPTION NV_shader_thread_shuffle;");
- }
- if (stage == ShaderType::Vertex) {
- if (device.HasNvViewportArray2()) {
- AddLine("OPTION NV_viewport_array2;");
- }
- }
- if (stage == ShaderType::Fragment) {
- AddLine("OPTION ARB_draw_buffers;");
- }
- if (device.HasImageLoadFormatted()) {
- AddLine("OPTION EXT_shader_image_load_formatted;");
- }
-}
-
-void ARBDecompiler::DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
- AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
-}
-
-void ARBDecompiler::DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
- const auto& info = registry.GetGraphicsInfo();
- const auto& header = ir.GetHeader();
- AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
- AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
- AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
- AddLine("ATTRIB vertex_position = vertex.position;");
-}
-
-void ARBDecompiler::DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- AddLine("OUTPUT result_color7 = result.color[7];");
- AddLine("OUTPUT result_color6 = result.color[6];");
- AddLine("OUTPUT result_color5 = result.color[5];");
- AddLine("OUTPUT result_color4 = result.color[4];");
- AddLine("OUTPUT result_color3 = result.color[3];");
- AddLine("OUTPUT result_color2 = result.color[2];");
- AddLine("OUTPUT result_color1 = result.color[1];");
- AddLine("OUTPUT result_color0 = result.color;");
-}
-
-void ARBDecompiler::DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
- const ComputeInfo& info = registry.GetComputeInfo();
- AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
- info.workgroup_size[2]);
- if (info.shared_memory_size_in_words == 0) {
- return;
- }
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- u32 size_in_bytes = info.shared_memory_size_in_words * 4;
- if (size_in_bytes > limit) {
- LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
- size_in_bytes, limit);
- size_in_bytes = limit;
- }
-
- AddLine("SHARED_MEMORY {};", size_in_bytes);
- AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
-}
-
-void ARBDecompiler::DeclareInputAttributes() {
- if (stage == ShaderType::Compute) {
- return;
- }
- const std::string_view stage_name = StageInputName(stage);
- for (const auto attribute : ir.GetInputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
-
- std::string_view suffix;
- if (stage == ShaderType::Fragment) {
- const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
- if (input_mode == PixelImap::Unused) {
- return;
- }
- suffix = GetInputFlags(input_mode);
- }
- AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
- index);
- }
-}
-
-void ARBDecompiler::DeclareOutputAttributes() {
- if (stage == ShaderType::Compute) {
- return;
- }
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
- AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
- }
-}
-
-void ARBDecompiler::DeclareLocalMemory() {
- u64 size = 0;
- if (stage == ShaderType::Compute) {
- size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
- } else {
- size = ir.GetHeader().GetLocalMemorySize();
- }
- if (size == 0) {
- return;
- }
- const u64 element_count = Common::AlignUp(size, 4) / 4;
- AddLine("TEMP lmem[{}];", element_count);
-}
-
-void ARBDecompiler::DeclareGlobalMemory() {
- const size_t num_entries = ir.GetGlobalMemory().size();
- if (num_entries > 0) {
- AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
- }
-}
-
-void ARBDecompiler::DeclareConstantBuffers() {
- u32 binding = 0;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
- ++binding;
- }
-}
-
-void ARBDecompiler::DeclareRegisters() {
- for (const u32 gpr : ir.GetRegisters()) {
- AddLine("TEMP R{};", gpr);
- }
-}
-
-void ARBDecompiler::DeclareTemporaries() {
- for (std::size_t i = 0; i < max_temporaries; ++i) {
- AddLine("TEMP T{};", i);
- }
- for (std::size_t i = 0; i < max_long_temporaries; ++i) {
- AddLine("LONG TEMP L{};", i);
- }
-}
-
-void ARBDecompiler::DeclarePredicates() {
- for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
- AddLine("TEMP P{};", static_cast<u64>(pred));
- }
-}
-
-void ARBDecompiler::DeclareInternalFlags() {
- for (const char* name : INTERNAL_FLAG_NAMES) {
- AddLine("TEMP {};", name);
- }
-}
-
-void ARBDecompiler::InitializeVariables() {
- AddLine("MOV.F32 FSWZA[0], -1;");
- AddLine("MOV.F32 FSWZA[1], 1;");
- AddLine("MOV.F32 FSWZA[2], -1;");
- AddLine("MOV.F32 FSWZA[3], 0;");
- AddLine("MOV.F32 FSWZB[0], -1;");
- AddLine("MOV.F32 FSWZB[1], -1;");
- AddLine("MOV.F32 FSWZB[2], 1;");
- AddLine("MOV.F32 FSWZB[3], -1;");
-
- if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
- AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
- }
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
- AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
- }
- for (const u32 gpr : ir.GetRegisters()) {
- AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
- }
- for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
- AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
- }
-}
-
-void ARBDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("TEMP F{};", i);
- }
- for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
- }
-
- InitializeVariables();
-
- VisitAST(ir.GetASTProgram());
-}
-
-void ARBDecompiler::DecompileBranchMode() {
- static constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
- AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
- AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
- AddLine("TEMP SSY_TOP;");
- AddLine("TEMP PBK_TOP;");
- }
-
- AddLine("TEMP PC;");
-
- if (!ir.IsFlowStackDisabled()) {
- AddLine("MOV.U SSY_TOP.x, 0;");
- AddLine("MOV.U PBK_TOP.x, 0;");
- }
-
- InitializeVariables();
-
- const auto basic_block_end = ir.GetBasicBlocks().end();
- auto basic_block_it = ir.GetBasicBlocks().begin();
- const u32 first_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", first_address);
-
- AddLine("REP;");
-
- std::size_t num_blocks = 0;
- while (basic_block_it != basic_block_end) {
- const auto& [address, bb] = *basic_block_it;
- ++num_blocks;
-
- AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
- AddLine("IF NE.x;");
-
- VisitBlock(bb);
-
- ++basic_block_it;
-
- if (basic_block_it != basic_block_end) {
- const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
- if (!op || op->GetCode() != OperationCode::Branch) {
- const u32 next_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", next_address);
- AddLine("CONT;");
- }
- }
-
- AddLine("ELSE;");
- }
- AddLine("RET;");
- while (num_blocks--) {
- AddLine("ENDIF;");
- }
-
- AddLine("ENDREP;");
-}
-
-void ARBDecompiler::VisitAST(const ASTNode& node) {
- if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(if_then->condition);
- ResetTemporaries();
-
- AddLine("MOVC.U RC.x, {};", condition);
- AddLine("IF NE.x;");
- for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- AddLine("ENDIF;");
- } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
- AddLine("ELSE;");
- for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
- VisitBlock(decoded->nodes);
- } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
- AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
- ResetTemporaries();
- } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(do_while->condition);
- ResetTemporaries();
- AddLine("REP;");
- for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- AddLine("MOVC.U RC.x, {};", condition);
- AddLine("BRK (NE.x);");
- AddLine("ENDREP;");
- } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
- const bool is_true = ExprIsTrue(ast_return->condition);
- if (!is_true) {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
- AddLine("IF NE.x;");
- ResetTemporaries();
- }
- if (ast_return->kills) {
- AddLine("KIL TR;");
- } else {
- Exit();
- }
- if (!is_true) {
- AddLine("ENDIF;");
- }
- } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
- if (ExprIsTrue(ast_break->condition)) {
- AddLine("BRK;");
- } else {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
- AddLine("BRK (NE.x);");
- ResetTemporaries();
- }
- } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
- // Nothing to do
- } else {
- UNREACHABLE();
- }
-}
-
-std::string ARBDecompiler::VisitExpression(const Expr& node) {
- if (const auto expr = std::get_if<ExprAnd>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
- VisitExpression(expr->operand2));
- return result;
- }
- if (const auto expr = std::get_if<ExprOr>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
- VisitExpression(expr->operand2));
- return result;
- }
- if (const auto expr = std::get_if<ExprNot>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
- return result;
- }
- if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
- return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
- }
- if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
- return Visit(ir.GetConditionCode(expr->cc));
- }
- if (const auto expr = std::get_if<ExprVar>(&*node)) {
- return fmt::format("F{}.x", expr->var_index);
- }
- if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
- return expr->value ? "0xffffffff" : "0";
- }
- if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
- return result;
- }
- UNREACHABLE();
- return "0";
-}
-
-void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node);
- }
-}
-
-std::string ARBDecompiler::Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index));
- }
- const std::size_t index = static_cast<std::size_t>(operation->GetCode());
- if (index >= OPERATION_DECOMPILERS.size()) {
- UNREACHABLE_MSG("Out of bounds operation: {}", index);
- return {};
- }
- const auto decompiler = OPERATION_DECOMPILERS[index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Undefined operation: {}", index);
- return {};
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return "{0, 0, 0, 0}.x";
- }
- return fmt::format("R{}.x", index);
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- return fmt::format("CV{}.x", cv->GetIndex());
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- std::string temporary = AllocTemporary();
- AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
- return temporary;
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- std::string temporary = AllocTemporary();
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- AddLine("MOV.S {}, -1;", temporary);
- break;
- case Tegra::Shader::Pred::NeverExecute:
- AddLine("MOV.S {}, 0;", temporary);
- break;
- default:
- AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
- break;
- }
- if (predicate->IsNegated()) {
- AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
- }
- return temporary;
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- if (abuf->IsPhysicalBuffer()) {
- UNIMPLEMENTED_MSG("Physical buffers are not implemented");
- return "{0, 0, 0, 0}.x";
- }
-
- const Attribute::Index index = abuf->GetIndex();
- const u32 element = abuf->GetElement();
- const char swizzle = Swizzle(element);
- switch (index) {
- case Attribute::Index::Position: {
- if (stage == ShaderType::Geometry) {
- return fmt::format("{}_position[{}].{}", StageInputName(stage),
- Visit(abuf->GetBuffer()), swizzle);
- } else {
- return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
- }
- }
- case Attribute::Index::TessCoordInstanceIDVertexID:
- ASSERT(stage == ShaderType::Vertex);
- switch (element) {
- case 2:
- return "vertex.instance";
- case 3:
- return "vertex.id";
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- break;
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return "fragment.pointcoord.x";
- case 1:
- return "fragment.pointcoord.y";
- }
- UNIMPLEMENTED();
- break;
- case Attribute::Index::FrontFacing: {
- ASSERT(stage == ShaderType::Fragment);
- ASSERT(element == 3);
- const std::string temporary = AllocVectorTemporary();
- AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
- AddLine("MOV.U.CC RC.x, -RC;");
- AddLine("MOV.S {}.x, 0;", temporary);
- AddLine("MOV.S {}.x (NE.x), -1;", temporary);
- return fmt::format("{}.x", temporary);
- }
- default:
- if (IsGenericAttribute(index)) {
- if (stage == ShaderType::Geometry) {
- return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
- Visit(abuf->GetBuffer()), swizzle);
- } else {
- return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
- GetGenericAttributeIndex(index), swizzle);
- }
- }
- UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
- break;
- }
- return "{0, 0, 0, 0}.x";
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- std::string offset_string;
- const auto& offset = cbuf->GetOffset();
- if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
- offset_string = std::to_string(imm->GetValue());
- } else {
- offset_string = Visit(offset);
- }
- std::string temporary = AllocTemporary();
- AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
- return temporary;
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- std::string temporary = AllocTemporary();
- AddLine("MOV {}, 0;", temporary);
- AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
- return temporary;
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- std::string temporary = Visit(lmem->GetAddress());
- AddLine("SHR.U {}, {}, 2;", temporary, temporary);
- AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
- return temporary;
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- std::string temporary = Visit(smem->GetAddress());
- AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
- return temporary;
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
- return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index));
- }
- AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
- AddLine("IF NE.x;");
- VisitBlock(conditional->GetCode());
- AddLine("ENDIF;");
- return {};
- }
-
- if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
- // Uncommenting this will generate invalid code. GLASM lacks comments.
- // AddLine("// {}", cmt->GetText());
- return {};
- }
-
- UNIMPLEMENTED();
- return {};
-}
-
-std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(meta.sampler.is_indexed);
-
- const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
- meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
- const std::size_t count = operation.GetOperandsCount();
- std::string temporary = AllocVectorTemporary();
- std::size_t i = 0;
- for (; i < count; ++i) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- if (meta.sampler.is_array) {
- AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
- ++i;
- }
- if (meta.sampler.is_shadow) {
- std::string compare = Visit(meta.depth_compare);
- if (is_extended) {
- ASSERT(i == 4);
- std::string extra_coord = AllocVectorTemporary();
- AddLine("MOV.F {}.x, {};", extra_coord, compare);
- return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
- }
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
- ++i;
- }
- return {temporary, temporary, i};
-}
-
-std::string ARBDecompiler::BuildAoffi(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- if (meta.aoffi.empty()) {
- return {};
- }
- const std::string temporary = AllocVectorTemporary();
- std::size_t i = 0;
- for (auto& node : meta.aoffi) {
- AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
- }
- return fmt::format(", offset({})", temporary);
-}
-
-std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
- // Read a bindless SSBO, return its address and set CC accordingly
- // address = c[binding].xy
- // length = c[binding].z
- const u32 binding = global_memory_names.at(gmem.GetDescriptor());
-
- const std::string pointer = AllocLongVectorTemporary();
- std::string temporary = AllocTemporary();
-
- AddLine("PK64.U {}, c[{}];", pointer, binding);
- AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
- Visit(gmem.GetBaseAddress()));
- AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
- AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
- // Compare offset to length and set CC
- AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
- return fmt::format("{}.x", pointer);
-}
-
-void ARBDecompiler::Exit() {
- if (stage != ShaderType::Fragment) {
- AddLine("RET;");
- return;
- }
-
- const auto safe_get_register = [this](u32 reg) -> std::string {
- if (ir.GetRegisters().contains(reg)) {
- return fmt::format("R{}.x", reg);
- }
- return "{0, 0, 0, 0}.x";
- };
-
- const auto& header = ir.GetHeader();
- u32 current_reg = 0;
- for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
- for (u32 component = 0; component < 4; ++component) {
- if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
- continue;
- }
- AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
- safe_get_register(current_reg));
- ++current_reg;
- }
- }
- if (header.ps.omap.depth) {
- AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
- }
-
- AddLine("RET;");
-}
-
-std::string ARBDecompiler::Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string dest_name;
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op
- return {};
- }
- dest_name = fmt::format("R{}.x", gpr->GetIndex());
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- const u32 element = abuf->GetElement();
- const char swizzle = Swizzle(element);
- switch (const Attribute::Index index = abuf->GetIndex()) {
- case Attribute::Index::Position:
- dest_name = fmt::format("result.position.{}", swizzle);
- break;
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- return {};
- case 1:
- case 2:
- if (!device.HasNvViewportArray2()) {
- LOG_ERROR(
- Render_OpenGL,
- "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
- return {};
- }
- dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
- break;
- case 3:
- dest_name = "result.pointsize.x";
- break;
- }
- break;
- case Attribute::Index::ClipDistances0123:
- dest_name = fmt::format("result.clip[{}].x", element);
- break;
- case Attribute::Index::ClipDistances4567:
- dest_name = fmt::format("result.clip[{}].x", element + 4);
- break;
- default:
- if (!IsGenericAttribute(index)) {
- UNREACHABLE();
- return {};
- }
- dest_name =
- fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
- break;
- }
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- const std::string address = Visit(lmem->GetAddress());
- AddLine("SHR.U {}, {}, 2;", address, address);
- dest_name = fmt::format("lmem[{}].x", address);
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
- ResetTemporaries();
- return {};
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- AddLine("IF NE.x;");
- AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
- AddLine("ENDIF;");
- ResetTemporaries();
- return {};
- } else {
- UNREACHABLE();
- ResetTemporaries();
- return {};
- }
-
- AddLine("MOV.U {}, {};", dest_name, Visit(src));
- ResetTemporaries();
- return {};
-}
-
-std::string ARBDecompiler::Select(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
- Visit(operation[2]));
- return temporary;
-}
-
-std::string ARBDecompiler::FClamp(Operation operation) {
- // 1.0f in hex, replace with std::bit_cast on C++20
- static constexpr u32 POSITIVE_ONE = 0x3f800000;
-
- std::string temporary = AllocTemporary();
- const Node& value = operation[0];
- const Node& low = operation[1];
- const Node& high = operation[2];
- const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
- const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
- if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
- AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
- } else {
- AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
- AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
- }
- return temporary;
-}
-
-std::string ARBDecompiler::FCastHalf0(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FCastHalf1(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
- AddLine("MOV {}.x, {}.y;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FSqrt(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
- AddLine("RCP.F32 {}, {};", temporary, temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "NV_shader_thread_shuffle is missing. Kepler or better is required.");
- AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
- return fmt::format("{}.x", temporary);
- }
-
- AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
- AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
- AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
- AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
- AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
- AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
- AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HAdd2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HMul2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HFma2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- const std::string tmp3 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
- AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HAbsolute(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HNegate(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
- AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
- AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
- AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HClamp(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
- AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
- AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
- AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
- AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HCastFloat(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
- AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HUnpack(Operation operation) {
- std::string operand = Visit(operation[0]);
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.U {}.x, {};", temporary, operand);
- AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- case Tegra::Shader::HalfType::H0_H0: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, operand);
- AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- case Tegra::Shader::HalfType::H1_H1: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, operand);
- AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- }
- UNREACHABLE();
- return "{0, 0, 0, 0}.x";
-}
-
-std::string ARBDecompiler::HMergeF32(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH0(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
- AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH1(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
- AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HPack2(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
- AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const Tegra::Shader::Pred index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = fmt::format("P{}.x", static_cast<u64>(index));
- } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
- const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
- target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
- } else {
- UNREACHABLE();
- ResetTemporaries();
- return {};
- }
-
- AddLine("MOV.U {}, {};", target, Visit(src));
- ResetTemporaries();
- return {};
-}
-
-std::string ARBDecompiler::LogicalPick2(Operation operation) {
- std::string temporary = AllocTemporary();
- const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
- AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
- return temporary;
-}
-
-std::string ARBDecompiler::LogicalAnd2(Operation operation) {
- std::string temporary = AllocTemporary();
- const std::string op = Visit(operation[0]);
- AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
- return temporary;
-}
-
-std::string ARBDecompiler::FloatOrdered(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
- AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
- AddLine("MOV.S {}, -1;", temporary);
- AddLine("MOV.S {} (NAN.x), 0;", temporary);
- AddLine("MOV.S {} (NAN.y), 0;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::FloatUnordered(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
- AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("MOV.S {} (NAN.x), -1;", temporary);
- AddLine("MOV.S {} (NAN.y), -1;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("IF CF.x;");
- AddLine("MOV.S {}, -1;", temporary);
- AddLine("ENDIF;");
- return temporary;
-}
-
-std::string ARBDecompiler::Texture(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- std::string_view opcode = "TEX";
- std::string extra;
- if (meta.bias) {
- ASSERT(!meta.lod);
- opcode = "TXB";
-
- if (swizzle < 4) {
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
- } else {
- const std::string bias = AllocTemporary();
- AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
- extra = fmt::format(" {},", bias);
- }
- }
- if (meta.lod) {
- ASSERT(!meta.bias);
- opcode = "TXL";
-
- if (swizzle < 4) {
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
- } else {
- const std::string lod = AllocTemporary();
- AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
- extra = fmt::format(" {},", lod);
- }
- }
-
- AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- std::string comp;
- if (!meta.sampler.is_shadow) {
- const auto& immediate = std::get<ImmediateNode>(*meta.component);
- comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
- }
-
- AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const std::string temporary = AllocVectorTemporary();
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
- ASSERT(!meta.sampler.is_array);
-
- const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
- AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryLod(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const std::string temporary = AllocVectorTemporary();
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
- ASSERT(!meta.sampler.is_array);
-
- const std::size_t count = operation.GetOperandsCount();
- for (std::size_t i = 0; i < count; ++i) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
- AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
- AddLine("TRUNC.S {}, {};", temporary, temporary);
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TexelFetch(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- if (!meta.sampler.is_buffer) {
- ASSERT(swizzle < 4);
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
- }
- AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
- BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const std::string ddx = AllocVectorTemporary();
- const std::string ddy = AllocVectorTemporary();
- const std::string coord = std::get<1>(BuildCoords(operation));
-
- const std::size_t num_components = meta.derivates.size() / 2;
- for (std::size_t index = 0; index < num_components; ++index) {
- const char swizzle = Swizzle(index);
- AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
- AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
- }
-
- const std::string_view result = coord;
- AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
- return fmt::format("{}.x", result);
-}
-
-std::string ARBDecompiler::ImageLoad(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t count = operation.GetOperandsCount();
- const std::string_view type = ImageType(meta.image.type);
-
- const std::string temporary = AllocVectorTemporary();
- for (std::size_t i = 0; i < count; ++i) {
- AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
- AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::ImageStore(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t num_coords = operation.GetOperandsCount();
- const std::size_t num_values = meta.values.size();
- const std::string_view type = ImageType(meta.image.type);
-
- const std::string coord = AllocVectorTemporary();
- const std::string value = AllocVectorTemporary();
- for (std::size_t i = 0; i < num_coords; ++i) {
- AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
- }
- for (std::size_t i = 0; i < num_values; ++i) {
- AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
- }
- AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
- return {};
-}
-
-std::string ARBDecompiler::Branch(Operation operation) {
- const auto target = std::get<ImmediateNode>(*operation[0]);
- AddLine("MOV.U PC.x, {};", target.GetValue());
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::BranchIndirect(Operation operation) {
- AddLine("MOV.U PC.x, {};", Visit(operation[0]));
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::PushFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
- const std::string_view stack_name = StackName(stack);
- AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
- AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- return {};
-}
-
-std::string ARBDecompiler::PopFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const std::string_view stack_name = StackName(stack);
- AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::Exit(Operation) {
- Exit();
- return {};
-}
-
-std::string ARBDecompiler::Discard(Operation) {
- AddLine("KIL TR;");
- return {};
-}
-
-std::string ARBDecompiler::EmitVertex(Operation) {
- AddLine("EMIT;");
- return {};
-}
-
-std::string ARBDecompiler::EndPrimitive(Operation) {
- AddLine("ENDPRIM;");
- return {};
-}
-
-std::string ARBDecompiler::InvocationId(Operation) {
- return "primitive.invocation";
-}
-
-std::string ARBDecompiler::YNegate(Operation) {
- LOG_WARNING(Render_OpenGL, "(STUBBED)");
- std::string temporary = AllocTemporary();
- AddLine("MOV.F {}, 1;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::ThreadId(Operation) {
- return fmt::format("{}.threadid", StageInputName(stage));
-}
-
-std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "NV_shader_thread_shuffle is missing. Kepler or better is required.");
- return Visit(operation[0]);
- }
- const std::string temporary = AllocVectorTemporary();
- AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
- Visit(operation[1]));
- AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::Barrier(Operation) {
- AddLine("BAR;");
- return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
- AddLine("MEMBAR.CTA;");
- return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
- AddLine("MEMBAR;");
- return {};
-}
-
-} // Anonymous namespace
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier) {
- return ARBDecompiler(device, ir, registry, stage, identifier).Code();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string>
-#include <string_view>
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-enum class ShaderType : u32;
-}
-
-namespace VideoCommon::Shader {
-class ShaderIR;
-class Registry;
-} // namespace VideoCommon::Shader
-
-namespace OpenGL {
-
-class Device;
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier);
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ceb3abcb2..3551dbdcc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -54,40 +54,6 @@ namespace {
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-struct TextureHandle {
- constexpr TextureHandle(u32 data, bool via_header_index) {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
-
- u32 image;
- u32 sampler;
-};
-
-template <typename Engine, typename Entry>
-TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
- ShaderType shader_type, size_t index = 0) {
- if constexpr (std::is_same_v<Entry, SamplerEntry>) {
- if (entry.is_separated) {
- const u32 buffer_1 = entry.buffer;
- const u32 buffer_2 = entry.secondary_buffer;
- const u32 offset_1 = entry.offset;
- const u32 offset_2 = entry.secondary_offset;
- const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
- const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return TextureHandle(handle_1 | handle_2, via_header_index);
- }
- }
- if (entry.is_bindless) {
- const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return TextureHandle(raw, via_header_index);
- }
- const u32 buffer = engine.GetBoundBuffer();
- const u64 offset = (entry.offset + index) * sizeof(u32);
- return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
-}
-
/// Translates hardware transform feedback indices
/// @param location Hardware location
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -119,44 +85,6 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-
-ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
- if (entry.is_buffer) {
- return ImageViewType::Buffer;
- }
- switch (entry.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
- case Tegra::Shader::TextureType::Texture2D:
- return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
- case Tegra::Shader::TextureType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::TextureType::TextureCube:
- return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
-ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
- switch (entry.type) {
- case Tegra::Shader::ImageType::Texture1D:
- return ImageViewType::e1D;
- case Tegra::Shader::ImageType::Texture1DArray:
- return ImageViewType::e1DArray;
- case Tegra::Shader::ImageType::Texture2D:
- return ImageViewType::e2D;
- case Tegra::Shader::ImageType::Texture2DArray:
- return ImageViewType::e2DArray;
- case Tegra::Shader::ImageType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::ImageType::TextureBuffer:
- return ImageViewType::Buffer;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
- fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
- async_shaders(emu_window_) {
- if (device.UseAsynchronousShaders()) {
- async_shaders.AllocateWorkers();
- }
-}
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() {
}
}
-void RasterizerOpenGL::SetupShaders(bool is_indexed) {
- u32 clip_distances = 0;
-
- std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
- image_view_indices.clear();
- sampler_handles.clear();
-
- texture_cache.SynchronizeGraphicsDescriptors();
-
- for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = maxwell3d.regs.shader_config[index];
- const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
- // Skip stages that are not enabled
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- switch (program) {
- case Maxwell::ShaderProgram::Geometry:
- program_manager.UseGeometryShader(0);
- break;
- case Maxwell::ShaderProgram::Fragment:
- program_manager.UseFragmentShader(0);
- break;
- default:
- break;
- }
- continue;
- }
- // Currently this stages are not supported in the OpenGL backend.
- // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
- if (program == Maxwell::ShaderProgram::TesselationControl ||
- program == Maxwell::ShaderProgram::TesselationEval) {
- continue;
- }
-
- Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
- const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
- switch (program) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- program_manager.UseVertexShader(program_handle);
- break;
- case Maxwell::ShaderProgram::Geometry:
- program_manager.UseGeometryShader(program_handle);
- break;
- case Maxwell::ShaderProgram::Fragment:
- program_manager.UseFragmentShader(program_handle);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
- shader_config.enable.Value(), shader_config.offset);
- break;
- }
-
- // Stage indices are 0 - 5
- const size_t stage = index == 0 ? 0 : index - 1;
- shaders[stage] = shader;
-
- SetupDrawTextures(shader, stage);
- SetupDrawImages(shader, stage);
-
- buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
-
- buffer_cache.UnbindGraphicsStorageBuffers(stage);
- u32 ssbo_index = 0;
- for (const auto& buffer : shader->GetEntries().global_memory_entries) {
- buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
- buffer.cbuf_offset, buffer.is_written);
- ++ssbo_index;
- }
-
- // Workaround for Intel drivers.
- // When a clip distance is enabled but not set in the shader it crops parts of the screen
- // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
- // clip distances only when it's written by a shader stage.
- clip_distances |= shader->GetEntries().clip_distances;
-
- // When VertexA is enabled, we have dual vertex shaders
- if (program == Maxwell::ShaderProgram::VertexA) {
- // VertexB was combined with VertexA, so we skip the VertexB iteration
- ++index;
- }
- }
- SyncClipEnabled(clip_distances);
- maxwell3d.dirty.flags[Dirty::Shaders] = false;
-
- buffer_cache.UpdateGraphicsBuffers(is_indexed);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
-
- buffer_cache.BindHostGeometryBuffers(is_indexed);
-
- size_t image_view_index = 0;
- size_t texture_index = 0;
- size_t image_index = 0;
- for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- const Shader* const shader = shaders[stage];
- if (!shader) {
- continue;
- }
- buffer_cache.BindHostStageBuffers(stage);
- const auto& base = device.GetBaseBindings(stage);
- BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
- texture_index, image_index);
- }
-}
-
void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback) {
- shader_cache.LoadDiskCache(title_id, stop_loading, callback);
-}
+ const VideoCore::DiskResourceLoadCallback& callback) {}
void RasterizerOpenGL::Clear() {
MICROPROFILE_SCOPE(OpenGL_Clears);
@@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Setup shaders and their used resources.
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- SetupShaders(is_indexed);
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
@@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
gpu.TickWork();
}
-void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
-
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- BindComputeTextures(kernel);
-
- const auto& entries = kernel->GetEntries();
- buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
- buffer_cache.UnbindComputeStorageBuffers();
- u32 ssbo_index = 0;
- for (const auto& buffer : entries.global_memory_entries) {
- buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
- buffer.is_written);
- ++ssbo_index;
- }
- buffer_cache.UpdateComputeBuffers();
- buffer_cache.BindHostComputeBuffers();
-
- const auto& launch_desc = kepler_compute.launch_description;
- glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
- ++num_queued_commands;
+void RasterizerOpenGL::DispatchCompute() {
+ UNREACHABLE_MSG("Not implemented");
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
- image_view_indices.clear();
- sampler_handles.clear();
-
- texture_cache.SynchronizeComputeDescriptors();
-
- SetupComputeTextures(kernel);
- SetupComputeImages(kernel);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillComputeImageViews(indices_span, image_view_ids);
-
- program_manager.BindCompute(kernel->GetHandle());
- size_t image_view_index = 0;
- size_t texture_index = 0;
- size_t image_index = 0;
- BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
-}
-
-void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
- GLuint base_image, size_t& image_view_index,
- size_t& texture_index, size_t& image_index) {
- const GLuint* const samplers = sampler_handles.data() + texture_index;
- const GLuint* const textures = texture_handles.data() + texture_index;
- const GLuint* const images = image_handles.data() + image_index;
-
- const size_t num_samplers = entries.samplers.size();
- for (const auto& sampler : entries.samplers) {
- for (size_t i = 0; i < sampler.size; ++i) {
- const ImageViewId image_view_id = image_view_ids[image_view_index++];
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
- texture_handles[texture_index++] = handle;
- }
- }
- const size_t num_images = entries.images.size();
- for (size_t unit = 0; unit < num_images; ++unit) {
- // TODO: Mark as modified
- const ImageViewId image_view_id = image_view_ids[image_view_index++];
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
- image_handles[image_index] = handle;
- ++image_index;
- }
- if (num_samplers > 0) {
- glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
- glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
- }
- if (num_images > 0) {
- glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
- }
-}
-
-void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
- const bool via_header_index =
- maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : shader->GetEntries().samplers) {
- const auto shader_type = static_cast<ShaderType>(stage_index);
- for (size_t index = 0; index < entry.size; ++index) {
- const auto handle =
- GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
- const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- image_view_indices.push_back(handle.image);
- }
- }
-}
-
-void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : kernel->GetEntries().samplers) {
- for (size_t i = 0; i < entry.size; ++i) {
- const auto handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
- const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- image_view_indices.push_back(handle.image);
- }
- }
-}
-
-void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
- const bool via_header_index =
- maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : shader->GetEntries().images) {
- const auto shader_type = static_cast<ShaderType>(stage_index);
- const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : shader->GetEntries().images) {
- const auto handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
- image_view_indices.push_back(handle.image);
- }
-}
-
void RasterizerOpenGL::SyncState() {
SyncViewport();
SyncRasterizeEnable();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d30ad698f..1f58f8791 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,11 +28,9 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
namespace Core::Memory {
@@ -81,7 +79,7 @@ public:
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
- void DispatchCompute(GPUVAddr code_addr) override;
+ void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -118,36 +116,11 @@ public:
return num_queued_commands > 0;
}
- VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
- return async_shaders;
- }
-
- const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
- return async_shaders;
- }
-
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
- void BindComputeTextures(Shader* kernel);
-
- void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
- size_t& image_view_index, size_t& texture_index, size_t& image_index);
-
- /// Configures the current textures to use for the draw command.
- void SetupDrawTextures(const Shader* shader, size_t stage_index);
-
- /// Configures the textures used in a compute shader.
- void SetupComputeTextures(const Shader* kernel);
-
- /// Configures images in a graphics shader.
- void SetupDrawImages(const Shader* shader, size_t stage_index);
-
- /// Configures images in a compute shader.
- void SetupComputeImages(const Shader* shader);
-
/// Syncs state to match guest's
void SyncState();
@@ -230,8 +203,6 @@ private:
/// End a transform feedback
void EndTransformFeedback();
- void SetupShaders(bool is_indexed);
-
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
@@ -251,8 +222,6 @@ private:
AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
- VideoCommon::Shader::AsyncShaders async_shaders;
-
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5a01c59ec..4dd166156 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,307 +20,19 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_notify.h"
namespace OpenGL {
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::GetUniqueIdentifier;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::Registry;
-using VideoCommon::Shader::ShaderIR;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
-namespace {
-
-constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
-
-/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetGLShaderType(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GL_VERTEX_SHADER;
- case ShaderType::Geometry:
- return GL_GEOMETRY_SHADER;
- case ShaderType::Fragment:
- return GL_FRAGMENT_SHADER;
- case ShaderType::Compute:
- return GL_COMPUTE_SHADER;
- default:
- return GL_NONE;
- }
-}
-
-constexpr const char* GetShaderTypeName(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return "VS";
- case ShaderType::TesselationControl:
- return "HS";
- case ShaderType::TesselationEval:
- return "DS";
- case ShaderType::Geometry:
- return "GS";
- case ShaderType::Fragment:
- return "FS";
- case ShaderType::Compute:
- return "CS";
- }
- return "UNK";
-}
-
-constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
- switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- return ShaderType::Vertex;
- case Maxwell::ShaderProgram::TesselationControl:
- return ShaderType::TesselationControl;
- case Maxwell::ShaderProgram::TesselationEval:
- return ShaderType::TesselationEval;
- case Maxwell::ShaderProgram::Geometry:
- return ShaderType::Geometry;
- case Maxwell::ShaderProgram::Fragment:
- return ShaderType::Fragment;
- }
- return {};
-}
-
-constexpr GLenum AssemblyEnum(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GL_VERTEX_PROGRAM_NV;
- case ShaderType::TesselationControl:
- return GL_TESS_CONTROL_PROGRAM_NV;
- case ShaderType::TesselationEval:
- return GL_TESS_EVALUATION_PROGRAM_NV;
- case ShaderType::Geometry:
- return GL_GEOMETRY_PROGRAM_NV;
- case ShaderType::Fragment:
- return GL_FRAGMENT_PROGRAM_NV;
- case ShaderType::Compute:
- return GL_COMPUTE_PROGRAM_NV;
- }
- return {};
-}
-
-std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
- return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
-}
-
-std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
- const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
- const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
- entry.graphics_info, entry.compute_info};
- auto registry = std::make_shared<Registry>(entry.type, info);
- for (const auto& [address, value] : entry.keys) {
- const auto [buffer, offset] = address;
- registry->InsertKey(buffer, offset, value);
- }
- for (const auto& [offset, sampler] : entry.bound_samplers) {
- registry->InsertBoundSampler(offset, sampler);
- }
- for (const auto& [key, sampler] : entry.bindless_samplers) {
- const auto [buffer, offset] = key;
- registry->InsertBindlessSampler(buffer, offset, sampler);
- }
- return registry;
-}
-
-std::unordered_set<GLenum> GetSupportedFormats() {
- GLint num_formats;
- glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
-
- std::vector<GLint> formats(num_formats);
- glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
-
- std::unordered_set<GLenum> supported_formats;
- for (const GLint format : formats) {
- supported_formats.insert(static_cast<GLenum>(format));
- }
- return supported_formats;
-}
-
-} // Anonymous namespace
-
-ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
- const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
- if (device.UseDriverCache()) {
- // Ignore hint retrievable if we are using the driver cache
- hint_retrievable = false;
- }
- const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
- LOG_INFO(Render_OpenGL, "{}", shader_id);
-
- auto program = std::make_shared<ProgramHandle>();
-
- if (device.UseAssemblyShaders()) {
- const std::string arb =
- DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
-
- GLuint& arb_prog = program->assembly_program.handle;
-
-// Commented out functions signal OpenGL errors but are compatible with apitrace.
-// Use them only to capture and replay on apitrace.
-#if 0
- glGenProgramsNV(1, &arb_prog);
- glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
- reinterpret_cast<const GLubyte*>(arb.data()));
-#else
- glGenProgramsARB(1, &arb_prog);
- glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
- static_cast<GLsizei>(arb.size()), arb.data());
-#endif
- const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
- if (err && *err) {
- LOG_CRITICAL(Render_OpenGL, "{}", err);
- LOG_INFO(Render_OpenGL, "\n{}", arb);
- }
- } else {
- const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
- OGLShader shader;
- shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
-
- program->source_program.Create(true, hint_retrievable, shader.handle);
- }
-
- return program;
-}
-
-Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
- ProgramSharedPtr program_, bool is_built_)
- : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
- is_built{is_built_} {
- handle = program->assembly_program.handle;
- if (handle == 0) {
- handle = program->source_program.handle;
- }
- if (is_built) {
- ASSERT(handle != 0);
- }
-}
+Shader::Shader() = default;
Shader::~Shader() = default;
-GLuint Shader::GetHandle() const {
- DEBUG_ASSERT(registry->IsConsistent());
- return handle;
-}
-
-bool Shader::IsBuilt() const {
- return is_built;
-}
-
-void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
- program->source_program = std::move(new_program);
- handle = program->source_program.handle;
- is_built = true;
-}
-
-void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
- program->assembly_program = std::move(new_program);
- handle = program->assembly_program.handle;
- is_built = true;
-}
-
-std::unique_ptr<Shader> Shader::CreateStageFromMemory(
- const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
- ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
- const auto shader_type = GetShaderType(program_type);
-
- auto& gpu = params.gpu;
- gpu.ShaderNotify().MarkSharderBuilding();
-
- auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
- if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- // TODO(Rodrigo): Handle VertexA shaders
- // std::optional<ShaderIR> ir_b;
- // if (!code_b.empty()) {
- // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
- // }
- auto program =
- BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
- ShaderDiskCacheEntry entry;
- entry.type = shader_type;
- entry.code = std::move(code);
- entry.code_b = std::move(code_b);
- entry.unique_identifier = params.unique_identifier;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.graphics_info = registry->GetGraphicsInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
-
- gpu.ShaderNotify().MarkShaderComplete();
-
- return std::unique_ptr<Shader>(new Shader(std::move(registry),
- MakeEntries(params.device, ir, shader_type),
- std::move(program), true));
- } else {
- // Required for entries
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- auto entries = MakeEntries(params.device, ir, shader_type);
-
- async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
- std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
- COMPILER_SETTINGS, *registry, cpu_addr);
-
- auto program = std::make_shared<ProgramHandle>();
- return std::unique_ptr<Shader>(
- new Shader(std::move(registry), std::move(entries), std::move(program), false));
- }
-}
-
-std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
- ProgramCode code) {
- auto& gpu = params.gpu;
- gpu.ShaderNotify().MarkSharderBuilding();
-
- auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
- const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- const u64 uid = params.unique_identifier;
- auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
-
- ShaderDiskCacheEntry entry;
- entry.type = ShaderType::Compute;
- entry.code = std::move(code);
- entry.unique_identifier = uid;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.compute_info = registry->GetComputeInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
-
- gpu.ShaderNotify().MarkShaderComplete();
-
- return std::unique_ptr<Shader>(new Shader(std::move(registry),
- MakeEntries(params.device, ir, ShaderType::Compute),
- std::move(program)));
-}
-
-std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader) {
- return std::unique_ptr<Shader>(new Shader(
- precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
-}
-
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback) {
- disk_cache.BindTitleID(title_id);
- const std::optional transferable = disk_cache.LoadTransferable();
-
- LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
- transferable.has_value() ? transferable->size() : 0);
-
- if (!transferable) {
- return;
- }
-
- std::vector<ShaderDiskCachePrecompiled> gl_cache;
- if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
- // Only load precompiled cache when we are not using assembly shaders
- gl_cache = disk_cache.LoadPrecompiled();
- }
- const auto supported_formats = GetSupportedFormats();
-
- // Track if precompiled cache was altered during loading to know if we have to
- // serialize the virtual precompiled cache file back to the hard drive
- bool precompiled_cache_altered = false;
-
- // Inform the frontend about shader build initialization
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
- }
-
- std::mutex mutex;
- std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
- std::atomic_bool gl_cache_failed = false;
-
- const auto find_precompiled = [&gl_cache](u64 id) {
- return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
- };
-
- const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
- std::size_t end) {
- const auto scope = context->Acquire();
-
- for (std::size_t i = begin; i < end; ++i) {
- if (stop_loading.stop_requested()) {
- return;
- }
- const auto& entry = (*transferable)[i];
- const u64 uid = entry.unique_identifier;
- const auto it = find_precompiled(uid);
- const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
-
- const bool is_compute = entry.type == ShaderType::Compute;
- const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- auto registry = MakeRegistry(entry);
- const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
-
- ProgramSharedPtr program;
- if (precompiled_entry) {
- // If the shader is precompiled, attempt to load it with
- program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
- if (!program) {
- gl_cache_failed = true;
- }
- }
- if (!program) {
- // Otherwise compile it from GLSL
- program = BuildShader(device, entry.type, uid, ir, *registry, true);
- }
-
- PrecompiledShader shader;
- shader.program = std::move(program);
- shader.registry = std::move(registry);
- shader.entries = MakeEntries(device, ir, entry.type);
-
- std::scoped_lock lock{mutex};
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
- transferable->size());
- }
- runtime_cache.emplace(entry.unique_identifier, std::move(shader));
- }
- };
-
- const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
- const std::size_t bucket_size{transferable->size() / num_workers};
- std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
- std::vector<std::thread> threads(num_workers);
- for (std::size_t i = 0; i < num_workers; ++i) {
- const bool is_last_worker = i + 1 == num_workers;
- const std::size_t start{bucket_size * i};
- const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
-
- // On some platforms the shared context has to be created from the GUI thread
- contexts[i] = emu_window.CreateSharedContext();
- threads[i] = std::thread(worker, contexts[i].get(), start, end);
- }
- for (auto& thread : threads) {
- thread.join();
- }
-
- if (gl_cache_failed) {
- // Invalidate the precompiled cache if a shader dumped shader was rejected
- disk_cache.InvalidatePrecompiled();
- precompiled_cache_altered = true;
- return;
- }
- if (stop_loading.stop_requested()) {
- return;
- }
-
- if (device.UseAssemblyShaders() || device.UseDriverCache()) {
- // Don't store precompiled binaries for assembly shaders or when using the driver cache
- return;
- }
-
- // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
- // before precompiling them
-
- for (std::size_t i = 0; i < transferable->size(); ++i) {
- const u64 id = (*transferable)[i].unique_identifier;
- const auto it = find_precompiled(id);
- if (it == gl_cache.end()) {
- const GLuint program = runtime_cache.at(id).program->source_program.handle;
- disk_cache.SavePrecompiled(id, program);
- precompiled_cache_altered = true;
- }
- }
-
- if (precompiled_cache_altered) {
- disk_cache.SaveVirtualPrecompiledFile();
- }
-}
-
-ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
- const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
- const std::unordered_set<GLenum>& supported_formats) {
- if (!supported_formats.contains(precompiled_entry.binary_format)) {
- LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
- return {};
- }
-
- auto program = std::make_shared<ProgramHandle>();
- GLuint& handle = program->source_program.handle;
- handle = glCreateProgram();
- glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
- glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
- static_cast<GLsizei>(precompiled_entry.binary.size()));
-
- GLint link_status;
- glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
- if (link_status == GL_FALSE) {
- LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
- return {};
- }
-
- return program;
-}
-
-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
- VideoCommon::Shader::AsyncShaders& async_shaders) {
- if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
- auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
- if (last_shader->IsBuilt()) {
- return last_shader;
- }
- }
-
- const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
-
- if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
- auto completed_work = async_shaders.GetCompletedWork();
- for (auto& work : completed_work) {
- Shader* shader = TryGet(work.cpu_address);
- gpu.ShaderNotify().MarkShaderComplete();
- if (shader == nullptr) {
- continue;
- }
- using namespace VideoCommon::Shader;
- if (work.backend == AsyncShaders::Backend::OpenGL) {
- shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
- } else if (work.backend == AsyncShaders::Backend::GLASM) {
- shader->AsyncGLASMBuilt(std::move(work.program.glasm));
- }
-
- auto& registry = shader->GetRegistry();
-
- ShaderDiskCacheEntry entry;
- entry.type = work.shader_type;
- entry.code = std::move(work.code);
- entry.code_b = std::move(work.code_b);
- entry.unique_identifier = work.uid;
- entry.bound_buffer = registry.GetBoundBuffer();
- entry.graphics_info = registry.GetGraphicsInfo();
- entry.keys = registry.GetKeys();
- entry.bound_samplers = registry.GetBoundSamplers();
- entry.bindless_samplers = registry.GetBindlessSamplers();
- disk_cache.SaveEntry(std::move(entry));
- }
- }
-
- // Look up shader in the cache based on address
- const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
- if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
- return last_shaders[static_cast<std::size_t>(program)] = shader;
- }
-
- const u8* const host_ptr{gpu_memory.GetPointer(address)};
-
- // No shader found - create a new one
- ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
- ProgramCode code_b;
- if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
- const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
- code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
- }
- const std::size_t code_size = code.size() * sizeof(u64);
-
- const u64 unique_identifier = GetUniqueIdentifier(
- GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
-
- const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
-
- std::unique_ptr<Shader> shader;
- const auto found = runtime_cache.find(unique_identifier);
- if (found == runtime_cache.end()) {
- shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
- async_shaders, cpu_addr.value_or(0));
- } else {
- shader = Shader::CreateFromCache(params, found->second);
- }
-
- Shader* const result = shader.get();
- if (cpu_addr) {
- Register(std::move(shader), *cpu_addr, code_size);
- } else {
- null_shader = std::move(shader);
- }
-
- return last_shaders[static_cast<std::size_t>(program)] = result;
-}
-
-Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
- const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
-
- if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
- return kernel;
- }
-
- // No kernel found, create a new one
- const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
- ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
- const std::size_t code_size{code.size() * sizeof(u64)};
- const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
-
- const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
-
- std::unique_ptr<Shader> kernel;
- const auto found = runtime_cache.find(unique_identifier);
- if (found == runtime_cache.end()) {
- kernel = Shader::CreateKernelFromMemory(params, std::move(code));
- } else {
- kernel = Shader::CreateFromCache(params, found->second);
- }
-
- Shader* const result = kernel.get();
- if (cpu_addr) {
- Register(std::move(kernel), *cpu_addr, code_size);
- } else {
- null_kernel = std::move(kernel);
- }
- return result;
-}
-
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b30308b6f..ad3d15a76 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -19,10 +19,6 @@
#include "common/common_types.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
namespace Tegra {
@@ -33,10 +29,6 @@ namespace Core::Frontend {
class EmuWindow;
}
-namespace VideoCommon::Shader {
-class AsyncShaders;
-}
-
namespace OpenGL {
class Device;
@@ -44,77 +36,10 @@ class RasterizerOpenGL;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-struct ProgramHandle {
- OGLProgram source_program;
- OGLAssemblyProgram assembly_program;
-};
-using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
-
-struct PrecompiledShader {
- ProgramSharedPtr program;
- std::shared_ptr<VideoCommon::Shader::Registry> registry;
- ShaderEntries entries;
-};
-
-struct ShaderParameters {
- Tegra::GPU& gpu;
- Tegra::Engines::ConstBufferEngineInterface& engine;
- ShaderDiskCacheOpenGL& disk_cache;
- const Device& device;
- VAddr cpu_addr;
- const u8* host_ptr;
- u64 unique_identifier;
-};
-
-ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
- u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- bool hint_retrievable = false);
-
-class Shader final {
+class Shader {
public:
+ explicit Shader();
~Shader();
-
- /// Gets the GL program handle for the shader
- GLuint GetHandle() const;
-
- bool IsBuilt() const;
-
- /// Gets the shader entries for the shader
- const ShaderEntries& GetEntries() const {
- return entries;
- }
-
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return *registry;
- }
-
- /// Mark a OpenGL shader as built
- void AsyncOpenGLBuilt(OGLProgram new_program);
-
- /// Mark a GLASM shader as built
- void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
-
- static std::unique_ptr<Shader> CreateStageFromMemory(
- const ShaderParameters& params, Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b,
- VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
-
- static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
- ProgramCode code);
-
- static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader);
-
-private:
- explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
- ProgramSharedPtr program, bool is_built_ = true);
-
- std::shared_ptr<VideoCommon::Shader::Registry> registry;
- ShaderEntries entries;
- ProgramSharedPtr program;
- GLuint handle = 0;
- bool is_built{};
};
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
@@ -126,36 +51,13 @@ public:
Tegra::MemoryManager& gpu_memory_, const Device& device_);
~ShaderCacheOpenGL() override;
- /// Loads disk cache for the current game
- void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback);
-
- /// Gets the current specified shader stage program
- Shader* GetStageProgram(Maxwell::ShaderProgram program,
- VideoCommon::Shader::AsyncShaders& async_shaders);
-
- /// Gets a compute kernel in the passed address
- Shader* GetComputeKernel(GPUVAddr code_addr);
-
private:
- ProgramSharedPtr GeneratePrecompiledProgram(
- const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
- const std::unordered_set<GLenum>& supported_formats);
-
Core::Frontend::EmuWindow& emu_window;
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
-
- ShaderDiskCacheOpenGL disk_cache;
- std::unordered_map<u64, PrecompiledShader> runtime_cache;
-
- std::unique_ptr<Shader> null_shader;
- std::unique_ptr<Shader> null_kernel;
-
- std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 9c28498e8..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/div_ceil.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/shader/transform_feedback.h"
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::Header;
-using Tegra::Shader::IpaInterpMode;
-using Tegra::Shader::IpaMode;
-using Tegra::Shader::IpaSampleMode;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureType;
-
-using namespace VideoCommon::Shader;
-using namespace std::string_literals;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using Operation = const OperationNode&;
-
-class ASTDecompiler;
-class ExprDecompiler;
-
-enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-
-constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
-
-constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
-constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
-
-struct TextureOffset {};
-struct TextureDerivates {};
-using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
-
-constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
-
-constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
-#define ftou floatBitsToUint
-#define itof intBitsToFloat
-#define utof uintBitsToFloat
-
-bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
- bvec2 is_nan1 = isnan(pair1);
- bvec2 is_nan2 = isnan(pair2);
- return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
-}}
-
-const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
-const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-)";
-
-class ShaderWriter final {
-public:
- void AddExpression(std::string_view text) {
- DEBUG_ASSERT(scope >= 0);
- if (!text.empty()) {
- AppendIndentation();
- }
- shader_source += text;
- }
-
- // Forwards all arguments directly to libfmt.
- // Note that all formatting requirements for fmt must be
- // obeyed when using this function. (e.g. {{ must be used
- // printing the character '{' is desirable. Ditto for }} and '}',
- // etc).
- template <typename... Args>
- void AddLine(std::string_view text, Args&&... args) {
- AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
- AddNewLine();
- }
-
- void AddNewLine() {
- DEBUG_ASSERT(scope >= 0);
- shader_source += '\n';
- }
-
- std::string GenerateTemporary() {
- return fmt::format("tmp{}", temporary_index++);
- }
-
- std::string GetResult() {
- return std::move(shader_source);
- }
-
- s32 scope = 0;
-
-private:
- void AppendIndentation() {
- shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
- }
-
- std::string shader_source;
- u32 temporary_index = 1;
-};
-
-class Expression final {
-public:
- Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
- ASSERT(type != Type::Void);
- }
- Expression() : type{Type::Void} {}
-
- Type GetType() const {
- return type;
- }
-
- std::string GetCode() const {
- return code;
- }
-
- void CheckVoid() const {
- ASSERT(type == Type::Void);
- }
-
- std::string As(Type type_) const {
- switch (type_) {
- case Type::Bool:
- return AsBool();
- case Type::Bool2:
- return AsBool2();
- case Type::Float:
- return AsFloat();
- case Type::Int:
- return AsInt();
- case Type::Uint:
- return AsUint();
- case Type::HalfFloat:
- return AsHalfFloat();
- default:
- UNREACHABLE_MSG("Invalid type");
- return code;
- }
- }
-
- std::string AsBool() const {
- switch (type) {
- case Type::Bool:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsBool2() const {
- switch (type) {
- case Type::Bool2:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsFloat() const {
- switch (type) {
- case Type::Float:
- return code;
- case Type::Uint:
- return fmt::format("utof({})", code);
- case Type::Int:
- return fmt::format("itof({})", code);
- case Type::HalfFloat:
- return fmt::format("utof(packHalf2x16({}))", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsInt() const {
- switch (type) {
- case Type::Float:
- return fmt::format("ftoi({})", code);
- case Type::Uint:
- return fmt::format("int({})", code);
- case Type::Int:
- return code;
- case Type::HalfFloat:
- return fmt::format("int(packHalf2x16({}))", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsUint() const {
- switch (type) {
- case Type::Float:
- return fmt::format("ftou({})", code);
- case Type::Uint:
- return code;
- case Type::Int:
- return fmt::format("uint({})", code);
- case Type::HalfFloat:
- return fmt::format("packHalf2x16({})", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsHalfFloat() const {
- switch (type) {
- case Type::Float:
- return fmt::format("unpackHalf2x16(ftou({}))", code);
- case Type::Uint:
- return fmt::format("unpackHalf2x16({})", code);
- case Type::Int:
- return fmt::format("unpackHalf2x16(int({}))", code);
- case Type::HalfFloat:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
-private:
- std::string code;
- Type type{};
-};
-
-const char* GetTypeString(Type type) {
- switch (type) {
- case Type::Bool:
- return "bool";
- case Type::Bool2:
- return "bvec2";
- case Type::Float:
- return "float";
- case Type::Int:
- return "int";
- case Type::Uint:
- return "uint";
- case Type::HalfFloat:
- return "vec2";
- default:
- UNREACHABLE_MSG("Invalid type");
- return "<invalid type>";
- }
-}
-
-const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "Buffer";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "1DArray";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "2DArray";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- default:
- UNREACHABLE();
- return "1D";
- }
-}
-
-/// Describes primitive behavior on geometry shaders
-std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
- switch (topology) {
- case Maxwell::PrimitiveTopology::Points:
- return {"points", 1};
- case Maxwell::PrimitiveTopology::Lines:
- case Maxwell::PrimitiveTopology::LineStrip:
- return {"lines", 2};
- case Maxwell::PrimitiveTopology::LinesAdjacency:
- case Maxwell::PrimitiveTopology::LineStripAdjacency:
- return {"lines_adjacency", 4};
- case Maxwell::PrimitiveTopology::Triangles:
- case Maxwell::PrimitiveTopology::TriangleStrip:
- case Maxwell::PrimitiveTopology::TriangleFan:
- return {"triangles", 3};
- case Maxwell::PrimitiveTopology::TrianglesAdjacency:
- case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
- return {"triangles_adjacency", 6};
- default:
- UNIMPLEMENTED_MSG("topology={}", topology);
- return {"points", 1};
- }
-}
-
-/// Generates code to use for a swizzle operation.
-constexpr const char* GetSwizzle(std::size_t element) {
- constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
- return swizzle.at(element);
-}
-
-constexpr const char* GetColorSwizzle(std::size_t element) {
- constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
- return swizzle.at(element);
-}
-
-/// Translate topology
-std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
- switch (topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return "points";
- case Tegra::Shader::OutputTopology::LineStrip:
- return "line_strip";
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return "triangle_strip";
- default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
- return "points";
- }
-}
-
-/// Returns true if an object has to be treated as precise
-bool IsPrecise(Operation operand) {
- const auto& meta{operand.GetMeta()};
- if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
- return arithmetic->precise;
- }
- return false;
-}
-
-bool IsPrecise(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- return IsPrecise(*operation);
- }
- return false;
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
- return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-constexpr bool IsLegacyTexCoord(Attribute::Index index) {
- return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
- static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
-}
-
-constexpr Attribute::Index ToGenericAttribute(u64 value) {
- return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
-}
-
-constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
- return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
- ASSERT(IsGenericAttribute(index));
- return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
- switch (stack) {
- case MetaStackClass::Ssy:
- return "ssy";
- case MetaStackClass::Pbk:
- return "pbk";
- }
- return {};
-}
-
-std::string FlowStackName(MetaStackClass stack) {
- return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
-}
-
-std::string FlowStackTopName(MetaStackClass stack) {
- return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
-}
-
-struct GenericVaryingDescription {
- std::string name;
- u8 first_element = 0;
- bool is_scalar = false;
-};
-
-class GLSLDecompiler final {
-public:
- explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier_,
- std::string_view suffix_)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
- identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
- if (stage != ShaderType::Compute) {
- transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
- }
- }
-
- void Decompile() {
- DeclareHeader();
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareImages();
- DeclareSamplers();
- DeclareGlobalMemory();
- DeclareConstantBuffers();
- DeclareLocalMemory();
- DeclareRegisters();
- DeclarePredicates();
- DeclareInternalFlags();
- DeclareCustomVariables();
- DeclarePhysicalAttributeReader();
-
- code.AddLine("void main() {{");
- ++code.scope;
-
- if (stage == ShaderType::Vertex) {
- code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
- }
-
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
- }
-
- --code.scope;
- code.AddLine("}}");
- }
-
- std::string GetResult() {
- return code.GetResult();
- }
-
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
- void DecompileBranchMode() {
- // VM's program counter
- const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = {}U;", first_address);
-
- // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
- // unlikely that shaders will use 20 nested SSYs and PBKs.
- constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
- for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
- code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
- code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
- }
- }
-
- code.AddLine("while (true) {{");
- ++code.scope;
-
- code.AddLine("switch (jmp_to) {{");
-
- for (const auto& pair : ir.GetBasicBlocks()) {
- const auto& [address, bb] = pair;
- code.AddLine("case 0x{:X}U: {{", address);
- ++code.scope;
-
- VisitBlock(bb);
-
- --code.scope;
- code.AddLine("}}");
- }
-
- code.AddLine("default: return;");
- code.AddLine("}}");
-
- --code.scope;
- code.AddLine("}}");
- }
-
- void DecompileAST();
-
- void DeclareHeader() {
- if (!identifier.empty()) {
- code.AddLine("// {}", identifier);
- }
- const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
- code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
- code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
- if (device.HasShaderBallot()) {
- code.AddLine("#extension GL_ARB_shader_ballot : require");
- }
- if (device.HasVertexViewportLayer()) {
- code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
- }
- if (device.HasImageLoadFormatted()) {
- code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
- }
- if (device.HasTextureShadowLod()) {
- code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
- }
- if (device.HasWarpIntrinsics()) {
- code.AddLine("#extension GL_NV_gpu_shader5 : require");
- code.AddLine("#extension GL_NV_shader_thread_group : require");
- code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
- }
- // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
- // operations) on places where we don't want to.
- // Thanks to Ryujinx for finding this workaround.
- code.AddLine("#pragma optionNV(fastmath off)");
-
- code.AddNewLine();
-
- code.AddLine(COMMON_DECLARATIONS);
- }
-
- void DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
-
- DeclareVertexRedeclarations();
- }
-
- void DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
-
- const auto& info = registry.GetGraphicsInfo();
- const auto input_topology = info.primitive_topology;
- const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
- max_input_vertices = max_vertices;
- code.AddLine("layout ({}) in;", glsl_topology);
-
- const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_output_vertices = header.common4.max_output_vertices.Value();
- code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
- code.AddNewLine();
-
- code.AddLine("in gl_PerVertex {{");
- ++code.scope;
- code.AddLine("vec4 gl_Position;");
- --code.scope;
- code.AddLine("}} gl_in[];");
-
- DeclareVertexRedeclarations();
- }
-
- void DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- if (ir.UsesLegacyVaryings()) {
- code.AddLine("in gl_PerFragment {{");
- ++code.scope;
- code.AddLine("vec4 gl_TexCoord[8];");
- code.AddLine("vec4 gl_Color;");
- code.AddLine("vec4 gl_SecondaryColor;");
- --code.scope;
- code.AddLine("}};");
- }
-
- for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
- code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
- }
- }
-
- void DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
- const auto& info = registry.GetComputeInfo();
- if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- if (size > limit) {
- LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
- size, limit);
- size = limit;
- }
-
- code.AddLine("shared uint smem[{}];", size / 4);
- code.AddNewLine();
- }
- code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
- info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
- code.AddNewLine();
- }
-
- void DeclareVertexRedeclarations() {
- code.AddLine("out gl_PerVertex {{");
- ++code.scope;
-
- auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
- if (!pos_xfb.empty()) {
- pos_xfb = fmt::format("layout ({}) ", pos_xfb);
- }
- const char* pos_type =
- FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
- code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
-
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (attribute == Attribute::Index::ClipDistances0123 ||
- attribute == Attribute::Index::ClipDistances4567) {
- code.AddLine("float gl_ClipDistance[];");
- break;
- }
- }
-
- if (stage != ShaderType::Geometry &&
- (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
- if (ir.UsesLayer()) {
- code.AddLine("int gl_Layer;");
- }
- if (ir.UsesViewportIndex()) {
- code.AddLine("int gl_ViewportIndex;");
- }
- } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
- !device.HasVertexViewportLayer()) {
- LOG_ERROR(
- Render_OpenGL,
- "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
- }
-
- if (ir.UsesPointSize()) {
- code.AddLine("float gl_PointSize;");
- }
-
- if (ir.UsesLegacyVaryings()) {
- code.AddLine("vec4 gl_TexCoord[8];");
- code.AddLine("vec4 gl_FrontColor;");
- code.AddLine("vec4 gl_FrontSecondaryColor;");
- code.AddLine("vec4 gl_BackColor;");
- code.AddLine("vec4 gl_BackSecondaryColor;");
- }
-
- --code.scope;
- code.AddLine("}};");
- code.AddNewLine();
-
- if (stage == ShaderType::Geometry) {
- if (ir.UsesLayer()) {
- code.AddLine("out int gl_Layer;");
- }
- if (ir.UsesViewportIndex()) {
- code.AddLine("out int gl_ViewportIndex;");
- }
- }
- code.AddNewLine();
- }
-
- void DeclareRegisters() {
- const auto& registers = ir.GetRegisters();
- for (const u32 gpr : registers) {
- code.AddLine("float {} = 0.0f;", GetRegister(gpr));
- }
- if (!registers.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareCustomVariables() {
- const u32 num_custom_variables = ir.GetNumCustomVariables();
- for (u32 i = 0; i < num_custom_variables; ++i) {
- code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
- }
- if (num_custom_variables > 0) {
- code.AddNewLine();
- }
- }
-
- void DeclarePredicates() {
- const auto& predicates = ir.GetPredicates();
- for (const auto pred : predicates) {
- code.AddLine("bool {} = false;", GetPredicate(pred));
- }
- if (!predicates.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareLocalMemory() {
- u64 local_memory_size = 0;
- if (stage == ShaderType::Compute) {
- local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
- } else {
- local_memory_size = header.GetLocalMemorySize();
- }
- if (local_memory_size == 0) {
- return;
- }
- const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
- code.AddNewLine();
- }
-
- void DeclareInternalFlags() {
- for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
- const auto flag_code = static_cast<InternalFlag>(flag);
- code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
- }
- code.AddNewLine();
- }
-
- const char* GetInputFlags(PixelImap attribute) {
- switch (attribute) {
- case PixelImap::Perspective:
- return "smooth";
- case PixelImap::Constant:
- return "flat";
- case PixelImap::ScreenLinear:
- return "noperspective";
- case PixelImap::Unused:
- break;
- }
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
- return {};
- }
-
- void DeclareInputAttributes() {
- if (ir.HasPhysicalAttributes()) {
- const u32 num_inputs{GetNumPhysicalInputAttributes()};
- for (u32 i = 0; i < num_inputs; ++i) {
- DeclareInputAttribute(ToGenericAttribute(i), true);
- }
- code.AddNewLine();
- return;
- }
-
- const auto& attributes = ir.GetInputAttributes();
- for (const auto index : attributes) {
- if (IsGenericAttribute(index)) {
- DeclareInputAttribute(index, false);
- }
- }
- if (!attributes.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
- const u32 location{GetGenericAttributeIndex(index)};
-
- std::string name{GetGenericInputAttribute(index)};
- if (stage == ShaderType::Geometry) {
- name = "gs_" + name + "[]";
- }
-
- std::string suffix_;
- if (stage == ShaderType::Fragment) {
- const auto input_mode{header.ps.GetPixelImap(location)};
- if (input_mode == PixelImap::Unused) {
- return;
- }
- suffix_ = GetInputFlags(input_mode);
- }
-
- code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
- }
-
- void DeclareOutputAttributes() {
- if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
- for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
- DeclareOutputAttribute(ToGenericAttribute(i));
- }
- code.AddNewLine();
- return;
- }
-
- const auto& attributes = ir.GetOutputAttributes();
- for (const auto index : attributes) {
- if (IsGenericAttribute(index)) {
- DeclareOutputAttribute(index);
- }
- }
- if (!attributes.empty()) {
- code.AddNewLine();
- }
- }
-
- std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return std::nullopt;
- }
- return it->second.components;
- }
-
- std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return {};
- }
-
- const VaryingTFB& tfb = it->second;
- return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
- tfb.offset, tfb.stride);
- }
-
- void DeclareOutputAttribute(Attribute::Index index) {
- static constexpr std::string_view swizzle = "xyzw";
- u8 element = 0;
- while (element < 4) {
- auto xfb = GetTransformFeedbackDecoration(index, element);
- if (!xfb.empty()) {
- xfb = fmt::format(", {}", xfb);
- }
- const std::size_t remainder = 4 - element;
- const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
- const char* const type = FLOAT_TYPES.at(num_components - 1);
-
- const u32 location = GetGenericAttributeIndex(index);
-
- GenericVaryingDescription description;
- description.first_element = static_cast<u8>(element);
- description.is_scalar = num_components == 1;
- description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
- if (element != 0 || num_components != 4) {
- const std::string_view name_swizzle = swizzle.substr(element, num_components);
- description.name = fmt::format("{}_{}", description.name, name_swizzle);
- }
- for (std::size_t i = 0; i < num_components; ++i) {
- const u8 offset = static_cast<u8>(location * 4 + element + i);
- varying_description.insert({offset, description});
- }
-
- code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
- xfb, type, description.name);
-
- element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
- }
- }
-
- void DeclareConstantBuffers() {
- u32 binding = device.GetBaseBindings(stage).uniform_buffer;
- for (const auto& [index, info] : ir.GetConstantBuffers()) {
- const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
- const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
- code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
- GetConstBufferBlock(index));
- code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
- code.AddLine("}};");
- code.AddNewLine();
- }
- }
-
- void DeclareGlobalMemory() {
- u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- // Since we don't know how the shader will use the shader, hint the driver to disable as
- // much optimizations as possible
- std::string qualifier = "coherent volatile";
- if (usage.is_read && !usage.is_written) {
- qualifier += " readonly";
- } else if (usage.is_written && !usage.is_read) {
- qualifier += " writeonly";
- }
-
- code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
- GetGlobalMemoryBlock(base));
- code.AddLine(" uint {}[];", GetGlobalMemory(base));
- code.AddLine("}};");
- code.AddNewLine();
- }
- }
-
- void DeclareSamplers() {
- u32 binding = device.GetBaseBindings(stage).sampler;
- for (const auto& sampler : ir.GetSamplers()) {
- const std::string name = GetSampler(sampler);
- const std::string description = fmt::format("layout (binding = {}) uniform", binding);
- binding += sampler.is_indexed ? sampler.size : 1;
-
- std::string sampler_type = [&]() {
- if (sampler.is_buffer) {
- return "samplerBuffer";
- }
- switch (sampler.type) {
- case TextureType::Texture1D:
- return "sampler1D";
- case TextureType::Texture2D:
- return "sampler2D";
- case TextureType::Texture3D:
- return "sampler3D";
- case TextureType::TextureCube:
- return "samplerCube";
- default:
- UNREACHABLE();
- return "sampler2D";
- }
- }();
- if (sampler.is_array) {
- sampler_type += "Array";
- }
- if (sampler.is_shadow) {
- sampler_type += "Shadow";
- }
-
- if (!sampler.is_indexed) {
- code.AddLine("{} {} {};", description, sampler_type, name);
- } else {
- code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
- }
- }
- if (!ir.GetSamplers().empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclarePhysicalAttributeReader() {
- if (!ir.HasPhysicalAttributes()) {
- return;
- }
- code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
- ++code.scope;
- code.AddLine("switch (physical_address) {{");
-
- // Just declare generic attributes for now.
- const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
- for (u32 index = 0; index < num_attributes; ++index) {
- const auto attribute{ToGenericAttribute(index)};
- for (u32 element = 0; element < 4; ++element) {
- constexpr u32 generic_base = 0x80;
- constexpr u32 generic_stride = 16;
- constexpr u32 element_stride = 4;
- const u32 address{generic_base + index * generic_stride + element * element_stride};
-
- const bool declared = stage != ShaderType::Fragment ||
- header.ps.GetPixelImap(index) != PixelImap::Unused;
- const std::string value =
- declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
- code.AddLine("case 0x{:X}U: return {};", address, value);
- }
- }
-
- code.AddLine("default: return 0;");
-
- code.AddLine("}}");
- --code.scope;
- code.AddLine("}}");
- code.AddNewLine();
- }
-
- void DeclareImages() {
- u32 binding = device.GetBaseBindings(stage).image;
- for (const auto& image : ir.GetImages()) {
- std::string qualifier = "coherent volatile";
- if (image.is_read && !image.is_written) {
- qualifier += " readonly";
- } else if (image.is_written && !image.is_read) {
- qualifier += " writeonly";
- }
-
- const char* format = image.is_atomic ? "r32ui, " : "";
- const char* type_declaration = GetImageTypeDeclaration(image.type);
- code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
- qualifier, type_declaration, GetImage(image));
- }
- if (!ir.GetImages().empty()) {
- code.AddNewLine();
- }
- }
-
- void VisitBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node).CheckVoid();
- }
- }
-
- Expression Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
- }
- const auto operation_index = static_cast<std::size_t>(operation->GetCode());
- if (operation_index >= operation_decompilers.size()) {
- UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
- return {};
- }
- const auto decompiler = operation_decompilers[operation_index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Undefined operation: {}", operation_index);
- return {};
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return {"0U", Type::Uint};
- }
- return {GetRegister(index), Type::Float};
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- const u32 index = cv->GetIndex();
- return {GetCustomVariable(index), Type::Float};
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- const u32 value = immediate->GetValue();
- if (value < 10) {
- // For eyecandy avoid using hex numbers on single digits
- return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
- }
- return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- const auto value = [&]() -> std::string {
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- return "true";
- case Tegra::Shader::Pred::NeverExecute:
- return "false";
- default:
- return GetPredicate(index);
- }
- }();
- if (predicate->IsNegated()) {
- return {fmt::format("!({})", value), Type::Bool};
- }
- return {value, Type::Bool};
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
- "Physical attributes in geometry shaders are not implemented");
- if (abuf->IsPhysicalBuffer()) {
- return {fmt::format("ReadPhysicalAttribute({})",
- Visit(abuf->GetPhysicalAddress()).AsUint()),
- Type::Float};
- }
- return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- const Node offset = cbuf->GetOffset();
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- // Direct access
- const u32 offset_imm = immediate->GetValue();
- ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
- Type::Uint};
- }
-
- // Indirect access
- const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
-
- if (!device.HasComponentIndexingBug()) {
- return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset),
- Type::Uint};
- }
-
- // AMD's proprietary GLSL compiler emits ill code for variable component access.
- // To bypass this driver bug generate 4 ifs, one per each component.
- const std::string pack = code.GenerateTemporary();
- code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
- final_offset);
-
- const std::string result = code.GenerateTemporary();
- code.AddLine("uint {};", result);
- for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
- code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
- GetSwizzle(swizzle));
- }
- return {result, Type::Uint};
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- const std::string real = Visit(gmem->GetRealAddress()).AsUint();
- const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
- const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
- return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
- Type::Uint};
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- return {
- fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
- Type::Uint};
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
- }
- // It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
- ++code.scope;
-
- VisitBlock(conditional->GetCode());
-
- --code.scope;
- code.AddLine("}}");
- return {};
- }
-
- if (const auto comment = std::get_if<CommentNode>(&*node)) {
- code.AddLine("// " + comment->GetText());
- return {};
- }
-
- UNREACHABLE();
- return {};
- }
-
- Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
- const auto GeometryPass = [&](std::string_view name) {
- if (stage == ShaderType::Geometry && buffer) {
- // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
- // set an 0x80000000 index for those and the shader fails to build. Find out why
- // this happens and what's its intent.
- return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
- max_input_vertices.value());
- }
- return std::string(name);
- };
-
- switch (attribute) {
- case Attribute::Index::Position:
- switch (stage) {
- case ShaderType::Geometry:
- return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
- GetSwizzle(element)),
- Type::Float};
- case ShaderType::Fragment:
- return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
- default:
- UNREACHABLE();
- return {"0", Type::Int};
- }
- case Attribute::Index::FrontColor:
- return {"gl_Color"s + GetSwizzle(element), Type::Float};
- case Attribute::Index::FrontSecondaryColor:
- return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return {"gl_PointCoord.x", Type::Float};
- case 1:
- return {"gl_PointCoord.y", Type::Float};
- case 2:
- case 3:
- return {"0.0f", Type::Float};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- case Attribute::Index::TessCoordInstanceIDVertexID:
- // TODO(Subv): Find out what the values are for the first two elements when inside a
- // vertex shader, and what's the value of the fourth element when inside a Tess Eval
- // shader.
- ASSERT(stage == ShaderType::Vertex);
- switch (element) {
- case 2:
- // Config pack's first value is instance_id.
- return {"gl_InstanceID", Type::Int};
- case 3:
- return {"gl_VertexID", Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return {"0", Type::Int};
- case Attribute::Index::FrontFacing:
- // TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderType::Fragment);
- switch (element) {
- case 3:
- return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return {"0", Type::Int};
- default:
- if (IsGenericAttribute(attribute)) {
- return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
- Type::Float};
- }
- if (IsLegacyTexCoord(attribute)) {
- UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
- return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
- GetSwizzle(element)),
- Type::Float};
- }
- break;
- }
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
- return {"0", Type::Int};
- }
-
- Expression ApplyPrecise(Operation operation, std::string value, Type type) {
- if (!IsPrecise(operation)) {
- return {std::move(value), type};
- }
- // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
- // be found in fragment shaders, so we disable precise there. There are vertex shaders that
- // also fail to build but nobody seems to care about those.
- // Note: Only bugged drivers will skip precise.
- const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
-
- std::string temporary = code.GenerateTemporary();
- code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
- temporary, value);
- return {std::move(temporary), type};
- }
-
- Expression VisitOperand(Operation operation, std::size_t operand_index) {
- const auto& operand = operation[operand_index];
- const bool parent_precise = IsPrecise(operation);
- const bool child_precise = IsPrecise(operand);
- const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
- if (!parent_precise || child_precise || child_trivial) {
- return Visit(operand);
- }
-
- Expression value = Visit(operand);
- std::string temporary = code.GenerateTemporary();
- code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
- return {std::move(temporary), value.GetType()};
- }
-
- std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
- const u32 element = abuf->GetElement();
- switch (const auto attribute = abuf->GetIndex()) {
- case Attribute::Index::Position:
- return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- return std::nullopt;
- case 1:
- if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return std::nullopt;
- }
- return {{"gl_Layer", Type::Int}};
- case 2:
- if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return std::nullopt;
- }
- return {{"gl_ViewportIndex", Type::Int}};
- case 3:
- return {{"gl_PointSize", Type::Float}};
- }
- return std::nullopt;
- case Attribute::Index::FrontColor:
- return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::FrontSecondaryColor:
- return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::BackColor:
- return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::BackSecondaryColor:
- return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::ClipDistances0123:
- return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
- case Attribute::Index::ClipDistances4567:
- return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
- default:
- if (IsGenericAttribute(attribute)) {
- return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
- }
- if (IsLegacyTexCoord(attribute)) {
- return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
- GetSwizzle(element)),
- Type::Float}};
- }
- UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
- return std::nullopt;
- }
- }
-
- Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
- Type type_a) {
- std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b, Type type_c) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- const std::string op_c = VisitOperand(operation, 2).As(type_c);
- std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c, Type type_d) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- const std::string op_c = VisitOperand(operation, 2).As(type_c);
- const std::string op_d = VisitOperand(operation, 3).As(type_d);
- std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- std::string GenerateTexture(Operation operation, const std::string& function_suffix,
- const std::vector<TextureIR>& extras, bool separate_dc = false) {
- constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
-
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- const std::size_t count = operation.GetOperandsCount();
- const bool has_array = meta->sampler.is_array;
- const bool has_shadow = meta->sampler.is_shadow;
- const bool workaround_lod_array_shadow_as_grad =
- !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
- ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
- meta->sampler.type == TextureType::TextureCube);
-
- std::string expr = "texture";
-
- if (workaround_lod_array_shadow_as_grad) {
- expr += "Grad";
- } else {
- expr += function_suffix;
- }
-
- if (!meta->aoffi.empty()) {
- expr += "Offset";
- } else if (!meta->ptp.empty()) {
- expr += "Offsets";
- }
- if (!meta->sampler.is_indexed) {
- expr += '(' + GetSampler(meta->sampler) + ", ";
- } else {
- expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
- }
- expr += coord_constructors.at(count + (has_array ? 1 : 0) +
- (has_shadow && !separate_dc ? 1 : 0) - 1);
- expr += '(';
- for (std::size_t i = 0; i < count; ++i) {
- expr += Visit(operation[i]).AsFloat();
-
- const std::size_t next = i + 1;
- if (next < count)
- expr += ", ";
- }
- if (has_array) {
- expr += ", float(" + Visit(meta->array).AsInt() + ')';
- }
- if (has_shadow) {
- if (separate_dc) {
- expr += "), " + Visit(meta->depth_compare).AsFloat();
- } else {
- expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
- }
- } else {
- expr += ')';
- }
-
- if (workaround_lod_array_shadow_as_grad) {
- switch (meta->sampler.type) {
- case TextureType::Texture2D:
- return expr + ", vec2(0.0), vec2(0.0))";
- case TextureType::TextureCube:
- return expr + ", vec3(0.0), vec3(0.0))";
- default:
- UNREACHABLE();
- break;
- }
- }
-
- for (const auto& variant : extras) {
- if (const auto argument = std::get_if<TextureArgument>(&variant)) {
- expr += GenerateTextureArgument(*argument);
- } else if (std::holds_alternative<TextureOffset>(variant)) {
- if (!meta->aoffi.empty()) {
- expr += GenerateTextureAoffi(meta->aoffi);
- } else if (!meta->ptp.empty()) {
- expr += GenerateTexturePtp(meta->ptp);
- }
- } else if (std::holds_alternative<TextureDerivates>(variant)) {
- expr += GenerateTextureDerivates(meta->derivates);
- } else {
- UNREACHABLE();
- }
- }
-
- return expr + ')';
- }
-
- std::string GenerateTextureArgument(const TextureArgument& argument) {
- const auto& [type, operand] = argument;
- if (operand == nullptr) {
- return {};
- }
-
- std::string expr = ", ";
- switch (type) {
- case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
- // Inline the string as an immediate integer in GLSL (some extra arguments are
- // required to be constant)
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
- expr += Visit(operand).AsInt();
- }
- break;
- case Type::Float:
- expr += Visit(operand).AsFloat();
- break;
- default: {
- const auto type_int = static_cast<u32>(type);
- UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
- expr += '0';
- break;
- }
- }
- return expr;
- }
-
- std::string ReadTextureOffset(const Node& value) {
- if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
- // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
- // to be constant by the standard).
- return std::to_string(static_cast<s32>(immediate->GetValue()));
- } else if (device.HasVariableAoffi()) {
- // Avoid using variable AOFFI on unsupported devices.
- return Visit(value).AsInt();
- } else {
- // Insert 0 on devices not supporting variable AOFFI.
- return "0";
- }
- }
-
- std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
- if (aoffi.empty()) {
- return {};
- }
- constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
- std::string expr = ", ";
- expr += coord_constructors.at(aoffi.size() - 1);
- expr += '(';
-
- for (std::size_t index = 0; index < aoffi.size(); ++index) {
- expr += ReadTextureOffset(aoffi.at(index));
- if (index + 1 < aoffi.size()) {
- expr += ", ";
- }
- }
- expr += ')';
-
- return expr;
- }
-
- std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
- static constexpr std::size_t num_vectors = 4;
- ASSERT(ptp.size() == num_vectors * 2);
-
- std::string expr = ", ivec2[](";
- for (std::size_t vector = 0; vector < num_vectors; ++vector) {
- const bool has_next = vector + 1 < num_vectors;
- expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
- ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
- }
- expr += ')';
- return expr;
- }
-
- std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
- if (derivates.empty()) {
- return {};
- }
- constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
- std::string expr = ", ";
- const std::size_t components = derivates.size() / 2;
- std::string dx = coord_constructors.at(components - 1);
- std::string dy = coord_constructors.at(components - 1);
- dx += '(';
- dy += '(';
-
- for (std::size_t index = 0; index < components; ++index) {
- const auto& operand_x{derivates.at(index * 2)};
- const auto& operand_y{derivates.at(index * 2 + 1)};
- dx += Visit(operand_x).AsFloat();
- dy += Visit(operand_y).AsFloat();
-
- if (index + 1 < components) {
- dx += ", ";
- dy += ", ";
- }
- }
- dx += ')';
- dy += ')';
- expr += dx + ", " + dy;
-
- return expr;
- }
-
- std::string BuildIntegerCoordinates(Operation operation) {
- constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
- const std::size_t coords_count{operation.GetOperandsCount()};
- std::string expr = constructors.at(coords_count - 1);
- for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- if (i + 1 < coords_count) {
- expr += ", ";
- }
- }
- expr += ')';
- return expr;
- }
-
- std::string BuildImageValues(Operation operation) {
- constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-
- const std::size_t values_count{meta.values.size()};
- std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
- for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i)).AsUint();
- if (i + 1 < values_count) {
- expr += ", ";
- }
- }
- expr += ')';
- return expr;
- }
-
- Expression Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- Expression target;
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op but we still have to visit the source
- // as it might have side effects.
- code.AddLine("{};", Visit(src).GetCode());
- return {};
- }
- target = {GetRegister(gpr->GetIndex()), Type::Float};
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- auto output = GetOutputAttribute(abuf);
- if (!output) {
- return {};
- }
- target = std::move(*output);
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- target = {
- fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
- Type::Uint};
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- ASSERT(stage == ShaderType::Compute);
- target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- const std::string real = Visit(gmem->GetRealAddress()).AsUint();
- const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
- const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
- target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
- Type::Uint};
- } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
- target = {GetCustomVariable(cv->GetIndex()), Type::Float};
- } else {
- UNREACHABLE_MSG("Assign called without a proper target");
- }
-
- code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
- return {};
- }
-
- template <Type type>
- Expression Add(Operation operation) {
- return GenerateBinaryInfix(operation, "+", type, type, type);
- }
-
- template <Type type>
- Expression Mul(Operation operation) {
- return GenerateBinaryInfix(operation, "*", type, type, type);
- }
-
- template <Type type>
- Expression Div(Operation operation) {
- return GenerateBinaryInfix(operation, "/", type, type, type);
- }
-
- template <Type type>
- Expression Fma(Operation operation) {
- return GenerateTernary(operation, "fma", type, type, type, type);
- }
-
- template <Type type>
- Expression Negate(Operation operation) {
- return GenerateUnary(operation, "-", type, type);
- }
-
- template <Type type>
- Expression Absolute(Operation operation) {
- return GenerateUnary(operation, "abs", type, type);
- }
-
- Expression FClamp(Operation operation) {
- return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
- Type::Float);
- }
-
- Expression FCastHalf0(Operation operation) {
- return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- Expression FCastHalf1(Operation operation) {
- return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- template <Type type>
- Expression Min(Operation operation) {
- return GenerateBinaryCall(operation, "min", type, type, type);
- }
-
- template <Type type>
- Expression Max(Operation operation) {
- return GenerateBinaryCall(operation, "max", type, type, type);
- }
-
- Expression Select(Operation operation) {
- const std::string condition = Visit(operation[0]).AsBool();
- const std::string true_case = Visit(operation[1]).AsUint();
- const std::string false_case = Visit(operation[2]).AsUint();
- std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
-
- return ApplyPrecise(operation, std::move(op_str), Type::Uint);
- }
-
- Expression FCos(Operation operation) {
- return GenerateUnary(operation, "cos", Type::Float, Type::Float);
- }
-
- Expression FSin(Operation operation) {
- return GenerateUnary(operation, "sin", Type::Float, Type::Float);
- }
-
- Expression FExp2(Operation operation) {
- return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
- }
-
- Expression FLog2(Operation operation) {
- return GenerateUnary(operation, "log2", Type::Float, Type::Float);
- }
-
- Expression FInverseSqrt(Operation operation) {
- return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
- }
-
- Expression FSqrt(Operation operation) {
- return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
- }
-
- Expression FRoundEven(Operation operation) {
- return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
- }
-
- Expression FFloor(Operation operation) {
- return GenerateUnary(operation, "floor", Type::Float, Type::Float);
- }
-
- Expression FCeil(Operation operation) {
- return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
- }
-
- Expression FTrunc(Operation operation) {
- return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
- }
-
- template <Type type>
- Expression FCastInteger(Operation operation) {
- return GenerateUnary(operation, "float", Type::Float, type);
- }
-
- Expression FSwizzleAdd(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsFloat();
- const std::string op_b = VisitOperand(operation, 1).AsFloat();
-
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {fmt::format("{} + {}", op_a, op_b), Type::Float};
- }
-
- const std::string instr_mask = VisitOperand(operation, 2).AsUint();
- const std::string mask = code.GenerateTemporary();
- code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
- instr_mask);
-
- const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
- const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
- return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
- Type::Float};
- }
-
- Expression ICastFloat(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Float);
- }
-
- Expression ICastUnsigned(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Uint);
- }
-
- template <Type type>
- Expression LogicalShiftLeft(Operation operation) {
- return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
- }
-
- Expression ILogicalShiftRight(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsUint();
- const std::string op_b = VisitOperand(operation, 1).AsUint();
- std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), Type::Int);
- }
-
- Expression IArithmeticShiftRight(Operation operation) {
- return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
- }
-
- template <Type type>
- Expression BitwiseAnd(Operation operation) {
- return GenerateBinaryInfix(operation, "&", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseOr(Operation operation) {
- return GenerateBinaryInfix(operation, "|", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseXor(Operation operation) {
- return GenerateBinaryInfix(operation, "^", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseNot(Operation operation) {
- return GenerateUnary(operation, "~", type, type);
- }
-
- Expression UCastFloat(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
- }
-
- Expression UCastSigned(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
- }
-
- Expression UShiftRight(Operation operation) {
- return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
- }
-
- template <Type type>
- Expression BitfieldInsert(Operation operation) {
- return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
- Type::Int);
- }
-
- template <Type type>
- Expression BitfieldExtract(Operation operation) {
- return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
- }
-
- template <Type type>
- Expression BitCount(Operation operation) {
- return GenerateUnary(operation, "bitCount", type, type);
- }
-
- template <Type type>
- Expression BitMSB(Operation operation) {
- return GenerateUnary(operation, "findMSB", type, type);
- }
-
- Expression HNegate(Operation operation) {
- const auto GetNegate = [&](std::size_t index) {
- return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
- };
- return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
- GetNegate(1), GetNegate(2)),
- Type::HalfFloat};
- }
-
- Expression HClamp(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsHalfFloat();
- const std::string min = VisitOperand(operation, 1).AsFloat();
- const std::string max = VisitOperand(operation, 2).AsFloat();
- std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
-
- return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
- }
-
- Expression HCastFloat(Operation operation) {
- return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
- Type::HalfFloat};
- }
-
- Expression HUnpack(Operation operation) {
- Expression operand = VisitOperand(operation, 0);
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32:
- return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
- case Tegra::Shader::HalfType::H0_H0:
- return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
- case Tegra::Shader::HalfType::H1_H1:
- return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- }
-
- Expression HMergeF32(Operation operation) {
- return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- Expression HMergeH0(Operation operation) {
- const std::string dest = VisitOperand(operation, 0).AsUint();
- const std::string src = VisitOperand(operation, 1).AsUint();
- return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
- Type::HalfFloat};
- }
-
- Expression HMergeH1(Operation operation) {
- const std::string dest = VisitOperand(operation, 0).AsUint();
- const std::string src = VisitOperand(operation, 1).AsUint();
- return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
- Type::HalfFloat};
- }
-
- Expression HPack2(Operation operation) {
- return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::HalfFloat};
- }
-
- template <const std::string_view& op, Type type, bool unordered = false>
- Expression Comparison(Operation operation) {
- static_assert(!unordered || type == Type::Float);
-
- Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
-
- if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
- // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
- // and Nvidia's proprietary stacks. Manually force an ordered comparison.
- return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
- VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
- if constexpr (!unordered) {
- return expr;
- }
- // Unordered comparisons are always true for NaN operands.
- return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
- VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression FOrdered(Operation operation) {
- return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression FUnordered(Operation operation) {
- return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression LogicalAddCarry(Operation operation) {
- const std::string carry = code.GenerateTemporary();
- code.AddLine("uint {};", carry);
- code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
- VisitOperand(operation, 1).AsUint(), carry);
- return {fmt::format("({} != 0)", carry), Type::Bool};
- }
-
- Expression LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const auto index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = GetPredicate(index);
- } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
- target = GetInternalFlag(flag->GetFlag());
- }
-
- code.AddLine("{} = {};", target, Visit(src).AsBool());
- return {};
- }
-
- Expression LogicalAnd(Operation operation) {
- return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalOr(Operation operation) {
- return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalXor(Operation operation) {
- return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalNegate(Operation operation) {
- return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
- }
-
- Expression LogicalPick2(Operation operation) {
- return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
- VisitOperand(operation, 1).AsUint()),
- Type::Bool};
- }
-
- Expression LogicalAnd2(Operation operation) {
- return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
- }
-
- template <bool with_nan>
- Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
- Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat);
- if constexpr (!with_nan) {
- return comparison;
- }
- return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
- VisitOperand(operation, 0).AsHalfFloat(),
- VisitOperand(operation, 1).AsHalfFloat()),
- Type::Bool2};
- }
-
- template <bool with_nan>
- Expression Logical2HLessThan(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "lessThan");
- }
-
- template <bool with_nan>
- Expression Logical2HEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "equal");
- }
-
- template <bool with_nan>
- Expression Logical2HLessEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
- }
-
- template <bool with_nan>
- Expression Logical2HGreaterThan(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "greaterThan");
- }
-
- template <bool with_nan>
- Expression Logical2HNotEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "notEqual");
- }
-
- template <bool with_nan>
- Expression Logical2HGreaterEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
- }
-
- Expression Texture(Operation operation) {
- const auto meta = std::get<MetaTexture>(operation.GetMeta());
- const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
- meta.sampler.is_array && meta.sampler.is_shadow;
- // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
- const std::vector<TextureIR> extras{
- TextureOffset{},
- TextureArgument{Type::Float, meta.bias},
- };
- std::string expr = GenerateTexture(operation, "", extras, separate_dc);
- if (meta.sampler.is_shadow) {
- expr = fmt::format("vec4({})", expr);
- }
- return {expr + GetSwizzle(meta.element), Type::Float};
- }
-
- Expression TextureLod(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- std::string expr{};
-
- if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
- ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
- meta->sampler.type == TextureType::TextureCube)) {
- LOG_ERROR(Render_OpenGL,
- "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
- expr = GenerateTexture(operation, "Lod", {});
- } else {
- expr = GenerateTexture(operation, "Lod",
- {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
- }
-
- if (meta->sampler.is_shadow) {
- expr = "vec4(" + expr + ')';
- }
- return {expr + GetSwizzle(meta->element), Type::Float};
- }
-
- Expression TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
- const bool separate_dc = meta.sampler.is_shadow;
-
- std::vector<TextureIR> ir_;
- if (meta.sampler.is_shadow) {
- ir_ = {TextureOffset{}};
- } else {
- ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
- }
- return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
- Type::Float};
- }
-
- Expression TextureQueryDimensions(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- const std::string sampler = GetSampler(meta->sampler);
- const std::string lod = VisitOperand(operation, 0).AsInt();
-
- switch (meta->element) {
- case 0:
- case 1:
- return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
- Type::Int};
- case 3:
- return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- }
-
- Expression TextureQueryLod(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- if (meta->element < 2) {
- return {fmt::format("int(({} * vec2(256)){})",
- GenerateTexture(operation, "QueryLod", {}),
- GetSwizzle(meta->element)),
- Type::Int};
- }
- return {"0", Type::Int};
- }
-
- Expression TexelFetch(Operation operation) {
- constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
- UNIMPLEMENTED_IF(meta->sampler.is_array);
- const std::size_t count = operation.GetOperandsCount();
-
- std::string expr = "texelFetch(";
- expr += GetSampler(meta->sampler);
- expr += ", ";
-
- expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
- expr += '(';
- for (std::size_t i = 0; i < count; ++i) {
- if (i > 0) {
- expr += ", ";
- }
- expr += VisitOperand(operation, i).AsInt();
- }
- if (meta->array) {
- expr += ", ";
- expr += Visit(meta->array).AsInt();
- }
- expr += ')';
-
- if (meta->lod && !meta->sampler.is_buffer) {
- expr += ", ";
- expr += Visit(meta->lod).AsInt();
- }
- expr += ')';
- expr += GetSwizzle(meta->element);
-
- return {std::move(expr), Type::Float};
- }
-
- Expression TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- std::string expr =
- GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
- return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
- }
-
- Expression ImageLoad(Operation operation) {
- if (!device.HasImageLoadFormatted()) {
- LOG_ERROR(Render_OpenGL,
- "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
- return {"0", Type::Int};
- }
-
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
- BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
- Type::Uint};
- }
-
- Expression ImageStore(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
- BuildIntegerCoordinates(operation), BuildImageValues(operation));
- return {};
- }
-
- template <const std::string_view& opname>
- Expression AtomicImage(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- ASSERT(meta.values.size() == 1);
-
- return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
- BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
- Type::Uint};
- }
-
- template <const std::string_view& opname, Type type>
- Expression Atomic(Operation operation) {
- if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
- UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
- return {};
- }
- return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
- Visit(operation[1]).AsUint()),
- Type::Uint};
- }
-
- template <const std::string_view& opname, Type type>
- Expression Reduce(Operation operation) {
- code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
- return {};
- }
-
- Expression Branch(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(&*operation[0]);
- UNIMPLEMENTED_IF(!target);
-
- code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
- code.AddLine("break;");
- return {};
- }
-
- Expression BranchIndirect(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsUint();
-
- code.AddLine("jmp_to = {};", op_a);
- code.AddLine("break;");
- return {};
- }
-
- Expression PushFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const auto target = std::get_if<ImmediateNode>(&*operation[0]);
- UNIMPLEMENTED_IF(!target);
-
- code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
- target->GetValue());
- return {};
- }
-
- Expression PopFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
- code.AddLine("break;");
- return {};
- }
-
- void PreExit() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- const auto& used_registers = ir.GetRegisters();
- const auto SafeGetRegister = [&](u32 reg) -> Expression {
- // TODO(Rodrigo): Replace with contains once C++20 releases
- if (used_registers.find(reg) != used_registers.end()) {
- return {GetRegister(reg), Type::Float};
- }
- return {"0.0f", Type::Float};
- };
-
- UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
-
- // Write the color outputs using the data in the shader registers, disabled
- // rendertargets/components are skipped in the register assignment.
- u32 current_reg = 0;
- for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
- // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
- for (u32 component = 0; component < 4; ++component) {
- if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
- SafeGetRegister(current_reg).AsFloat());
- ++current_reg;
- }
- }
- }
- if (header.ps.omap.depth) {
- // The depth output is always 2 registers after the last color output, and current_reg
- // already contains one past the last color register.
- code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
- }
- }
-
- Expression Exit(Operation operation) {
- PreExit();
- code.AddLine("return;");
- return {};
- }
-
- Expression Discard(Operation operation) {
- // Enclose "discard" in a conditional, so that GLSL compilation does not complain
- // about unexecuted instructions that may follow this.
- code.AddLine("if (true) {{");
- ++code.scope;
- code.AddLine("discard;");
- --code.scope;
- code.AddLine("}}");
- return {};
- }
-
- Expression EmitVertex(Operation operation) {
- ASSERT_MSG(stage == ShaderType::Geometry,
- "EmitVertex is expected to be used in a geometry shader.");
- code.AddLine("EmitVertex();");
- return {};
- }
-
- Expression EndPrimitive(Operation operation) {
- ASSERT_MSG(stage == ShaderType::Geometry,
- "EndPrimitive is expected to be used in a geometry shader.");
- code.AddLine("EndPrimitive();");
- return {};
- }
-
- Expression InvocationId(Operation operation) {
- return {"gl_InvocationID", Type::Int};
- }
-
- Expression YNegate(Operation operation) {
- // Y_NEGATE is mapped to this uniform value
- return {"gl_FrontMaterial.ambient.a", Type::Float};
- }
-
- template <u32 element>
- Expression LocalInvocationId(Operation) {
- return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
- }
-
- template <u32 element>
- Expression WorkGroupId(Operation) {
- return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
- }
-
- Expression BallotThread(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsBool();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // Stub on non-Nvidia devices by simulating all threads voting the same as the active
- // one.
- return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
- }
- return {fmt::format("ballotThreadNV({})", value), Type::Uint};
- }
-
- Expression Vote(Operation operation, const char* func) {
- const std::string value = VisitOperand(operation, 0).AsBool();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // Stub with a warp size of one.
- return {value, Type::Bool};
- }
- return {fmt::format("{}({})", func, value), Type::Bool};
- }
-
- Expression VoteAll(Operation operation) {
- return Vote(operation, "allThreadsNV");
- }
-
- Expression VoteAny(Operation operation) {
- return Vote(operation, "anyThreadNV");
- }
-
- Expression VoteEqual(Operation operation) {
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // We must return true here since a stub for a theoretical warp size of 1.
- // This will always return an equal result across all votes.
- return {"true", Type::Bool};
- }
- return Vote(operation, "allThreadsEqualNV");
- }
-
- Expression ThreadId(Operation operation) {
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {"0U", Type::Uint};
- }
- return {"gl_SubGroupInvocationARB", Type::Uint};
- }
-
- template <const std::string_view& comparison>
- Expression ThreadMask(Operation) {
- if (device.HasWarpIntrinsics()) {
- return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
- }
- if (device.HasShaderBallot()) {
- return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
- }
- LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
- return {"0U", Type::Uint};
- }
-
- Expression ShuffleIndexed(Operation operation) {
- std::string value = VisitOperand(operation, 0).AsFloat();
-
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {std::move(value), Type::Float};
- }
-
- const std::string index = VisitOperand(operation, 1).AsUint();
- return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
- }
-
- Expression Barrier(Operation) {
- if (!ir.IsDecompiled()) {
- LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
- return {};
- }
- code.AddLine("barrier();");
- return {};
- }
-
- Expression MemoryBarrierGroup(Operation) {
- code.AddLine("groupMemoryBarrier();");
- return {};
- }
-
- Expression MemoryBarrierGlobal(Operation) {
- code.AddLine("memoryBarrier();");
- return {};
- }
-
- struct Func final {
- Func() = delete;
- ~Func() = delete;
-
- static constexpr std::string_view LessThan = "<";
- static constexpr std::string_view Equal = "==";
- static constexpr std::string_view LessEqual = "<=";
- static constexpr std::string_view GreaterThan = ">";
- static constexpr std::string_view NotEqual = "!=";
- static constexpr std::string_view GreaterEqual = ">=";
-
- static constexpr std::string_view Eq = "Eq";
- static constexpr std::string_view Ge = "Ge";
- static constexpr std::string_view Gt = "Gt";
- static constexpr std::string_view Le = "Le";
- static constexpr std::string_view Lt = "Lt";
-
- static constexpr std::string_view Add = "Add";
- static constexpr std::string_view Min = "Min";
- static constexpr std::string_view Max = "Max";
- static constexpr std::string_view And = "And";
- static constexpr std::string_view Or = "Or";
- static constexpr std::string_view Xor = "Xor";
- static constexpr std::string_view Exchange = "Exchange";
- };
-
- static constexpr std::array operation_decompilers = {
- &GLSLDecompiler::Assign,
-
- &GLSLDecompiler::Select,
-
- &GLSLDecompiler::Add<Type::Float>,
- &GLSLDecompiler::Mul<Type::Float>,
- &GLSLDecompiler::Div<Type::Float>,
- &GLSLDecompiler::Fma<Type::Float>,
- &GLSLDecompiler::Negate<Type::Float>,
- &GLSLDecompiler::Absolute<Type::Float>,
- &GLSLDecompiler::FClamp,
- &GLSLDecompiler::FCastHalf0,
- &GLSLDecompiler::FCastHalf1,
- &GLSLDecompiler::Min<Type::Float>,
- &GLSLDecompiler::Max<Type::Float>,
- &GLSLDecompiler::FCos,
- &GLSLDecompiler::FSin,
- &GLSLDecompiler::FExp2,
- &GLSLDecompiler::FLog2,
- &GLSLDecompiler::FInverseSqrt,
- &GLSLDecompiler::FSqrt,
- &GLSLDecompiler::FRoundEven,
- &GLSLDecompiler::FFloor,
- &GLSLDecompiler::FCeil,
- &GLSLDecompiler::FTrunc,
- &GLSLDecompiler::FCastInteger<Type::Int>,
- &GLSLDecompiler::FCastInteger<Type::Uint>,
- &GLSLDecompiler::FSwizzleAdd,
-
- &GLSLDecompiler::Add<Type::Int>,
- &GLSLDecompiler::Mul<Type::Int>,
- &GLSLDecompiler::Div<Type::Int>,
- &GLSLDecompiler::Negate<Type::Int>,
- &GLSLDecompiler::Absolute<Type::Int>,
- &GLSLDecompiler::Min<Type::Int>,
- &GLSLDecompiler::Max<Type::Int>,
-
- &GLSLDecompiler::ICastFloat,
- &GLSLDecompiler::ICastUnsigned,
- &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
- &GLSLDecompiler::ILogicalShiftRight,
- &GLSLDecompiler::IArithmeticShiftRight,
- &GLSLDecompiler::BitwiseAnd<Type::Int>,
- &GLSLDecompiler::BitwiseOr<Type::Int>,
- &GLSLDecompiler::BitwiseXor<Type::Int>,
- &GLSLDecompiler::BitwiseNot<Type::Int>,
- &GLSLDecompiler::BitfieldInsert<Type::Int>,
- &GLSLDecompiler::BitfieldExtract<Type::Int>,
- &GLSLDecompiler::BitCount<Type::Int>,
- &GLSLDecompiler::BitMSB<Type::Int>,
-
- &GLSLDecompiler::Add<Type::Uint>,
- &GLSLDecompiler::Mul<Type::Uint>,
- &GLSLDecompiler::Div<Type::Uint>,
- &GLSLDecompiler::Min<Type::Uint>,
- &GLSLDecompiler::Max<Type::Uint>,
- &GLSLDecompiler::UCastFloat,
- &GLSLDecompiler::UCastSigned,
- &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
- &GLSLDecompiler::UShiftRight,
- &GLSLDecompiler::UShiftRight,
- &GLSLDecompiler::BitwiseAnd<Type::Uint>,
- &GLSLDecompiler::BitwiseOr<Type::Uint>,
- &GLSLDecompiler::BitwiseXor<Type::Uint>,
- &GLSLDecompiler::BitwiseNot<Type::Uint>,
- &GLSLDecompiler::BitfieldInsert<Type::Uint>,
- &GLSLDecompiler::BitfieldExtract<Type::Uint>,
- &GLSLDecompiler::BitCount<Type::Uint>,
- &GLSLDecompiler::BitMSB<Type::Uint>,
-
- &GLSLDecompiler::Add<Type::HalfFloat>,
- &GLSLDecompiler::Mul<Type::HalfFloat>,
- &GLSLDecompiler::Fma<Type::HalfFloat>,
- &GLSLDecompiler::Absolute<Type::HalfFloat>,
- &GLSLDecompiler::HNegate,
- &GLSLDecompiler::HClamp,
- &GLSLDecompiler::HCastFloat,
- &GLSLDecompiler::HUnpack,
- &GLSLDecompiler::HMergeF32,
- &GLSLDecompiler::HMergeH0,
- &GLSLDecompiler::HMergeH1,
- &GLSLDecompiler::HPack2,
-
- &GLSLDecompiler::LogicalAssign,
- &GLSLDecompiler::LogicalAnd,
- &GLSLDecompiler::LogicalOr,
- &GLSLDecompiler::LogicalXor,
- &GLSLDecompiler::LogicalNegate,
- &GLSLDecompiler::LogicalPick2,
- &GLSLDecompiler::LogicalAnd2,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
- &GLSLDecompiler::FOrdered,
- &GLSLDecompiler::FUnordered,
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
-
- &GLSLDecompiler::LogicalAddCarry,
-
- &GLSLDecompiler::Logical2HLessThan<false>,
- &GLSLDecompiler::Logical2HEqual<false>,
- &GLSLDecompiler::Logical2HLessEqual<false>,
- &GLSLDecompiler::Logical2HGreaterThan<false>,
- &GLSLDecompiler::Logical2HNotEqual<false>,
- &GLSLDecompiler::Logical2HGreaterEqual<false>,
- &GLSLDecompiler::Logical2HLessThan<true>,
- &GLSLDecompiler::Logical2HEqual<true>,
- &GLSLDecompiler::Logical2HLessEqual<true>,
- &GLSLDecompiler::Logical2HGreaterThan<true>,
- &GLSLDecompiler::Logical2HNotEqual<true>,
- &GLSLDecompiler::Logical2HGreaterEqual<true>,
-
- &GLSLDecompiler::Texture,
- &GLSLDecompiler::TextureLod,
- &GLSLDecompiler::TextureGather,
- &GLSLDecompiler::TextureQueryDimensions,
- &GLSLDecompiler::TextureQueryLod,
- &GLSLDecompiler::TexelFetch,
- &GLSLDecompiler::TextureGradient,
-
- &GLSLDecompiler::ImageLoad,
- &GLSLDecompiler::ImageStore,
-
- &GLSLDecompiler::AtomicImage<Func::Add>,
- &GLSLDecompiler::AtomicImage<Func::And>,
- &GLSLDecompiler::AtomicImage<Func::Or>,
- &GLSLDecompiler::AtomicImage<Func::Xor>,
- &GLSLDecompiler::AtomicImage<Func::Exchange>,
-
- &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
-
- &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
- &GLSLDecompiler::Atomic<Func::And, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
-
- &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
-
- &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
- &GLSLDecompiler::Reduce<Func::And, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
-
- &GLSLDecompiler::Branch,
- &GLSLDecompiler::BranchIndirect,
- &GLSLDecompiler::PushFlowStack,
- &GLSLDecompiler::PopFlowStack,
- &GLSLDecompiler::Exit,
- &GLSLDecompiler::Discard,
-
- &GLSLDecompiler::EmitVertex,
- &GLSLDecompiler::EndPrimitive,
-
- &GLSLDecompiler::InvocationId,
- &GLSLDecompiler::YNegate,
- &GLSLDecompiler::LocalInvocationId<0>,
- &GLSLDecompiler::LocalInvocationId<1>,
- &GLSLDecompiler::LocalInvocationId<2>,
- &GLSLDecompiler::WorkGroupId<0>,
- &GLSLDecompiler::WorkGroupId<1>,
- &GLSLDecompiler::WorkGroupId<2>,
-
- &GLSLDecompiler::BallotThread,
- &GLSLDecompiler::VoteAll,
- &GLSLDecompiler::VoteAny,
- &GLSLDecompiler::VoteEqual,
-
- &GLSLDecompiler::ThreadId,
- &GLSLDecompiler::ThreadMask<Func::Eq>,
- &GLSLDecompiler::ThreadMask<Func::Ge>,
- &GLSLDecompiler::ThreadMask<Func::Gt>,
- &GLSLDecompiler::ThreadMask<Func::Le>,
- &GLSLDecompiler::ThreadMask<Func::Lt>,
- &GLSLDecompiler::ShuffleIndexed,
-
- &GLSLDecompiler::Barrier,
- &GLSLDecompiler::MemoryBarrierGroup,
- &GLSLDecompiler::MemoryBarrierGlobal,
- };
- static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
-
- std::string GetRegister(u32 index) const {
- return AppendSuffix(index, "gpr");
- }
-
- std::string GetCustomVariable(u32 index) const {
- return AppendSuffix(index, "custom_var");
- }
-
- std::string GetPredicate(Tegra::Shader::Pred pred) const {
- return AppendSuffix(static_cast<u32>(pred), "pred");
- }
-
- std::string GetGenericInputAttribute(Attribute::Index attribute) const {
- return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
- }
-
- std::unordered_map<u8, GenericVaryingDescription> varying_description;
-
- std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
- const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
- const auto& description = varying_description.at(offset);
- if (description.is_scalar) {
- return description.name;
- }
- return fmt::format("{}[{}]", description.name, element - description.first_element);
- }
-
- std::string GetConstBuffer(u32 index) const {
- return AppendSuffix(index, "cbuf");
- }
-
- std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
- return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
- }
-
- std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
- return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
- suffix);
- }
-
- std::string GetConstBufferBlock(u32 index) const {
- return AppendSuffix(index, "cbuf_block");
- }
-
- std::string GetLocalMemory() const {
- if (suffix.empty()) {
- return "lmem";
- } else {
- return "lmem_" + std::string{suffix};
- }
- }
-
- std::string GetInternalFlag(InternalFlag flag) const {
- constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
- "overflow_flag"};
- const auto index = static_cast<u32>(flag);
- ASSERT(index < static_cast<u32>(InternalFlag::Amount));
-
- if (suffix.empty()) {
- return InternalFlagNames[index];
- } else {
- return fmt::format("{}_{}", InternalFlagNames[index], suffix);
- }
- }
-
- std::string GetSampler(const SamplerEntry& sampler) const {
- return AppendSuffix(sampler.index, "sampler");
- }
-
- std::string GetImage(const ImageEntry& image) const {
- return AppendSuffix(image.index, "image");
- }
-
- std::string AppendSuffix(u32 index, std::string_view name) const {
- if (suffix.empty()) {
- return fmt::format("{}{}", name, index);
- } else {
- return fmt::format("{}{}_{}", name, index, suffix);
- }
- }
-
- u32 GetNumPhysicalInputAttributes() const {
- return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
- }
-
- u32 GetNumPhysicalAttributes() const {
- return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
- }
-
- u32 GetNumPhysicalVaryings() const {
- return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
- }
-
- const Device& device;
- const ShaderIR& ir;
- const Registry& registry;
- const ShaderType stage;
- const std::string_view identifier;
- const std::string_view suffix;
- const Header header;
- std::unordered_map<u8, VaryingTFB> transform_feedback;
-
- ShaderWriter code;
-
- std::optional<u32> max_input_vertices;
-};
-
-std::string GetFlowVariable(u32 index) {
- return fmt::format("flow_var{}", index);
-}
-
-class ExprDecompiler {
-public:
- explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ExprAnd& expr) {
- inner += '(';
- std::visit(*this, *expr.operand1);
- inner += " && ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprOr& expr) {
- inner += '(';
- std::visit(*this, *expr.operand1);
- inner += " || ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprNot& expr) {
- inner += '!';
- std::visit(*this, *expr.operand1);
- }
-
- void operator()(const ExprPredicate& expr) {
- const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
- inner += decomp.GetPredicate(pred);
- }
-
- void operator()(const ExprCondCode& expr) {
- inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
- }
-
- void operator()(const ExprVar& expr) {
- inner += GetFlowVariable(expr.var_index);
- }
-
- void operator()(const ExprBoolean& expr) {
- inner += expr.value ? "true" : "false";
- }
-
- void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
- inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
- }
-
- const std::string& GetResult() const {
- return inner;
- }
-
-private:
- GLSLDecompiler& decomp;
- std::string inner;
-};
-
-class ASTDecompiler {
-public:
- explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfThen& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
-
- void operator()(const ASTIfElse& ast) {
- decomp.code.AddLine("else {{");
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
-
- void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTBlockDecoded& ast) {
- decomp.VisitBlock(ast.nodes);
- }
-
- void operator()(const ASTVarSet& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
- }
-
- void operator()(const ASTLabel& ast) {
- decomp.code.AddLine("// Label_{}:", ast.index);
- }
-
- void operator()([[maybe_unused]] const ASTGoto& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTDoWhile& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("do {{");
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}} while({});", expr_parser.GetResult());
- }
-
- void operator()(const ASTReturn& ast) {
- const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
- if (!is_true) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- }
- if (ast.kills) {
- decomp.code.AddLine("discard;");
- } else {
- decomp.PreExit();
- decomp.code.AddLine("return;");
- }
- if (!is_true) {
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
- }
-
- void operator()(const ASTBreak& ast) {
- const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
- if (!is_true) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- }
- decomp.code.AddLine("break;");
- if (!is_true) {
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
- }
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- }
-
-private:
- GLSLDecompiler& decomp;
-};
-
-void GLSLDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; i++) {
- code.AddLine("bool {} = false;", GetFlowVariable(i));
- }
-
- ASTDecompiler decompiler{*this};
- decompiler.Visit(ir.GetASTProgram());
-}
-
-} // Anonymous namespace
-
-ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
- ShaderEntries entries;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
- cbuf.first);
- }
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
- usage.is_written);
- }
- for (const auto& sampler : ir.GetSamplers()) {
- entries.samplers.emplace_back(sampler);
- }
- for (const auto& image : ir.GetImages()) {
- entries.images.emplace_back(image);
- }
- const auto clip_distances = ir.GetClipDistances();
- for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
- entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
- }
- for (const auto& buffer : entries.const_buffers) {
- entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
- }
- entries.shader_length = ir.GetLength();
- return entries;
-}
-
-std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier,
- std::string_view suffix) {
- GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
- decompiler.Decompile();
- return decompiler.GetResult();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL {
-
-class Device;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using SamplerEntry = VideoCommon::Shader::SamplerEntry;
-using ImageEntry = VideoCommon::Shader::ImageEntry;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
- explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
- : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
-
- u32 GetIndex() const {
- return index;
- }
-
-private:
- u32 index = 0;
-};
-
-struct GlobalMemoryEntry {
- constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
- bool is_written_)
- : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
- is_written_} {}
-
- u32 cbuf_index = 0;
- u32 cbuf_offset = 0;
- bool is_read = false;
- bool is_written = false;
-};
-
-struct ShaderEntries {
- std::vector<ConstBufferEntry> const_buffers;
- std::vector<GlobalMemoryEntry> global_memory_entries;
- std::vector<SamplerEntry> samplers;
- std::vector<ImageEntry> images;
- std::size_t shader_length{};
- u32 clip_distances{};
- u32 enabled_uniform_buffers{};
-};
-
-ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage);
-
-std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier,
- std::string_view suffix = {});
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/fs/file.h"
-#include "common/fs/fs.h"
-#include "common/fs/path_util.h"
-#include "common/logging/log.h"
-#include "common/scm_rev.h"
-#include "common/settings.h"
-#include "common/zstd_compression.h"
-#include "core/core.h"
-#include "core/hle/kernel/k_process.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-
-namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::BindlessSamplerMap;
-using VideoCommon::Shader::BoundSamplerMap;
-using VideoCommon::Shader::KeyMap;
-using VideoCommon::Shader::SeparateSamplerKey;
-using ShaderCacheVersionHash = std::array<u8, 64>;
-
-struct ConstBufferKey {
- u32 cbuf = 0;
- u32 offset = 0;
- u32 value = 0;
-};
-
-struct BoundSamplerEntry {
- u32 offset = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct SeparateSamplerEntry {
- u32 cbuf1 = 0;
- u32 cbuf2 = 0;
- u32 offset1 = 0;
- u32 offset2 = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct BindlessSamplerEntry {
- u32 cbuf = 0;
- u32 offset = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-namespace {
-
-constexpr u32 NativeVersion = 21;
-
-ShaderCacheVersionHash GetShaderCacheVersionHash() {
- ShaderCacheVersionHash hash{};
- const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
- std::memcpy(hash.data(), Common::g_shader_cache_version, length);
- return hash;
-}
-
-} // Anonymous namespace
-
-ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
-
-ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-
-bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
- if (!file.ReadObject(type)) {
- return false;
- }
- u32 code_size;
- u32 code_size_b;
- if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
- return false;
- }
- code.resize(code_size);
- code_b.resize(code_size_b);
- if (file.Read(code) != code_size) {
- return false;
- }
- if (HasProgramA() && file.Read(code_b) != code_size_b) {
- return false;
- }
-
- u8 is_texture_handler_size_known;
- u32 texture_handler_size_value;
- u32 num_keys;
- u32 num_bound_samplers;
- u32 num_separate_samplers;
- u32 num_bindless_samplers;
- if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
- !file.ReadObject(is_texture_handler_size_known) ||
- !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
- !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
- !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
- !file.ReadObject(num_bindless_samplers)) {
- return false;
- }
- if (is_texture_handler_size_known) {
- texture_handler_size = texture_handler_size_value;
- }
-
- std::vector<ConstBufferKey> flat_keys(num_keys);
- std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
- std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
- std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
- if (file.Read(flat_keys) != flat_keys.size() ||
- file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
- file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
- file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
- return false;
- }
- for (const auto& entry : flat_keys) {
- keys.insert({{entry.cbuf, entry.offset}, entry.value});
- }
- for (const auto& entry : flat_bound_samplers) {
- bound_samplers.emplace(entry.offset, entry.sampler);
- }
- for (const auto& entry : flat_separate_samplers) {
- SeparateSamplerKey key;
- key.buffers = {entry.cbuf1, entry.cbuf2};
- key.offsets = {entry.offset1, entry.offset2};
- separate_samplers.emplace(key, entry.sampler);
- }
- for (const auto& entry : flat_bindless_samplers) {
- bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
- }
-
- return true;
-}
-
-bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
- if (!file.WriteObject(static_cast<u32>(type)) ||
- !file.WriteObject(static_cast<u32>(code.size())) ||
- !file.WriteObject(static_cast<u32>(code_b.size()))) {
- return false;
- }
- if (file.Write(code) != code.size()) {
- return false;
- }
- if (HasProgramA() && file.Write(code_b) != code_b.size()) {
- return false;
- }
-
- if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
- !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
- !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
- !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
- !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
- !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
- !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
- return false;
- }
-
- std::vector<ConstBufferKey> flat_keys;
- flat_keys.reserve(keys.size());
- for (const auto& [address, value] : keys) {
- flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
- }
-
- std::vector<BoundSamplerEntry> flat_bound_samplers;
- flat_bound_samplers.reserve(bound_samplers.size());
- for (const auto& [address, sampler] : bound_samplers) {
- flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
- }
-
- std::vector<SeparateSamplerEntry> flat_separate_samplers;
- flat_separate_samplers.reserve(separate_samplers.size());
- for (const auto& [key, sampler] : separate_samplers) {
- SeparateSamplerEntry entry;
- std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
- std::tie(entry.offset1, entry.offset2) = key.offsets;
- entry.sampler = sampler;
- flat_separate_samplers.push_back(entry);
- }
-
- std::vector<BindlessSamplerEntry> flat_bindless_samplers;
- flat_bindless_samplers.reserve(bindless_samplers.size());
- for (const auto& [address, sampler] : bindless_samplers) {
- flat_bindless_samplers.push_back(
- BindlessSamplerEntry{address.first, address.second, sampler});
- }
-
- return file.Write(flat_keys) == flat_keys.size() &&
- file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
- file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
- file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
-}
-
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
-
-ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
-
-void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
- title_id = title_id_;
-}
-
-std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
- // Skip games without title id
- const bool has_title_id = title_id != 0;
- if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
- return std::nullopt;
- }
-
- Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No transferable shader cache found");
- is_usable = true;
- return std::nullopt;
- }
-
- u32 version{};
- if (!file.ReadObject(version)) {
- LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
- return std::nullopt;
- }
-
- if (version < NativeVersion) {
- LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
- file.Close();
- InvalidateTransferable();
- is_usable = true;
- return std::nullopt;
- }
- if (version > NativeVersion) {
- LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
- "of the emulator, skipping");
- return std::nullopt;
- }
-
- // Version is valid, load the shaders
- std::vector<ShaderDiskCacheEntry> entries;
- while (static_cast<u64>(file.Tell()) < file.GetSize()) {
- ShaderDiskCacheEntry& entry = entries.emplace_back();
- if (!entry.Load(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
- return std::nullopt;
- }
- }
-
- is_usable = true;
- return {std::move(entries)};
-}
-
-std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
- if (!is_usable) {
- return {};
- }
-
- Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
- return {};
- }
-
- if (const auto result = LoadPrecompiledFile(file)) {
- return *result;
- }
-
- LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
- file.Close();
- InvalidatePrecompiled();
- return {};
-}
-
-std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
- Common::FS::IOFile& file) {
- // Read compressed file from disk and decompress to virtual precompiled cache file
- std::vector<u8> compressed(file.GetSize());
- if (file.Read(compressed) != file.GetSize()) {
- return std::nullopt;
- }
- const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
- SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
- precompiled_cache_virtual_file_offset = 0;
-
- ShaderCacheVersionHash file_hash{};
- if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
- precompiled_cache_virtual_file_offset = 0;
- return std::nullopt;
- }
- if (GetShaderCacheVersionHash() != file_hash) {
- LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
- precompiled_cache_virtual_file_offset = 0;
- return std::nullopt;
- }
-
- std::vector<ShaderDiskCachePrecompiled> entries;
- while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
- u32 binary_size;
- auto& entry = entries.emplace_back();
- if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
- !LoadObjectFromPrecompiled(entry.binary_format) ||
- !LoadObjectFromPrecompiled(binary_size)) {
- return std::nullopt;
- }
-
- entry.binary.resize(binary_size);
- if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
- return std::nullopt;
- }
- }
- return entries;
-}
-
-void ShaderDiskCacheOpenGL::InvalidateTransferable() {
- if (!Common::FS::RemoveFile(GetTransferablePath())) {
- LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
- Common::FS::PathToUTF8String(GetTransferablePath()));
- }
- InvalidatePrecompiled();
-}
-
-void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
- // Clear virtaul precompiled cache file
- precompiled_cache_virtual_file.Resize(0);
-
- if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
- LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
- Common::FS::PathToUTF8String(GetPrecompiledPath()));
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
- if (!is_usable) {
- return;
- }
-
- const u64 id = entry.unique_identifier;
- if (stored_transferable.contains(id)) {
- // The shader already exists
- return;
- }
-
- Common::FS::IOFile file = AppendTransferableFile();
- if (!file.IsOpen()) {
- return;
- }
- if (!entry.Save(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
- file.Close();
- InvalidateTransferable();
- return;
- }
-
- stored_transferable.insert(id);
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
- if (!is_usable) {
- return;
- }
-
- // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
- // when writing the dump. This should be done the moment I get access to write to the virtual
- // file.
- if (precompiled_cache_virtual_file.GetSize() == 0) {
- SavePrecompiledHeaderToVirtualPrecompiledCache();
- }
-
- GLint binary_length;
- glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
-
- GLenum binary_format;
- std::vector<u8> binary(binary_length);
- glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
-
- if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
- !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
- !SaveArrayToPrecompiled(binary.data(), binary.size())) {
- LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
- unique_identifier);
- InvalidatePrecompiled();
- }
-}
-
-Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
- if (!EnsureDirectories()) {
- return {};
- }
-
- const auto transferable_path{GetTransferablePath()};
- const bool existed = Common::FS::Exists(transferable_path);
-
- Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
- Common::FS::PathToUTF8String(transferable_path));
- return {};
- }
- if (!existed || file.GetSize() == 0) {
- // If the file didn't exist, write its version
- if (!file.WriteObject(NativeVersion)) {
- LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
- Common::FS::PathToUTF8String(transferable_path));
- return {};
- }
- }
- return file;
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
- const auto hash{GetShaderCacheVersionHash()};
- if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
- LOG_ERROR(
- Render_OpenGL,
- "Failed to write precompiled cache version hash to virtual precompiled cache file");
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
- precompiled_cache_virtual_file_offset = 0;
- const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
- const std::vector<u8> compressed =
- Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
-
- const auto precompiled_path = GetPrecompiledPath();
- Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
- Common::FS::FileType::BinaryFile};
-
- if (!file.IsOpen()) {
- LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
- Common::FS::PathToUTF8String(precompiled_path));
- return;
- }
- if (file.Write(compressed) != compressed.size()) {
- LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
- Common::FS::PathToUTF8String(precompiled_path));
- }
-}
-
-bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
- const auto CreateDir = [](const std::filesystem::path& dir) {
- if (!Common::FS::CreateDir(dir)) {
- LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
- Common::FS::PathToUTF8String(dir));
- return false;
- }
- return true;
- };
-
- return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
- CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
- CreateDir(GetPrecompiledDir());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
- return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
- return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
- return GetBaseDir() / "transferable";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
- return GetBaseDir() / "precompiled";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
- return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
-}
-
-std::string ShaderDiskCacheOpenGL::GetTitleID() const {
- return fmt::format("{:016X}", title_id);
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <filesystem>
-#include <optional>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/file_sys/vfs_vector.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-
-namespace Common::FS {
-class IOFile;
-}
-
-namespace OpenGL {
-
-using ProgramCode = std::vector<u64>;
-
-/// Describes a shader and how it's used by the guest GPU
-struct ShaderDiskCacheEntry {
- ShaderDiskCacheEntry();
- ~ShaderDiskCacheEntry();
-
- bool Load(Common::FS::IOFile& file);
-
- bool Save(Common::FS::IOFile& file) const;
-
- bool HasProgramA() const {
- return !code.empty() && !code_b.empty();
- }
-
- Tegra::Engines::ShaderType type{};
- ProgramCode code;
- ProgramCode code_b;
-
- u64 unique_identifier = 0;
- std::optional<u32> texture_handler_size;
- u32 bound_buffer = 0;
- VideoCommon::Shader::GraphicsInfo graphics_info;
- VideoCommon::Shader::ComputeInfo compute_info;
- VideoCommon::Shader::KeyMap keys;
- VideoCommon::Shader::BoundSamplerMap bound_samplers;
- VideoCommon::Shader::SeparateSamplerMap separate_samplers;
- VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
-};
-
-/// Contains an OpenGL dumped binary program
-struct ShaderDiskCachePrecompiled {
- u64 unique_identifier = 0;
- GLenum binary_format = 0;
- std::vector<u8> binary;
-};
-
-class ShaderDiskCacheOpenGL {
-public:
- explicit ShaderDiskCacheOpenGL();
- ~ShaderDiskCacheOpenGL();
-
- /// Binds a title ID for all future operations.
- void BindTitleID(u64 title_id);
-
- /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
- std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
-
- /// Loads current game's precompiled cache. Invalidates on failure.
- std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
-
- /// Removes the transferable (and precompiled) cache file.
- void InvalidateTransferable();
-
- /// Removes the precompiled cache file and clears virtual precompiled cache file.
- void InvalidatePrecompiled();
-
- /// Saves a raw dump to the transferable file. Checks for collisions.
- void SaveEntry(const ShaderDiskCacheEntry& entry);
-
- /// Saves a dump entry to the precompiled file. Does not check for collisions.
- void SavePrecompiled(u64 unique_identifier, GLuint program);
-
- /// Serializes virtual precompiled shader cache file to real file
- void SaveVirtualPrecompiledFile();
-
-private:
- /// Loads the transferable cache. Returns empty on failure.
- std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
- Common::FS::IOFile& file);
-
- /// Opens current game's transferable file and write it's header if it doesn't exist
- Common::FS::IOFile AppendTransferableFile() const;
-
- /// Save precompiled header to precompiled_cache_in_memory
- void SavePrecompiledHeaderToVirtualPrecompiledCache();
-
- /// Create shader disk cache directories. Returns true on success.
- bool EnsureDirectories() const;
-
- /// Gets current game's transferable file path
- std::filesystem::path GetTransferablePath() const;
-
- /// Gets current game's precompiled file path
- std::filesystem::path GetPrecompiledPath() const;
-
- /// Get user's transferable directory path
- std::filesystem::path GetTransferableDir() const;
-
- /// Get user's precompiled directory path
- std::filesystem::path GetPrecompiledDir() const;
-
- /// Get user's shader directory path
- std::filesystem::path GetBaseDir() const;
-
- /// Get current game's title id
- std::string GetTitleID() const;
-
- template <typename T>
- bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
- const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
- data, length, precompiled_cache_virtual_file_offset);
- precompiled_cache_virtual_file_offset += write_length;
- return write_length == sizeof(T) * length;
- }
-
- template <typename T>
- bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
- const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
- data, length, precompiled_cache_virtual_file_offset);
- precompiled_cache_virtual_file_offset += read_length;
- return read_length == sizeof(T) * length;
- }
-
- template <typename T>
- bool SaveObjectToPrecompiled(const T& object) {
- return SaveArrayToPrecompiled(&object, 1);
- }
-
- bool SaveObjectToPrecompiled(bool object) {
- const auto value = static_cast<u8>(object);
- return SaveArrayToPrecompiled(&value, 1);
- }
-
- template <typename T>
- bool LoadObjectFromPrecompiled(T& object) {
- return LoadArrayFromPrecompiled(&object, 1);
- }
-
- // Stores whole precompiled cache which will be read from or saved to the precompiled chache
- // file
- FileSys::VectorVfsFile precompiled_cache_virtual_file;
- // Stores the current offset of the precompiled cache file for IO purposes
- std::size_t precompiled_cache_virtual_file_offset = 0;
-
- // Stored transferable shaders
- std::unordered_set<u64> stored_transferable;
-
- /// Title ID to operate on
- u64 title_id = 0;
-
- // The cache has been loaded at boot
- bool is_usable = false;
-};
-
-} // namespace OpenGL