diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-04-27 07:07:18 +0200 |
---|---|---|
committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-06-21 02:38:33 +0200 |
commit | 06c4ce86458310870abec90ada68ac393256b9b6 (patch) | |
tree | 43957a5f0e33c044b1206663e91d390ef1336d2a | |
parent | gl_rasterizer: Track texture buffer usage (diff) | |
download | yuzu-06c4ce86458310870abec90ada68ac393256b9b6.tar yuzu-06c4ce86458310870abec90ada68ac393256b9b6.tar.gz yuzu-06c4ce86458310870abec90ada68ac393256b9b6.tar.bz2 yuzu-06c4ce86458310870abec90ada68ac393256b9b6.tar.lz yuzu-06c4ce86458310870abec90ada68ac393256b9b6.tar.xz yuzu-06c4ce86458310870abec90ada68ac393256b9b6.tar.zst yuzu-06c4ce86458310870abec90ada68ac393256b9b6.zip |
-rw-r--r-- | CMakeModules/GenerateSCMRev.cmake | 1 | ||||
-rw-r--r-- | src/common/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 66 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 70 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 7 | ||||
-rw-r--r-- | src/video_core/shader/decode.cpp | 1 | ||||
-rw-r--r-- | src/video_core/shader/decode/image.cpp | 89 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 42 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 9 |
10 files changed, 284 insertions, 3 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 31edeb63d..dd65cfe42 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -70,6 +70,7 @@ set(HASH_FILES "${VIDEO_CORE}/shader/decode/half_set.cpp" "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/hfma2.cpp" + "${VIDEO_CORE}/shader/decode/image.cpp" "${VIDEO_CORE}/shader/decode/integer_set.cpp" "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/memory.cpp" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 198b3fe07..8ae05137b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/half_set.cpp" "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/hfma2.cpp" + "${VIDEO_CORE}/shader/decode/image.cpp" "${VIDEO_CORE}/shader/decode/integer_set.cpp" "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/memory.cpp" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9d43f03d2..6839abe71 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -90,6 +90,7 @@ add_library(video_core STATIC shader/decode/conversion.cpp shader/decode/memory.cpp shader/decode/texture.cpp + shader/decode/image.cpp shader/decode/float_set_predicate.cpp shader/decode/integer_set_predicate.cpp shader/decode/half_set_predicate.cpp diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5b32e1249..54a1a04f9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -126,6 +126,15 @@ union Sampler { u64 value{}; }; +union Image { + Image() = default; + + constexpr explicit Image(u64 value) : value{value} {} + + BitField<36, 13, u64> index; + u64 value; +}; + } // namespace Tegra::Shader namespace std { @@ -344,6 +353,26 @@ enum class TextureMiscMode : u64 { PTP, }; +enum class SurfaceDataMode : u64 { + P = 0, + D_BA = 1, +}; + +enum class OutOfBoundsStore : u64 { + Ignore = 0, + Clamp = 1, + Trap = 2, +}; + +enum class ImageType : u64 { + Texture1D = 0, + TextureBuffer = 1, + Texture1DArray = 2, + Texture2D = 3, + Texture2DArray = 4, + Texture3D = 5, +}; + enum class IsberdMode : u64 { None = 0, Patch = 1, @@ -398,7 +427,7 @@ enum class LmemLoadCacheManagement : u64 { CV = 3, }; -enum class LmemStoreCacheManagement : u64 { +enum class StoreCacheManagement : u64 { Default = 0, CG = 1, CS = 2, @@ -811,7 +840,7 @@ union Instruction { } ld_l; union { - BitField<44, 2, LmemStoreCacheManagement> cache_management; + BitField<44, 2, StoreCacheManagement> cache_management; } st_l; union { @@ -1295,6 +1324,35 @@ union Instruction { } tlds; union { + BitField<24, 2, StoreCacheManagement> cache_management; + BitField<33, 3, ImageType> image_type; + BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; + BitField<51, 1, u64> is_immediate; + BitField<52, 1, SurfaceDataMode> mode; + + BitField<20, 3, StoreType> store_data_layout; + BitField<20, 4, u64> component_mask_selector; + + bool IsComponentEnabled(std::size_t component) const { + ASSERT(mode == SurfaceDataMode::P); + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + constexpr std::array<u8, 16> mask = { + 0, (R), (G), (R | G), (B), (R | B), + (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), + (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; + return std::bitset<4>{mask.at(component_mask_selector)}.test(component); + } + + StoreType GetStoreDataLayout() const { + ASSERT(mode == SurfaceDataMode::D_BA); + return store_data_layout; + } + } sust; + + union { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; @@ -1385,6 +1443,7 @@ union Instruction { Attribute attribute; Sampler sampler; + Image image; u64 value; }; @@ -1428,6 +1487,7 @@ public: TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level + SUST, // Surface Store EXIT, IPA, OUT_R, // Emit vertex/primitive @@ -1558,6 +1618,7 @@ public: Synch, Memory, Texture, + Image, FloatSet, FloatSetPredicate, IntegerSet, @@ -1703,6 +1764,7 @@ private: INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), + INST("11101011001-----", Id::SUST, Type::Image, "SUST"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ece386cdc..2ae2f1db2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -180,6 +180,7 @@ public: DeclareGlobalMemory(); DeclareSamplers(); DeclarePhysicalAttributeReader(); + DeclareImages(); code.AddLine("void execute_{}() {{", suffix); ++code.scope; @@ -531,6 +532,36 @@ private: code.AddNewLine(); } + void DeclareImages() { + const auto& images{ir.GetImages()}; + for (const auto& image : images) { + const std::string image_type = [&]() { + switch (image.GetType()) { + case Tegra::Shader::ImageType::Texture1D: + return "image1D"; + case Tegra::Shader::ImageType::TextureBuffer: + return "bufferImage"; + case Tegra::Shader::ImageType::Texture1DArray: + return "image1DArray"; + case Tegra::Shader::ImageType::Texture2D: + return "image2D"; + case Tegra::Shader::ImageType::Texture2DArray: + return "image2DArray"; + case Tegra::Shader::ImageType::Texture3D: + return "image3D"; + default: + UNREACHABLE(); + return "image1D"; + } + }(); + code.AddLine("layout (binding = IMAGE_BINDING_" + std::to_string(image.GetIndex()) + + ") coherent volatile writeonly uniform " + image_type + ' ' + + GetImage(image) + ';'); + } + if (!images.empty()) + code.AddNewLine(); + } + void VisitBlock(const NodeBlock& bb) { for (const auto& node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { @@ -1478,6 +1509,39 @@ private: return tmp; } + std::string ImageStore(Operation operation) { + constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const auto meta{std::get<MetaImage>(operation.GetMeta())}; + + std::string expr = "imageStore("; + expr += GetImage(meta.image); + expr += ", "; + + const std::size_t coords_count{operation.GetOperandsCount()}; + expr += constructors.at(coords_count - 1); + for (std::size_t i = 0; i < coords_count; ++i) { + expr += VisitOperand(operation, i, Type::Int); + if (i + 1 < coords_count) { + expr += ", "; + } + } + expr += "), "; + + const std::size_t values_count{meta.values.size()}; + UNIMPLEMENTED_IF(values_count != 4); + expr += "vec4("; + for (std::size_t i = 0; i < values_count; ++i) { + expr += Visit(meta.values.at(i)); + if (i + 1 < values_count) { + expr += ", "; + } + } + expr += "));"; + + code.AddLine(expr); + return {}; + } + std::string Branch(Operation operation) { const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1718,6 +1782,8 @@ private: &GLSLDecompiler::TextureQueryLod, &GLSLDecompiler::TexelFetch, + &GLSLDecompiler::ImageStore, + &GLSLDecompiler::Branch, &GLSLDecompiler::PushFlowStack, &GLSLDecompiler::PopFlowStack, @@ -1786,6 +1852,10 @@ private: return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); } + std::string GetImage(const Image& image) const { + return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); + } + void EmitIfdefIsBuffer(const Sampler& sampler) { code.AddLine(fmt::format("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex())); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 33ad9764a..97ce214b1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -935,6 +935,11 @@ private: return {}; } + Id ImageStore(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id Branch(Operation operation) { const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1326,6 +1331,8 @@ private: &SPIRVDecompiler::TextureQueryLod, &SPIRVDecompiler::TexelFetch, + &SPIRVDecompiler::ImageStore, + &SPIRVDecompiler::Branch, &SPIRVDecompiler::PushFlowStack, &SPIRVDecompiler::PopFlowStack, diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index a0554c97e..2c9ff28f2 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -169,6 +169,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, + {OpCode::Type::Image, &ShaderIR::DecodeImage}, {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp new file mode 100644 index 000000000..66fdf5714 --- /dev/null +++ b/src/video_core/shader/decode/image.cpp @@ -0,0 +1,89 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +namespace { +std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { + switch (image_type) { + case Tegra::Shader::ImageType::Texture1D: + case Tegra::Shader::ImageType::TextureBuffer: + return 1; + case Tegra::Shader::ImageType::Texture1DArray: + case Tegra::Shader::ImageType::Texture2D: + return 2; + case Tegra::Shader::ImageType::Texture2DArray: + case Tegra::Shader::ImageType::Texture3D: + return 3; + } + UNREACHABLE(); + return 1; +} +} // Anonymous namespace + +u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + switch (opcode->get().GetId()) { + case OpCode::Id::SUST: { + UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); + UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); + UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); + UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store + + std::vector<Node> values; + constexpr std::size_t hardcoded_size{4}; + for (std::size_t i = 0; i < hardcoded_size; ++i) { + values.push_back(GetRegister(instr.gpr0.Value() + i)); + } + + std::vector<Node> coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + ASSERT(instr.sust.is_immediate); + const auto& image{GetImage(instr.image, instr.sust.image_type)}; + MetaImage meta{image, values}; + const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; + bb.push_back(store); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); + } + + return pc; +} + +const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { + const auto offset{static_cast<std::size_t>(image.index.Value())}; + + // If this image has already been used, return the existing mapping. + const auto itr{std::find_if(used_images.begin(), used_images.end(), + [=](const Image& entry) { return entry.GetOffset() == offset; })}; + if (itr != used_images.end()) { + ASSERT(itr->GetType() == type); + return *itr; + } + + // Otherwise create a new mapping for this image. + const std::size_t next_index{used_images.size()}; + const Image entry{offset, next_index, type}; + return *used_images.emplace(entry).first; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 3cfb911bb..8b8d83ae7 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -146,6 +146,8 @@ enum class OperationCode { TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 + ImageStore, /// (MetaImage, float[N] coords) -> void + Branch, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void PopFlowStack, /// () -> void @@ -263,6 +265,39 @@ private: bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; +class Image { +public: + explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) + : offset{offset}, index{index}, type{type}, is_bindless{false} {} + + std::size_t GetOffset() const { + return offset; + } + + std::size_t GetIndex() const { + return index; + } + + Tegra::Shader::ImageType GetType() const { + return type; + } + + bool IsBindless() const { + return is_bindless; + } + + bool operator<(const Image& rhs) const { + return std::tie(offset, index, type, is_bindless) < + std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); + } + +private: + std::size_t offset{}; + std::size_t index{}; + Tegra::Shader::ImageType type{}; + bool is_bindless{}; +}; + struct GlobalMemoryBase { u32 cbuf_index{}; u32 cbuf_offset{}; @@ -289,8 +324,13 @@ struct MetaTexture { u32 element{}; }; +struct MetaImage { + const Image& image; + std::vector<Node> values; +}; + /// Parameters that modify an operation but are not part of any particular operand -using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>; +using Meta = std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; /// Holds any kind of operation that can be done in the IR class OperationNode final { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1b84c0672..c7f264371 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -104,6 +104,10 @@ public: return used_samplers; } + const std::set<Image>& GetImages() const { + return used_images; + } + const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() const { return used_clip_distances; @@ -154,6 +158,7 @@ private: u32 DecodeConversion(NodeBlock& bb, u32 pc); u32 DecodeMemory(NodeBlock& bb, u32 pc); u32 DecodeTexture(NodeBlock& bb, u32 pc); + u32 DecodeImage(NodeBlock& bb, u32 pc); u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); @@ -254,6 +259,9 @@ private: Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + /// Accesses an image. + const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -329,6 +337,7 @@ private: std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; + std::set<Image> used_images; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes |