diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-09-25 04:34:18 +0200 |
---|---|---|
committer | FernandoS27 <fsahmkow27@gmail.com> | 2019-10-25 15:01:31 +0200 |
commit | 7b81ba4d8a9805f808fcc60a0905ac74d293b2ee (patch) | |
tree | 7bf80df3851e7d0e25746d241cbb0e09ba5c6b33 | |
parent | Shader_IR: Implement Fast BRX and allow multi-branches in the CFG. (diff) | |
download | yuzu-7b81ba4d8a9805f808fcc60a0905ac74d293b2ee.tar yuzu-7b81ba4d8a9805f808fcc60a0905ac74d293b2ee.tar.gz yuzu-7b81ba4d8a9805f808fcc60a0905ac74d293b2ee.tar.bz2 yuzu-7b81ba4d8a9805f808fcc60a0905ac74d293b2ee.tar.lz yuzu-7b81ba4d8a9805f808fcc60a0905ac74d293b2ee.tar.xz yuzu-7b81ba4d8a9805f808fcc60a0905ac74d293b2ee.tar.zst yuzu-7b81ba4d8a9805f808fcc60a0905ac74d293b2ee.zip |
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 489 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 66 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 70 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 9 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 280 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 32 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 94 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 41 | ||||
-rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 8 | ||||
-rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 4 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 18 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.h | 3 | ||||
-rw-r--r-- | src/video_core/shader/decode.cpp | 9 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.cpp | 7 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 12 |
15 files changed, 420 insertions, 722 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9e2799876..6402d6763 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,10 +3,12 @@ // Refer to the license.txt file included. #include <mutex> +#include <optional> +#include <string> #include <thread> +#include <unordered_set> #include <boost/functional/hash.hpp> #include "common/assert.h" -#include "common/hash.h" #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" @@ -22,18 +24,20 @@ namespace OpenGL { +using Tegra::Engines::ShaderType; +using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; +using VideoCommon::Shader::ShaderIR; + +namespace { // One UBO is always reserved for emulation values on staged shaders constexpr u32 STAGE_RESERVED_UBOS = 1; -struct UnspecializedShader { - std::string code; - GLShader::ShaderEntries entries; - ProgramType program_type; -}; +constexpr u32 STAGE_MAIN_OFFSET = 10; +constexpr u32 KERNEL_MAIN_OFFSET = 0; -namespace { +constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; /// Gets the address for the specified shader stage program GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { @@ -42,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) return gpu.regs.code_address.CodeAddress() + shader_config.offset; } +/// Gets if the current instruction offset is a scheduler instruction +constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { + // Sched instructions appear once every 4 instructions. + constexpr std::size_t SchedPeriod = 4; + const std::size_t absolute_offset = offset - main_offset; + return (absolute_offset % SchedPeriod) == 0; +} + +/// Calculates the size of a program stream +std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { + constexpr std::size_t start_offset = 10; + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; + constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; + std::size_t offset = start_offset; + while (offset < program.size()) { + const u64 instruction = program[offset]; + if (!IsSchedInstruction(offset, start_offset)) { + if ((instruction & mask) == self_jumping_branch) { + // End on Maxwell's "nop" instruction + break; + } + if (instruction == 0) { + break; + } + } + offset++; + } + // The last instruction is included in the program size + return std::min(offset + 1, program.size()); +} + /// Gets the shader program code from memory for the specified address ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, const u8* host_ptr) { @@ -52,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g }); memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), program_code.size() * sizeof(u64)); + program_code.resize(CalculateProgramSize(program_code)); return program_code; } @@ -72,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) { } } -/// Gets if the current instruction offset is a scheduler instruction -constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - /// Describes primitive behavior on geometry shaders constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { switch (primitive_mode) { @@ -122,122 +152,114 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) { return {}; } -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { - constexpr std::size_t start_offset = 10; - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; - constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; - std::size_t offset = start_offset; - std::size_t size = start_offset * sizeof(u64); - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == self_jumping_branch) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - size += sizeof(u64); - offset++; - } - // The last instruction is included in the program size - return std::min(size + sizeof(u64), program.size() * sizeof(u64)); -} - /// Hashes one (or two) program streams u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, - const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { - if (size_a == 0) { - size_a = CalculateProgramSize(code); - } - u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); - if (program_type != ProgramType::VertexA) { - return unique_identifier; - } - // VertexA programs include two programs - - std::size_t seed = 0; - boost::hash_combine(seed, unique_identifier); - - if (size_b == 0) { - size_b = CalculateProgramSize(code_b); + const ProgramCode& code_b) { + u64 unique_identifier = boost::hash_value(code); + if (program_type == ProgramType::VertexA) { + // VertexA programs include two programs + boost::hash_combine(unique_identifier, boost::hash_value(code_b)); } - const u64 identifier_b = - Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); - boost::hash_combine(seed, identifier_b); - return static_cast<u64>(seed); + return unique_identifier; } /// Creates an unspecialized program from code streams -GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device, - ProgramType program_type, ProgramCode program_code, - ProgramCode program_code_b) { - GLShader::ShaderSetup setup(program_code); - setup.program.size_a = CalculateProgramSize(program_code); - setup.program.size_b = 0; - if (program_type == ProgramType::VertexA) { - // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. - // Conventional HW does not support this, so we combine VertexA and VertexB into one - // stage here. - setup.SetProgramB(program_code_b); - setup.program.size_b = CalculateProgramSize(program_code_b); - } - setup.program.unique_identifier = GetUniqueIdentifier( - program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); - +std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, + const std::optional<ShaderIR>& ir_b) { switch (program_type) { case ProgramType::VertexA: - case ProgramType::VertexB: { - VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex, - &(system.GPU().Maxwell3D())}; - return GLShader::GenerateVertexShader(locker, device, setup); - } - case ProgramType::Geometry: { - VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry, - &(system.GPU().Maxwell3D())}; - return GLShader::GenerateGeometryShader(locker, device, setup); - } - case ProgramType::Fragment: { - VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment, - &(system.GPU().Maxwell3D())}; - return GLShader::GenerateFragmentShader(locker, device, setup); - } - case ProgramType::Compute: { - VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())}; - return GLShader::GenerateComputeShader(locker, device, setup); - } + case ProgramType::VertexB: + return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); + case ProgramType::Geometry: + return GLShader::GenerateGeometryShader(device, ir); + case ProgramType::Fragment: + return GLShader::GenerateFragmentShader(device, ir); + case ProgramType::Compute: + return GLShader::GenerateComputeShader(device, ir); default: UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); return {}; } } -CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, - ProgramType program_type, const ProgramVariant& variant, - bool hint_retrievable = false) { +constexpr const char* GetProgramTypeName(ProgramType program_type) { + switch (program_type) { + case ProgramType::VertexA: + case ProgramType::VertexB: + return "VS"; + case ProgramType::TessellationControl: + return "TCS"; + case ProgramType::TessellationEval: + return "TES"; + case ProgramType::Geometry: + return "GS"; + case ProgramType::Fragment: + return "FS"; + case ProgramType::Compute: + return "CS"; + } + return "UNK"; +} + +Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { + switch (program_type) { + case ProgramType::VertexA: + case ProgramType::VertexB: + return Tegra::Engines::ShaderType::Vertex; + case ProgramType::TessellationControl: + return Tegra::Engines::ShaderType::TesselationControl; + case ProgramType::TessellationEval: + return Tegra::Engines::ShaderType::TesselationEval; + case ProgramType::Geometry: + return Tegra::Engines::ShaderType::Geometry; + case ProgramType::Fragment: + return Tegra::Engines::ShaderType::Fragment; + case ProgramType::Compute: + return Tegra::Engines::ShaderType::Compute; + } + UNREACHABLE(); + return {}; +} + +std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { + return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); +} + +CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, + const ProgramCode& program_code, const ProgramCode& program_code_b, + const ProgramVariant& variant, ConstBufferLocker& locker, + bool hint_retrievable = false) { + LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); + + const bool is_compute = program_type == ProgramType::Compute; + const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); + std::optional<ShaderIR> ir_b; + if (!program_code_b.empty()) { + ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); + } + const auto entries = GLShader::GetEntries(ir); + auto base_bindings{variant.base_bindings}; const auto primitive_mode{variant.primitive_mode}; const auto texture_buffer_usage{variant.texture_buffer_usage}; - std::string source = R"(#version 430 core + std::string source = fmt::format(R"(// {} +#version 430 core #extension GL_ARB_separate_shader_objects : enable #extension GL_ARB_shader_viewport_layer_array : enable #extension GL_EXT_shader_image_load_formatted : enable #extension GL_NV_gpu_shader5 : enable #extension GL_NV_shader_thread_group : enable #extension GL_NV_shader_thread_shuffle : enable -)"; - if (program_type == ProgramType::Compute) { +)", + GetShaderId(unique_identifier, program_type)); + if (is_compute) { source += "#extension GL_ARB_compute_variable_group_size : require\n"; } source += '\n'; - if (program_type != ProgramType::Compute) { + if (!is_compute) { source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); } @@ -281,7 +303,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn } source += '\n'; - source += code; + source += GenerateGLSL(device, program_type, ir, ir_b); OGLShader shader; shader.Create(source.c_str(), GetShaderType(program_type)); @@ -291,85 +313,86 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn return program; } -std::set<GLenum> GetSupportedFormats() { - std::set<GLenum> supported_formats; - +std::unordered_set<GLenum> GetSupportedFormats() { GLint num_formats{}; glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); std::vector<GLint> formats(num_formats); glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - for (const GLint format : formats) + std::unordered_set<GLenum> supported_formats; + for (const GLint format : formats) { supported_formats.insert(static_cast<GLenum>(format)); + } return supported_formats; } } // Anonymous namespace CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ProgramResult result) - : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, - unique_identifier{params.unique_identifier}, program_type{program_type}, - disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, - entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} + GLShader::ShaderEntries entries, ProgramCode program_code, + ProgramCode program_code_b) + : RasterizerCacheObject{params.host_ptr}, system{params.system}, + disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, + unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, + program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { + if (params.precompiled_variants) { + for (const auto& pair : *params.precompiled_variants) { + const auto& variant = pair->first.variant; + programs.emplace(variant, pair->second); + } + } +} Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode&& program_code, - ProgramCode&& program_code_b) { - const auto code_size{CalculateProgramSize(program_code)}; - const auto code_size_b{CalculateProgramSize(program_code_b)}; - auto result{CreateProgram(params.system, params.device, GetProgramType(program_type), - program_code, program_code_b)}; - if (result.first.empty()) { - // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now - return {}; - } - + ProgramCode program_code, ProgramCode program_code_b) { params.disk_cache.SaveRaw(ShaderDiskCacheRaw( - params.unique_identifier, GetProgramType(program_type), - static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), - std::move(program_code), std::move(program_code_b))); - - return std::shared_ptr<CachedShader>( - new CachedShader(params, GetProgramType(program_type), std::move(result))); -} - -Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - GLShader::ProgramResult result) { + params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); + + ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type))); + const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (!program_code_b.empty()) { + // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); + // } return std::shared_ptr<CachedShader>( - new CachedShader(params, GetProgramType(program_type), std::move(result))); + new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), + std::move(program_code), std::move(program_code_b))); } -Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { - auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})}; +Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { + params.disk_cache.SaveRaw( + ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); - const auto code_size{CalculateProgramSize(code)}; - params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, - static_cast<u32>(code_size / sizeof(u64)), 0, - std::move(code), {})); - - return std::shared_ptr<CachedShader>( - new CachedShader(params, ProgramType::Compute, std::move(result))); + ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute); + const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); + return std::shared_ptr<CachedShader>(new CachedShader( + params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); } -Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, - GLShader::ProgramResult result) { - return std::shared_ptr<CachedShader>( - new CachedShader(params, ProgramType::Compute, std::move(result))); +Shader CachedShader::CreateFromCache(const ShaderParameters& params, + const UnspecializedShader& unspecialized) { + return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type, + unspecialized.entries, unspecialized.code, + unspecialized.code_b)); } std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { const auto [entry, is_cache_miss] = programs.try_emplace(variant); auto& program = entry->second; if (is_cache_miss) { - program = TryLoadProgram(variant); - if (!program) { - program = SpecializeShader(code, entries, program_type, variant); - disk_cache.SaveUsage(GetUsage(variant)); + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + if (program_type == ProgramType::Compute) { + engine = &system.GPU().KeplerCompute(); + } else { + engine = &system.GPU().Maxwell3D(); } + ConstBufferLocker locker(GetEnginesShaderType(program_type), *engine); + program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, + variant, locker); + disk_cache.SaveUsage(GetUsage(variant)); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } @@ -385,14 +408,6 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar return {program->handle, base_bindings}; } -CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { - const auto found = precompiled_programs.find(GetUsage(variant)); - if (found == precompiled_programs.end()) { - return {}; - } - return found->second; -} - ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { ShaderDiskCacheUsage usage; usage.unique_identifier = unique_identifier; @@ -412,18 +427,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto [raws, shader_usages] = *transferable; - - auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); - - const auto supported_formats{GetSupportedFormats()}; - const auto unspecialized_shaders{ - GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; - if (stop_loading) { + if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { return; } - // Track if precompiled cache was altered during loading to know if we have to serialize the - // virtual precompiled cache file back to the hard drive + const auto dumps = disk_cache.LoadPrecompiled(); + const auto supported_formats = GetSupportedFormats(); + + // Track if precompiled cache was altered during loading to know if we have to + // serialize the virtual precompiled cache file back to the hard drive bool precompiled_cache_altered = false; // Inform the frontend about shader build initialization @@ -446,9 +458,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto& usage{shader_usages[i]}; - LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", - usage.unique_identifier, i, shader_usages.size()); - const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; const auto dump{dumps.find(usage)}; @@ -462,21 +471,27 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } if (!shader) { - shader = SpecializeShader(unspecialized.code, unspecialized.entries, - unspecialized.program_type, usage.variant, true); + ConstBufferLocker locker(GetEnginesShaderType(unspecialized.program_type)); + shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, + unspecialized.code, unspecialized.code_b, usage.variant, + locker, true); } - std::scoped_lock lock(mutex); + std::scoped_lock lock{mutex}; if (callback) { callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, shader_usages.size()); } precompiled_programs.emplace(usage, std::move(shader)); + + // TODO(Rodrigo): Is there a better way to do this? + precompiled_variants[usage.unique_identifier].push_back( + precompiled_programs.find(usage)); } }; - const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; + const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; const std::size_t bucket_size{shader_usages.size() / num_workers}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); std::vector<std::thread> threads(num_workers); @@ -496,7 +511,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (compilation_failed) { // Invalidate the precompiled cache if a shader dumped shader was rejected disk_cache.InvalidatePrecompiled(); - dumps.clear(); precompiled_cache_altered = true; return; } @@ -504,8 +518,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before - // precompiling them + // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw + // before precompiling them for (std::size_t i = 0; i < shader_usages.size(); ++i) { const auto& usage{shader_usages[i]}; @@ -521,9 +535,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } -CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { +const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { + const auto it = precompiled_variants.find(unique_identifier); + return it == precompiled_variants.end() ? nullptr : &it->second; +} +CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( + const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { if (supported_formats.find(dump.binary_format) == supported_formats.end()) { LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); return {}; @@ -545,56 +563,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( return shader; } -std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( +bool ShaderCacheOpenGL::GenerateUnspecializedShaders( const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector<ShaderDiskCacheRaw>& raws, - const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { - std::unordered_map<u64, UnspecializedShader> unspecialized; - + const std::vector<ShaderDiskCacheRaw>& raws) { if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); } for (std::size_t i = 0; i < raws.size(); ++i) { if (stop_loading) { - return {}; + return false; } const auto& raw{raws[i]}; const u64 unique_identifier{raw.GetUniqueIdentifier()}; const u64 calculated_hash{ GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; if (unique_identifier != calculated_hash) { - LOG_ERROR( - Render_OpenGL, - "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", - raw.GetUniqueIdentifier(), calculated_hash); + LOG_ERROR(Render_OpenGL, + "Invalid hash in entry={:016x} (obtained hash={:016x}) - " + "removing shader cache", + raw.GetUniqueIdentifier(), calculated_hash); disk_cache.InvalidateTransferable(); - return {}; + return false; } - GLShader::ProgramResult result; - if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { - // If it's stored in the precompiled file, avoid decompiling it here - const auto& stored_decompiled{it->second}; - result = {stored_decompiled.code, stored_decompiled.entries}; - } else { - // Otherwise decompile the shader at boot and save the result to the decompiled file - result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(), - raw.GetProgramCodeB()); - disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); - } - - precompiled_shaders.insert({unique_identifier, result}); - - unspecialized.insert( - {raw.GetUniqueIdentifier(), - {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); + const u32 main_offset = + raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); + const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (raw.HasProgramA()) { + // ir_b.emplace(raw.GetProgramCodeB(), main_offset); + // } + + UnspecializedShader unspecialized; + unspecialized.entries = GLShader::GetEntries(ir); + unspecialized.program_type = raw.GetProgramType(); + unspecialized.code = raw.GetProgramCode(); + unspecialized.code_b = raw.GetProgramCodeB(); + unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); } } - return unspecialized; + return true; } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { @@ -603,37 +617,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { } auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(system, program)}; + const GPUVAddr address{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(address)}; Shader shader{TryGet(host_ptr)}; if (shader) { return last_shaders[static_cast<std::size_t>(program)] = shader; } // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; - if (is_program_a) { - const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); - } - - const auto unique_identifier = - GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); - const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, - host_ptr, unique_identifier}; - - const auto found = precompiled_shaders.find(unique_identifier); - if (found == precompiled_shaders.end()) { - shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code), - std::move(program_code_b)); + ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; + ProgramCode code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); + } + + const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); + const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); + const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; + const ShaderParameters params{system, disk_cache, precompiled_variants, device, + cpu_addr, host_ptr, unique_identifier}; + + const auto found = unspecialized_shaders.find(unique_identifier); + if (found == unspecialized_shaders.end()) { + shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), + std::move(code_b)); } else { - shader = CachedShader::CreateStageFromCache(params, program, found->second); + shader = CachedShader::CreateFromCache(params, found->second); } Register(shader); @@ -651,15 +663,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { // No kernel found - create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; + const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, - host_ptr, unique_identifier}; + const ShaderParameters params{system, disk_cache, precompiled_variants, device, + cpu_addr, host_ptr, unique_identifier}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found == precompiled_shaders.end()) { + const auto found = unspecialized_shaders.find(unique_identifier); + if (found == unspecialized_shaders.end()) { kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); } else { - kernel = CachedShader::CreateKernelFromCache(params, found->second); + kernel = CachedShader::CreateFromCache(params, found->second); } Register(kernel); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6ff78f005..700a83853 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -8,9 +8,10 @@ #include <atomic> #include <bitset> #include <memory> -#include <set> +#include <string> #include <tuple> #include <unordered_map> +#include <unordered_set> #include <vector> #include <glad/glad.h> @@ -20,6 +21,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/shader/shader_ir.h" namespace Core { class System; @@ -40,12 +42,19 @@ using Shader = std::shared_ptr<CachedShader>; using CachedProgram = std::shared_ptr<OGLProgram>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; -using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; +using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; + +struct UnspecializedShader { + GLShader::ShaderEntries entries; + ProgramType program_type; + ProgramCode code; + ProgramCode code_b; +}; struct ShaderParameters { - ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledPrograms& precompiled_programs; Core::System& system; + ShaderDiskCacheOpenGL& disk_cache; + const PrecompiledVariants* precompiled_variants; const Device& device; VAddr cpu_addr; u8* host_ptr; @@ -56,23 +65,18 @@ class CachedShader final : public RasterizerCacheObject { public: static Shader CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode&& program_code, ProgramCode&& program_code_b); + ProgramCode program_code, ProgramCode program_code_b); + static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); - static Shader CreateStageFromCache(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - GLShader::ProgramResult result); - - static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); - - static Shader CreateKernelFromCache(const ShaderParameters& params, - GLShader::ProgramResult result); + static Shader CreateFromCache(const ShaderParameters& params, + const UnspecializedShader& unspecialized); VAddr GetCpuAddr() const override { return cpu_addr; } std::size_t GetSizeInBytes() const override { - return shader_length; + return program_code.size() * sizeof(u64); } /// Gets the shader entries for the shader @@ -85,21 +89,24 @@ public: private: explicit CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ProgramResult result); - - CachedProgram TryLoadProgram(const ProgramVariant& variant) const; + GLShader::ShaderEntries entries, ProgramCode program_code, + ProgramCode program_code_b); ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; + Core::System& system; + ShaderDiskCacheOpenGL& disk_cache; + const Device& device; + VAddr cpu_addr{}; + u64 unique_identifier{}; ProgramType program_type{}; - ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledPrograms& precompiled_programs; GLShader::ShaderEntries entries; - std::string code; - std::size_t shader_length{}; + + ProgramCode program_code; + ProgramCode program_code_b; std::unordered_map<ProgramVariant, CachedProgram> programs; }; @@ -124,21 +131,26 @@ protected: void FlushObjectInner(const Shader& object) override {} private: - std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( - const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector<ShaderDiskCacheRaw>& raws, - const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled); + bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback, + const std::vector<ShaderDiskCacheRaw>& raws); CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, - const std::set<GLenum>& supported_formats); + const std::unordered_set<GLenum>& supported_formats); + + const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; + ShaderDiskCacheOpenGL disk_cache; - PrecompiledShaders precompiled_shaders; PrecompiledPrograms precompiled_programs; + std::unordered_map<u64, PrecompiledVariants> precompiled_variants; + + std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; + std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 71d7389cb..030550c53 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -415,27 +415,6 @@ public: return code.GetResult(); } - ShaderEntries GetShaderEntries() const { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& [offset, image] : ir.GetImages()) { - entries.images.emplace_back(image); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, - usage.is_read, usage.is_written); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - return entries; - } - private: friend class ASTDecompiler; friend class ExprDecompiler; @@ -2481,25 +2460,46 @@ void GLSLDecompiler::DecompileAST() { } // Anonymous namespace +ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { + ShaderEntries entries; + for (const auto& cbuf : ir.GetConstantBuffers()) { + entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), + cbuf.first); + } + for (const auto& sampler : ir.GetSamplers()) { + entries.samplers.emplace_back(sampler); + } + for (const auto& [offset, image] : ir.GetImages()) { + entries.images.emplace_back(image); + } + for (const auto& [base, usage] : ir.GetGlobalMemory()) { + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, + usage.is_written); + } + entries.clip_distances = ir.GetClipDistances(); + entries.shader_length = ir.GetLength(); + return entries; +} + std::string GetCommonDeclarations() { - return fmt::format( - "#define ftoi floatBitsToInt\n" - "#define ftou floatBitsToUint\n" - "#define itof intBitsToFloat\n" - "#define utof uintBitsToFloat\n\n" - "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" - " bvec2 is_nan1 = isnan(pair1);\n" - " bvec2 is_nan2 = isnan(pair2);\n" - " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " - "is_nan2.y);\n" - "}}\n\n"); + return R"(#define ftoi floatBitsToInt +#define ftou floatBitsToUint +#define itof intBitsToFloat +#define utof uintBitsToFloat + +bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { + bvec2 is_nan1 = isnan(pair1); + bvec2 is_nan2 = isnan(pair2); + return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); +} +)"; } -ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, - const std::string& suffix) { +std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, + const std::string& suffix) { GLSLDecompiler decompiler(device, ir, stage, suffix); decompiler.Decompile(); - return {decompiler.GetResult(), decompiler.GetShaderEntries()}; + return decompiler.GetResult(); } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e538dc001..fead2a51e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -34,10 +34,7 @@ enum class ProgramType : u32 { namespace OpenGL::GLShader { -struct ShaderEntries; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using ProgramResult = std::pair<std::string, ShaderEntries>; using SamplerEntry = VideoCommon::Shader::Sampler; using ImageEntry = VideoCommon::Shader::Image; @@ -93,9 +90,11 @@ struct ShaderEntries { std::size_t shader_length{}; }; +ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); + std::string GetCommonDeclarations(); -ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ProgramType stage, const std::string& suffix); +std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + ProgramType stage, const std::string& suffix); } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 74cc33476..ddc19dccd 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -29,12 +29,7 @@ enum class TransferableEntryKind : u32 { Usage, }; -enum class PrecompiledEntryKind : u32 { - Decompiled, - Dump, -}; - -constexpr u32 NativeVersion = 4; +constexpr u32 NativeVersion = 5; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); @@ -49,13 +44,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { return hash; } -} // namespace +} // Anonymous namespace ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - u32 program_code_size, u32 program_code_size_b, ProgramCode program_code, ProgramCode program_code_b) : unique_identifier{unique_identifier}, program_type{program_type}, - program_code_size{program_code_size}, program_code_size_b{program_code_size_b}, program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; @@ -90,15 +83,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(program_type)) != 1 || - file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { + file.WriteObject(static_cast<u32>(program_code.size())) != 1 || + file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) { return false; } - if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) + if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) return false; if (HasProgramA() && - file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { + file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { return false; } return true; @@ -186,13 +180,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { return {{std::move(raws), std::move(usages)}}; } -std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> +std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> ShaderDiskCacheOpenGL::LoadPrecompiled() { if (!is_usable) { return {}; } - FileUtil::IOFile file(GetPrecompiledPath(), "rb"); + std::string path = GetPrecompiledPath(); + FileUtil::IOFile file(path, "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", GetTitleID()); @@ -211,7 +206,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { return *result; } -std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> +std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector<u8> compressed(file.GetSize()); @@ -231,238 +226,31 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - PrecompiledEntryKind kind{}; - if (!LoadObjectFromPrecompiled(kind)) { + ShaderDiskCacheUsage usage; + if (!LoadObjectFromPrecompiled(usage)) { return {}; } - switch (kind) { - case PrecompiledEntryKind::Decompiled: { - u64 unique_identifier{}; - if (!LoadObjectFromPrecompiled(unique_identifier)) { - return {}; - } - - auto entry = LoadDecompiledEntry(); - if (!entry) { - return {}; - } - decompiled.insert({unique_identifier, std::move(*entry)}); - break; - } - case PrecompiledEntryKind::Dump: { - ShaderDiskCacheUsage usage; - if (!LoadObjectFromPrecompiled(usage)) { - return {}; - } - - ShaderDiskCacheDump dump; - if (!LoadObjectFromPrecompiled(dump.binary_format)) { - return {}; - } - - u32 binary_length{}; - if (!LoadObjectFromPrecompiled(binary_length)) { - return {}; - } - - dump.binary.resize(binary_length); - if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { - return {}; - } - - dumps.insert({usage, dump}); - break; - } - default: + ShaderDiskCacheDump dump; + if (!LoadObjectFromPrecompiled(dump.binary_format)) { return {}; } - } - return {{decompiled, dumps}}; -} -std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() { - u32 code_size{}; - if (!LoadObjectFromPrecompiled(code_size)) { - return {}; - } - - std::string code(code_size, '\0'); - if (!LoadArrayFromPrecompiled(code.data(), code.size())) { - return {}; - } - - ShaderDiskCacheDecompiled entry; - entry.code = std::move(code); - - u32 const_buffers_count{}; - if (!LoadObjectFromPrecompiled(const_buffers_count)) { - return {}; - } - - for (u32 i = 0; i < const_buffers_count; ++i) { - u32 max_offset{}; - u32 index{}; - bool is_indirect{}; - if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(is_indirect)) { + u32 binary_length{}; + if (!LoadObjectFromPrecompiled(binary_length)) { return {}; } - entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); - } - u32 samplers_count{}; - if (!LoadObjectFromPrecompiled(samplers_count)) { - return {}; - } - - for (u32 i = 0; i < samplers_count; ++i) { - u64 offset{}; - u64 index{}; - u32 type{}; - bool is_array{}; - bool is_shadow{}; - bool is_bindless{}; - if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || - !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { + dump.binary.resize(binary_length); + if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { return {}; } - entry.entries.samplers.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); - } - u32 images_count{}; - if (!LoadObjectFromPrecompiled(images_count)) { - return {}; - } - for (u32 i = 0; i < images_count; ++i) { - u64 offset{}; - u64 index{}; - u32 type{}; - u8 is_bindless{}; - u8 is_written{}; - u8 is_read{}; - u8 is_atomic{}; - if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || - !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || - !LoadObjectFromPrecompiled(is_atomic)) { - return {}; - } - entry.entries.images.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, - is_read != 0, is_atomic != 0); - } - - u32 global_memory_count{}; - if (!LoadObjectFromPrecompiled(global_memory_count)) { - return {}; + dumps.emplace(usage, dump); } - for (u32 i = 0; i < global_memory_count; ++i) { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_read{}; - bool is_written{}; - if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || - !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { - return {}; - } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, - is_written); - } - - for (auto& clip_distance : entry.entries.clip_distances) { - if (!LoadObjectFromPrecompiled(clip_distance)) { - return {}; - } - } - - u64 shader_length{}; - if (!LoadObjectFromPrecompiled(shader_length)) { - return {}; - } - entry.entries.shader_length = static_cast<std::size_t>(shader_length); - - return entry; -} - -bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries) { - if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) || - !SaveObjectToPrecompiled(unique_identifier) || - !SaveObjectToPrecompiled(static_cast<u32>(code.size())) || - !SaveArrayToPrecompiled(code.data(), code.size())) { - return false; - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) { - return false; - } - for (const auto& cbuf : entries.const_buffers) { - if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || - !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || - !SaveObjectToPrecompiled(cbuf.IsIndirect())) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) { - return false; - } - for (const auto& sampler : entries.samplers) { - if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || - !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || - !SaveObjectToPrecompiled(sampler.IsArray()) || - !SaveObjectToPrecompiled(sampler.IsShadow()) || - !SaveObjectToPrecompiled(sampler.IsBindless())) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) { - return false; - } - for (const auto& image : entries.images) { - if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || - !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { - return false; - } - for (const auto& gmem : entries.global_memory_entries) { - if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || - !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { - return false; - } - } - - for (const bool clip_distance : entries.clip_distances) { - if (!SaveObjectToPrecompiled(clip_distance)) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { - return false; - } - - return true; + return dumps; } void ShaderDiskCacheOpenGL::InvalidateTransferable() { @@ -532,28 +320,18 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { } } -void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries) { +void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { if (!is_usable) { return; } + // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header + // when writing the dump. This should be done the moment I get access to write to the virtual + // file. if (precompiled_cache_virtual_file.GetSize() == 0) { SavePrecompiledHeaderToVirtualPrecompiledCache(); } - if (!SaveDecompiledFile(unique_identifier, code, entries)) { - LOG_ERROR(Render_OpenGL, - "Failed to save decompiled entry to the precompiled file - removing"); - InvalidatePrecompiled(); - } -} - -void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { - if (!is_usable) { - return; - } - GLint binary_length{}; glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); @@ -561,8 +339,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector<u8> binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || - !SaveObjectToPrecompiled(usage) || + if (!SaveObjectToPrecompiled(usage) || !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || !SaveArrayToPrecompiled(binary.data(), binary.size())) { @@ -574,8 +351,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p } FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) + if (!EnsureDirectories()) { return {}; + } const auto transferable_path{GetTransferablePath()}; const bool existed = FileUtil::Exists(transferable_path); @@ -607,8 +385,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { precompiled_cache_virtual_file_offset = 0; - const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector<u8>& compressed = + const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); + const std::vector<u8> compressed = Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); const auto precompiled_path{GetPrecompiledPath()}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 9595bd71b..61b46d728 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -123,8 +123,7 @@ namespace OpenGL { class ShaderDiskCacheRaw { public: explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - u32 program_code_size, u32 program_code_size_b, - ProgramCode program_code, ProgramCode program_code_b); + ProgramCode program_code, ProgramCode program_code_b = {}); ShaderDiskCacheRaw(); ~ShaderDiskCacheRaw(); @@ -155,22 +154,14 @@ public: private: u64 unique_identifier{}; ProgramType program_type{}; - u32 program_code_size{}; - u32 program_code_size_b{}; ProgramCode program_code; ProgramCode program_code_b; }; -/// Contains decompiled data from a shader -struct ShaderDiskCacheDecompiled { - std::string code; - GLShader::ShaderEntries entries; -}; - /// Contains an OpenGL dumped binary program struct ShaderDiskCacheDump { - GLenum binary_format; + GLenum binary_format{}; std::vector<u8> binary; }; @@ -184,9 +175,7 @@ public: LoadTransferable(); /// Loads current game's precompiled cache. Invalidates on failure. - std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> - LoadPrecompiled(); + std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); /// Removes the transferable (and precompiled) cache file. void InvalidateTransferable(); @@ -200,10 +189,6 @@ public: /// Saves shader usage to the transferable file. Does not check for collisions. void SaveUsage(const ShaderDiskCacheUsage& usage); - /// Saves a decompiled entry to the precompiled file. Does not check for collisions. - void SaveDecompiled(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries); - /// Saves a dump entry to the precompiled file. Does not check for collisions. void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); @@ -212,18 +197,9 @@ public: private: /// Loads the transferable cache. Returns empty on failure. - std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> + std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> LoadPrecompiledFile(FileUtil::IOFile& file); - /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on - /// failure. - std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(); - - /// Saves a decompiled entry to the passed file. Returns true on success. - bool SaveDecompiledFile(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries); - /// Opens current game's transferable file and write it's header if it doesn't exist FileUtil::IOFile AppendTransferableFile() const; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 817c6e12c..0e22eede9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -16,18 +16,8 @@ using VideoCommon::Shader::CompilerSettings; using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::ShaderIR; -static constexpr u32 PROGRAM_OFFSET = 10; -static constexpr u32 COMPUTE_OFFSET = 0; - -static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; - -ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: VS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { + std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { vec4 viewport_flip; @@ -35,18 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, - locker); - const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; - ProgramResult program = Decompile(device, program_ir, stage, "vertex"); - out += program.first; - - if (setup.IsDualProgram()) { - const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, - settings, locker); - ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); - out += program_b.first; + const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; + out += Decompile(device, ir, stage, "vertex"); + if (ir_b) { + out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); } out += R"( @@ -54,7 +36,7 @@ void main() { execute_vertex(); )"; - if (setup.IsDualProgram()) { + if (ir_b) { out += " execute_vertex_b();"; } @@ -68,18 +50,13 @@ void main() { // Viewport can be flipped, which is unsupported by glViewport gl_Position.xy *= viewport_flip.xy; } -})"; - - return {std::move(out), std::move(program.second)}; +} +)"; + return out; } -ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: GS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { vec4 viewport_flip; @@ -87,27 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, - locker); - ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); - out += program.first; + out += Decompile(device, ir, ProgramType::Geometry, "geometry"); out += R"( void main() { execute_geometry(); -};)"; - - return {std::move(out), std::move(program.second)}; +} +)"; + return out; } -ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: FS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); out += R"( layout (location = 0) out vec4 FragColor0; layout (location = 1) out vec4 FragColor1; @@ -124,39 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, - locker); - ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); - out += program.first; + out += Decompile(device, ir, ProgramType::Fragment, "fragment"); out += R"( void main() { execute_fragment(); } - )"; - return {std::move(out), std::move(program.second)}; + return out; } -ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: CS" + id + "\n\n"; - out += GetCommonDeclarations(); - - const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings, - locker); - ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); - out += program.first; - +std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); + out += Decompile(device, ir, ProgramType::Compute, "compute"); out += R"( void main() { execute_compute(); } )"; - return {std::move(out), std::move(program.second)}; + return out; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 05f157298..cba2be9f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -16,50 +16,19 @@ class Device; namespace OpenGL::GLShader { -using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; - -struct ShaderSetup { - explicit ShaderSetup(ProgramCode program_code) { - program.code = std::move(program_code); - } - - struct { - ProgramCode code; - ProgramCode code_b; // Used for dual vertex shaders - u64 unique_identifier; - std::size_t size_a; - std::size_t size_b; - } program; - - /// Used in scenarios where we have a dual vertex shaders - void SetProgramB(ProgramCode program_b) { - program.code_b = std::move(program_b); - has_program_b = true; - } - - bool IsDualProgram() const { - return has_program_b; - } - -private: - bool has_program_b{}; -}; +using VideoCommon::Shader::ShaderIR; /// Generates the GLSL vertex shader program source code for the given VS program -ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup); +std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); /// Generates the GLSL geometry shader program source code for the given GS program -ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup); +std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); /// Generates the GLSL fragment shader program source code for the given FS program -ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup); +std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); /// Generates the GLSL compute shader program source code for the given CS program -ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup); +std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); } // namespace OpenGL::GLShader diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 9d23bcecf..37a0968a1 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -15,15 +15,15 @@ ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) : engine{nullptr}, shader_stage{shader_stage} {} ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface* engine) - : engine{engine}, shader_stage{shader_stage} {} + Tegra::Engines::ConstBufferEngineInterface& engine) + : engine{&engine}, shader_stage{shader_stage} {} bool ConstBufferLocker::IsEngineSet() const { return engine != nullptr; } -void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine_) { - engine = engine_; +void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine_) { + engine = &engine_; } std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 13eeba320..54459977f 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -21,14 +21,14 @@ public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface* engine); + Tegra::Engines::ConstBufferEngineInterface& engine); // Checks if an engine is setup, it may be possible that during disk shader // cache run, the engines have not been created yet. bool IsEngineSet() const; // Use this to set/change the engine used for this shader. - void SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine); + void SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine); // Retrieves a key from the locker, if it's registered, it will give the // registered value, if not it will obtain it from maxwell3d and register it. diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index d1c269ea7..6c698bcff 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -66,10 +66,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start, ConstBufferLocker& locker) - : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) + : program_code{program_code}, start{start}, locker{locker} {} + const ProgramCode& program_code; + ConstBufferLocker& locker; u32 start{}; std::vector<BlockInfo> block_info{}; std::list<u32> inspect_queries{}; @@ -79,10 +80,7 @@ struct CFGRebuildState { std::map<u32, u32> ssy_labels{}; std::map<u32, u32> pbk_labels{}; std::unordered_map<u32, BlockStack> stacks{}; - const ProgramCode& program_code; - const std::size_t program_size; ASTManager* manager; - ConstBufferLocker& locker; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -242,7 +240,7 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast<u32>(address); - const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); + const u32 end_address = static_cast<u32>(state.program_code.size()); ParseInfo parse_info{}; SingleBranch single_branch{}; @@ -583,6 +581,7 @@ bool TryQuery(CFGRebuildState& state) { } return true; } + } // Anonymous namespace void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { @@ -651,8 +650,7 @@ void DecompileShader(CFGRebuildState& state) { state.manager->Decompile(); } -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address, +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, ConstBufferLocker& locker) { auto result_out = std::make_unique<ShaderCharacteristics>(); @@ -661,7 +659,7 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, return result_out; } - CFGRebuildState state{program_code, program_size, start_address, locker}; + CFGRebuildState state{program_code, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 369ca255b..288ee68af 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -105,8 +105,7 @@ struct ShaderCharacteristics { CompilerSettings settings{}; }; -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address, +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, ConstBufferLocker& locker); diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 053241128..e1afa4582 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -} // namespace +} // Anonymous namespace class ASTDecoder { public: @@ -102,7 +102,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, program_size, main_offset, settings, locker); + auto info = ScanFlow(program_code, main_offset, settings, locker); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -155,7 +155,7 @@ void ShaderIR::Decode() { [[fallthrough]]; case CompileDepth::BruteForce: { coverage_begin = main_offset; - const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); + const u32 shader_end = program_code.size(); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); @@ -225,7 +225,8 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { for (auto& branch_case : multi_branch->branches) { Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); Node op_b = Immediate(branch_case.cmp_value); - Node condition = GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + Node condition = + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); auto result = Conditional(condition, {n}); bb.push_back(result); global_code.push_back(result); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 6430575ec..1d718ccc6 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -22,10 +22,9 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, - CompilerSettings settings, ConstBufferLocker& locker) - : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, - program_manager{true, true}, settings{settings}, locker{locker} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 3a3e381d2..3ebea91b9 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -67,8 +67,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, - CompilerSettings settings, ConstBufferLocker& locker); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker); ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { @@ -384,7 +384,9 @@ private: const ProgramCode& program_code; const u32 main_offset; - const std::size_t program_size; + const CompilerSettings settings; + ConstBufferLocker& locker; + bool decompiled{}; bool disable_flow_stack{}; @@ -393,9 +395,7 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; - ASTManager program_manager; - CompilerSettings settings{}; - ConstBufferLocker& locker; + ASTManager program_manager{true, true}; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; |