diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/command_processor.cpp | 142 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 1 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 3 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 50 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 20 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 153 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 749 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 21 | ||||
-rw-r--r-- | src/video_core/surface.h | 552 |
11 files changed, 677 insertions, 1027 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp deleted file mode 100644 index 8b9c548cc..000000000 --- a/src/video_core/command_processor.cpp +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> -#include <cstddef> -#include <memory> -#include <utility> -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "common/vector_math.h" -#include "core/memory.h" -#include "core/tracer/recorder.h" -#include "video_core/command_processor.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/kepler_memory.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/maxwell_compute.h" -#include "video_core/engines/maxwell_dma.h" -#include "video_core/gpu.h" -#include "video_core/renderer_base.h" -#include "video_core/video_core.h" - -namespace Tegra { - -enum class BufferMethods { - BindObject = 0, - CountBufferMethods = 0x40, -}; - -MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192)); - -void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { - MICROPROFILE_SCOPE(ProcessCommandLists); - - // On entering GPU code, assume all memory may be touched by the ARM core. - maxwell_3d->dirty_flags.OnMemoryWrite(); - - auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { - LOG_TRACE(HW_GPU, - "Processing method {:08X} on subchannel {} value " - "{:08X} remaining params {}", - method, subchannel, value, remaining_params); - - ASSERT(subchannel < bound_engines.size()); - - if (method == static_cast<u32>(BufferMethods::BindObject)) { - // Bind the current subchannel to the desired engine id. - LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value); - bound_engines[subchannel] = static_cast<EngineID>(value); - return; - } - - if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) { - // TODO(Subv): Research and implement these methods. - LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); - return; - } - - const EngineID engine = bound_engines[subchannel]; - - switch (engine) { - case EngineID::FERMI_TWOD_A: - fermi_2d->WriteReg(method, value); - break; - case EngineID::MAXWELL_B: - maxwell_3d->WriteReg(method, value, remaining_params); - break; - case EngineID::MAXWELL_COMPUTE_B: - maxwell_compute->WriteReg(method, value); - break; - case EngineID::MAXWELL_DMA_COPY_A: - maxwell_dma->WriteReg(method, value); - break; - case EngineID::KEPLER_INLINE_TO_MEMORY_B: - kepler_memory->WriteReg(method, value); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented engine"); - } - }; - - for (auto entry : commands) { - Tegra::GPUVAddr address = entry.Address(); - u32 size = entry.sz; - const std::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); - VAddr current_addr = *head_address; - while (current_addr < *head_address + size * sizeof(CommandHeader)) { - const CommandHeader header = {Memory::Read32(current_addr)}; - current_addr += sizeof(u32); - - switch (header.mode.Value()) { - case SubmissionMode::IncreasingOld: - case SubmissionMode::Increasing: { - // Increase the method value with each argument. - for (unsigned i = 0; i < header.arg_count; ++i) { - WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); - current_addr += sizeof(u32); - } - break; - } - case SubmissionMode::NonIncreasingOld: - case SubmissionMode::NonIncreasing: { - // Use the same method value for all arguments. - for (unsigned i = 0; i < header.arg_count; ++i) { - WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); - current_addr += sizeof(u32); - } - break; - } - case SubmissionMode::IncreaseOnce: { - ASSERT(header.arg_count.Value() >= 1); - - // Use the original method for the first argument and then the next method for all - // other arguments. - WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), - header.arg_count - 1); - current_addr += sizeof(u32); - - for (unsigned i = 1; i < header.arg_count; ++i) { - WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); - current_addr += sizeof(u32); - } - break; - } - case SubmissionMode::Inline: { - // The register value is stored in the bits 16-28 as an immediate - WriteReg(header.method, header.subchannel, header.inline_data, 0); - break; - } - default: - UNIMPLEMENTED(); - } - } - } -} - -} // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d3b3ed1f0..25bb7604a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -42,6 +42,7 @@ public: static constexpr std::size_t NumVertexArrays = 32; static constexpr std::size_t NumVertexAttributes = 32; static constexpr std::size_t NumTextureSamplers = 32; + static constexpr std::size_t NumClipDistances = 8; static constexpr std::size_t MaxShaderProgram = 6; static constexpr std::size_t MaxShaderStage = 5; // Maximum number of const buffers per shader stage. diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index b9faaf8e0..5ea094e64 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1049,6 +1049,7 @@ union Instruction { BitField<49, 1, u64> nodep_flag; BitField<50, 3, u64> component_mask_selector; BitField<53, 4, u64> texture_info; + BitField<60, 1, u64> fp32_flag; TextureType GetTextureType() const { // The TEXS instruction has a weird encoding for the texture type. @@ -1549,7 +1550,7 @@ private: INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), - INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), + INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 6c81dee64..88c45a423 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -128,10 +128,8 @@ enum class BufferMethods { }; void GPU::CallMethod(const MethodCall& method_call) { - LOG_TRACE(HW_GPU, - "Processing method {:08X} on subchannel {} value " - "{:08X} remaining params {}", - MethCall.method, MethCall.subchannel, value, remaining_params); + LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, + method_call.subchannel); ASSERT(method_call.subchannel < bound_engines.size()); @@ -143,6 +141,12 @@ void GPU::CallMethod(const MethodCall& method_call) { return; } + if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) { + // TODO(Subv): Research and implement these methods. + LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); + return; + } + const EngineID engine = bound_engines[method_call.subchannel]; switch (engine) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7b836cc94..2b29fc45f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -108,19 +108,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo state.texture_units[i].sampler = texture_samplers[i].sampler.handle; } - GLint ext_num; - glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); - for (GLint i = 0; i < ext_num; i++) { - const std::string_view extension{ - reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; - - if (extension == "GL_ARB_direct_state_access") { - has_ARB_direct_state_access = true; - } else if (extension == "GL_ARB_multi_bind") { - has_ARB_multi_bind = true; - } - } - OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); @@ -312,6 +299,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; u32 current_texture_bindpoint = 0; + std::array<bool, Maxwell::NumClipDistances> clip_distances{}; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; @@ -372,12 +360,22 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, current_texture_bindpoint); + // Workaround for Intel drivers. + // When a clip distance is enabled but not set in the shader it crops parts of the screen + // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the + // clip distances only when it's written by a shader stage. + for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) { + clip_distances[i] |= shader->GetShaderEntries().clip_distances[i]; + } + // When VertexA is enabled, we have dual vertex shaders if (program == Maxwell::ShaderProgram::VertexA) { // VertexB was combined with VertexA, so we skip the VertexB iteration index++; } } + + SyncClipEnabled(clip_distances); } void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, @@ -498,7 +496,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us // TODO(bunnei): Figure out how the below register works. According to envytools, this should be // used to enable multiple render targets. However, it is left unset on all games that I have // tested. - ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented"); + UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0); // Bind the framebuffer surfaces current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; @@ -680,7 +678,6 @@ void RasterizerOpenGL::DrawArrays() { SyncCullMode(); SyncPrimitiveRestart(); SyncScissorTest(state); - SyncClipEnabled(); // Alpha Testing is synced on shaders. SyncTransformFeedback(); SyncPointState(); @@ -1057,20 +1054,23 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; } -void RasterizerOpenGL::SyncClipEnabled() { +void RasterizerOpenGL::SyncClipEnabled( + const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - state.clip_distance[0] = regs.clip_distance_enabled.c0 != 0; - state.clip_distance[1] = regs.clip_distance_enabled.c1 != 0; - state.clip_distance[2] = regs.clip_distance_enabled.c2 != 0; - state.clip_distance[3] = regs.clip_distance_enabled.c3 != 0; - state.clip_distance[4] = regs.clip_distance_enabled.c4 != 0; - state.clip_distance[5] = regs.clip_distance_enabled.c5 != 0; - state.clip_distance[6] = regs.clip_distance_enabled.c6 != 0; - state.clip_distance[7] = regs.clip_distance_enabled.c7 != 0; + const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ + regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, + regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, + regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0, + regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0}; + + for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) { + state.clip_distance[i] = reg_state[i] && clip_mask[i]; + } } void RasterizerOpenGL::SyncClipCoef() { - UNREACHABLE(); + UNIMPLEMENTED(); } void RasterizerOpenGL::SyncCullMode() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 959e8df63..8a891ffc7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -61,20 +61,6 @@ public: bool AccelerateDrawBatch(bool is_indexed) override; void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; - /// OpenGL shader generated for a given Maxwell register state - struct MaxwellShader { - /// OpenGL shader resource - OGLProgram shader; - }; - - struct VertexShader { - OGLShader shader; - }; - - struct FragmentShader { - OGLShader shader; - }; - /// Maximum supported size that a constbuffer can have in bytes. static constexpr std::size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, @@ -143,7 +129,8 @@ private: void SyncViewport(OpenGLState& current_state); /// Syncs the clip enabled status to match the guest state - void SyncClipEnabled(); + void SyncClipEnabled( + const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask); /// Syncs the clip coefficients to match the guest state void SyncClipCoef(); @@ -194,9 +181,6 @@ private: /// but are needed for correct emulation void CheckExtensions(); - bool has_ARB_direct_state_access = false; - bool has_ARB_multi_bind = false; - OpenGLState state; RasterizerCacheOpenGL res_cache; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index dde2f468d..5f4cdd119 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -405,138 +405,6 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, } } -MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64)); -static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, - GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, - GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { - MICROPROFILE_SCOPE(OpenGL_BlitSurface); - - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = read_fb_handle; - state.draw.draw_framebuffer = draw_fb_handle; - // Set sRGB enabled if the destination surfaces need it - state.framebuffer_srgb.enabled = dst_params.srgb_conversion; - state.ApplyFramebufferState(); - - u32 buffers{}; - - if (src_params.type == SurfaceType::ColorTexture) { - switch (src_params.target) { - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - case SurfaceTarget::TextureCubemap: - glFramebufferTexture2D( - GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), - src_surface->Texture().handle, 0); - glFramebufferTexture2D( - GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); - break; - case SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - src_surface->Texture().handle, 0, 0); - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); - break; - case SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - SurfaceTargetToGL(src_params.target), - src_surface->Texture().handle, 0, 0); - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - SurfaceTargetToGL(src_params.target), 0, 0, 0); - break; - default: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - } - - switch (dst_params.target) { - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - case SurfaceTarget::TextureCubemap: - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), - dst_surface->Texture().handle, 0); - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); - break; - case SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - dst_surface->Texture().handle, 0, 0); - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); - break; - - case SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - SurfaceTargetToGL(dst_params.target), - dst_surface->Texture().handle, 0, 0); - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - SurfaceTargetToGL(dst_params.target), 0, 0, 0); - break; - default: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - } - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); - - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - const auto& rect{src_params.GetRect()}; - glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom, - rect.right, rect.top, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - - return true; -} - static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) { const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; @@ -841,9 +709,10 @@ void CachedSurface::LoadGLBuffer() { const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; gl_buffer[0].assign(texture_src_data, texture_src_data_end); } - for (u32 i = 0; i < params.max_mip_level; i++) + for (u32 i = 0; i < params.max_mip_level; i++) { ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), params.MipHeight(i), params.MipDepth(i)); + } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); @@ -1163,7 +1032,10 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface) { const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; - FlushRegion(src_params.addr, dst_params.MemorySize()); + + // Flush enough memory for both the source and destination surface + FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); + LoadSurface(dst_surface); } @@ -1189,20 +1061,9 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, return new_surface; } - // If the format is the same, just do a framebuffer blit. This is significantly faster than - // using PBOs. The is also likely less accurate, as textures will be converted rather than - // reinterpreted. When use_accurate_gpu_emulation setting is enabled, perform a more accurate - // surface copy, where pixels are reinterpreted as a new format (without conversion). This - // code path uses OpenGL PBOs and is quite slow. - const bool is_blit{old_params.pixel_format == new_params.pixel_format}; - switch (new_params.target) { case SurfaceTarget::Texture2D: - if (is_blit) { - BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle); - } else { - CopySurface(old_surface, new_surface, copy_pbo.handle); - } + CopySurface(old_surface, new_surface, copy_pbo.handle); break; case SurfaceTarget::Texture3D: AccurateCopySurface(old_surface, new_surface); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0c4524d5c..4fc09cac6 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -50,6 +50,14 @@ public: using std::runtime_error::runtime_error; }; +/// Generates code to use for a swizzle operation. +static std::string GetSwizzle(u64 elem) { + ASSERT(elem <= 3); + std::string swizzle = "."; + swizzle += "xyzw"[elem]; + return swizzle; +} + /// Translate topology static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { @@ -201,14 +209,53 @@ private: } }; +template <typename T> +class ShaderScopedScope { +public: + explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr) + : writer(writer), end_expr(std::move(end_expr)) { + + if (begin_expr.empty()) { + writer.AddLine('{'); + } else { + writer.AddExpression(begin_expr); + writer.AddLine(" {"); + } + ++writer.scope; + } + + ShaderScopedScope(const ShaderScopedScope&) = delete; + + ~ShaderScopedScope() { + --writer.scope; + if (end_expr.empty()) { + writer.AddLine('}'); + } else { + writer.AddExpression("} "); + writer.AddExpression(end_expr); + writer.AddLine(';'); + } + } + + ShaderScopedScope& operator=(const ShaderScopedScope&) = delete; + +private: + T& writer; + std::string end_expr; +}; + class ShaderWriter { public: - void AddLine(std::string_view text) { + void AddExpression(std::string_view text) { DEBUG_ASSERT(scope >= 0); if (!text.empty()) { AppendIndentation(); } shader_source += text; + } + + void AddLine(std::string_view text) { + AddExpression(text); AddNewLine(); } @@ -228,6 +275,11 @@ public: return std::move(shader_source); } + ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {}, + std::string end_expr = {}) { + return ShaderScopedScope(*this, begin_expr, end_expr); + } + int scope = 0; private: @@ -311,7 +363,7 @@ public: // Default - do nothing return value; default: - UNIMPLEMENTED_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); + UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); } } @@ -525,6 +577,7 @@ public: ((header.vtg.clip_distances >> index) & 1) == 0, "Shader is setting gl_ClipDistance{} without enabling it in the header", index); + clip_distances[index] = true; fixed_pipeline_output_attributes_used.insert(attribute); shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';'); break; @@ -602,6 +655,11 @@ public: return used_samplers; } + /// Returns an array of the used clip distances. + const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const { + return clip_distances; + } + /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if /// necessary. std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, @@ -810,14 +868,12 @@ private: } if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + // This avoids optimizations of constant propagation and keeps the code as the original // Sadly using the precise keyword causes "linking" errors on fragment shaders. shader.AddLine("precise float tmp = " + src + ';'); shader.AddLine(dest + " = tmp;"); - --shader.scope; - shader.AddLine('}'); } else { shader.AddLine(dest + " = " + src + ';'); } @@ -956,14 +1012,6 @@ private: } } - /// Generates code to use for a swizzle operation. - static std::string GetSwizzle(u64 elem) { - ASSERT(elem <= 3); - std::string swizzle = "."; - swizzle += "xyzw"[elem]; - return swizzle; - } - ShaderWriter& shader; ShaderWriter& declarations; std::vector<GLSLRegister> regs; @@ -975,6 +1023,7 @@ private: const std::string& suffix; const Tegra::Shader::Header& header; std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; + std::array<bool, Maxwell::NumClipDistances> clip_distances{}; u64 local_memory_size; }; @@ -997,7 +1046,8 @@ public: /// Returns entries in the shader that are useful for external functions ShaderEntries GetEntries() const { - return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), shader_length}; + return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(), + shader_length}; } private: @@ -1293,15 +1343,7 @@ private: regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); } - void WriteTexsInstruction(const Instruction& instr, const std::string& coord, - const std::string& texture) { - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; - shader.AddLine(coord); - shader.AddLine("vec4 texture_tmp = " + texture + ';'); - + void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 @@ -1313,19 +1355,49 @@ private: if (written_components < 2) { // Write the first two swizzle components to gpr0 and gpr0+1 - regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, written_components % 2); } else { ASSERT(instr.texs.HasTwoDestinations()); // Write the rest of the swizzle components to gpr28 and gpr28+1 - regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, written_components % 2); } ++written_components; } - --shader.scope; - shader.AddLine('}'); + } + + void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) { + // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half + // float instruction). + + std::array<std::string, 4> components; + u32 written_components = 0; + + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + components[written_components++] = texture + GetSwizzle(component); + } + if (written_components == 0) + return; + + const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) { + return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')'; + }; + + regs.SetRegisterToHalfFloat( + instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1), + Tegra::Shader::HalfMerge::H0_H1, 1, 1); + + if (written_components > 2) { + ASSERT(instr.texs.HasTwoDestinations()); + regs.SetRegisterToHalfFloat( + instr.gpr28, 0, + BuildComponent(components[2], components[3], written_components > 3), + Tegra::Shader::HalfMerge::H0_H1, 1, 1); + } } static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { @@ -1348,12 +1420,10 @@ private: * top. */ void EmitPushToFlowStack(u32 target) { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;"); shader.AddLine("flow_stack_top++;"); - --shader.scope; - shader.AddLine('}'); } /* @@ -1361,13 +1431,11 @@ private: * popped address and decrementing the stack top. */ void EmitPopFromFlowStack() { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + shader.AddLine("flow_stack_top--;"); shader.AddLine("jmp_to = flow_stack[flow_stack_top];"); shader.AddLine("break;"); - --shader.scope; - shader.AddLine('}'); } /// Writes the output values from a fragment shader to the corresponding GLSL output variables. @@ -1479,6 +1547,161 @@ private: } } + std::pair<size_t, std::string> ValidateAndGetCoordinateElement( + const Tegra::Shader::TextureType texture_type, const bool depth_compare, + const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) { + const size_t coord_count = TextureCoordinates(texture_type); + + size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); + const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); + if (total_coord_count > max_coords || total_reg_count > max_inputs) { + UNIMPLEMENTED_MSG("Unsupported Texture operation"); + total_coord_count = std::min(total_coord_count, max_coords); + } + // 1D.DC opengl is using a vec3 but 2nd component is ignored later. + total_coord_count += + (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) + ? 1 + : 0; + + constexpr std::array<const char*, 5> coord_container{ + {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(", + "vec4 coord = vec4("}}; + + return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]); + } + + std::string GetTextureCode(const Tegra::Shader::Instruction& instr, + const Tegra::Shader::TextureType texture_type, + const Tegra::Shader::TextureProcessMode process_mode, + const bool depth_compare, const bool is_array, + const size_t bias_offset) { + + if ((texture_type == Tegra::Shader::TextureType::Texture3D && + (is_array || depth_compare)) || + (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && + depth_compare)) { + UNIMPLEMENTED_MSG("This method is not supported."); + } + + const std::string sampler = + GetSampler(instr.sampler, texture_type, is_array, depth_compare); + + const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ || + process_mode == Tegra::Shader::TextureProcessMode::LL || + process_mode == Tegra::Shader::TextureProcessMode::LLA; + + const bool gl_lod_supported = !( + (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || + (texture_type == Tegra::Shader::TextureType::TextureCube && !is_array && + depth_compare)); + + const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture("; + std::string texture = read_method + sampler + ", coord"; + + if (process_mode != Tegra::Shader::TextureProcessMode::None) { + if (process_mode == Tegra::Shader::TextureProcessMode::LZ) { + if (gl_lod_supported) { + texture += ", 0"; + } else { + // Lod 0 is emulated by a big negative bias + // in scenarios that are not supported by glsl + texture += ", -1000"; + } + } else { + // If present, lod or bias are always stored in the register indexed by the + // gpr20 + // field with an offset depending on the usage of the other registers + texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset); + } + } + texture += ")"; + return texture; + } + + std::pair<std::string, std::string> GetTEXCode( + const Instruction& instr, const Tegra::Shader::TextureType texture_type, + const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, + const bool is_array) { + const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && + process_mode != Tegra::Shader::TextureProcessMode::LZ); + + const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::string coord = coord_dcl; + for (size_t i = 0; i < coord_count;) { + coord += regs.GetRegisterAsFloat(coord_register + i); + ++i; + if (i != coord_count) { + coord += ','; + } + } + // 1D.DC in opengl the 2nd component is ignored. + if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) { + coord += ",0.0"; + } + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 + // or in the next register if lod or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + coord += ',' + regs.GetRegisterAsFloat(depth_register); + } + if (is_array) { + coord += ',' + regs.GetRegisterAsInteger(array_register); + } + coord += ");"; + return std::make_pair( + coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0)); + } + + std::pair<std::string, std::string> GetTEXSCode( + const Instruction& instr, const Tegra::Shader::TextureType texture_type, + const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, + const bool is_array) { + const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && + process_mode != Tegra::Shader::TextureProcessMode::LZ); + + const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + const u64 last_coord_register = + (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) + ? static_cast<u64>(instr.gpr20.Value()) + : coord_register + 1; + + std::string coord = coord_dcl; + for (size_t i = 0; i < coord_count; ++i) { + const bool last = (i == (coord_count - 1)) && (coord_count > 1); + coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i); + if (!last) { + coord += ','; + } + } + + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 + // or in the next register if lod or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + coord += ',' + regs.GetRegisterAsFloat(depth_register); + } + if (is_array) { + coord += ',' + regs.GetRegisterAsInteger(array_register); + } + coord += ");"; + + return std::make_pair(coord, + GetTextureCode(instr, texture_type, process_mode, depth_compare, + is_array, (coord_count > 2 ? 1 : 0))); + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -2279,8 +2502,7 @@ private: UNIMPLEMENTED_IF(instr.conversion.selector); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); - - std::string op_a{}; + std::string op_a; if (instr.is_b_gpr) { op_a = @@ -2436,10 +2658,7 @@ private: case OpCode::Id::LD_C: { UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine("{"); - ++shader.scope; + const auto scope = shader.Scope(); shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); @@ -2465,19 +2684,13 @@ private: UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); } - - --shader.scope; - shader.AddLine("}"); break; } case OpCode::Id::LD_L: { UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", static_cast<unsigned>(instr.ld_l.unknown.Value())); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + std::to_string(instr.smem_imm.Value()) + ')'; @@ -2494,9 +2707,6 @@ private: UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", static_cast<unsigned>(instr.ldst_sl.type.Value())); } - - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::ST_A: { @@ -2531,10 +2741,7 @@ private: UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", static_cast<unsigned>(instr.st_l.unknown.Value())); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + std::to_string(instr.smem_imm.Value()) + ')'; @@ -2549,179 +2756,28 @@ private: UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", static_cast<unsigned>(instr.ldst_sl.type.Value())); } - - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TEX: { Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; - std::string coord; const bool is_array = instr.tex.array != 0; - + const bool depth_compare = + instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); + const auto process_mode = instr.tex.GetTextureProcessMode(); UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), "AOFFI is not implemented"); - const bool depth_compare = - instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); - u32 num_coordinates = TextureCoordinates(texture_type); - u32 start_index = 0; - std::string array_elem; - if (is_array) { - array_elem = regs.GetRegisterAsInteger(instr.gpr8); - start_index = 1; - } - const auto process_mode = instr.tex.GetTextureProcessMode(); - u32 start_index_b = 0; - std::string lod_value; - if (process_mode != Tegra::Shader::TextureProcessMode::LZ && - process_mode != Tegra::Shader::TextureProcessMode::None) { - start_index_b = 1; - lod_value = regs.GetRegisterAsFloat(instr.gpr20); - } - - std::string depth_value; - if (depth_compare) { - depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b); - } + const auto [coord, texture] = + GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array); - bool depth_compare_extra = false; - - switch (num_coordinates) { - case 1: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); - if (is_array) { - if (depth_compare) { - coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " + - array_elem + ");"; - } else { - coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");"; - } - } else { - if (depth_compare) { - coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");"; - } else { - coord = "float coords = " + x + ';'; - } - } - break; - } - case 2: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1); - if (is_array) { - if (depth_compare) { - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value + - ", " + array_elem + ");"; - } else { - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");"; - } - } else { - if (depth_compare) { - coord = - "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");"; - } else { - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - } - } - break; - } - case 3: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1); - const std::string z = - regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2); - if (is_array) { - depth_compare_extra = depth_compare; - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + - array_elem + ");"; - } else { - if (depth_compare) { - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + - depth_value + ");"; - } else { - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - } - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled coordinates number {}", - static_cast<u32>(num_coordinates)); - - // Fallback to interpreting as a 2D texture for now - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - texture_type = Tegra::Shader::TextureType::Texture2D; - } - - const std::string sampler = - GetSampler(instr.sampler, texture_type, is_array, depth_compare); - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); shader.AddLine(coord); - std::string texture; - switch (instr.tex.GetTextureProcessMode()) { - case Tegra::Shader::TextureProcessMode::None: { - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - } - break; - } - case Tegra::Shader::TextureProcessMode::LZ: { - if (!depth_compare_extra) { - texture = "textureLod(" + sampler + ", coords, 0.0)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - } - break; - } - case Tegra::Shader::TextureProcessMode::LB: - case Tegra::Shader::TextureProcessMode::LBA: { - // TODO: Figure if A suffix changes the equation at all. - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords, " + lod_value + ')'; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - LOG_WARNING(HW_GPU, - "OpenGL Limitation: can't set bias value along depth compare"); - } - break; - } - case Tegra::Shader::TextureProcessMode::LL: - case Tegra::Shader::TextureProcessMode::LLA: { - // TODO: Figure if A suffix changes the equation at all. - if (!depth_compare_extra) { - texture = "textureLod(" + sampler + ", coords, " + lod_value + ')'; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - LOG_WARNING(HW_GPU, - "OpenGL Limitation: can't set lod value along depth compare"); - } - break; - } - default: { - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - } - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.tex.GetTextureProcessMode())); - } - } - if (!depth_compare) { + if (depth_compare) { + regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); + } else { shader.AddLine("vec4 texture_tmp = " + texture + ';'); std::size_t dest_elem{}; for (std::size_t elem = 0; elem < 4; ++elem) { @@ -2733,138 +2789,36 @@ private: dest_elem); ++dest_elem; } - } else { - regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TEXS: { Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; - bool is_array{instr.texs.IsArrayTexture()}; + const bool is_array{instr.texs.IsArrayTexture()}; + const bool depth_compare = + instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); + const auto process_mode = instr.texs.GetTextureProcessMode(); UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); - const bool depth_compare = - instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); - u32 num_coordinates = TextureCoordinates(texture_type); - const auto process_mode = instr.texs.GetTextureProcessMode(); - std::string lod_value; - std::string coord; - u32 lod_offset = 0; - if (process_mode == Tegra::Shader::TextureProcessMode::LL) { - if (num_coordinates > 2) { - lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); - lod_offset = 2; - } else { - lod_value = regs.GetRegisterAsFloat(instr.gpr20); - lod_offset = 1; - } - } + const auto scope = shader.Scope(); - switch (num_coordinates) { - case 1: { - coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';'; - break; - } - case 2: { - if (is_array) { - if (depth_compare) { - const std::string index = regs.GetRegisterAsInteger(instr.gpr8); - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + - ");"; - } else { - const std::string index = regs.GetRegisterAsInteger(instr.gpr8); - const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; - } - } else { - if (lod_offset != 0) { - if (depth_compare) { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string z = - regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - } else { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - } - } else { - if (depth_compare) { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = - regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - } else { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - } - } - } - break; - } - case 3: { - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled coordinates number {}", - static_cast<u32>(num_coordinates)); + auto [coord, texture] = + GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array); - // Fallback to interpreting as a 2D texture for now - const std::string x = regs.GetRegisterAsFloat(instr.gpr8); - const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - texture_type = Tegra::Shader::TextureType::Texture2D; - is_array = false; - } - const std::string sampler = - GetSampler(instr.sampler, texture_type, is_array, depth_compare); - std::string texture; - switch (process_mode) { - case Tegra::Shader::TextureProcessMode::None: { - texture = "texture(" + sampler + ", coords)"; - break; - } - case Tegra::Shader::TextureProcessMode::LZ: { - if (depth_compare && is_array) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "textureLod(" + sampler + ", coords, 0.0)"; - } - break; - } - case Tegra::Shader::TextureProcessMode::LL: { - texture = "textureLod(" + sampler + ", coords, " + lod_value + ')'; - break; - } - default: { - texture = "texture(" + sampler + ", coords)"; - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.texs.GetTextureProcessMode())); - } + shader.AddLine(coord); + + if (depth_compare) { + texture = "vec4(" + texture + ')'; } - if (!depth_compare) { - WriteTexsInstruction(instr, coord, texture); + shader.AddLine("vec4 texture_tmp = " + texture + ';'); + + if (instr.texs.fp32_flag) { + WriteTexsInstructionFloat(instr, "texture_tmp"); } else { - WriteTexsInstruction(instr, coord, "vec4(" + texture + ')'); + WriteTexsInstructionHalfFloat(instr, "texture_tmp"); } - break; } case OpCode::Id::TLDS: { @@ -2883,15 +2837,12 @@ private: u32 extra_op_offset = 0; - // Scope to avoid variable name overlaps. - shader.AddLine('{'); - ++shader.scope; - std::string coords; + ShaderScopedScope scope = shader.Scope(); switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: { const std::string x = regs.GetRegisterAsInteger(instr.gpr8); - coords = "float coords = " + x + ';'; + shader.AddLine("float coords = " + x + ';'); break; } case Tegra::Shader::TextureType::Texture2D: { @@ -2900,7 +2851,7 @@ private: const std::string x = regs.GetRegisterAsInteger(instr.gpr8); const std::string y = regs.GetRegisterAsInteger(instr.gpr20); // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); - coords = "ivec2 coords = ivec2(" + x + ", " + y + ");"; + shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); extra_op_offset = 1; break; } @@ -2909,35 +2860,29 @@ private: } const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); - std::string texture = "texelFetch(" + sampler + ", coords, 0)"; - switch (instr.tlds.GetTextureProcessMode()) { - case Tegra::Shader::TextureProcessMode::LZ: { - texture = "texelFetch(" + sampler + ", coords, 0)"; - break; - } - case Tegra::Shader::TextureProcessMode::LL: { - shader.AddLine( - "float lod = " + - regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';'); - texture = "texelFetch(" + sampler + ", coords, lod)"; - break; - } - default: { - texture = "texelFetch(" + sampler + ", coords, 0)"; - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.tlds.GetTextureProcessMode())); - } - } - WriteTexsInstruction(instr, coords, texture); - --shader.scope; - shader.AddLine('}'); + const std::string texture = [&]() { + switch (instr.tlds.GetTextureProcessMode()) { + case Tegra::Shader::TextureProcessMode::LZ: + return "texelFetch(" + sampler + ", coords, 0)"; + case Tegra::Shader::TextureProcessMode::LL: + shader.AddLine( + "float lod = " + + regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';'); + return "texelFetch(" + sampler + ", coords, lod)"; + default: + UNIMPLEMENTED_MSG("Unhandled texture process mode {}", + static_cast<u32>(instr.tlds.GetTextureProcessMode())); + return "texelFetch(" + sampler + ", coords, 0)"; + } + }(); + + WriteTexsInstructionFloat(instr, texture); break; } case OpCode::Id::TLD4: { ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D); ASSERT(instr.tld4.array == 0); - std::string coord; UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); @@ -2954,10 +2899,7 @@ private: if (depth_compare) num_coordinates += 1; - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); switch (num_coordinates) { case 2: { @@ -2988,23 +2930,19 @@ private: const std::string texture = "textureGather(" + sampler + ", coords, " + std::to_string(instr.tld4.component) + ')'; - if (!depth_compare) { - shader.AddLine("vec4 texture_tmp = " + texture + ';'); + if (depth_compare) { + regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); + } else { std::size_t dest_elem{}; for (std::size_t elem = 0; elem < 4; ++elem) { if (!instr.tex.IsComponentEnabled(elem)) { // Skip disabled components continue; } - regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, - dest_elem); + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); ++dest_elem; } - } else { - regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TLD4S: { @@ -3015,10 +2953,7 @@ private: instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), "AOFFI is not implemented"); - // Scope to avoid variable name overlaps. - shader.AddLine('{'); - ++shader.scope; - std::string coords; + const auto scope = shader.Scope(); const bool depth_compare = instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); @@ -3027,33 +2962,30 @@ private: // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. const std::string sampler = GetSampler( instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); - if (!depth_compare) { - coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; - } else { + if (depth_compare) { // Note: TLD4S coordinate encoding works just like TEXS's const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");"; + shader.AddLine("vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");"); + } else { + shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");"); } - const std::string texture = "textureGather(" + sampler + ", coords, " + - std::to_string(instr.tld4s.component) + ')'; - if (!depth_compare) { - WriteTexsInstruction(instr, coords, texture); - } else { - WriteTexsInstruction(instr, coords, "vec4(" + texture + ')'); + std::string texture = "textureGather(" + sampler + ", coords, " + + std::to_string(instr.tld4s.component) + ')'; + if (depth_compare) { + texture = "vec4(" + texture + ')'; } - --shader.scope; - shader.AddLine('}'); + WriteTexsInstructionFloat(instr, texture); break; } case OpCode::Id::TXQ: { UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); - ++shader.scope; - shader.AddLine('{'); - // TODO: the new commits on the texture refactor, change the way samplers work. + const auto scope = shader.Scope(); + + // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const std::string sampler = @@ -3064,7 +2996,8 @@ private: regs.GetRegisterAsInteger(instr.gpr8) + ')'; const std::string mip_level = "textureQueryLevels(" + sampler + ')'; shader.AddLine("ivec2 sizes = " + texture + ';'); - regs.SetRegisterToInteger(instr.gpr0, true, 0, "sizes.x", 1, 1); + + regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1); @@ -3075,8 +3008,6 @@ private: static_cast<u32>(instr.txq.query_type.Value())); } } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TMML: { @@ -3091,17 +3022,18 @@ private: const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); - // TODO: add coordinates for different samplers once other texture types are + const auto scope = shader.Scope(); + + // TODO: Add coordinates for different samplers once other texture types are // implemented. - std::string coord; switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: { - coord = "float coords = " + x + ';'; + shader.AddLine("float coords = " + x + ';'); break; } case Tegra::Shader::TextureType::Texture2D: { const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); break; } default: @@ -3109,22 +3041,15 @@ private: // Fallback to interpreting as a 2D texture for now const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); texture_type = Tegra::Shader::TextureType::Texture2D; } - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; - shader.AddLine(coord); + const std::string texture = "textureQueryLod(" + sampler + ", coords)"; - const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);"; - shader.AddLine(tmp); + shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);"); regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1); - --shader.scope; - shader.AddLine('}'); break; } default: { @@ -3963,4 +3888,4 @@ std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u return {}; } -} // namespace OpenGL::GLShader::Decompiler +} // namespace OpenGL::GLShader::Decompiler
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index b425d98ae..4fa6d7612 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -163,6 +163,7 @@ private: struct ShaderEntries { std::vector<ConstBufferEntry> const_buffer_entries; std::vector<SamplerEntry> texture_samplers; + std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances; std::size_t shader_length; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index b757f5f44..4970aafed 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -60,6 +60,17 @@ public: } void ApplyTo(OpenGLState& state) { + UpdatePipeline(); + state.draw.shader_program = 0; + state.draw.program_pipeline = pipeline.handle; + state.geometry_shaders.enabled = (gs != 0); + } + +private: + void UpdatePipeline() { + // Avoid updating the pipeline when values have no changed + if (old_vs == vs && old_fs == fs && old_gs == gs) + return; // Workaround for AMD bug glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, @@ -68,14 +79,16 @@ public: glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs); glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs); glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs); - state.draw.shader_program = 0; - state.draw.program_pipeline = pipeline.handle; - state.geometry_shaders.enabled = (gs != 0); + + // Update the old values + old_vs = vs; + old_fs = fs; + old_gs = gs; } -private: OGLPipeline pipeline; GLuint vs{}, fs{}, gs{}; + GLuint old_vs{}, old_fs{}, old_gs{}; }; } // namespace OpenGL::GLShader diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 0dd3eb2e4..e23cfecbc 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -125,6 +125,75 @@ enum class SurfaceTarget { TextureCubeArray, }; +constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // G8R8U + 1, // G8R8S + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI + 4, // ASTC_2D_8X8 + 4, // ASTC_2D_8X5 + 4, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 4, // ASTC_2D_4X4_SRGB + 4, // ASTC_2D_8X8_SRGB + 4, // ASTC_2D_8X5_SRGB + 4, // ASTC_2D_5X4_SRGB + 4, // ASTC_2D_5X5 + 4, // ASTC_2D_5X5_SRGB + 4, // ASTC_2D_10X8 + 4, // ASTC_2D_10X8_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 +}}; + /** * Gets the compression factor for the specified PixelFormat. This applies to just the * "compressed width" and "compressed height", not the overall compression factor of a @@ -135,304 +204,237 @@ static constexpr u32 GetCompressionFactor(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; - constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // G8R8U - 1, // G8R8S - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // R32UI - 4, // ASTC_2D_8X8 - 4, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 4, // ASTC_2D_8X8_SRGB - 4, // ASTC_2D_8X5_SRGB - 4, // ASTC_2D_5X4_SRGB - 4, // ASTC_2D_5X5 - 4, // ASTC_2D_5X5_SRGB - 4, // ASTC_2D_10X8 - 4, // ASTC_2D_10X8_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 - }}; - ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); return compression_factor_table[static_cast<std::size_t>(format)]; } +constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // G8R8U + 1, // G8R8S + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI + 8, // ASTC_2D_8X8 + 8, // ASTC_2D_8X5 + 5, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 4, // ASTC_2D_4X4_SRGB + 8, // ASTC_2D_8X8_SRGB + 8, // ASTC_2D_8X5_SRGB + 5, // ASTC_2D_5X4_SRGB + 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_SRGB + 10, // ASTC_2D_10X8 + 10, // ASTC_2D_10X8_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 +}}; + static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; - constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // G8R8U - 1, // G8R8S - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // R32UI - 8, // ASTC_2D_8X8 - 8, // ASTC_2D_8X5 - 5, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 8, // ASTC_2D_8X8_SRGB - 8, // ASTC_2D_8X5_SRGB - 5, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 - 5, // ASTC_2D_5X5_SRGB - 10, // ASTC_2D_10X8 - 10, // ASTC_2D_10X8_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 - }}; + ASSERT(static_cast<std::size_t>(format) < block_width_table.size()); return block_width_table[static_cast<std::size_t>(format)]; } +constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // G8R8U + 1, // G8R8S + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI + 8, // ASTC_2D_8X8 + 5, // ASTC_2D_8X5 + 4, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 4, // ASTC_2D_4X4_SRGB + 8, // ASTC_2D_8X8_SRGB + 5, // ASTC_2D_8X5_SRGB + 4, // ASTC_2D_5X4_SRGB + 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_SRGB + 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 +}}; + static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; - constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // G8R8U - 1, // G8R8S - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // R32UI - 8, // ASTC_2D_8X8 - 5, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 8, // ASTC_2D_8X8_SRGB - 5, // ASTC_2D_8X5_SRGB - 4, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 - 5, // ASTC_2D_5X5_SRGB - 8, // ASTC_2D_10X8 - 8, // ASTC_2D_10X8_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 - }}; - ASSERT(static_cast<std::size_t>(format) < block_height_table.size()); return block_height_table[static_cast<std::size_t>(format)]; } +constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ + 32, // ABGR8U + 32, // ABGR8S + 32, // ABGR8UI + 16, // B5G6R5U + 32, // A2B10G10R10U + 16, // A1B5G5R5U + 8, // R8U + 8, // R8UI + 64, // RGBA16F + 64, // RGBA16U + 64, // RGBA16UI + 32, // R11FG11FB10F + 128, // RGBA32UI + 64, // DXT1 + 128, // DXT23 + 128, // DXT45 + 64, // DXN1 + 128, // DXN2UNORM + 128, // DXN2SNORM + 128, // BC7U + 128, // BC6H_UF16 + 128, // BC6H_SF16 + 128, // ASTC_2D_4X4 + 16, // G8R8U + 16, // G8R8S + 32, // BGRA8 + 128, // RGBA32F + 64, // RG32F + 32, // R32F + 16, // R16F + 16, // R16U + 16, // R16S + 16, // R16UI + 16, // R16I + 32, // RG16 + 32, // RG16F + 32, // RG16UI + 32, // RG16I + 32, // RG16S + 96, // RGB32F + 32, // RGBA8_SRGB + 16, // RG8U + 16, // RG8S + 64, // RG32UI + 32, // R32UI + 128, // ASTC_2D_8X8 + 128, // ASTC_2D_8X5 + 128, // ASTC_2D_5X4 + 32, // BGRA8_SRGB + 64, // DXT1_SRGB + 128, // DXT23_SRGB + 128, // DXT45_SRGB + 128, // BC7U + 128, // ASTC_2D_4X4_SRGB + 128, // ASTC_2D_8X8_SRGB + 128, // ASTC_2D_8X5_SRGB + 128, // ASTC_2D_5X4_SRGB + 128, // ASTC_2D_5X5 + 128, // ASTC_2D_5X5_SRGB + 128, // ASTC_2D_10X8 + 128, // ASTC_2D_10X8_SRGB + 32, // Z32F + 16, // Z16 + 32, // Z24S8 + 32, // S8Z24 + 64, // Z32FS8 +}}; + static constexpr u32 GetFormatBpp(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; - constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ - 32, // ABGR8U - 32, // ABGR8S - 32, // ABGR8UI - 16, // B5G6R5U - 32, // A2B10G10R10U - 16, // A1B5G5R5U - 8, // R8U - 8, // R8UI - 64, // RGBA16F - 64, // RGBA16U - 64, // RGBA16UI - 32, // R11FG11FB10F - 128, // RGBA32UI - 64, // DXT1 - 128, // DXT23 - 128, // DXT45 - 64, // DXN1 - 128, // DXN2UNORM - 128, // DXN2SNORM - 128, // BC7U - 128, // BC6H_UF16 - 128, // BC6H_SF16 - 128, // ASTC_2D_4X4 - 16, // G8R8U - 16, // G8R8S - 32, // BGRA8 - 128, // RGBA32F - 64, // RG32F - 32, // R32F - 16, // R16F - 16, // R16U - 16, // R16S - 16, // R16UI - 16, // R16I - 32, // RG16 - 32, // RG16F - 32, // RG16UI - 32, // RG16I - 32, // RG16S - 96, // RGB32F - 32, // RGBA8_SRGB - 16, // RG8U - 16, // RG8S - 64, // RG32UI - 32, // R32UI - 128, // ASTC_2D_8X8 - 128, // ASTC_2D_8X5 - 128, // ASTC_2D_5X4 - 32, // BGRA8_SRGB - 64, // DXT1_SRGB - 128, // DXT23_SRGB - 128, // DXT45_SRGB - 128, // BC7U - 128, // ASTC_2D_4X4_SRGB - 128, // ASTC_2D_8X8_SRGB - 128, // ASTC_2D_8X5_SRGB - 128, // ASTC_2D_5X4_SRGB - 128, // ASTC_2D_5X5 - 128, // ASTC_2D_5X5_SRGB - 128, // ASTC_2D_10X8 - 128, // ASTC_2D_10X8_SRGB - 32, // Z32F - 16, // Z16 - 32, // Z24S8 - 32, // S8Z24 - 64, // Z32FS8 - }}; - ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); return bpp_table[static_cast<std::size_t>(format)]; } |