diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 8 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 70 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 25 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 94 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 75 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 33 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 41 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 25 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 53 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 18 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 24 |
12 files changed, 367 insertions, 102 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 68f91cc75..f32a79d7b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -218,10 +218,6 @@ void Maxwell3D::DrawArrays() { debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); } - if (debug_context) { - debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } - // Both instance configuration registers can not be set at the same time. ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); @@ -237,6 +233,10 @@ void Maxwell3D::DrawArrays() { const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; rasterizer.AccelerateDrawBatch(is_indexed); + if (debug_context) { + debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } + // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - // it's possible that it is incorrect and that there is some other register used to specify the diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d03bc1c0c..92bfda053 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -330,6 +330,17 @@ public: Set = 0x150F, }; + enum class StencilOp : u32 { + Keep = 1, + Zero = 2, + Replace = 3, + Incr = 4, + Decr = 5, + Invert = 6, + IncrWrap = 7, + DecrWrap = 8, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -508,8 +519,16 @@ public: float clear_color[4]; float clear_depth; + INSERT_PADDING_WORDS(0x3); + s32 clear_stencil; + + INSERT_PADDING_WORDS(0x6C); + + s32 stencil_back_func_ref; + u32 stencil_back_mask; + u32 stencil_back_func_mask; - INSERT_PADDING_WORDS(0x93); + INSERT_PADDING_WORDS(0x20); struct { u32 address_high; @@ -573,16 +592,14 @@ public: u32 enable[NumRenderTargets]; } blend; - struct { - u32 enable; - u32 front_op_fail; - u32 front_op_zfail; - u32 front_op_zpass; - u32 front_func_func; - u32 front_func_ref; - u32 front_func_mask; - u32 front_mask; - } stencil; + u32 stencil_enable; + StencilOp stencil_front_op_fail; + StencilOp stencil_front_op_zfail; + StencilOp stencil_front_op_zpass; + ComparisonOp stencil_front_func_func; + s32 stencil_front_func_ref; + u32 stencil_front_func_mask; + u32 stencil_front_mask; INSERT_PADDING_WORDS(0x3); @@ -626,13 +643,11 @@ public: INSERT_PADDING_WORDS(0x5); - struct { - u32 enable; - u32 back_op_fail; - u32 back_op_zfail; - u32 back_op_zpass; - u32 back_func_func; - } stencil_two_side; + u32 stencil_two_side_enable; + StencilOp stencil_back_op_fail; + StencilOp stencil_back_op_zfail; + StencilOp stencil_back_op_zpass; + ComparisonOp stencil_back_func_func; INSERT_PADDING_WORDS(0x17); @@ -944,6 +959,10 @@ ASSERT_REG_POSITION(viewport, 0x300); ASSERT_REG_POSITION(vertex_buffer, 0x35D); ASSERT_REG_POSITION(clear_color[0], 0x360); ASSERT_REG_POSITION(clear_depth, 0x364); +ASSERT_REG_POSITION(clear_stencil, 0x368); +ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); +ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); +ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458); ASSERT_REG_POSITION(rt_control, 0x487); @@ -955,13 +974,24 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2); ASSERT_REG_POSITION(depth_test_func, 0x4C3); ASSERT_REG_POSITION(blend, 0x4CF); -ASSERT_REG_POSITION(stencil, 0x4E0); +ASSERT_REG_POSITION(stencil_enable, 0x4E0); +ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1); +ASSERT_REG_POSITION(stencil_front_op_zfail, 0x4E2); +ASSERT_REG_POSITION(stencil_front_op_zpass, 0x4E3); +ASSERT_REG_POSITION(stencil_front_func_func, 0x4E4); +ASSERT_REG_POSITION(stencil_front_func_ref, 0x4E5); +ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6); +ASSERT_REG_POSITION(stencil_front_mask, 0x4E7); ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(tsc, 0x557); ASSERT_REG_POSITION(tic, 0x55D); -ASSERT_REG_POSITION(stencil_two_side, 0x565); +ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); +ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); +ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567); +ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); +ASSERT_REG_POSITION(stencil_back_func_func, 0x569); ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 67194b0e3..7fd622159 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -280,6 +280,19 @@ union Instruction { BitField<56, 1, u64> invert_b; } lop32i; + union { + BitField<28, 8, u64> imm_lut28; + BitField<48, 8, u64> imm_lut48; + + u32 GetImmLut28() const { + return static_cast<u32>(imm_lut28); + } + + u32 GetImmLut48() const { + return static_cast<u32>(imm_lut48); + } + } lop3; + u32 GetImm20_19() const { u32 imm{static_cast<u32>(imm20_19)}; imm <<= 12; @@ -623,6 +636,9 @@ public: IADD_C, IADD_R, IADD_IMM, + IADD3_C, + IADD3_R, + IADD3_IMM, IADD32I, ISCADD_C, // Scale and Add ISCADD_R, @@ -650,6 +666,9 @@ public: LOP_R, LOP_IMM, LOP32I, + LOP3_C, + LOP3_R, + LOP3_IMM, MOV_C, MOV_R, MOV_IMM, @@ -838,6 +857,9 @@ private: INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), + INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), + INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), + INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), @@ -872,6 +894,9 @@ private: INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), + INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), + INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), + INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c67eabe65..96851ccb5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -14,6 +14,7 @@ #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" @@ -181,7 +182,7 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); // Fetch program code from memory - GLShader::ProgramCode program_code; + GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; @@ -315,16 +316,14 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c using_color_fb = false; } - // TODO(bunnei): Implement this - const bool has_stencil = false; - + const bool has_stencil = regs.stencil_enable; const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; const bool write_depth_fb = (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || - (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask)); Surface color_surface; Surface depth_surface; @@ -364,41 +363,70 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c } void RasterizerOpenGL::Clear() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto prev_state{state}; + SCOPE_EXIT({ prev_state.Apply(); }); + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; bool use_color_fb = false; bool use_depth_fb = false; - GLbitfield clear_mask = 0; - if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B && + OpenGLState clear_state; + clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer; + clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE; + clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE; + clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; + clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; + + GLbitfield clear_mask{}; + if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A) { - clear_mask |= GL_COLOR_BUFFER_BIT; - use_color_fb = true; + if (regs.clear_buffers.RT == 0) { + // We only support clearing the first color attachment for now + clear_mask |= GL_COLOR_BUFFER_BIT; + use_color_fb = true; + } else { + // TODO(subv): Add support for the other color attachments + LOG_CRITICAL(HW_GPU, "Clear unimplemented for RT {}", regs.clear_buffers.RT); + } } if (regs.clear_buffers.Z) { + ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); + use_depth_fb = true; clear_mask |= GL_DEPTH_BUFFER_BIT; - use_depth_fb = regs.zeta_enable != 0; // Always enable the depth write when clearing the depth buffer. The depth write mask is // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true. - state.depth.test_enabled = true; - state.depth.write_mask = GL_TRUE; - state.depth.test_func = GL_ALWAYS; - state.Apply(); + clear_state.depth.test_enabled = true; + clear_state.depth.test_func = GL_ALWAYS; + } + if (regs.clear_buffers.S) { + ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); + use_depth_fb = true; + clear_mask |= GL_STENCIL_BUFFER_BIT; + clear_state.stencil.test_enabled = true; } - if (clear_mask == 0) + if (!use_color_fb && !use_depth_fb) { + // No color surface nor depth/stencil surface are enabled return; + } + + if (clear_mask == 0) { + // No clear mask is enabled + return; + } ScopeAcquireGLContext acquire_context{emu_window}; auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(use_color_fb, use_depth_fb, false); - // TODO(Subv): Support clearing only partial colors. + clear_state.Apply(); + glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], regs.clear_color[3]); glClearDepth(regs.clear_depth); + glClearStencil(regs.clear_stencil); glClear(clear_mask); @@ -451,6 +479,7 @@ void RasterizerOpenGL::DrawArrays() { ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true); SyncDepthTestState(); + SyncStencilTestState(); SyncBlendState(); SyncLogicOpState(); SyncCullMode(); @@ -841,6 +870,34 @@ void RasterizerOpenGL::SyncDepthTestState() { state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); } +void RasterizerOpenGL::SyncStencilTestState() { + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + state.stencil.test_enabled = regs.stencil_enable != 0; + + if (!regs.stencil_enable) { + return; + } + + // TODO(bunnei): Verify behavior when this is not set + ASSERT(regs.stencil_two_side_enable); + + state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func); + state.stencil.front.test_ref = regs.stencil_front_func_ref; + state.stencil.front.test_mask = regs.stencil_front_func_mask; + state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail); + state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail); + state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass); + state.stencil.front.write_mask = regs.stencil_front_mask; + + state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func); + state.stencil.back.test_ref = regs.stencil_back_func_ref; + state.stencil.back.test_mask = regs.stencil_back_func_mask; + state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail); + state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail); + state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass); + state.stencil.back.write_mask = regs.stencil_back_mask; +} + void RasterizerOpenGL::SyncBlendState() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; @@ -872,7 +929,8 @@ void RasterizerOpenGL::SyncLogicOpState() { if (!state.logic_op.enabled) return; - ASSERT_MSG(regs.blend.enable == 0, "Blending and logic op can't be enabled at the same time."); + ASSERT_MSG(regs.blend.enable[0] == 0, + "Blending and logic op can't be enabled at the same time."); state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 59b727de0..531b04046 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -141,6 +141,9 @@ private: /// Syncs the depth test state to match the guest state void SyncDepthTestState(); + /// Syncs the stencil test state to match the guest state + void SyncStencilTestState(); + /// Syncs the blend state to match the guest state void SyncBlendState(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index b1769c99b..83d8d3d94 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -780,17 +780,30 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres } else if (preserve_contents) { // If surface parameters changed and we care about keeping the previous data, recreate // the surface from the old one - return RecreateSurface(surface, params); + UnregisterSurface(surface); + Surface new_surface{RecreateSurface(surface, params)}; + RegisterSurface(new_surface); + return new_surface; } else { // Delete the old surface before creating a new one to prevent collisions. UnregisterSurface(surface); } } + // Try to get a previously reserved surface + surface = TryGetReservedSurface(params); + // No surface found - create a new one - surface = std::make_shared<CachedSurface>(params); - RegisterSurface(surface); - LoadSurface(surface); + if (!surface) { + surface = std::make_shared<CachedSurface>(params); + ReserveSurface(surface); + RegisterSurface(surface); + } + + // Only load surface from memory if we care about the contents + if (preserve_contents) { + LoadSurface(surface); + } return surface; } @@ -799,13 +812,18 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, const SurfaceParams& new_params) { // Verify surface is compatible for blitting const auto& params{surface->GetSurfaceParams()}; - ASSERT(params.type == new_params.type); - ASSERT_MSG(params.GetCompressionFactor(params.pixel_format) == 1, - "Compressed texture reinterpretation is not supported"); // Create a new surface with the new parameters, and blit the previous surface to it Surface new_surface{std::make_shared<CachedSurface>(new_params)}; + // If format is unchanged, we can do a faster blit without reinterpreting pixel data + if (params.pixel_format == new_params.pixel_format) { + BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, + new_surface->GetSurfaceParams().GetRect(), params.type, + read_framebuffer.handle, draw_framebuffer.handle); + return new_surface; + } + auto source_format = GetFormatTuple(params.pixel_format, params.component_type); auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); @@ -818,9 +836,13 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle); glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); - glGetTextureImage(surface->Texture().handle, 0, source_format.format, source_format.type, - params.SizeInBytes(), nullptr); - + if (source_format.compressed) { + glGetCompressedTextureImage(surface->Texture().handle, 0, + static_cast<GLsizei>(params.SizeInBytes()), nullptr); + } else { + glGetTextureImage(surface->Texture().handle, 0, source_format.format, source_format.type, + static_cast<GLsizei>(params.SizeInBytes()), nullptr); + } // If the new texture is bigger than the previous one, we need to fill in the rest with data // from the CPU. if (params.SizeInBytes() < new_params.SizeInBytes()) { @@ -846,17 +868,21 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, const auto& dest_rect{new_params.GetRect()}; glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle); - glTextureSubImage2D( - new_surface->Texture().handle, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()), - static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, dest_format.type, nullptr); + if (dest_format.compressed) { + glCompressedTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, + static_cast<GLsizei>(dest_rect.GetWidth()), + static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, + static_cast<GLsizei>(new_params.SizeInBytes()), nullptr); + } else { + glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0, + static_cast<GLsizei>(dest_rect.GetWidth()), + static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, + dest_format.type, nullptr); + } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); pbo.Release(); - // Update cache accordingly - UnregisterSurface(surface); - RegisterSurface(new_surface); - return new_surface; } @@ -931,6 +957,21 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { surface_cache.erase(search); } +void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { + const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; + surface_reserve[surface_reserve_key] = surface; +} + +Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) { + const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; + auto search{surface_reserve.find(surface_reserve_key)}; + if (search != surface_reserve.end()) { + RegisterSurface(search->second); + return search->second; + } + return {}; +} + template <typename Map, typename Interval> constexpr auto RangeFromInterval(Map& map, const Interval& interval) { return boost::make_iterator_range(map.equal_range(interval)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index f273152a2..c8c615df2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -11,6 +11,7 @@ #include <boost/icl/interval_map.hpp> #include "common/common_types.h" +#include "common/hash.h" #include "common/math_util.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -682,6 +683,27 @@ struct SurfaceParams { u32 cache_height; }; +}; // namespace OpenGL + +/// Hashable variation of SurfaceParams, used for a key in the surface cache +struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> { + static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { + SurfaceReserveKey res; + res.state = params; + return res; + } +}; +namespace std { +template <> +struct hash<SurfaceReserveKey> { + size_t operator()(const SurfaceReserveKey& k) const { + return k.Hash(); + } +}; +} // namespace std + +namespace OpenGL { + class CachedSurface final { public: CachedSurface(const SurfaceParams& params); @@ -752,12 +774,23 @@ private: /// Remove surface from the cache void UnregisterSurface(const Surface& surface); + /// Reserves a unique surface that can be reused later + void ReserveSurface(const Surface& surface); + + /// Tries to get a reserved surface for the specified parameters + Surface TryGetReservedSurface(const SurfaceParams& params); + /// Increase/decrease the number of surface in pages touching the specified region void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache; PageMap cached_pages; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have + /// previously been used. This is to prevent surfaces from being constantly created and + /// destroyed when used with different surface parameters. + std::unordered_map<SurfaceReserveKey, Surface> surface_reserve; + OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index f1e00c93c..94e318966 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -849,6 +849,33 @@ private: } } + void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b, + const std::string& op_c, const std::string& imm_lut) { + if (dest == Tegra::Shader::Register::ZeroIndex) { + return; + } + + static constexpr std::array<const char*, 32> shift_amounts = { + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", + "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", + "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"}; + + std::string result; + result += '('; + + for (size_t i = 0; i < shift_amounts.size(); ++i) { + if (i) + result += '|'; + result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + + ") & 1) | ((" + op_b + " >> " + shift_amounts[i] + ") & 1) << 1 | ((" + op_a + + " >> " + shift_amounts[i] + ") & 1) << 2)) & 1) << " + shift_amounts[i] + ")"; + } + + result += ')'; + + regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); + } + void WriteTexsInstruction(const Instruction& instr, const std::string& coord, const std::string& texture) { // Add an extra scope and declare the texture coords inside to prevent @@ -1297,6 +1324,20 @@ private: instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); break; } + case OpCode::Id::LOP3_C: + case OpCode::Id::LOP3_R: + case OpCode::Id::LOP3_IMM: { + std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); + std::string lut; + if (opcode->GetId() == OpCode::Id::LOP3_R) { + lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')'; + } else { + lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')'; + } + + WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut); + break; + } case OpCode::Id::IMNMX_C: case OpCode::Id::IMNMX_R: case OpCode::Id::IMNMX_IMM: { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 2c636b7f3..4e5a6f130 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -9,14 +9,14 @@ #include <type_traits> #include <utility> #include <vector> +#include <boost/functional/hash.hpp> #include "common/common_types.h" #include "common/hash.h" namespace OpenGL::GLShader { constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; - -using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>; +using ProgramCode = std::vector<u64>; class ConstBufferEntry { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -115,8 +115,8 @@ struct ShaderEntries { using ProgramResult = std::pair<std::string, ShaderEntries>; struct ShaderSetup { - ShaderSetup(const ProgramCode& program_code) { - program.code = program_code; + explicit ShaderSetup(ProgramCode program_code) { + program.code = std::move(program_code); } struct { @@ -135,8 +135,8 @@ struct ShaderSetup { } /// Used in scenarios where we have a dual vertex shaders - void SetProgramB(const ProgramCode& program_b) { - program.code_b = program_b; + void SetProgramB(ProgramCode program_b) { + program.code_b = std::move(program_b); has_program_b = true; } @@ -146,13 +146,18 @@ struct ShaderSetup { private: u64 GetNewHash() const { + size_t hash = 0; + + const u64 hash_a = Common::ComputeHash64(program.code.data(), program.code.size()); + boost::hash_combine(hash, hash_a); + if (has_program_b) { // Compute hash over dual shader programs - return Common::ComputeHash64(&program, sizeof(program)); - } else { - // Compute hash over a single shader program - return Common::ComputeHash64(&program.code, program.code.size()); + const u64 hash_b = Common::ComputeHash64(program.code_b.data(), program.code_b.size()); + boost::hash_combine(hash, hash_b); } + + return hash; } u64 program_code_hash{}; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index e1a887d67..60a4defd1 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -27,13 +27,17 @@ OpenGLState::OpenGLState() { color_mask.alpha_enabled = GL_TRUE; stencil.test_enabled = false; - stencil.test_func = GL_ALWAYS; - stencil.test_ref = 0; - stencil.test_mask = 0xFF; - stencil.write_mask = 0xFF; - stencil.action_depth_fail = GL_KEEP; - stencil.action_depth_pass = GL_KEEP; - stencil.action_stencil_fail = GL_KEEP; + auto reset_stencil = [](auto& config) { + config.test_func = GL_ALWAYS; + config.test_ref = 0; + config.test_mask = 0xFFFFFFFF; + config.write_mask = 0xFFFFFFFF; + config.action_depth_fail = GL_KEEP; + config.action_depth_pass = GL_KEEP; + config.action_stencil_fail = GL_KEEP; + }; + reset_stencil(stencil.front); + reset_stencil(stencil.back); blend.enabled = true; blend.rgb_equation = GL_FUNC_ADD; @@ -129,24 +133,23 @@ void OpenGLState::Apply() const { glDisable(GL_STENCIL_TEST); } } - - if (stencil.test_func != cur_state.stencil.test_func || - stencil.test_ref != cur_state.stencil.test_ref || - stencil.test_mask != cur_state.stencil.test_mask) { - glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); - } - - if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || - stencil.action_depth_pass != cur_state.stencil.action_depth_pass || - stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { - glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, - stencil.action_depth_pass); - } - - // Stencil mask - if (stencil.write_mask != cur_state.stencil.write_mask) { - glStencilMask(stencil.write_mask); - } + auto config_stencil = [](GLenum face, const auto& config, const auto& prev_config) { + if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref || + config.test_mask != prev_config.test_mask) { + glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); + } + if (config.action_depth_fail != prev_config.action_depth_fail || + config.action_depth_pass != prev_config.action_depth_pass || + config.action_stencil_fail != prev_config.action_stencil_fail) { + glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, + config.action_depth_pass); + } + if (config.write_mask != prev_config.write_mask) { + glStencilMaskSeparate(face, config.write_mask); + } + }; + config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front); + config_stencil(GL_BACK, stencil.back, cur_state.stencil.back); // Blending if (blend.enabled != cur_state.blend.enabled) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 22b0b1e41..46e96a97d 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -58,14 +58,16 @@ public: } color_mask; // GL_COLOR_WRITEMASK struct { - bool test_enabled; // GL_STENCIL_TEST - GLenum test_func; // GL_STENCIL_FUNC - GLint test_ref; // GL_STENCIL_REF - GLuint test_mask; // GL_STENCIL_VALUE_MASK - GLuint write_mask; // GL_STENCIL_WRITEMASK - GLenum action_stencil_fail; // GL_STENCIL_FAIL - GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL - GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS + bool test_enabled; // GL_STENCIL_TEST + struct { + GLenum test_func; // GL_STENCIL_FUNC + GLint test_ref; // GL_STENCIL_REF + GLuint test_mask; // GL_STENCIL_VALUE_MASK + GLuint write_mask; // GL_STENCIL_WRITEMASK + GLenum action_stencil_fail; // GL_STENCIL_FAIL + GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL + GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS + } front, back; } stencil; struct { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 0343759a6..67273e164 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -295,6 +295,30 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { return {}; } +inline GLenum StencilOp(Maxwell::StencilOp stencil) { + switch (stencil) { + case Maxwell::StencilOp::Keep: + return GL_KEEP; + case Maxwell::StencilOp::Zero: + return GL_ZERO; + case Maxwell::StencilOp::Replace: + return GL_REPLACE; + case Maxwell::StencilOp::Incr: + return GL_INCR; + case Maxwell::StencilOp::Decr: + return GL_DECR; + case Maxwell::StencilOp::Invert: + return GL_INVERT; + case Maxwell::StencilOp::IncrWrap: + return GL_INCR_WRAP; + case Maxwell::StencilOp::DecrWrap: + return GL_DECR_WRAP; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil)); + UNREACHABLE(); + return {}; +} + inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { switch (front_face) { case Maxwell::Cull::FrontFace::ClockWise: |