diff options
-rw-r--r-- | src/video_core/pica.h | 22 | ||||
-rw-r--r-- | src/video_core/rasterizer.cpp | 58 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 9 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 23 |
6 files changed, 111 insertions, 28 deletions
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index bb689f2a9..f40684d83 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -441,8 +441,14 @@ struct Regs { }; enum class StencilAction : u32 { - Keep = 0, - Xor = 5, + Keep = 0, + Zero = 1, + Replace = 2, + Increment = 3, + Decrement = 4, + Invert = 5, + IncrementWrap = 6, + DecrementWrap = 7 }; struct { @@ -481,23 +487,29 @@ struct Regs { struct { union { + // Raw value of this register + u32 raw_func; + // If true, enable stencil testing BitField< 0, 1, u32> enable; // Comparison operation for stencil testing BitField< 4, 3, CompareFunc> func; - // Value to calculate the new stencil value from - BitField< 8, 8, u32> replacement_value; + // Mask used to control writing to the stencil buffer + BitField< 8, 8, u32> write_mask; // Value to compare against for stencil testing BitField<16, 8, u32> reference_value; // Mask to apply on stencil test inputs - BitField<24, 8, u32> mask; + BitField<24, 8, u32> input_mask; }; union { + // Raw value of this register + u32 raw_op; + // Action to perform when the stencil test fails BitField< 0, 3, StencilAction> action_stencil_fail; diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 4a159da8e..77eadda9e 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -216,14 +216,33 @@ static void SetStencil(int x, int y, u8 value) { } } -// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not! -static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { +static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 ref) { switch (action) { case Regs::StencilAction::Keep: - return dest; + return old_stencil; - case Regs::StencilAction::Xor: - return dest ^ ref; + case Regs::StencilAction::Zero: + return 0; + + case Regs::StencilAction::Replace: + return ref; + + case Regs::StencilAction::Increment: + // Saturated increment + return std::min<u8>(old_stencil, 254) + 1; + + case Regs::StencilAction::Decrement: + // Saturated decrement + return std::max<u8>(old_stencil, 1) - 1; + + case Regs::StencilAction::Invert: + return ~old_stencil; + + case Regs::StencilAction::IncrementWrap: + return old_stencil + 1; + + case Regs::StencilAction::DecrementWrap: + return old_stencil - 1; default: LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); @@ -783,10 +802,16 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } u8 old_stencil = 0; + + auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { + u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); + SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); + }; + if (stencil_action_enable) { old_stencil = GetStencil(x >> 4, y >> 4); - u8 dest = old_stencil & stencil_test.mask; - u8 ref = stencil_test.reference_value & stencil_test.mask; + u8 dest = old_stencil & stencil_test.input_mask; + u8 ref = stencil_test.reference_value & stencil_test.input_mask; bool pass = false; switch (stencil_test.func) { @@ -824,8 +849,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } if (!pass) { - u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); - SetStencil(x >> 4, y >> 4, new_stencil); + UpdateStencil(stencil_test.action_stencil_fail); continue; } } @@ -875,23 +899,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } if (!pass) { - if (stencil_action_enable) { - u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); - SetStencil(x >> 4, y >> 4, new_stencil); - } + if (stencil_action_enable) + UpdateStencil(stencil_test.action_depth_fail); continue; } if (output_merger.depth_write_enable) SetDepth(x >> 4, y >> 4, z); - - if (stencil_action_enable) { - // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? - u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); - SetStencil(x >> 4, y >> 4, new_stencil); - } } + // The stencil depth_pass action is executed even if depth testing is disabled + if (stencil_action_enable) + UpdateStencil(stencil_test.action_depth_pass); + auto dest = GetPixel(x >> 4, y >> 4); Math::Vec4<u8> blend_output = combiner_output; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f0ccc2397..d29049508 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -269,7 +269,8 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { break; // Stencil test - case PICA_REG_INDEX(output_merger.stencil_test): + case PICA_REG_INDEX(output_merger.stencil_test.raw_func): + case PICA_REG_INDEX(output_merger.stencil_test.raw_op): SyncStencilTest(); break; @@ -676,7 +677,15 @@ void RasterizerOpenGL::SyncLogicOp() { } void RasterizerOpenGL::SyncStencilTest() { - // TODO: Implement stencil test, mask, and op + const auto& regs = Pica::g_state.regs; + state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; + state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); + state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; + state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; + state.stencil.write_mask = regs.output_merger.stencil_test.write_mask; + state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); + state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); + state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); } void RasterizerOpenGL::SyncDepthTest() { @@ -867,8 +876,15 @@ void RasterizerOpenGL::ReloadDepthBuffer() { state.Apply(); glActiveTexture(GL_TEXTURE0); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, - fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); + if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { + // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer. + // The bug has been reported to Intel (https://communities.intel.com/message/324464) + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0, + GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get()); + } else { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, + fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); + } state.texture_units[0].texture_2d = 0; state.Apply(); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 871324014..ba47ce8b8 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -26,6 +26,9 @@ OpenGLState::OpenGLState() { stencil.test_ref = 0; stencil.test_mask = -1; stencil.write_mask = -1; + stencil.action_depth_fail = GL_KEEP; + stencil.action_depth_pass = GL_KEEP; + stencil.action_stencil_fail = GL_KEEP; blend.enabled = false; blend.src_rgb_func = GL_ONE; @@ -105,6 +108,12 @@ void OpenGLState::Apply() { glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); } + if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || + stencil.action_depth_pass != cur_state.stencil.action_depth_pass || + stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { + glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, stencil.action_depth_pass); + } + // Stencil mask if (stencil.write_mask != cur_state.stencil.write_mask) { glStencilMask(stencil.write_mask); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 3e2379021..81e7e0877 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -32,6 +32,9 @@ public: GLint test_ref; // GL_STENCIL_REF GLuint test_mask; // GL_STENCIL_VALUE_MASK GLuint write_mask; // GL_STENCIL_WRITEMASK + GLenum action_stencil_fail; // GL_STENCIL_FAIL + GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL + GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS } stencil; struct { diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 3b562da86..12806fad5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -152,6 +152,29 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { return compare_func_table[(unsigned)func]; } +inline GLenum StencilOp(Pica::Regs::StencilAction action) { + static const GLenum stencil_op_table[] = { + GL_KEEP, // StencilAction::Keep + GL_ZERO, // StencilAction::Zero + GL_REPLACE, // StencilAction::Replace + GL_INCR, // StencilAction::Increment + GL_DECR, // StencilAction::Decrement + GL_INVERT, // StencilAction::Invert + GL_INCR_WRAP, // StencilAction::IncrementWrap + GL_DECR_WRAP // StencilAction::DecrementWrap + }; + + // Range check table for input + if ((unsigned)action >= ARRAY_SIZE(stencil_op_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown stencil op %d", action); + UNREACHABLE(); + + return GL_KEEP; + } + + return stencil_op_table[(unsigned)action]; +} + inline std::array<GLfloat, 4> ColorRGBA8(const u8* bytes) { return { { bytes[0] / 255.0f, bytes[1] / 255.0f, |