summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp8
-rw-r--r--src/video_core/engines/maxwell_3d.h70
-rw-r--r--src/video_core/engines/shader_bytecode.h25
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp94
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp75
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp41
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h25
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp53
-rw-r--r--src/video_core/renderer_opengl/gl_state.h18
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h24
12 files changed, 367 insertions, 102 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 68f91cc75..f32a79d7b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -218,10 +218,6 @@ void Maxwell3D::DrawArrays() {
debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
}
- if (debug_context) {
- debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
- }
-
// Both instance configuration registers can not be set at the same time.
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
@@ -237,6 +233,10 @@ void Maxwell3D::DrawArrays() {
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
rasterizer.AccelerateDrawBatch(is_indexed);
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+ }
+
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
// the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
// it's possible that it is incorrect and that there is some other register used to specify the
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d03bc1c0c..92bfda053 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -330,6 +330,17 @@ public:
Set = 0x150F,
};
+ enum class StencilOp : u32 {
+ Keep = 1,
+ Zero = 2,
+ Replace = 3,
+ Incr = 4,
+ Decr = 5,
+ Invert = 6,
+ IncrWrap = 7,
+ DecrWrap = 8,
+ };
+
struct Cull {
enum class FrontFace : u32 {
ClockWise = 0x0900,
@@ -508,8 +519,16 @@ public:
float clear_color[4];
float clear_depth;
+ INSERT_PADDING_WORDS(0x3);
+ s32 clear_stencil;
+
+ INSERT_PADDING_WORDS(0x6C);
+
+ s32 stencil_back_func_ref;
+ u32 stencil_back_mask;
+ u32 stencil_back_func_mask;
- INSERT_PADDING_WORDS(0x93);
+ INSERT_PADDING_WORDS(0x20);
struct {
u32 address_high;
@@ -573,16 +592,14 @@ public:
u32 enable[NumRenderTargets];
} blend;
- struct {
- u32 enable;
- u32 front_op_fail;
- u32 front_op_zfail;
- u32 front_op_zpass;
- u32 front_func_func;
- u32 front_func_ref;
- u32 front_func_mask;
- u32 front_mask;
- } stencil;
+ u32 stencil_enable;
+ StencilOp stencil_front_op_fail;
+ StencilOp stencil_front_op_zfail;
+ StencilOp stencil_front_op_zpass;
+ ComparisonOp stencil_front_func_func;
+ s32 stencil_front_func_ref;
+ u32 stencil_front_func_mask;
+ u32 stencil_front_mask;
INSERT_PADDING_WORDS(0x3);
@@ -626,13 +643,11 @@ public:
INSERT_PADDING_WORDS(0x5);
- struct {
- u32 enable;
- u32 back_op_fail;
- u32 back_op_zfail;
- u32 back_op_zpass;
- u32 back_func_func;
- } stencil_two_side;
+ u32 stencil_two_side_enable;
+ StencilOp stencil_back_op_fail;
+ StencilOp stencil_back_op_zfail;
+ StencilOp stencil_back_op_zpass;
+ ComparisonOp stencil_back_func_func;
INSERT_PADDING_WORDS(0x17);
@@ -944,6 +959,10 @@ ASSERT_REG_POSITION(viewport, 0x300);
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(clear_color[0], 0x360);
ASSERT_REG_POSITION(clear_depth, 0x364);
+ASSERT_REG_POSITION(clear_stencil, 0x368);
+ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
+ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
+ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
@@ -955,13 +974,24 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
ASSERT_REG_POSITION(blend, 0x4CF);
-ASSERT_REG_POSITION(stencil, 0x4E0);
+ASSERT_REG_POSITION(stencil_enable, 0x4E0);
+ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1);
+ASSERT_REG_POSITION(stencil_front_op_zfail, 0x4E2);
+ASSERT_REG_POSITION(stencil_front_op_zpass, 0x4E3);
+ASSERT_REG_POSITION(stencil_front_func_func, 0x4E4);
+ASSERT_REG_POSITION(stencil_front_func_ref, 0x4E5);
+ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6);
+ASSERT_REG_POSITION(stencil_front_mask, 0x4E7);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(zeta_enable, 0x54E);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
-ASSERT_REG_POSITION(stencil_two_side, 0x565);
+ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
+ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
+ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
+ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
+ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 67194b0e3..7fd622159 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -280,6 +280,19 @@ union Instruction {
BitField<56, 1, u64> invert_b;
} lop32i;
+ union {
+ BitField<28, 8, u64> imm_lut28;
+ BitField<48, 8, u64> imm_lut48;
+
+ u32 GetImmLut28() const {
+ return static_cast<u32>(imm_lut28);
+ }
+
+ u32 GetImmLut48() const {
+ return static_cast<u32>(imm_lut48);
+ }
+ } lop3;
+
u32 GetImm20_19() const {
u32 imm{static_cast<u32>(imm20_19)};
imm <<= 12;
@@ -623,6 +636,9 @@ public:
IADD_C,
IADD_R,
IADD_IMM,
+ IADD3_C,
+ IADD3_R,
+ IADD3_IMM,
IADD32I,
ISCADD_C, // Scale and Add
ISCADD_R,
@@ -650,6 +666,9 @@ public:
LOP_R,
LOP_IMM,
LOP32I,
+ LOP3_C,
+ LOP3_R,
+ LOP3_IMM,
MOV_C,
MOV_R,
MOV_IMM,
@@ -838,6 +857,9 @@ private:
INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
+ INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"),
+ INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"),
+ INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"),
INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
@@ -872,6 +894,9 @@ private:
INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
+ INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
+ INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
+ INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"),
INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c67eabe65..96851ccb5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -14,6 +14,7 @@
#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
@@ -181,7 +182,7 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
// Fetch program code from memory
- GLShader::ProgramCode program_code;
+ GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
@@ -315,16 +316,14 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
using_color_fb = false;
}
- // TODO(bunnei): Implement this
- const bool has_stencil = false;
-
+ const bool has_stencil = regs.stencil_enable;
const bool write_color_fb =
state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
const bool write_depth_fb =
(state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
- (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0);
+ (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask));
Surface color_surface;
Surface depth_surface;
@@ -364,41 +363,70 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
}
void RasterizerOpenGL::Clear() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto prev_state{state};
+ SCOPE_EXIT({ prev_state.Apply(); });
+ const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
bool use_color_fb = false;
bool use_depth_fb = false;
- GLbitfield clear_mask = 0;
- if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B &&
+ OpenGLState clear_state;
+ clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer;
+ clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
+ clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
+ clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
+ clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
+
+ GLbitfield clear_mask{};
+ if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
- clear_mask |= GL_COLOR_BUFFER_BIT;
- use_color_fb = true;
+ if (regs.clear_buffers.RT == 0) {
+ // We only support clearing the first color attachment for now
+ clear_mask |= GL_COLOR_BUFFER_BIT;
+ use_color_fb = true;
+ } else {
+ // TODO(subv): Add support for the other color attachments
+ LOG_CRITICAL(HW_GPU, "Clear unimplemented for RT {}", regs.clear_buffers.RT);
+ }
}
if (regs.clear_buffers.Z) {
+ ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
+ use_depth_fb = true;
clear_mask |= GL_DEPTH_BUFFER_BIT;
- use_depth_fb = regs.zeta_enable != 0;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
- state.depth.test_enabled = true;
- state.depth.write_mask = GL_TRUE;
- state.depth.test_func = GL_ALWAYS;
- state.Apply();
+ clear_state.depth.test_enabled = true;
+ clear_state.depth.test_func = GL_ALWAYS;
+ }
+ if (regs.clear_buffers.S) {
+ ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
+ use_depth_fb = true;
+ clear_mask |= GL_STENCIL_BUFFER_BIT;
+ clear_state.stencil.test_enabled = true;
}
- if (clear_mask == 0)
+ if (!use_color_fb && !use_depth_fb) {
+ // No color surface nor depth/stencil surface are enabled
return;
+ }
+
+ if (clear_mask == 0) {
+ // No clear mask is enabled
+ return;
+ }
ScopeAcquireGLContext acquire_context{emu_window};
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(use_color_fb, use_depth_fb, false);
- // TODO(Subv): Support clearing only partial colors.
+ clear_state.Apply();
+
glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
regs.clear_color[3]);
glClearDepth(regs.clear_depth);
+ glClearStencil(regs.clear_stencil);
glClear(clear_mask);
@@ -451,6 +479,7 @@ void RasterizerOpenGL::DrawArrays() {
ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true);
SyncDepthTestState();
+ SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
@@ -841,6 +870,34 @@ void RasterizerOpenGL::SyncDepthTestState() {
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
}
+void RasterizerOpenGL::SyncStencilTestState() {
+ const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ state.stencil.test_enabled = regs.stencil_enable != 0;
+
+ if (!regs.stencil_enable) {
+ return;
+ }
+
+ // TODO(bunnei): Verify behavior when this is not set
+ ASSERT(regs.stencil_two_side_enable);
+
+ state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
+ state.stencil.front.test_ref = regs.stencil_front_func_ref;
+ state.stencil.front.test_mask = regs.stencil_front_func_mask;
+ state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail);
+ state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail);
+ state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass);
+ state.stencil.front.write_mask = regs.stencil_front_mask;
+
+ state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func);
+ state.stencil.back.test_ref = regs.stencil_back_func_ref;
+ state.stencil.back.test_mask = regs.stencil_back_func_mask;
+ state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail);
+ state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail);
+ state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass);
+ state.stencil.back.write_mask = regs.stencil_back_mask;
+}
+
void RasterizerOpenGL::SyncBlendState() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
@@ -872,7 +929,8 @@ void RasterizerOpenGL::SyncLogicOpState() {
if (!state.logic_op.enabled)
return;
- ASSERT_MSG(regs.blend.enable == 0, "Blending and logic op can't be enabled at the same time.");
+ ASSERT_MSG(regs.blend.enable[0] == 0,
+ "Blending and logic op can't be enabled at the same time.");
state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 59b727de0..531b04046 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -141,6 +141,9 @@ private:
/// Syncs the depth test state to match the guest state
void SyncDepthTestState();
+ /// Syncs the stencil test state to match the guest state
+ void SyncStencilTestState();
+
/// Syncs the blend state to match the guest state
void SyncBlendState();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index b1769c99b..83d8d3d94 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -780,17 +780,30 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
} else if (preserve_contents) {
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
- return RecreateSurface(surface, params);
+ UnregisterSurface(surface);
+ Surface new_surface{RecreateSurface(surface, params)};
+ RegisterSurface(new_surface);
+ return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
UnregisterSurface(surface);
}
}
+ // Try to get a previously reserved surface
+ surface = TryGetReservedSurface(params);
+
// No surface found - create a new one
- surface = std::make_shared<CachedSurface>(params);
- RegisterSurface(surface);
- LoadSurface(surface);
+ if (!surface) {
+ surface = std::make_shared<CachedSurface>(params);
+ ReserveSurface(surface);
+ RegisterSurface(surface);
+ }
+
+ // Only load surface from memory if we care about the contents
+ if (preserve_contents) {
+ LoadSurface(surface);
+ }
return surface;
}
@@ -799,13 +812,18 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
const SurfaceParams& new_params) {
// Verify surface is compatible for blitting
const auto& params{surface->GetSurfaceParams()};
- ASSERT(params.type == new_params.type);
- ASSERT_MSG(params.GetCompressionFactor(params.pixel_format) == 1,
- "Compressed texture reinterpretation is not supported");
// Create a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{std::make_shared<CachedSurface>(new_params)};
+ // If format is unchanged, we can do a faster blit without reinterpreting pixel data
+ if (params.pixel_format == new_params.pixel_format) {
+ BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
+ new_surface->GetSurfaceParams().GetRect(), params.type,
+ read_framebuffer.handle, draw_framebuffer.handle);
+ return new_surface;
+ }
+
auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
@@ -818,9 +836,13 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
- glGetTextureImage(surface->Texture().handle, 0, source_format.format, source_format.type,
- params.SizeInBytes(), nullptr);
-
+ if (source_format.compressed) {
+ glGetCompressedTextureImage(surface->Texture().handle, 0,
+ static_cast<GLsizei>(params.SizeInBytes()), nullptr);
+ } else {
+ glGetTextureImage(surface->Texture().handle, 0, source_format.format, source_format.type,
+ static_cast<GLsizei>(params.SizeInBytes()), nullptr);
+ }
// If the new texture is bigger than the previous one, we need to fill in the rest with data
// from the CPU.
if (params.SizeInBytes() < new_params.SizeInBytes()) {
@@ -846,17 +868,21 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
const auto& dest_rect{new_params.GetRect()};
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle);
- glTextureSubImage2D(
- new_surface->Texture().handle, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()),
- static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, dest_format.type, nullptr);
+ if (dest_format.compressed) {
+ glCompressedTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
+ static_cast<GLsizei>(dest_rect.GetWidth()),
+ static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
+ static_cast<GLsizei>(new_params.SizeInBytes()), nullptr);
+ } else {
+ glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
+ static_cast<GLsizei>(dest_rect.GetWidth()),
+ static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
+ dest_format.type, nullptr);
+ }
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
pbo.Release();
- // Update cache accordingly
- UnregisterSurface(surface);
- RegisterSurface(new_surface);
-
return new_surface;
}
@@ -931,6 +957,21 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
surface_cache.erase(search);
}
+void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
+ const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())};
+ surface_reserve[surface_reserve_key] = surface;
+}
+
+Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) {
+ const auto& surface_reserve_key{SurfaceReserveKey::Create(params)};
+ auto search{surface_reserve.find(surface_reserve_key)};
+ if (search != surface_reserve.end()) {
+ RegisterSurface(search->second);
+ return search->second;
+ }
+ return {};
+}
+
template <typename Map, typename Interval>
constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index f273152a2..c8c615df2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -11,6 +11,7 @@
#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
+#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -682,6 +683,27 @@ struct SurfaceParams {
u32 cache_height;
};
+}; // namespace OpenGL
+
+/// Hashable variation of SurfaceParams, used for a key in the surface cache
+struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
+ static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
+ SurfaceReserveKey res;
+ res.state = params;
+ return res;
+ }
+};
+namespace std {
+template <>
+struct hash<SurfaceReserveKey> {
+ size_t operator()(const SurfaceReserveKey& k) const {
+ return k.Hash();
+ }
+};
+} // namespace std
+
+namespace OpenGL {
+
class CachedSurface final {
public:
CachedSurface(const SurfaceParams& params);
@@ -752,12 +774,23 @@ private:
/// Remove surface from the cache
void UnregisterSurface(const Surface& surface);
+ /// Reserves a unique surface that can be reused later
+ void ReserveSurface(const Surface& surface);
+
+ /// Tries to get a reserved surface for the specified parameters
+ Surface TryGetReservedSurface(const SurfaceParams& params);
+
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache;
PageMap cached_pages;
+ /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
+ /// previously been used. This is to prevent surfaces from being constantly created and
+ /// destroyed when used with different surface parameters.
+ std::unordered_map<SurfaceReserveKey, Surface> surface_reserve;
+
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index f1e00c93c..94e318966 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -849,6 +849,33 @@ private:
}
}
+ void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b,
+ const std::string& op_c, const std::string& imm_lut) {
+ if (dest == Tegra::Shader::Register::ZeroIndex) {
+ return;
+ }
+
+ static constexpr std::array<const char*, 32> shift_amounts = {
+ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
+ "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
+ "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"};
+
+ std::string result;
+ result += '(';
+
+ for (size_t i = 0; i < shift_amounts.size(); ++i) {
+ if (i)
+ result += '|';
+ result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
+ ") & 1) | ((" + op_b + " >> " + shift_amounts[i] + ") & 1) << 1 | ((" + op_a +
+ " >> " + shift_amounts[i] + ") & 1) << 2)) & 1) << " + shift_amounts[i] + ")";
+ }
+
+ result += ')';
+
+ regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
+ }
+
void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
const std::string& texture) {
// Add an extra scope and declare the texture coords inside to prevent
@@ -1297,6 +1324,20 @@ private:
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
break;
}
+ case OpCode::Id::LOP3_C:
+ case OpCode::Id::LOP3_R:
+ case OpCode::Id::LOP3_IMM: {
+ std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
+ std::string lut;
+ if (opcode->GetId() == OpCode::Id::LOP3_R) {
+ lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
+ } else {
+ lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')';
+ }
+
+ WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut);
+ break;
+ }
case OpCode::Id::IMNMX_C:
case OpCode::Id::IMNMX_R:
case OpCode::Id::IMNMX_IMM: {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 2c636b7f3..4e5a6f130 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -9,14 +9,14 @@
#include <type_traits>
#include <utility>
#include <vector>
+#include <boost/functional/hash.hpp>
#include "common/common_types.h"
#include "common/hash.h"
namespace OpenGL::GLShader {
constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
-
-using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>;
+using ProgramCode = std::vector<u64>;
class ConstBufferEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -115,8 +115,8 @@ struct ShaderEntries {
using ProgramResult = std::pair<std::string, ShaderEntries>;
struct ShaderSetup {
- ShaderSetup(const ProgramCode& program_code) {
- program.code = program_code;
+ explicit ShaderSetup(ProgramCode program_code) {
+ program.code = std::move(program_code);
}
struct {
@@ -135,8 +135,8 @@ struct ShaderSetup {
}
/// Used in scenarios where we have a dual vertex shaders
- void SetProgramB(const ProgramCode& program_b) {
- program.code_b = program_b;
+ void SetProgramB(ProgramCode program_b) {
+ program.code_b = std::move(program_b);
has_program_b = true;
}
@@ -146,13 +146,18 @@ struct ShaderSetup {
private:
u64 GetNewHash() const {
+ size_t hash = 0;
+
+ const u64 hash_a = Common::ComputeHash64(program.code.data(), program.code.size());
+ boost::hash_combine(hash, hash_a);
+
if (has_program_b) {
// Compute hash over dual shader programs
- return Common::ComputeHash64(&program, sizeof(program));
- } else {
- // Compute hash over a single shader program
- return Common::ComputeHash64(&program.code, program.code.size());
+ const u64 hash_b = Common::ComputeHash64(program.code_b.data(), program.code_b.size());
+ boost::hash_combine(hash, hash_b);
}
+
+ return hash;
}
u64 program_code_hash{};
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index e1a887d67..60a4defd1 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -27,13 +27,17 @@ OpenGLState::OpenGLState() {
color_mask.alpha_enabled = GL_TRUE;
stencil.test_enabled = false;
- stencil.test_func = GL_ALWAYS;
- stencil.test_ref = 0;
- stencil.test_mask = 0xFF;
- stencil.write_mask = 0xFF;
- stencil.action_depth_fail = GL_KEEP;
- stencil.action_depth_pass = GL_KEEP;
- stencil.action_stencil_fail = GL_KEEP;
+ auto reset_stencil = [](auto& config) {
+ config.test_func = GL_ALWAYS;
+ config.test_ref = 0;
+ config.test_mask = 0xFFFFFFFF;
+ config.write_mask = 0xFFFFFFFF;
+ config.action_depth_fail = GL_KEEP;
+ config.action_depth_pass = GL_KEEP;
+ config.action_stencil_fail = GL_KEEP;
+ };
+ reset_stencil(stencil.front);
+ reset_stencil(stencil.back);
blend.enabled = true;
blend.rgb_equation = GL_FUNC_ADD;
@@ -129,24 +133,23 @@ void OpenGLState::Apply() const {
glDisable(GL_STENCIL_TEST);
}
}
-
- if (stencil.test_func != cur_state.stencil.test_func ||
- stencil.test_ref != cur_state.stencil.test_ref ||
- stencil.test_mask != cur_state.stencil.test_mask) {
- glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask);
- }
-
- if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail ||
- stencil.action_depth_pass != cur_state.stencil.action_depth_pass ||
- stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) {
- glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail,
- stencil.action_depth_pass);
- }
-
- // Stencil mask
- if (stencil.write_mask != cur_state.stencil.write_mask) {
- glStencilMask(stencil.write_mask);
- }
+ auto config_stencil = [](GLenum face, const auto& config, const auto& prev_config) {
+ if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
+ config.test_mask != prev_config.test_mask) {
+ glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
+ }
+ if (config.action_depth_fail != prev_config.action_depth_fail ||
+ config.action_depth_pass != prev_config.action_depth_pass ||
+ config.action_stencil_fail != prev_config.action_stencil_fail) {
+ glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
+ config.action_depth_pass);
+ }
+ if (config.write_mask != prev_config.write_mask) {
+ glStencilMaskSeparate(face, config.write_mask);
+ }
+ };
+ config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
+ config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
// Blending
if (blend.enabled != cur_state.blend.enabled) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 22b0b1e41..46e96a97d 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -58,14 +58,16 @@ public:
} color_mask; // GL_COLOR_WRITEMASK
struct {
- bool test_enabled; // GL_STENCIL_TEST
- GLenum test_func; // GL_STENCIL_FUNC
- GLint test_ref; // GL_STENCIL_REF
- GLuint test_mask; // GL_STENCIL_VALUE_MASK
- GLuint write_mask; // GL_STENCIL_WRITEMASK
- GLenum action_stencil_fail; // GL_STENCIL_FAIL
- GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL
- GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS
+ bool test_enabled; // GL_STENCIL_TEST
+ struct {
+ GLenum test_func; // GL_STENCIL_FUNC
+ GLint test_ref; // GL_STENCIL_REF
+ GLuint test_mask; // GL_STENCIL_VALUE_MASK
+ GLuint write_mask; // GL_STENCIL_WRITEMASK
+ GLenum action_stencil_fail; // GL_STENCIL_FAIL
+ GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL
+ GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS
+ } front, back;
} stencil;
struct {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 0343759a6..67273e164 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -295,6 +295,30 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
return {};
}
+inline GLenum StencilOp(Maxwell::StencilOp stencil) {
+ switch (stencil) {
+ case Maxwell::StencilOp::Keep:
+ return GL_KEEP;
+ case Maxwell::StencilOp::Zero:
+ return GL_ZERO;
+ case Maxwell::StencilOp::Replace:
+ return GL_REPLACE;
+ case Maxwell::StencilOp::Incr:
+ return GL_INCR;
+ case Maxwell::StencilOp::Decr:
+ return GL_DECR;
+ case Maxwell::StencilOp::Invert:
+ return GL_INVERT;
+ case Maxwell::StencilOp::IncrWrap:
+ return GL_INCR_WRAP;
+ case Maxwell::StencilOp::DecrWrap:
+ return GL_DECR_WRAP;
+ }
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
+ UNREACHABLE();
+ return {};
+}
+
inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) {
switch (front_face) {
case Maxwell::Cull::FrontFace::ClockWise: