37 files changed, 323 insertions, 159 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9e90c587c..9b2698fad 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -544,7 +544,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
         it++;
     }
 
-    boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
+    boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads;
     u64 total_size_bytes = 0;
     u64 largest_copy = 0;
     for (const IntervalSet& intervals : committed_ranges) {
@@ -914,6 +914,11 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
 
         const u32 offset = buffer.Offset(binding.cpu_addr);
         const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
+
+        if (is_written) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+        }
+
         if constexpr (NEEDS_BIND_STORAGE_INDEX) {
             runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
             ++binding_index;
@@ -931,6 +936,11 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
+        const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0;
+        if (is_written) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+        }
+
         const u32 offset = buffer.Offset(binding.cpu_addr);
         const PixelFormat format = binding.format;
         if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -962,6 +972,8 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
+        MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+
         const u32 offset = buffer.Offset(binding.cpu_addr);
         host_bindings.buffers.push_back(&buffer);
         host_bindings.offsets.push_back(offset);
@@ -1011,6 +1023,11 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
         const u32 offset = buffer.Offset(binding.cpu_addr);
         const bool is_written =
             ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
+
+        if (is_written) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+        }
+
         if constexpr (NEEDS_BIND_STORAGE_INDEX) {
             runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
             ++binding_index;
@@ -1028,6 +1045,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
+        const bool is_written =
+            ((channel_state->written_compute_texture_buffers >> index) & 1) != 0;
+        if (is_written) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+        }
+
         const u32 offset = buffer.Offset(binding.cpu_addr);
         const PixelFormat format = binding.format;
         if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -1201,16 +1224,11 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
 
 template <class P>
 void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
-    const u32 written_mask = channel_state->written_storage_buffers[stage];
     ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
         // Resolve buffer
         Binding& binding = channel_state->storage_buffers[stage][index];
         const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
         binding.buffer_id = buffer_id;
-        // Mark buffer as written if needed
-        if (((written_mask >> index) & 1) != 0) {
-            MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
-        }
     });
 }
 
@@ -1219,10 +1237,6 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
     ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
         Binding& binding = channel_state->texture_buffers[stage][index];
         binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
-        // Mark buffer as written if needed
-        if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) {
-            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
-        }
     });
 }
 
@@ -1252,7 +1266,6 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
         .size = size,
         .buffer_id = buffer_id,
     };
-    MarkWrittenBuffer(buffer_id, *cpu_addr, size);
 }
 
 template <class P>
@@ -1279,10 +1292,6 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
         // Resolve buffer
         Binding& binding = channel_state->compute_storage_buffers[index];
         binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
-        // Mark as written if needed
-        if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) {
-            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
-        }
     });
 }
 
@@ -1291,18 +1300,11 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
     ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
         Binding& binding = channel_state->compute_texture_buffers[index];
         binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
-        // Mark as written if needed
-        if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) {
-            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
-        }
     });
 }
 
 template <class P>
 void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
-    if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
-        SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
-    }
     memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
 
     const IntervalType base_interval{cpu_addr, cpu_addr + size};
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index c4f6e8d12..eed267361 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -62,7 +62,11 @@ using BufferId = SlotId;
 using VideoCore::Surface::PixelFormat;
 using namespace Common::Literals;
 
+#ifdef __APPLE__
+constexpr u32 NUM_VERTEX_BUFFERS = 16;
+#else
 constexpr u32 NUM_VERTEX_BUFFERS = 32;
+#endif
 constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
 constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
 constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index f34090791..d77ff455b 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -48,8 +48,14 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
         SetInlineIndexBuffer(regs.inline_index_4x8.index3);
         break;
     case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
+        DrawArrayInstanced(regs.vertex_array_instance_first.topology.Value(),
+                           regs.vertex_array_instance_first.start.Value(),
+                           regs.vertex_array_instance_first.count.Value(), false);
+        break;
     case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): {
-        LOG_WARNING(HW_GPU, "(STUBBED) called");
+        DrawArrayInstanced(regs.vertex_array_instance_subsequent.topology.Value(),
+                           regs.vertex_array_instance_subsequent.start.Value(),
+                           regs.vertex_array_instance_subsequent.count.Value(), true);
         break;
     }
     case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
@@ -84,6 +90,22 @@ void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 ve
     ProcessDraw(false, num_instances);
 }
 
+void DrawManager::DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
+                                     bool subsequent) {
+    draw_state.topology = topology;
+    draw_state.vertex_buffer.first = vertex_first;
+    draw_state.vertex_buffer.count = vertex_count;
+
+    if (!subsequent) {
+        draw_state.instance_count = 1;
+    }
+
+    draw_state.base_instance = draw_state.instance_count - 1;
+    draw_state.draw_mode = DrawMode::Instance;
+    draw_state.instance_count++;
+    ProcessDraw(false, 1);
+}
+
 void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count,
                             u32 base_index, u32 base_instance, u32 num_instances) {
     const auto& regs{maxwell3d->regs};
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 18d959143..cfc8127fc 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -66,6 +66,8 @@ public:
 
     void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
                    u32 base_instance, u32 num_instances);
+    void DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
+                            bool subsequent);
 
     void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
                    u32 base_instance, u32 num_instances);
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index 8d7da50fc..dbcf508e5 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -137,16 +137,6 @@ bool Codec::CreateGpuAvDevice() {
                 break;
             }
             if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
-#if defined(__unix__)
-                // Some linux decoding backends are reported to crash with this config method
-                // TODO(ameerj): Properly support this method
-                if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
-                    // skip zero-copy decoders, we don't currently support them
-                    LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
-                              av_hwdevice_get_type_name(type), config->methods);
-                    continue;
-                }
-#endif
                 LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
                 av_codec_ctx->pix_fmt = config->pix_fmt;
                 return true;
diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
index d33131d7c..b81a54056 100644
--- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
@@ -3,16 +3,16 @@
 
 #version 450
 
+precision mediump int;
+precision highp float;
+
 layout(binding = 0) uniform sampler2D depth_tex;
-layout(binding = 1) uniform isampler2D stencil_tex;
+layout(binding = 1) uniform usampler2D stencil_tex;
 
 layout(location = 0) out vec4 color;
 
 void main() {
     ivec2 coord = ivec2(gl_FragCoord.xy);
-    uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
-    uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
-
     highp uint depth_val =
         uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
     lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
index 31db7d426..6a457981d 100644
--- a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
@@ -3,16 +3,16 @@
 
 #version 450
 
+precision mediump int;
+precision highp float;
+
 layout(binding = 0) uniform sampler2D depth_tex;
-layout(binding = 1) uniform isampler2D stencil_tex;
+layout(binding = 1) uniform usampler2D stencil_tex;
 
 layout(location = 0) out vec4 color;
 
 void main() {
     ivec2 coord = ivec2(gl_FragCoord.xy);
-    uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
-    uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
-
     highp uint depth_val =
         uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
     lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 3e12a8813..78ea5208b 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -89,9 +89,6 @@ public:
     void RequestScreenshot(void* data, std::function<void(bool)> callback,
                            const Layout::FramebufferLayout& layout);
 
-    /// This is called to notify the rendering backend of a surface change
-    virtual void NotifySurfaceChanged() {}
-
 protected:
     Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
     std::unique_ptr<Core::Frontend::GraphicsContext> context;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 9cafd2983..512eef575 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1048,6 +1048,10 @@ void Image::Scale(bool up_scale) {
 }
 
 bool Image::ScaleUp(bool ignore) {
+    const auto& resolution = runtime->resolution;
+    if (!resolution.active) {
+        return false;
+    }
     if (True(flags & ImageFlagBits::Rescaled)) {
         return false;
     }
@@ -1060,9 +1064,6 @@ bool Image::ScaleUp(bool ignore) {
         return false;
     }
     flags |= ImageFlagBits::Rescaled;
-    if (!runtime->resolution.active) {
-        return false;
-    }
     has_scaled = true;
     if (ignore) {
         current_texture = upscaled_backup.handle;
@@ -1073,13 +1074,14 @@ bool Image::ScaleUp(bool ignore) {
 }
 
 bool Image::ScaleDown(bool ignore) {
-    if (False(flags & ImageFlagBits::Rescaled)) {
+    const auto& resolution = runtime->resolution;
+    if (!resolution.active) {
         return false;
     }
-    flags &= ~ImageFlagBits::Rescaled;
-    if (!runtime->resolution.active) {
+    if (False(flags & ImageFlagBits::Rescaled)) {
         return false;
     }
+    flags &= ~ImageFlagBits::Rescaled;
     if (ignore) {
         current_texture = texture.handle;
         return true;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3676eaaa9..e71b87e99 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -118,6 +118,8 @@ public:
 
     void InsertUploadMemoryBarrier();
 
+    void TransitionImageLayout(Image& image) {}
+
     FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
 
     bool HasNativeBgr() const noexcept {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index c7dc7e0a1..5ea9e2378 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -116,6 +116,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
     {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                // E5B9G9R9_FLOAT
     {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},            // D32_FLOAT
     {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},    // D16_UNORM
+    {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT_24_8}, // X8_D24_UNORM
     {GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE},                // S8_UINT
     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // D24_UNORM_S8_UINT
     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // S8_UINT_D24_UNORM
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 1a40a4d05..c3db09424 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -627,6 +627,8 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool
     const VkPipelineLayout layout = *clear_color_pipeline_layout;
     scheduler.RequestRenderpass(dst_framebuffer);
     scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) {
+        constexpr std::array blend_constants{0.0f, 0.0f, 0.0f, 0.0f};
+        cmdbuf.SetBlendConstants(blend_constants.data());
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
         BindBlitState(cmdbuf, dst_region);
         cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth);
@@ -883,7 +885,7 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline(
         .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .depthTestEnable = VK_FALSE,
+        .depthTestEnable = key.depth_clear,
         .depthWriteEnable = key.depth_clear,
         .depthCompareOp = VK_COMPARE_OP_ALWAYS,
         .depthBoundsTestEnable = VK_FALSE,
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 208e88533..a08f2f67f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -214,8 +214,9 @@ struct FormatTuple {
     {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32},                        // E5B9G9R9_FLOAT
 
     // Depth formats
-    {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
-    {VK_FORMAT_D16_UNORM, Attachable},  // D16_UNORM
+    {VK_FORMAT_D32_SFLOAT, Attachable},          // D32_FLOAT
+    {VK_FORMAT_D16_UNORM, Attachable},           // D16_UNORM
+    {VK_FORMAT_X8_D24_UNORM_PACK32, Attachable}, // X8_D24_UNORM
 
     // Stencil formats
     {VK_FORMAT_S8_UINT, Attachable}, // S8_UINT
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 590bc1c64..14e257cf7 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -56,10 +56,6 @@ public:
         return device.GetDriverName();
     }
 
-    void NotifySurfaceChanged() override {
-        present_manager.NotifySurfaceChanged();
-    }
-
 private:
     void Report() const;
 
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 31928bb94..52fc142d1 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -96,6 +96,7 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
 VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
     switch (framebuffer.pixel_format) {
     case Service::android::PixelFormat::Rgba8888:
+    case Service::android::PixelFormat::Rgbx8888:
         return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
     case Service::android::PixelFormat::Rgb565:
         return VK_FORMAT_R5G6B5_UNORM_PACK16;
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
index d681bd22a..2ef36583b 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -103,8 +103,7 @@ PresentManager::PresentManager(const vk::Instance& instance_,
       surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(),
                                                            swapchain.GetImageViewFormat())},
       use_present_thread{Settings::values.async_presentation.GetValue()},
-      image_count{swapchain.GetImageCount()}, last_render_surface{
-                                                  render_window_.GetWindowInfo().render_surface} {
+      image_count{swapchain.GetImageCount()} {
 
     auto& dld = device.GetLogical();
     cmdpool = dld.CreateCommandPool({
@@ -289,44 +288,36 @@ void PresentManager::PresentThread(std::stop_token token) {
     }
 }
 
-void PresentManager::NotifySurfaceChanged() {
-#ifdef ANDROID
-    std::scoped_lock lock{recreate_surface_mutex};
-    recreate_surface_cv.notify_one();
-#endif
+void PresentManager::RecreateSwapchain(Frame* frame) {
+    swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb);
+    image_count = swapchain.GetImageCount();
 }
 
 void PresentManager::CopyToSwapchain(Frame* frame) {
-    MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
-
-    const auto recreate_swapchain = [&] {
-        swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb);
-        image_count = swapchain.GetImageCount();
-    };
-
-#ifdef ANDROID
-    std::unique_lock lock{recreate_surface_mutex};
-
-    const auto needs_recreation = [&] {
-        if (last_render_surface != render_window.GetWindowInfo().render_surface) {
-            return true;
-        }
-        if (swapchain.NeedsRecreation(frame->is_srgb)) {
-            return true;
+    bool requires_recreation = false;
+
+    while (true) {
+        try {
+            // Recreate surface and swapchain if needed.
+            if (requires_recreation) {
+                surface = CreateSurface(instance, render_window.GetWindowInfo());
+                RecreateSwapchain(frame);
+            }
+
+            // Draw to swapchain.
+            return CopyToSwapchainImpl(frame);
+        } catch (const vk::Exception& except) {
+            if (except.GetResult() != VK_ERROR_SURFACE_LOST_KHR) {
+                throw;
+            }
+
+            requires_recreation = true;
         }
-        return false;
-    };
-
-    recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400),
-                                 [&]() { return !needs_recreation(); });
-
-    // If the frontend recreated the surface, recreate the renderer surface and swapchain.
-    if (last_render_surface != render_window.GetWindowInfo().render_surface) {
-        last_render_surface = render_window.GetWindowInfo().render_surface;
-        surface = CreateSurface(instance, render_window.GetWindowInfo());
-        recreate_swapchain();
     }
-#endif
+}
+
+void PresentManager::CopyToSwapchainImpl(Frame* frame) {
+    MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
 
     // If the size or colorspace of the incoming frames has changed, recreate the swapchain
     // to account for that.
@@ -334,11 +325,11 @@ void PresentManager::CopyToSwapchain(Frame* frame) {
     const bool size_changed =
         swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
     if (srgb_changed || size_changed) {
-        recreate_swapchain();
+        RecreateSwapchain(frame);
     }
 
     while (swapchain.AcquireNextImage()) {
-        recreate_swapchain();
+        RecreateSwapchain(frame);
     }
 
     const vk::CommandBuffer cmdbuf{frame->cmdbuf};
@@ -488,4 +479,4 @@ void PresentManager::CopyToSwapchain(Frame* frame) {
     swapchain.Present(render_semaphore);
 }
 
-} // namespace Vulkan
-\ No newline at end of file
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
index 83e859416..a3d825fe6 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.h
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -54,14 +54,15 @@ public:
     /// Waits for the present thread to finish presenting all queued frames.
     void WaitPresent();
 
-    /// This is called to notify the rendering backend of a surface change
-    void NotifySurfaceChanged();
-
 private:
     void PresentThread(std::stop_token token);
 
     void CopyToSwapchain(Frame* frame);
 
+    void CopyToSwapchainImpl(Frame* frame);
+
+    void RecreateSwapchain(Frame* frame);
+
 private:
     const vk::Instance& instance;
     Core::Frontend::EmuWindow& render_window;
@@ -76,16 +77,13 @@ private:
     std::queue<Frame*> free_queue;
     std::condition_variable_any frame_cv;
     std::condition_variable free_cv;
-    std::condition_variable recreate_surface_cv;
     std::mutex swapchain_mutex;
-    std::mutex recreate_surface_mutex;
     std::mutex queue_mutex;
     std::mutex free_mutex;
     std::jthread present_thread;
     bool blit_supported;
     bool use_present_thread;
     std::size_t image_count{};
-    void* last_render_surface{};
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 2edaafa7e..66c03bf17 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -1436,6 +1436,7 @@ void QueryCacheRuntime::Barriers(bool is_prebarrier) {
         .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
         .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
     };
+    impl->scheduler.RequestOutsideRenderPassOperationContext();
     if (is_prebarrier) {
         impl->scheduler.Record([](vk::CommandBuffer cmdbuf) {
             cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1628d76d6..61d03daae 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -422,7 +422,8 @@ void RasterizerVulkan::Clear(u32 layer_count) {
         return;
     }
 
-    if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) {
+    if (use_stencil && framebuffer->HasAspectStencilBit() && regs.stencil_front_mask != 0xFF &&
+        regs.stencil_front_mask != 0) {
         Region2D dst_region = {
             Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
             Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),
@@ -974,6 +975,19 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
     if (!state_tracker.TouchScissors()) {
         return;
     }
+    if (!regs.viewport_scale_offset_enabled) {
+        const auto x = static_cast<float>(regs.surface_clip.x);
+        const auto y = static_cast<float>(regs.surface_clip.y);
+        const auto width = static_cast<float>(regs.surface_clip.width);
+        const auto height = static_cast<float>(regs.surface_clip.height);
+        VkRect2D scissor;
+        scissor.offset.x = static_cast<u32>(x);
+        scissor.offset.y = static_cast<u32>(y);
+        scissor.extent.width = static_cast<u32>(width != 0.0f ? width : 1.0f);
+        scissor.extent.height = static_cast<u32>(height != 0.0f ? height : 1.0f);
+        scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissor); });
+        return;
+    }
     u32 up_scale = 1;
     u32 down_shift = 0;
     if (texture_cache.IsRescaling()) {
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
index ae9f1de64..7746a88d3 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -19,7 +19,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat
                                               VkSampleCountFlagBits samples) {
     using MaxwellToVK::SurfaceFormat;
     return {
-        .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+        .flags = {},
         .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
         .samples = samples,
         .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index ce92f66ab..b278614e6 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -24,25 +24,38 @@ using namespace Common::Literals;
 
 // Maximum potential alignment of a Vulkan buffer
 constexpr VkDeviceSize MAX_ALIGNMENT = 256;
-// Maximum size to put elements in the stream buffer
-constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
 // Stream buffer size in bytes
-constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
-constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
+constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
 
-size_t Region(size_t iterator) noexcept {
-    return iterator / REGION_SIZE;
+size_t GetStreamBufferSize(const Device& device) {
+    VkDeviceSize size{0};
+    if (device.HasDebuggingToolAttached()) {
+        ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) {
+            size = std::max(size, heap.size);
+        });
+        // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
+        // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
+        // as the heap will be much larger.
+        if (size <= 256_MiB) {
+            size = size * 40 / 100;
+        }
+    } else {
+        size = MAX_STREAM_BUFFER_SIZE;
+    }
+    return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);
 }
 } // Anonymous namespace
 
 StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
                                      Scheduler& scheduler_)
-    : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
+    : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
+      stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size /
+                                                                   StagingBufferPool::NUM_SYNCS} {
     VkBufferCreateInfo stream_ci = {
         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .size = STREAM_BUFFER_SIZE,
+        .size = stream_buffer_size,
         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
                  VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
@@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
 StagingBufferPool::~StagingBufferPool() = default;
 
 StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
-    if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
+    if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
         return GetStreamBuffer(size);
     }
     return GetStagingBuffer(size, usage, deferred);
@@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
     used_iterator = iterator;
     free_iterator = std::max(free_iterator, iterator + size);
 
-    if (iterator + size >= STREAM_BUFFER_SIZE) {
+    if (iterator + size >= stream_buffer_size) {
         std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
                   current_tick);
         used_iterator = 0;
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 5f69f08b1..d3deb9072 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -90,6 +90,9 @@ private:
     void ReleaseCache(MemoryUsage usage);
 
     void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
+    size_t Region(size_t iter) const noexcept {
+        return iter / region_size;
+    }
 
     const Device& device;
     MemoryAllocator& memory_allocator;
@@ -97,6 +100,8 @@ private:
 
     vk::Buffer stream_buffer;
     std::span<u8> stream_pointer;
+    VkDeviceSize stream_buffer_size;
+    VkDeviceSize region_size;
 
     size_t iterator = 0;
     size_t used_iterator = 0;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index cdc41816f..80efd9517 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -238,6 +238,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
         return any_r ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
     case PixelFormat::D16_UNORM:
     case PixelFormat::D32_FLOAT:
+    case PixelFormat::X8_D24_UNORM:
         return VK_IMAGE_ASPECT_DEPTH_BIT;
     case PixelFormat::S8_UINT:
         return VK_IMAGE_ASPECT_STENCIL_BIT;
@@ -1550,15 +1551,15 @@ bool Image::IsRescaled() const noexcept {
 }
 
 bool Image::ScaleUp(bool ignore) {
+    const auto& resolution = runtime->resolution;
+    if (!resolution.active) {
+        return false;
+    }
     if (True(flags & ImageFlagBits::Rescaled)) {
         return false;
     }
     ASSERT(info.type != ImageType::Linear);
     flags |= ImageFlagBits::Rescaled;
-    const auto& resolution = runtime->resolution;
-    if (!resolution.active) {
-        return false;
-    }
     has_scaled = true;
     if (!scaled_image) {
         const bool is_2d = info.type == ImageType::e2D;
@@ -1587,15 +1588,15 @@ bool Image::ScaleUp(bool ignore) {
 }
 
 bool Image::ScaleDown(bool ignore) {
+    const auto& resolution = runtime->resolution;
+    if (!resolution.active) {
+        return false;
+    }
     if (False(flags & ImageFlagBits::Rescaled)) {
         return false;
     }
     ASSERT(info.type != ImageType::Linear);
     flags &= ~ImageFlagBits::Rescaled;
-    const auto& resolution = runtime->resolution;
-    if (!resolution.active) {
-        return false;
-    }
     current_image = *original_image;
     if (ignore) {
         return true;
@@ -2033,4 +2034,32 @@ void TextureCacheRuntime::AccelerateImageUpload(
     ASSERT(false);
 }
 
+void TextureCacheRuntime::TransitionImageLayout(Image& image) {
+    if (!image.ExchangeInitialization()) {
+        VkImageMemoryBarrier barrier{
+            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_NONE,
+            .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+            .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+            .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .image = image.Handle(),
+            .subresourceRange{
+                .aspectMask = image.AspectMask(),
+                .baseMipLevel = 0,
+                .levelCount = VK_REMAINING_MIP_LEVELS,
+                .baseArrayLayer = 0,
+                .layerCount = VK_REMAINING_ARRAY_LAYERS,
+            },
+        };
+        scheduler.RequestOutsideRenderPassOperationContext();
+        scheduler.Record([barrier = barrier](vk::CommandBuffer cmdbuf) {
+            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                                   VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
+        });
+    }
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index d6c5a15cc..7a0807709 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -92,6 +92,8 @@ public:
 
     void InsertUploadMemoryBarrier() {}
 
+    void TransitionImageLayout(Image& image);
+
     bool HasBrokenTextureViewFormats() const noexcept {
         // No known Vulkan driver has broken image views
         return false;
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index e16cd5e73..5b3c7aa5a 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -85,6 +85,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
         return PixelFormat::S8_UINT;
     case Tegra::DepthFormat::Z32_FLOAT_X24S8_UINT:
         return PixelFormat::D32_FLOAT_S8_UINT;
+    case Tegra::DepthFormat::X8Z24_UNORM:
+        return PixelFormat::X8_D24_UNORM;
     default:
         UNIMPLEMENTED_MSG("Unimplemented format={}", format);
         return PixelFormat::S8_UINT_D24_UNORM;
@@ -202,6 +204,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
 PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format) {
     switch (format) {
     case Service::android::PixelFormat::Rgba8888:
+    case Service::android::PixelFormat::Rgbx8888:
         return PixelFormat::A8B8G8R8_UNORM;
     case Service::android::PixelFormat::Rgb565:
         return PixelFormat::R5G6B5_UNORM;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 9b9c4d9bc..a5e8e2f62 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -115,6 +115,7 @@ enum class PixelFormat {
     // Depth formats
     D32_FLOAT = MaxColorFormat,
     D16_UNORM,
+    X8_D24_UNORM,
 
     MaxDepthFormat,
 
@@ -251,6 +252,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
     1,  // E5B9G9R9_FLOAT
     1,  // D32_FLOAT
     1,  // D16_UNORM
+    1,  // X8_D24_UNORM
     1,  // S8_UINT
     1,  // D24_UNORM_S8_UINT
     1,  // S8_UINT_D24_UNORM
@@ -360,6 +362,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
     1,  // E5B9G9R9_FLOAT
     1,  // D32_FLOAT
     1,  // D16_UNORM
+    1,  // X8_D24_UNORM
     1,  // S8_UINT
     1,  // D24_UNORM_S8_UINT
     1,  // S8_UINT_D24_UNORM
@@ -469,6 +472,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
     32,  // E5B9G9R9_FLOAT
     32,  // D32_FLOAT
     16,  // D16_UNORM
+    32,  // X8_D24_UNORM
     8,   // S8_UINT
     32,  // D24_UNORM_S8_UINT
     32,  // S8_UINT_D24_UNORM
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 56307d030..8c774f512 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -138,10 +138,16 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
         return PixelFormat::E5B9G9R9_FLOAT;
     case Hash(TextureFormat::Z32, FLOAT):
         return PixelFormat::D32_FLOAT;
+    case Hash(TextureFormat::Z32, FLOAT, UINT, UINT, UINT, LINEAR):
+        return PixelFormat::D32_FLOAT;
     case Hash(TextureFormat::Z16, UNORM):
         return PixelFormat::D16_UNORM;
     case Hash(TextureFormat::Z16, UNORM, UINT, UINT, UINT, LINEAR):
         return PixelFormat::D16_UNORM;
+    case Hash(TextureFormat::X8Z24, UNORM):
+        return PixelFormat::X8_D24_UNORM;
+    case Hash(TextureFormat::X8Z24, UNORM, UINT, UINT, UINT, LINEAR):
+        return PixelFormat::X8_D24_UNORM;
     case Hash(TextureFormat::Z24S8, UINT, UNORM, UNORM, UNORM, LINEAR):
         return PixelFormat::S8_UINT_D24_UNORM;
     case Hash(TextureFormat::Z24S8, UINT, UNORM, UINT, UINT, LINEAR):
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index 6279d8e9e..2b7e0df72 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -10,19 +10,23 @@
 #include "video_core/texture_cache/image_info.h"
 #include "video_core/texture_cache/image_view_base.h"
 #include "video_core/texture_cache/render_targets.h"
+#include "video_core/texture_cache/samples_helper.h"
 
 namespace VideoCommon {
 
 std::string Name(const ImageBase& image) {
     const GPUVAddr gpu_addr = image.gpu_addr;
     const ImageInfo& info = image.info;
-    const u32 width = info.size.width;
-    const u32 height = info.size.height;
+    u32 width = info.size.width;
+    u32 height = info.size.height;
     const u32 depth = info.size.depth;
     const u32 num_layers = image.info.resources.layers;
     const u32 num_levels = image.info.resources.levels;
     std::string resource;
     if (image.info.num_samples > 1) {
+        const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(image.info.num_samples);
+        width >>= samples_x;
+        height >>= samples_y;
         resource += fmt::format(":{}xMSAA", image.info.num_samples);
     }
     if (num_layers > 1) {
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index 9ee57a076..cabbfcb2d 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -211,6 +211,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
                 return "D32_FLOAT";
             case PixelFormat::D16_UNORM:
                 return "D16_UNORM";
+            case PixelFormat::X8_D24_UNORM:
+                return "X8_D24_UNORM";
             case PixelFormat::S8_UINT:
                 return "S8_UINT";
             case PixelFormat::D24_UNORM_S8_UINT:
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index 0c5f4450d..18b9250f9 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -85,6 +85,7 @@ bool ImageViewBase::SupportsAnisotropy() const noexcept {
     // Depth formats
     case PixelFormat::D32_FLOAT:
     case PixelFormat::D16_UNORM:
+    case PixelFormat::X8_D24_UNORM:
     // Stencil formats
     case PixelFormat::S8_UINT:
     // DepthStencil formats
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
index 203ac1b11..2ee2f8312 100644
--- a/src/video_core/texture_cache/samples_helper.h
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -24,7 +24,7 @@ namespace VideoCommon {
         return {2, 2};
     }
     ASSERT_MSG(false, "Invalid number of samples={}", num_samples);
-    return {1, 1};
+    return {0, 0};
 }
 
 [[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1bdb0def5..d575c57ca 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1016,6 +1016,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
 
     if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
         LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
+        runtime.TransitionImageLayout(image);
         return;
     }
     if (True(image.flags & ImageFlagBits::AsynchronousDecode)) {
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 0a86ce139..15596c925 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -68,6 +68,7 @@ struct LevelInfo {
     Extent2D tile_size;
     u32 bpp_log2;
     u32 tile_width_spacing;
+    u32 num_levels;
 };
 
 [[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
@@ -118,11 +119,11 @@ template <u32 GOB_EXTENT>
 }
 
 [[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
-                                                    u32 level) {
+                                                    u32 level, u32 num_levels) {
     return {
         .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
         .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
-        .depth = level == 0
+        .depth = level == 0 && num_levels == 1
                      ? block_size.depth
                      : AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
     };
@@ -166,7 +167,7 @@ template <u32 GOB_EXTENT>
 }
 
 [[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
-    if (level == 0) {
+    if (level == 0 && info.num_levels == 1) {
         return Extent3D{
             .width = info.block.width,
             .height = info.block.height,
@@ -257,7 +258,7 @@ template <u32 GOB_EXTENT>
 }
 
 [[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
-                                                u32 tile_width_spacing) {
+                                                u32 tile_width_spacing, u32 num_levels) {
     const u32 bytes_per_block = BytesPerBlock(format);
     return {
         .size =
@@ -270,16 +271,18 @@ template <u32 GOB_EXTENT>
         .tile_size = DefaultBlockSize(format),
         .bpp_log2 = BytesPerBlockLog2(bytes_per_block),
         .tile_width_spacing = tile_width_spacing,
+        .num_levels = num_levels,
     };
 }
 
 [[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
-    return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing);
+    return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing,
+                         info.resources.levels);
 }
 
 [[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
                                                  u32 tile_width_spacing, u32 level) {
-    const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing);
+    const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing, level);
     u32 offset = 0;
     for (u32 current_level = 0; current_level < level; ++current_level) {
         offset += CalculateLevelSize(info, current_level);
@@ -466,7 +469,7 @@ template <u32 GOB_EXTENT>
     };
     const u32 bpp_log2 = BytesPerBlockLog2(info.format);
     const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
-    const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
+    const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0, info.resources.levels);
     return Extent3D{
         .width = Common::AlignUpLog2(num_tiles.width, alignment),
         .height = Common::AlignUpLog2(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
@@ -533,7 +536,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
     UNIMPLEMENTED_IF(copy.image_extent != level_size);
 
     const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
-    const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+    const Extent3D block =
+        AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
 
     size_t host_offset = copy.buffer_offset;
 
@@ -698,7 +702,7 @@ u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
     const Extent2D tile_size = DefaultBlockSize(info.format);
     const Extent3D level_size = AdjustMipSize(info.size, level);
     const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
-    const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
+    const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level, info.resources.levels);
     const u32 bpp_log2 = BytesPerBlockLog2(info.format);
     return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
 }
@@ -887,7 +891,8 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
             .image_extent = level_size,
         };
         const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
-        const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+        const Extent3D block =
+            AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
         const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
         size_t guest_layer_offset = 0;
 
@@ -1041,7 +1046,7 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
     const Extent2D tile_size = DefaultBlockSize(info.format);
     const Extent3D level_size = AdjustMipSize(info.size, level);
     const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
-    return AdjustMipBlockSize(num_tiles, level_info.block, level);
+    return AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
 }
 
 boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
@@ -1063,7 +1068,8 @@ boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const I
     for (s32 level = 0; level < num_levels; ++level) {
         const Extent3D level_size = AdjustMipSize(size, level);
         const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
-        const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+        const Extent3D block =
+            AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
         params[level] = SwizzleParameters{
             .num_tiles = num_tiles,
             .block = block,
@@ -1292,11 +1298,11 @@ u32 MapSizeBytes(const ImageBase& image) {
     }
 }
 
-static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) ==
+static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0, 1}, 0) ==
               0x7f8000);
-static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000);
+static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x40000);
 
-static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000);
+static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x40000);
 
 static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
               0x2afc00);
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 3960b135a..876cec2e8 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -84,9 +84,12 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
 } // namespace Alternatives
 
 enum class NvidiaArchitecture {
-    AmpereOrNewer,
+    KeplerOrOlder,
+    Maxwell,
+    Pascal,
+    Volta,
     Turing,
-    VoltaOrOlder,
+    AmpereOrNewer,
 };
 
 template <typename T>
@@ -200,6 +203,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
         VK_FORMAT_BC7_UNORM_BLOCK,
         VK_FORMAT_D16_UNORM,
         VK_FORMAT_D16_UNORM_S8_UINT,
+        VK_FORMAT_X8_D24_UNORM_PACK32,
         VK_FORMAT_D24_UNORM_S8_UINT,
         VK_FORMAT_D32_SFLOAT,
         VK_FORMAT_D32_SFLOAT_S8_UINT,
@@ -321,13 +325,38 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
         physical.GetProperties2(physical_properties);
         if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
             // Only Ampere and newer support this feature
+            // TODO: Find a way to differentiate Ampere and Ada
             return NvidiaArchitecture::AmpereOrNewer;
         }
-    }
-    if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) {
         return NvidiaArchitecture::Turing;
     }
-    return NvidiaArchitecture::VoltaOrOlder;
+
+    if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) {
+        VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT advanced_blending_props{};
+        advanced_blending_props.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_PROPERTIES_EXT;
+        VkPhysicalDeviceProperties2 physical_properties{};
+        physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+        physical_properties.pNext = &advanced_blending_props;
+        physical.GetProperties2(physical_properties);
+        if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) {
+            return NvidiaArchitecture::Maxwell;
+        }
+
+        if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) {
+            VkPhysicalDeviceConservativeRasterizationPropertiesEXT conservative_raster_props{};
+            conservative_raster_props.sType =
+                VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT;
+            physical_properties.pNext = &conservative_raster_props;
+            physical.GetProperties2(physical_properties);
+            if (conservative_raster_props.degenerateLinesRasterized) {
+                return NvidiaArchitecture::Volta;
+            }
+            return NvidiaArchitecture::Pascal;
+        }
+    }
+
+    return NvidiaArchitecture::KeplerOrOlder;
 }
 
 std::vector<const char*> ExtensionListForVulkan(
@@ -504,19 +533,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
     if (is_nvidia) {
         const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
         const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
-        switch (arch) {
-        case NvidiaArchitecture::AmpereOrNewer:
+        if (arch >= NvidiaArchitecture::AmpereOrNewer) {
             LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math");
             features.shader_float16_int8.shaderFloat16 = false;
-            break;
-        case NvidiaArchitecture::Turing:
-            break;
-        case NvidiaArchitecture::VoltaOrOlder:
+        } else if (arch <= NvidiaArchitecture::Volta) {
             if (nv_major_version < 527) {
                 LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor");
                 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
             }
-            break;
         }
         if (nv_major_version >= 510) {
             LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
@@ -661,7 +685,15 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
                         "ANV drivers 22.3.0 to 23.1.0 have broken VK_KHR_push_descriptor");
             RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
         }
+    } else if (extensions.push_descriptor && is_nvidia) {
+        const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
+        if (arch <= NvidiaArchitecture::Pascal) {
+            LOG_WARNING(Render_Vulkan,
+                        "Pascal and older architectures have broken VK_KHR_push_descriptor");
+            RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
+        }
     }
+
     if (is_mvk) {
         LOG_WARNING(Render_Vulkan,
                     "MVK driver breaks when using more than 16 vertex attributes/bindings");
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 3ef381a38..8dd1667f3 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -9,6 +9,7 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "common/literals.h"
 #include "common/logging/log.h"
 #include "common/polyfill_ranges.h"
 #include "video_core/vulkan_common/vma.h"
@@ -65,12 +66,12 @@ struct Range {
     switch (usage) {
     case MemoryUsage::Upload:
     case MemoryUsage::Stream:
-        return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+        return VMA_ALLOCATION_CREATE_MAPPED_BIT |
+               VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
     case MemoryUsage::Download:
-        return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
+        return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
     case MemoryUsage::DeviceLocal:
-        return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
-               VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
+        return {};
     }
     return {};
 }
@@ -212,7 +213,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_)
     : device{device_}, allocator{device.GetAllocator()},
       properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
       buffer_image_granularity{
-          device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
+          device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
+    // GPUs not supporting rebar may only have a region with less than 256MB host visible/device
+    // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
+    // the heap running out of memory. With RenderDoc attached and only a small host/device region,
+    // only allow the stream buffer in this memory heap.
+    if (device.HasDebuggingToolAttached()) {
+        using namespace Common::Literals;
+        ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) {
+            if (heap.size <= 256_MiB) {
+                valid_memory_types &= ~(1u << index);
+            }
+        });
+    }
+}
 
 MemoryAllocator::~MemoryAllocator() = default;
 
@@ -239,12 +253,11 @@ vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
 
 vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
     const VmaAllocationCreateInfo alloc_ci = {
-        .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT |
-                 MemoryUsageVmaFlags(usage),
+        .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
         .usage = MemoryUsageVma(usage),
         .requiredFlags = 0,
         .preferredFlags = MemoryUsagePreferedVmaFlags(usage),
-        .memoryTypeBits = 0,
+        .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
         .pool = VK_NULL_HANDLE,
         .pUserData = nullptr,
         .priority = 0.f,
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index f449bc8d0..38a182bcb 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -7,6 +7,7 @@
 #include <span>
 #include <vector>
 #include "common/common_types.h"
+#include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 VK_DEFINE_HANDLE(VmaAllocator)
@@ -26,6 +27,18 @@ enum class MemoryUsage {
     Stream,      ///< Requests device local host visible buffer, falling back host memory.
 };
 
+template <typename F>
+void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) {
+    auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties;
+    for (size_t i = 0; i < memory_props.memoryTypeCount; i++) {
+        auto& memory_type = memory_props.memoryTypes[i];
+        if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
+            (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
+            f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]);
+        }
+    }
+}
+
 /// Ownership handle of a memory commitment.
 /// Points to a subregion of a memory allocation.
 class MemoryCommit {
@@ -124,6 +137,7 @@ private:
     std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
     VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
                                            // and optimal images
+    u32 valid_memory_types{~0u};
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 1e3c0fa64..0487cd3b6 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -117,6 +117,9 @@ public:
     virtual ~Exception() = default;
 
     const char* what() const noexcept override;
+    VkResult GetResult() const noexcept {
+        return result;
+    }
 
 private:
     VkResult result;