diff options
Diffstat (limited to 'src/video_core')
37 files changed, 323 insertions, 159 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9e90c587c..9b2698fad 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -544,7 +544,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { it++; } - boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; + boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads; u64 total_size_bytes = 0; u64 largest_copy = 0; for (const IntervalSet& intervals : committed_ranges) { @@ -914,6 +914,11 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; + + if (is_written) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + } + if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); ++binding_index; @@ -931,6 +936,11 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); + const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; + if (is_written) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + } + const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { @@ -962,6 +972,8 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + const u32 offset = buffer.Offset(binding.cpu_addr); host_bindings.buffers.push_back(&buffer); host_bindings.offsets.push_back(offset); @@ -1011,6 +1023,11 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const bool is_written = ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; + + if (is_written) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + } + if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); ++binding_index; @@ -1028,6 +1045,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); + const bool is_written = + ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; + if (is_written) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + } + const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { @@ -1201,16 +1224,11 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { template <class P> void BufferCache<P>::UpdateStorageBuffers(size_t stage) { - const u32 written_mask = channel_state->written_storage_buffers[stage]; ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { // Resolve buffer Binding& binding = channel_state->storage_buffers[stage][index]; const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); binding.buffer_id = buffer_id; - // Mark buffer as written if needed - if (((written_mask >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); - } }); } @@ -1219,10 +1237,6 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) { ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { Binding& binding = channel_state->texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); - // Mark buffer as written if needed - if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) { - MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); - } }); } @@ -1252,7 +1266,6 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { .size = size, .buffer_id = buffer_id, }; - MarkWrittenBuffer(buffer_id, *cpu_addr, size); } template <class P> @@ -1279,10 +1292,6 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { // Resolve buffer Binding& binding = channel_state->compute_storage_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); - // Mark as written if needed - if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); - } }); } @@ -1291,18 +1300,11 @@ void BufferCache<P>::UpdateComputeTextureBuffers() { ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { Binding& binding = channel_state->compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); - // Mark as written if needed - if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); - } }); } template <class P> void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { - if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) { - SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size); - } memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); const IntervalType base_interval{cpu_addr, cpu_addr + size}; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c4f6e8d12..eed267361 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -62,7 +62,11 @@ using BufferId = SlotId; using VideoCore::Surface::PixelFormat; using namespace Common::Literals; +#ifdef __APPLE__ +constexpr u32 NUM_VERTEX_BUFFERS = 16; +#else constexpr u32 NUM_VERTEX_BUFFERS = 32; +#endif constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index f34090791..d77ff455b 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -48,8 +48,14 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) { SetInlineIndexBuffer(regs.inline_index_4x8.index3); break; case MAXWELL3D_REG_INDEX(vertex_array_instance_first): + DrawArrayInstanced(regs.vertex_array_instance_first.topology.Value(), + regs.vertex_array_instance_first.start.Value(), + regs.vertex_array_instance_first.count.Value(), false); + break; case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): { - LOG_WARNING(HW_GPU, "(STUBBED) called"); + DrawArrayInstanced(regs.vertex_array_instance_subsequent.topology.Value(), + regs.vertex_array_instance_subsequent.start.Value(), + regs.vertex_array_instance_subsequent.count.Value(), true); break; } case MAXWELL3D_REG_INDEX(draw_texture.src_y0): { @@ -84,6 +90,22 @@ void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 ve ProcessDraw(false, num_instances); } +void DrawManager::DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, + bool subsequent) { + draw_state.topology = topology; + draw_state.vertex_buffer.first = vertex_first; + draw_state.vertex_buffer.count = vertex_count; + + if (!subsequent) { + draw_state.instance_count = 1; + } + + draw_state.base_instance = draw_state.instance_count - 1; + draw_state.draw_mode = DrawMode::Instance; + draw_state.instance_count++; + ProcessDraw(false, 1); +} + void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances) { const auto& regs{maxwell3d->regs}; diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 18d959143..cfc8127fc 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -66,6 +66,8 @@ public: void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, u32 base_instance, u32 num_instances); + void DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, + bool subsequent); void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances); diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 8d7da50fc..dbcf508e5 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -137,16 +137,6 @@ bool Codec::CreateGpuAvDevice() { break; } if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) { -#if defined(__unix__) - // Some linux decoding backends are reported to crash with this config method - // TODO(ameerj): Properly support this method - if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) { - // skip zero-copy decoders, we don't currently support them - LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.", - av_hwdevice_get_type_name(type), config->methods); - continue; - } -#endif LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); av_codec_ctx->pix_fmt = config->pix_fmt; return true; diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag index d33131d7c..b81a54056 100644 --- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -3,16 +3,16 @@ #version 450 +precision mediump int; +precision highp float; + layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; +layout(binding = 1) uniform usampler2D stencil_tex; layout(location = 0) out vec4 color; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - highp uint depth_val = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag index 31db7d426..6a457981d 100644 --- a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag @@ -3,16 +3,16 @@ #version 450 +precision mediump int; +precision highp float; + layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; +layout(binding = 1) uniform usampler2D stencil_tex; layout(location = 0) out vec4 color; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - highp uint depth_val = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 3e12a8813..78ea5208b 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -89,9 +89,6 @@ public: void RequestScreenshot(void* data, std::function<void(bool)> callback, const Layout::FramebufferLayout& layout); - /// This is called to notify the rendering backend of a surface change - virtual void NotifySurfaceChanged() {} - protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<Core::Frontend::GraphicsContext> context; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9cafd2983..512eef575 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1048,6 +1048,10 @@ void Image::Scale(bool up_scale) { } bool Image::ScaleUp(bool ignore) { + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } if (True(flags & ImageFlagBits::Rescaled)) { return false; } @@ -1060,9 +1064,6 @@ bool Image::ScaleUp(bool ignore) { return false; } flags |= ImageFlagBits::Rescaled; - if (!runtime->resolution.active) { - return false; - } has_scaled = true; if (ignore) { current_texture = upscaled_backup.handle; @@ -1073,13 +1074,14 @@ bool Image::ScaleUp(bool ignore) { } bool Image::ScaleDown(bool ignore) { - if (False(flags & ImageFlagBits::Rescaled)) { + const auto& resolution = runtime->resolution; + if (!resolution.active) { return false; } - flags &= ~ImageFlagBits::Rescaled; - if (!runtime->resolution.active) { + if (False(flags & ImageFlagBits::Rescaled)) { return false; } + flags &= ~ImageFlagBits::Rescaled; if (ignore) { current_texture = texture.handle; return true; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3676eaaa9..e71b87e99 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -118,6 +118,8 @@ public: void InsertUploadMemoryBarrier(); + void TransitionImageLayout(Image& image) {} + FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; bool HasNativeBgr() const noexcept { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index c7dc7e0a1..5ea9e2378 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -116,6 +116,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT_24_8}, // X8_D24_UNORM {GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 1a40a4d05..c3db09424 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -627,6 +627,8 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool const VkPipelineLayout layout = *clear_color_pipeline_layout; scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) { + constexpr std::array blend_constants{0.0f, 0.0f, 0.0f, 0.0f}; + cmdbuf.SetBlendConstants(blend_constants.data()); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); BindBlitState(cmdbuf, dst_region); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); @@ -883,7 +885,7 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline( .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .depthTestEnable = VK_FALSE, + .depthTestEnable = key.depth_clear, .depthWriteEnable = key.depth_clear, .depthCompareOp = VK_COMPARE_OP_ALWAYS, .depthBoundsTestEnable = VK_FALSE, diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 208e88533..a08f2f67f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -214,8 +214,9 @@ struct FormatTuple { {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT // Depth formats - {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT - {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM + {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT + {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM + {VK_FORMAT_X8_D24_UNORM_PACK32, Attachable}, // X8_D24_UNORM // Stencil formats {VK_FORMAT_S8_UINT, Attachable}, // S8_UINT diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 590bc1c64..14e257cf7 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -56,10 +56,6 @@ public: return device.GetDriverName(); } - void NotifySurfaceChanged() override { - present_manager.NotifySurfaceChanged(); - } - private: void Report() const; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 31928bb94..52fc142d1 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -96,6 +96,7 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { switch (framebuffer.pixel_format) { case Service::android::PixelFormat::Rgba8888: + case Service::android::PixelFormat::Rgbx8888: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; case Service::android::PixelFormat::Rgb565: return VK_FORMAT_R5G6B5_UNORM_PACK16; diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index d681bd22a..2ef36583b 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -103,8 +103,7 @@ PresentManager::PresentManager(const vk::Instance& instance_, surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())}, use_present_thread{Settings::values.async_presentation.GetValue()}, - image_count{swapchain.GetImageCount()}, last_render_surface{ - render_window_.GetWindowInfo().render_surface} { + image_count{swapchain.GetImageCount()} { auto& dld = device.GetLogical(); cmdpool = dld.CreateCommandPool({ @@ -289,44 +288,36 @@ void PresentManager::PresentThread(std::stop_token token) { } } -void PresentManager::NotifySurfaceChanged() { -#ifdef ANDROID - std::scoped_lock lock{recreate_surface_mutex}; - recreate_surface_cv.notify_one(); -#endif +void PresentManager::RecreateSwapchain(Frame* frame) { + swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); + image_count = swapchain.GetImageCount(); } void PresentManager::CopyToSwapchain(Frame* frame) { - MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); - - const auto recreate_swapchain = [&] { - swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); - image_count = swapchain.GetImageCount(); - }; - -#ifdef ANDROID - std::unique_lock lock{recreate_surface_mutex}; - - const auto needs_recreation = [&] { - if (last_render_surface != render_window.GetWindowInfo().render_surface) { - return true; - } - if (swapchain.NeedsRecreation(frame->is_srgb)) { - return true; + bool requires_recreation = false; + + while (true) { + try { + // Recreate surface and swapchain if needed. + if (requires_recreation) { + surface = CreateSurface(instance, render_window.GetWindowInfo()); + RecreateSwapchain(frame); + } + + // Draw to swapchain. + return CopyToSwapchainImpl(frame); + } catch (const vk::Exception& except) { + if (except.GetResult() != VK_ERROR_SURFACE_LOST_KHR) { + throw; + } + + requires_recreation = true; } - return false; - }; - - recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), - [&]() { return !needs_recreation(); }); - - // If the frontend recreated the surface, recreate the renderer surface and swapchain. - if (last_render_surface != render_window.GetWindowInfo().render_surface) { - last_render_surface = render_window.GetWindowInfo().render_surface; - surface = CreateSurface(instance, render_window.GetWindowInfo()); - recreate_swapchain(); } -#endif +} + +void PresentManager::CopyToSwapchainImpl(Frame* frame) { + MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); // If the size or colorspace of the incoming frames has changed, recreate the swapchain // to account for that. @@ -334,11 +325,11 @@ void PresentManager::CopyToSwapchain(Frame* frame) { const bool size_changed = swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; if (srgb_changed || size_changed) { - recreate_swapchain(); + RecreateSwapchain(frame); } while (swapchain.AcquireNextImage()) { - recreate_swapchain(); + RecreateSwapchain(frame); } const vk::CommandBuffer cmdbuf{frame->cmdbuf}; @@ -488,4 +479,4 @@ void PresentManager::CopyToSwapchain(Frame* frame) { swapchain.Present(render_semaphore); } -} // namespace Vulkan
\ No newline at end of file +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index 83e859416..a3d825fe6 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -54,14 +54,15 @@ public: /// Waits for the present thread to finish presenting all queued frames. void WaitPresent(); - /// This is called to notify the rendering backend of a surface change - void NotifySurfaceChanged(); - private: void PresentThread(std::stop_token token); void CopyToSwapchain(Frame* frame); + void CopyToSwapchainImpl(Frame* frame); + + void RecreateSwapchain(Frame* frame); + private: const vk::Instance& instance; Core::Frontend::EmuWindow& render_window; @@ -76,16 +77,13 @@ private: std::queue<Frame*> free_queue; std::condition_variable_any frame_cv; std::condition_variable free_cv; - std::condition_variable recreate_surface_cv; std::mutex swapchain_mutex; - std::mutex recreate_surface_mutex; std::mutex queue_mutex; std::mutex free_mutex; std::jthread present_thread; bool blit_supported; bool use_present_thread; std::size_t image_count{}; - void* last_render_surface{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 2edaafa7e..66c03bf17 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1436,6 +1436,7 @@ void QueryCacheRuntime::Barriers(bool is_prebarrier) { .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; + impl->scheduler.RequestOutsideRenderPassOperationContext(); if (is_prebarrier) { impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1628d76d6..61d03daae 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -422,7 +422,8 @@ void RasterizerVulkan::Clear(u32 layer_count) { return; } - if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) { + if (use_stencil && framebuffer->HasAspectStencilBit() && regs.stencil_front_mask != 0xFF && + regs.stencil_front_mask != 0) { Region2D dst_region = { Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width), @@ -974,6 +975,19 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs if (!state_tracker.TouchScissors()) { return; } + if (!regs.viewport_scale_offset_enabled) { + const auto x = static_cast<float>(regs.surface_clip.x); + const auto y = static_cast<float>(regs.surface_clip.y); + const auto width = static_cast<float>(regs.surface_clip.width); + const auto height = static_cast<float>(regs.surface_clip.height); + VkRect2D scissor; + scissor.offset.x = static_cast<u32>(x); + scissor.offset.y = static_cast<u32>(y); + scissor.extent.width = static_cast<u32>(width != 0.0f ? width : 1.0f); + scissor.extent.height = static_cast<u32>(height != 0.0f ? height : 1.0f); + scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissor); }); + return; + } u32 up_scale = 1; u32 down_shift = 0; if (texture_cache.IsRescaling()) { diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index ae9f1de64..7746a88d3 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -19,7 +19,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat VkSampleCountFlagBits samples) { using MaxwellToVK::SurfaceFormat; return { - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .flags = {}, .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, .samples = samples, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index ce92f66ab..b278614e6 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -24,25 +24,38 @@ using namespace Common::Literals; // Maximum potential alignment of a Vulkan buffer constexpr VkDeviceSize MAX_ALIGNMENT = 256; -// Maximum size to put elements in the stream buffer -constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; // Stream buffer size in bytes -constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; -constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; +constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; -size_t Region(size_t iterator) noexcept { - return iterator / REGION_SIZE; +size_t GetStreamBufferSize(const Device& device) { + VkDeviceSize size{0}; + if (device.HasDebuggingToolAttached()) { + ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { + size = std::max(size, heap.size); + }); + // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be + // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue + // as the heap will be much larger. + if (size <= 256_MiB) { + size = size * 40 / 100; + } + } else { + size = MAX_STREAM_BUFFER_SIZE; + } + return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); } } // Anonymous namespace StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_) - : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { + : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, + stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size / + StagingBufferPool::NUM_SYNCS} { VkBufferCreateInfo stream_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = STREAM_BUFFER_SIZE, + .size = stream_buffer_size, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, @@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem StagingBufferPool::~StagingBufferPool() = default; StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { - if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { + if (!deferred && usage == MemoryUsage::Upload && size <= region_size) { return GetStreamBuffer(size); } return GetStagingBuffer(size, usage, deferred); @@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { used_iterator = iterator; free_iterator = std::max(free_iterator, iterator + size); - if (iterator + size >= STREAM_BUFFER_SIZE) { + if (iterator + size >= stream_buffer_size) { std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, current_tick); used_iterator = 0; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 5f69f08b1..d3deb9072 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -90,6 +90,9 @@ private: void ReleaseCache(MemoryUsage usage); void ReleaseLevel(StagingBuffersCache& cache, size_t log2); + size_t Region(size_t iter) const noexcept { + return iter / region_size; + } const Device& device; MemoryAllocator& memory_allocator; @@ -97,6 +100,8 @@ private: vk::Buffer stream_buffer; std::span<u8> stream_pointer; + VkDeviceSize stream_buffer_size; + VkDeviceSize region_size; size_t iterator = 0; size_t used_iterator = 0; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index cdc41816f..80efd9517 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -238,6 +238,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { return any_r ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; case PixelFormat::D16_UNORM: case PixelFormat::D32_FLOAT: + case PixelFormat::X8_D24_UNORM: return VK_IMAGE_ASPECT_DEPTH_BIT; case PixelFormat::S8_UINT: return VK_IMAGE_ASPECT_STENCIL_BIT; @@ -1550,15 +1551,15 @@ bool Image::IsRescaled() const noexcept { } bool Image::ScaleUp(bool ignore) { + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } if (True(flags & ImageFlagBits::Rescaled)) { return false; } ASSERT(info.type != ImageType::Linear); flags |= ImageFlagBits::Rescaled; - const auto& resolution = runtime->resolution; - if (!resolution.active) { - return false; - } has_scaled = true; if (!scaled_image) { const bool is_2d = info.type == ImageType::e2D; @@ -1587,15 +1588,15 @@ bool Image::ScaleUp(bool ignore) { } bool Image::ScaleDown(bool ignore) { + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } if (False(flags & ImageFlagBits::Rescaled)) { return false; } ASSERT(info.type != ImageType::Linear); flags &= ~ImageFlagBits::Rescaled; - const auto& resolution = runtime->resolution; - if (!resolution.active) { - return false; - } current_image = *original_image; if (ignore) { return true; @@ -2033,4 +2034,32 @@ void TextureCacheRuntime::AccelerateImageUpload( ASSERT(false); } +void TextureCacheRuntime::TransitionImageLayout(Image& image) { + if (!image.ExchangeInitialization()) { + VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_NONE, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image.Handle(), + .subresourceRange{ + .aspectMask = image.AspectMask(), + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([barrier = barrier](vk::CommandBuffer cmdbuf) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier); + }); + } +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d6c5a15cc..7a0807709 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -92,6 +92,8 @@ public: void InsertUploadMemoryBarrier() {} + void TransitionImageLayout(Image& image); + bool HasBrokenTextureViewFormats() const noexcept { // No known Vulkan driver has broken image views return false; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index e16cd5e73..5b3c7aa5a 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -85,6 +85,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { return PixelFormat::S8_UINT; case Tegra::DepthFormat::Z32_FLOAT_X24S8_UINT: return PixelFormat::D32_FLOAT_S8_UINT; + case Tegra::DepthFormat::X8Z24_UNORM: + return PixelFormat::X8_D24_UNORM; default: UNIMPLEMENTED_MSG("Unimplemented format={}", format); return PixelFormat::S8_UINT_D24_UNORM; @@ -202,6 +204,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format) { switch (format) { case Service::android::PixelFormat::Rgba8888: + case Service::android::PixelFormat::Rgbx8888: return PixelFormat::A8B8G8R8_UNORM; case Service::android::PixelFormat::Rgb565: return PixelFormat::R5G6B5_UNORM; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 9b9c4d9bc..a5e8e2f62 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -115,6 +115,7 @@ enum class PixelFormat { // Depth formats D32_FLOAT = MaxColorFormat, D16_UNORM, + X8_D24_UNORM, MaxDepthFormat, @@ -251,6 +252,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM + 1, // X8_D24_UNORM 1, // S8_UINT 1, // D24_UNORM_S8_UINT 1, // S8_UINT_D24_UNORM @@ -360,6 +362,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM + 1, // X8_D24_UNORM 1, // S8_UINT 1, // D24_UNORM_S8_UINT 1, // S8_UINT_D24_UNORM @@ -469,6 +472,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ 32, // E5B9G9R9_FLOAT 32, // D32_FLOAT 16, // D16_UNORM + 32, // X8_D24_UNORM 8, // S8_UINT 32, // D24_UNORM_S8_UINT 32, // S8_UINT_D24_UNORM diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 56307d030..8c774f512 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -138,10 +138,16 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::E5B9G9R9_FLOAT; case Hash(TextureFormat::Z32, FLOAT): return PixelFormat::D32_FLOAT; + case Hash(TextureFormat::Z32, FLOAT, UINT, UINT, UINT, LINEAR): + return PixelFormat::D32_FLOAT; case Hash(TextureFormat::Z16, UNORM): return PixelFormat::D16_UNORM; case Hash(TextureFormat::Z16, UNORM, UINT, UINT, UINT, LINEAR): return PixelFormat::D16_UNORM; + case Hash(TextureFormat::X8Z24, UNORM): + return PixelFormat::X8_D24_UNORM; + case Hash(TextureFormat::X8Z24, UNORM, UINT, UINT, UINT, LINEAR): + return PixelFormat::X8_D24_UNORM; case Hash(TextureFormat::Z24S8, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; case Hash(TextureFormat::Z24S8, UINT, UNORM, UINT, UINT, LINEAR): diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index 6279d8e9e..2b7e0df72 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -10,19 +10,23 @@ #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" namespace VideoCommon { std::string Name(const ImageBase& image) { const GPUVAddr gpu_addr = image.gpu_addr; const ImageInfo& info = image.info; - const u32 width = info.size.width; - const u32 height = info.size.height; + u32 width = info.size.width; + u32 height = info.size.height; const u32 depth = info.size.depth; const u32 num_layers = image.info.resources.layers; const u32 num_levels = image.info.resources.levels; std::string resource; if (image.info.num_samples > 1) { + const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(image.info.num_samples); + width >>= samples_x; + height >>= samples_y; resource += fmt::format(":{}xMSAA", image.info.num_samples); } if (num_layers > 1) { diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index 9ee57a076..cabbfcb2d 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -211,6 +211,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str return "D32_FLOAT"; case PixelFormat::D16_UNORM: return "D16_UNORM"; + case PixelFormat::X8_D24_UNORM: + return "X8_D24_UNORM"; case PixelFormat::S8_UINT: return "S8_UINT"; case PixelFormat::D24_UNORM_S8_UINT: diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 0c5f4450d..18b9250f9 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -85,6 +85,7 @@ bool ImageViewBase::SupportsAnisotropy() const noexcept { // Depth formats case PixelFormat::D32_FLOAT: case PixelFormat::D16_UNORM: + case PixelFormat::X8_D24_UNORM: // Stencil formats case PixelFormat::S8_UINT: // DepthStencil formats diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h index 203ac1b11..2ee2f8312 100644 --- a/src/video_core/texture_cache/samples_helper.h +++ b/src/video_core/texture_cache/samples_helper.h @@ -24,7 +24,7 @@ namespace VideoCommon { return {2, 2}; } ASSERT_MSG(false, "Invalid number of samples={}", num_samples); - return {1, 1}; + return {0, 0}; } [[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1bdb0def5..d575c57ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1016,6 +1016,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + runtime.TransitionImageLayout(image); return; } if (True(image.flags & ImageFlagBits::AsynchronousDecode)) { diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 0a86ce139..15596c925 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -68,6 +68,7 @@ struct LevelInfo { Extent2D tile_size; u32 bpp_log2; u32 tile_width_spacing; + u32 num_levels; }; [[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { @@ -118,11 +119,11 @@ template <u32 GOB_EXTENT> } [[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, - u32 level) { + u32 level, u32 num_levels) { return { .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), - .depth = level == 0 + .depth = level == 0 && num_levels == 1 ? block_size.depth : AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), }; @@ -166,7 +167,7 @@ template <u32 GOB_EXTENT> } [[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { - if (level == 0) { + if (level == 0 && info.num_levels == 1) { return Extent3D{ .width = info.block.width, .height = info.block.height, @@ -257,7 +258,7 @@ template <u32 GOB_EXTENT> } [[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, - u32 tile_width_spacing) { + u32 tile_width_spacing, u32 num_levels) { const u32 bytes_per_block = BytesPerBlock(format); return { .size = @@ -270,16 +271,18 @@ template <u32 GOB_EXTENT> .tile_size = DefaultBlockSize(format), .bpp_log2 = BytesPerBlockLog2(bytes_per_block), .tile_width_spacing = tile_width_spacing, + .num_levels = num_levels, }; } [[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { - return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing); + return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing, + info.resources.levels); } [[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, u32 tile_width_spacing, u32 level) { - const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing); + const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing, level); u32 offset = 0; for (u32 current_level = 0; current_level < level; ++current_level) { offset += CalculateLevelSize(info, current_level); @@ -466,7 +469,7 @@ template <u32 GOB_EXTENT> }; const u32 bpp_log2 = BytesPerBlockLog2(info.format); const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); - const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); + const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0, info.resources.levels); return Extent3D{ .width = Common::AlignUpLog2(num_tiles.width, alignment), .height = Common::AlignUpLog2(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), @@ -533,7 +536,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr UNIMPLEMENTED_IF(copy.image_extent != level_size); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const Extent3D block = + AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels); size_t host_offset = copy.buffer_offset; @@ -698,7 +702,7 @@ u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { const Extent2D tile_size = DefaultBlockSize(info.format); const Extent3D level_size = AdjustMipSize(info.size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); + const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level, info.resources.levels); const u32 bpp_log2 = BytesPerBlockLog2(info.format); return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); } @@ -887,7 +891,8 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory .image_extent = level_size, }; const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const Extent3D block = + AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels); const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); size_t guest_layer_offset = 0; @@ -1041,7 +1046,7 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { const Extent2D tile_size = DefaultBlockSize(info.format); const Extent3D level_size = AdjustMipSize(info.size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - return AdjustMipBlockSize(num_tiles, level_info.block, level); + return AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels); } boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) { @@ -1063,7 +1068,8 @@ boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const I for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const Extent3D block = + AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels); params[level] = SwizzleParameters{ .num_tiles = num_tiles, .block = block, @@ -1292,11 +1298,11 @@ u32 MapSizeBytes(const ImageBase& image) { } } -static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == +static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0, 1}, 0) == 0x7f8000); -static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000); +static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x40000); -static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000); +static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x40000); static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) == 0x2afc00); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 3960b135a..876cec2e8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -84,9 +84,12 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ } // namespace Alternatives enum class NvidiaArchitecture { - AmpereOrNewer, + KeplerOrOlder, + Maxwell, + Pascal, + Volta, Turing, - VoltaOrOlder, + AmpereOrNewer, }; template <typename T> @@ -200,6 +203,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica VK_FORMAT_BC7_UNORM_BLOCK, VK_FORMAT_D16_UNORM, VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_X8_D24_UNORM_PACK32, VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT_S8_UINT, @@ -321,13 +325,38 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, physical.GetProperties2(physical_properties); if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { // Only Ampere and newer support this feature + // TODO: Find a way to differentiate Ampere and Ada return NvidiaArchitecture::AmpereOrNewer; } - } - if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { return NvidiaArchitecture::Turing; } - return NvidiaArchitecture::VoltaOrOlder; + + if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { + VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT advanced_blending_props{}; + advanced_blending_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_PROPERTIES_EXT; + VkPhysicalDeviceProperties2 physical_properties{}; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physical_properties.pNext = &advanced_blending_props; + physical.GetProperties2(physical_properties); + if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { + return NvidiaArchitecture::Maxwell; + } + + if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { + VkPhysicalDeviceConservativeRasterizationPropertiesEXT conservative_raster_props{}; + conservative_raster_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT; + physical_properties.pNext = &conservative_raster_props; + physical.GetProperties2(physical_properties); + if (conservative_raster_props.degenerateLinesRasterized) { + return NvidiaArchitecture::Volta; + } + return NvidiaArchitecture::Pascal; + } + } + + return NvidiaArchitecture::KeplerOrOlder; } std::vector<const char*> ExtensionListForVulkan( @@ -504,19 +533,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR if (is_nvidia) { const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; const auto arch = GetNvidiaArchitecture(physical, supported_extensions); - switch (arch) { - case NvidiaArchitecture::AmpereOrNewer: + if (arch >= NvidiaArchitecture::AmpereOrNewer) { LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); features.shader_float16_int8.shaderFloat16 = false; - break; - case NvidiaArchitecture::Turing: - break; - case NvidiaArchitecture::VoltaOrOlder: + } else if (arch <= NvidiaArchitecture::Volta) { if (nv_major_version < 527) { LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } - break; } if (nv_major_version >= 510) { LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits"); @@ -661,7 +685,15 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "ANV drivers 22.3.0 to 23.1.0 have broken VK_KHR_push_descriptor"); RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } + } else if (extensions.push_descriptor && is_nvidia) { + const auto arch = GetNvidiaArchitecture(physical, supported_extensions); + if (arch <= NvidiaArchitecture::Pascal) { + LOG_WARNING(Render_Vulkan, + "Pascal and older architectures have broken VK_KHR_push_descriptor"); + RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); + } } + if (is_mvk) { LOG_WARNING(Render_Vulkan, "MVK driver breaks when using more than 16 vertex attributes/bindings"); diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 3ef381a38..8dd1667f3 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -9,6 +9,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/literals.h" #include "common/logging/log.h" #include "common/polyfill_ranges.h" #include "video_core/vulkan_common/vma.h" @@ -65,12 +66,12 @@ struct Range { switch (usage) { case MemoryUsage::Upload: case MemoryUsage::Stream: - return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + return VMA_ALLOCATION_CREATE_MAPPED_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; case MemoryUsage::Download: - return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; case MemoryUsage::DeviceLocal: - return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | - VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; + return {}; } return {}; } @@ -212,7 +213,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_) : device{device_}, allocator{device.GetAllocator()}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, buffer_image_granularity{ - device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} + device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { + // GPUs not supporting rebar may only have a region with less than 256MB host visible/device + // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to + // the heap running out of memory. With RenderDoc attached and only a small host/device region, + // only allow the stream buffer in this memory heap. + if (device.HasDebuggingToolAttached()) { + using namespace Common::Literals; + ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { + if (heap.size <= 256_MiB) { + valid_memory_types &= ~(1u << index); + } + }); + } +} MemoryAllocator::~MemoryAllocator() = default; @@ -239,12 +253,11 @@ vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const { vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const { const VmaAllocationCreateInfo alloc_ci = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT | - MemoryUsageVmaFlags(usage), + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), .usage = MemoryUsageVma(usage), .requiredFlags = 0, .preferredFlags = MemoryUsagePreferedVmaFlags(usage), - .memoryTypeBits = 0, + .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, .pool = VK_NULL_HANDLE, .pUserData = nullptr, .priority = 0.f, diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index f449bc8d0..38a182bcb 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -7,6 +7,7 @@ #include <span> #include <vector> #include "common/common_types.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" VK_DEFINE_HANDLE(VmaAllocator) @@ -26,6 +27,18 @@ enum class MemoryUsage { Stream, ///< Requests device local host visible buffer, falling back host memory. }; +template <typename F> +void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) { + auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; + for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { + auto& memory_type = memory_props.memoryTypes[i]; + if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { + f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); + } + } +} + /// Ownership handle of a memory commitment. /// Points to a subregion of a memory allocation. class MemoryCommit { @@ -124,6 +137,7 @@ private: std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers // and optimal images + u32 valid_memory_types{~0u}; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 1e3c0fa64..0487cd3b6 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -117,6 +117,9 @@ public: virtual ~Exception() = default; const char* what() const noexcept override; + VkResult GetResult() const noexcept { + return result; + } private: VkResult result; |