summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h39
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h5
-rw-r--r--src/video_core/compatible_formats.cpp6
-rw-r--r--src/video_core/fence_manager.h2
-rw-r--r--src/video_core/gpu.cpp12
-rw-r--r--src/video_core/gpu.h4
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp5
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp35
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp45
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h5
-rw-r--r--src/video_core/shader_cache.cpp2
-rw-r--r--src/video_core/shader_cache.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h16
-rw-r--r--src/video_core/texture_cache/types.h1
-rw-r--r--src/video_core/texture_cache/util.cpp9
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp43
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h14
23 files changed, 238 insertions, 60 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 58a45ab67..b5ed3380f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
template <class P>
void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
- memory_tracker.CachedCpuWrite(cpu_addr, size);
+ const bool is_dirty = IsRegionRegistered(cpu_addr, size);
+ if (!is_dirty) {
+ return;
+ }
+ VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE);
+ VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE);
+ if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
+ WriteMemory(cpu_addr, size);
+ return;
+ }
+
+ tmp_buffer.resize_destructive(size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size);
+
+ InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
+}
+
+template <class P>
+bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) {
+ const bool is_dirty = IsRegionRegistered(cpu_addr, size);
+ if (!is_dirty) {
+ return false;
+ }
+ if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
+ return true;
+ }
+ WriteMemory(cpu_addr, size);
+ return false;
}
template <class P>
@@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
return false;
}
+ InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
+
+ return true;
+}
+
+template <class P>
+void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
+ std::span<const u8> inlined_buffer) {
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
ClearDownload(subtract_interval);
common_ranges.subtract(subtract_interval);
@@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
} else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
}
-
- return true;
}
template <class P>
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index fe6068cfe..460fc7551 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -245,6 +245,8 @@ public:
void CachedWriteMemory(VAddr cpu_addr, u64 size);
+ bool OnCPUWrite(VAddr cpu_addr, u64 size);
+
void DownloadMemory(VAddr cpu_addr, u64 size);
std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
@@ -543,6 +545,9 @@ private:
void ClearDownload(IntervalType subtract_interval);
+ void InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
+ std::span<const u8> inlined_buffer);
+
VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory;
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index ab4f4d407..87d69ebc5 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -272,6 +272,9 @@ constexpr Table MakeNonNativeBgrCopyTable() {
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views,
bool native_bgr) {
+ if (format_a == format_b) {
+ return true;
+ }
if (broken_views) {
// If format views are broken, only accept formats that are identical.
return format_a == format_b;
@@ -282,6 +285,9 @@ bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_vi
}
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr) {
+ if (format_a == format_b) {
+ return true;
+ }
static constexpr Table BGR_TABLE = MakeNativeBgrCopyTable();
static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrCopyTable();
return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b);
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 35d699bbf..ab20ff30f 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -69,7 +69,6 @@ public:
}
void SignalFence(std::function<void()>&& func) {
- rasterizer.InvalidateGPUCache();
bool delay_fence = Settings::IsGPULevelHigh();
if constexpr (!can_async_check) {
TryReleasePendingFences<false>();
@@ -96,6 +95,7 @@ public:
guard.unlock();
cv.notify_all();
}
+ rasterizer.InvalidateGPUCache();
}
void SignalSyncPoint(u32 value) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index db385076d..c192e33b2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -95,7 +95,9 @@ struct GPU::Impl {
/// Synchronizes CPU writes with Host GPU memory.
void InvalidateGPUCache() {
- rasterizer->InvalidateGPUCache();
+ std::function<void(VAddr, size_t)> callback_writes(
+ [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
+ system.GatherGPUDirtyMemory(callback_writes);
}
/// Signal the ending of command list.
@@ -299,6 +301,10 @@ struct GPU::Impl {
gpu_thread.InvalidateRegion(addr, size);
}
+ bool OnCPUWrite(VAddr addr, u64 size) {
+ return rasterizer->OnCPUWrite(addr, size);
+ }
+
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
@@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) {
impl->InvalidateRegion(addr, size);
}
+bool GPU::OnCPUWrite(VAddr addr, u64 size) {
+ return impl->OnCPUWrite(addr, size);
+}
+
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
impl->FlushAndInvalidateRegion(addr, size);
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index e49c40cf2..ba2838b89 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -250,6 +250,10 @@ public:
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(VAddr addr, u64 size);
+ /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
+ /// sensible, false otherwise
+ bool OnCPUWrite(VAddr addr, u64 size);
+
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size);
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 889144f38..2f0f9f593 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
rasterizer->FlushRegion(flush->addr, flush->size);
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
- rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
+ rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
} else {
ASSERT(false);
}
@@ -102,12 +102,12 @@ void ThreadManager::TickGPU() {
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
- rasterizer->OnCPUWrite(addr, size);
+ rasterizer->OnCacheInvalidation(addr, size);
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
- rasterizer->OnCPUWrite(addr, size);
+ rasterizer->OnCacheInvalidation(addr, size);
}
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 7566a8c4e..cb8029a4f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -109,7 +109,9 @@ public:
}
/// Notify rasterizer that any caches of the specified region are desync with guest
- virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
+ virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0;
+
+ virtual bool OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host.
virtual void InvalidateGPUCache() = 0;
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index bf2ce4c49..92ecf6682 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp
return false;
}
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
-void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {}
+bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {
+ return false;
+}
+void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {}
VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {
VideoCore::RasterizerDownloadArea new_area{
.start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index a8d35d2c1..93b9a6971 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -53,7 +53,8 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
- void OnCPUWrite(VAddr addr, u64 size) override;
+ void OnCacheInvalidation(VAddr addr, u64 size) override;
+ bool OnCPUWrite(VAddr addr, u64 size) override;
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index edf527f2d..aadd6967c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
}
}
-void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
+bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
+ MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ if (addr == 0 || size == 0) {
+ return false;
+ }
+
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ if (buffer_cache.OnCPUWrite(addr, size)) {
+ return true;
+ }
+ }
+
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.WriteMemory(addr, size);
+ }
+
+ shader_cache.InvalidateRegion(addr, size);
+ return false;
+}
+
+void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
- shader_cache.OnCPUWrite(addr, size);
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
@@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size);
}
+ shader_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::InvalidateGPUCache() {
- MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- shader_cache.SyncGuestHost();
- {
- std::scoped_lock lock{buffer_cache.mutex};
- buffer_cache.FlushCachedWrites();
- }
+ gpu.InvalidateGPUCache();
}
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
@@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
- shader_cache.OnCPUWrite(addr, size);
+ shader_cache.OnCacheInvalidation(addr, size);
}
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a73ad15c1..8eda2ddba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -98,7 +98,8 @@ public:
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
- void OnCPUWrite(VAddr addr, u64 size) override;
+ void OnCacheInvalidation(VAddr addr, u64 size) override;
+ bool OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f7c0d939a..456bb040e 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
}
}
-void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
+bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
+ if (addr == 0 || size == 0) {
+ return false;
+ }
+
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ if (buffer_cache.OnCPUWrite(addr, size)) {
+ return true;
+ }
+ }
+
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.WriteMemory(addr, size);
+ }
+
+ pipeline_cache.InvalidateRegion(addr, size);
+ return false;
+}
+
+void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- pipeline_cache.OnCPUWrite(addr, size);
+
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
@@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size);
}
+ pipeline_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::InvalidateGPUCache() {
- pipeline_cache.SyncGuestHost();
- {
- std::scoped_lock lock{buffer_cache.mutex};
- buffer_cache.FlushCachedWrites();
- }
+ gpu.InvalidateGPUCache();
}
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
@@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
- pipeline_cache.OnCPUWrite(addr, size);
+ pipeline_cache.OnCacheInvalidation(addr, size);
}
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index b39710b3c..73257d964 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -96,7 +96,8 @@ public:
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
- void OnCPUWrite(VAddr addr, u64 size) override;
+ void OnCacheInvalidation(VAddr addr, u64 size) override;
+ bool OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 8385b5509..3aac3cfab 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -36,8 +36,10 @@ using VideoCommon::ImageFlagBits;
using VideoCommon::ImageInfo;
using VideoCommon::ImageType;
using VideoCommon::SubresourceRange;
+using VideoCore::Surface::BytesPerBlock;
using VideoCore::Surface::IsPixelFormatASTC;
using VideoCore::Surface::IsPixelFormatInteger;
+using VideoCore::Surface::SurfaceType;
namespace {
constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
@@ -130,7 +132,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) {
const PixelFormat format = StorageFormat(info.format);
const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format);
- VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ VkImageCreateFlags flags{};
if (info.type == ImageType::e2D && info.resources.layers >= 6 &&
info.size.width == info.size.height && !device.HasBrokenCubeImageCompability()) {
flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
@@ -163,11 +165,24 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
- const ImageInfo& info) {
+ const ImageInfo& info, std::span<const VkFormat> view_formats) {
if (info.type == ImageType::Buffer) {
return vk::Image{};
}
- return allocator.CreateImage(MakeImageCreateInfo(device, info));
+ VkImageCreateInfo image_ci = MakeImageCreateInfo(device, info);
+ const VkImageFormatListCreateInfo image_format_list = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO,
+ .pNext = nullptr,
+ .viewFormatCount = static_cast<u32>(view_formats.size()),
+ .pViewFormats = view_formats.data(),
+ };
+ if (view_formats.size() > 1) {
+ image_ci.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ if (device.IsKhrImageFormatListSupported()) {
+ image_ci.pNext = &image_format_list;
+ }
+ }
+ return allocator.CreateImage(image_ci);
}
[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
@@ -806,6 +821,23 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
compute_pass_descriptor_queue, memory_allocator);
}
+ if (!device.IsKhrImageFormatListSupported()) {
+ return;
+ }
+ for (size_t index_a = 0; index_a < VideoCore::Surface::MaxPixelFormat; index_a++) {
+ const auto image_format = static_cast<PixelFormat>(index_a);
+ if (IsPixelFormatASTC(image_format) && !device.IsOptimalAstcSupported()) {
+ view_formats[index_a].push_back(VK_FORMAT_A8B8G8R8_UNORM_PACK32);
+ }
+ for (size_t index_b = 0; index_b < VideoCore::Surface::MaxPixelFormat; index_b++) {
+ const auto view_format = static_cast<PixelFormat>(index_b);
+ if (VideoCore::Surface::IsViewCompatible(image_format, view_format, false, true)) {
+ const auto view_info =
+ MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, true, view_format);
+ view_formats[index_a].push_back(view_info.format);
+ }
+ }
+ }
}
void TextureCacheRuntime::Finish() {
@@ -1265,8 +1297,8 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
- runtime{&runtime_},
- original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)),
+ runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info,
+ runtime->ViewFormats(info.format))),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
if (Settings::values.async_astc.GetValue()) {
@@ -1471,7 +1503,8 @@ bool Image::ScaleUp(bool ignore) {
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
- scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info);
+ scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info,
+ runtime->ViewFormats(info.format));
ignore = false;
}
current_image = *scaled_image;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 220943116..6621210ea 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -103,6 +103,10 @@ public:
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
+ std::span<const VkFormat> ViewFormats(PixelFormat format) {
+ return view_formats[static_cast<std::size_t>(format)];
+ }
+
void BarrierFeedbackLoop();
const Device& device;
@@ -113,6 +117,7 @@ public:
RenderPassCache& render_pass_cache;
std::optional<ASTCDecoderPass> astc_decoder_pass;
const Settings::ResolutionScalingInfo& resolution;
+ std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 4db948b6d..01701201d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
RemovePendingShaders();
}
-void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
+void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size);
}
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index f3cc4c70b..de8e08002 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -62,7 +62,7 @@ public:
/// @brief Unmarks a memory region as cached and marks it for removal
/// @param addr Start address of the CPU write operation
/// @param size Number of bytes of the CPU write operation
- void OnCPUWrite(VAddr addr, size_t size);
+ void OnCacheInvalidation(VAddr addr, size_t size);
/// @brief Flushes delayed removal operations
void SyncGuestHost();
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8190f3ba1..3a859139c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -598,6 +598,10 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
+ if (True(image.flags & ImageFlagBits::CpuModified)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::CpuModified;
if (True(image.flags & ImageFlagBits::Remapped)) {
continue;
}
@@ -865,11 +869,15 @@ void TextureCache<P>::PopAsyncFlushes() {
template <class P>
ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) {
const ImageInfo dst_info(operand);
- const ImageId image_id = FindDMAImage(dst_info, operand.address);
- if (!image_id) {
+ const ImageId dst_id = FindDMAImage(dst_info, operand.address);
+ if (!dst_id) {
+ return NULL_IMAGE_ID;
+ }
+ auto& image = slot_images[dst_id];
+ if (False(image.flags & ImageFlagBits::GpuModified)) {
+ // No need to waste time on an image that's synced with guest
return NULL_IMAGE_ID;
}
- auto& image = slot_images[image_id];
if (image.info.type == ImageType::e3D) {
// Don't accelerate 3D images.
return NULL_IMAGE_ID;
@@ -883,7 +891,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo
if (!base) {
return NULL_IMAGE_ID;
}
- return image_id;
+ return dst_id;
}
template <class P>
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index a0e10643f..0453456b4 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -54,7 +54,6 @@ enum class RelaxedOptions : u32 {
Format = 1 << 1,
Samples = 1 << 2,
ForceBrokenViews = 1 << 3,
- FormatBpp = 1 << 4,
};
DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 9a618a57a..0de6ed09d 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1201,8 +1201,7 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
// Format checking is relaxed, but we still have to check for matching bytes per block.
// This avoids creating a view for blits on UE4 titles where formats with different bytes
// per block are aliased.
- if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format) &&
- False(options & RelaxedOptions::FormatBpp)) {
+ if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format)) {
return std::nullopt;
}
} else {
@@ -1233,11 +1232,7 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
}
const bool strict_size = False(options & RelaxedOptions::Size);
if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
- if (False(options & RelaxedOptions::FormatBpp)) {
- return std::nullopt;
- } else if (!IsBlockLinearSizeCompatibleBPPRelaxed(existing, candidate, base->level, 0)) {
- return std::nullopt;
- }
+ return std::nullopt;
}
// TODO: compare block sizes
return base;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 421e71e5a..e04852e01 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -485,7 +485,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
}
}
- if (extensions.extended_dynamic_state2 && (is_radv || is_qualcomm)) {
+ if (extensions.extended_dynamic_state2 && is_radv) {
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) {
LOG_WARNING(
@@ -498,6 +498,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
}
}
+ if (extensions.extended_dynamic_state2 && is_qualcomm) {
+ const u32 version = (properties.properties.driverVersion << 3) >> 3;
+ if (version >= VK_MAKE_API_VERSION(0, 0, 676, 0) &&
+ version < VK_MAKE_API_VERSION(0, 0, 680, 0)) {
+ // Qualcomm Adreno 7xx drivers do not properly support extended_dynamic_state2.
+ LOG_WARNING(Render_Vulkan,
+ "Qualcomm Adreno 7xx drivers have broken VK_EXT_extended_dynamic_state2");
+ features.extended_dynamic_state2.extendedDynamicState2 = false;
+ features.extended_dynamic_state2.extendedDynamicState2LogicOp = false;
+ features.extended_dynamic_state2.extendedDynamicState2PatchControlPoints = false;
+ extensions.extended_dynamic_state2 = false;
+ loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
+ }
+ }
if (extensions.extended_dynamic_state3 && is_radv) {
LOG_WARNING(Render_Vulkan, "RADV has broken extendedDynamicState3ColorBlendEquation");
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
@@ -512,8 +526,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
dynamic_state3_enables = false;
}
}
- if (extensions.vertex_input_dynamic_state && (is_radv || is_qualcomm)) {
- // Qualcomm S8gen2 drivers do not properly support vertex_input_dynamic_state.
+ if (extensions.vertex_input_dynamic_state && is_radv) {
// TODO(ameerj): Blacklist only offending driver versions
// TODO(ameerj): Confirm if RDNA1 is affected
const bool is_rdna2 =
@@ -526,6 +539,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
}
}
+ if (extensions.vertex_input_dynamic_state && is_qualcomm) {
+ const u32 version = (properties.properties.driverVersion << 3) >> 3;
+ if (version >= VK_MAKE_API_VERSION(0, 0, 676, 0) &&
+ version < VK_MAKE_API_VERSION(0, 0, 680, 0)) {
+ // Qualcomm Adreno 7xx drivers do not properly support vertex_input_dynamic_state.
+ LOG_WARNING(
+ Render_Vulkan,
+ "Qualcomm Adreno 7xx drivers have broken VK_EXT_vertex_input_dynamic_state");
+ features.vertex_input_dynamic_state.vertexInputDynamicState = false;
+ extensions.vertex_input_dynamic_state = false;
+ loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+ }
+ }
sets_per_pool = 64;
if (extensions.extended_dynamic_state3 && is_amd_driver &&
@@ -774,6 +800,17 @@ bool Device::ShouldBoostClocks() const {
return validated_driver && !is_steam_deck && !is_debugging;
}
+bool Device::HasTimelineSemaphore() const {
+ if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
+ GetDriverID() == VK_DRIVER_ID_MESA_TURNIP) {
+ // Timeline semaphores do not work properly on all Qualcomm drivers.
+ // They generally work properly with Turnip drivers, but are problematic on some devices
+ // (e.g. ZTE handsets with Snapdragon 870).
+ return false;
+ }
+ return features.timeline_semaphore.timelineSemaphore;
+}
+
bool Device::GetSuitability(bool requires_swapchain) {
// Assume we will be suitable.
bool suitable = true;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 1f17265d5..be3ed45ff 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -77,6 +77,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \
EXTENSION(KHR, SWAPCHAIN, swapchain) \
EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
+ EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
@@ -408,6 +409,11 @@ public:
return extensions.workgroup_memory_explicit_layout;
}
+ /// Returns true if the device supports VK_KHR_image_format_list.
+ bool IsKhrImageFormatListSupported() const {
+ return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2;
+ }
+
/// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
bool IsTopologyListPrimitiveRestartSupported() const {
return features.primitive_topology_list_restart.primitiveTopologyListRestart;
@@ -522,13 +528,7 @@ public:
return extensions.shader_atomic_int64;
}
- bool HasTimelineSemaphore() const {
- if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
- // Timeline semaphores do not work properly on all Qualcomm drivers.
- return false;
- }
- return features.timeline_semaphore.timelineSemaphore;
- }
+ bool HasTimelineSemaphore() const;
/// Returns the minimum supported version of SPIR-V.
u32 SupportedSpirvVersion() const {