From 38165fb7e3e486b5099cfa76f5a09ec9f3201acd Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Jun 2021 15:52:27 +0200 Subject: Texture Cache: Initial Implementation of Sparse Textures. --- src/video_core/texture_cache/image_base.cpp | 3 + src/video_core/texture_cache/image_base.h | 8 + src/video_core/texture_cache/texture_cache.h | 248 +++++++++++++++++++++++++-- src/video_core/texture_cache/types.h | 1 + src/video_core/texture_cache/util.cpp | 41 ++++- src/video_core/texture_cache/util.h | 6 +- 6 files changed, 285 insertions(+), 22 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index ad69d32d1..2aae338b6 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ } } +ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) + : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} + std::optional ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { if (other_addr < gpu_addr) { // Subresource address can't be lower than the base diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index e326cab71..004ec23e4 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -57,6 +57,12 @@ struct ImageBase { return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } + [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_gpu_addr + overlap_size; + const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes; + return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; + } + void CheckBadOverlapState(); void CheckAliasState(); @@ -84,6 +90,8 @@ struct ImageBase { std::vector aliased_images; std::vector overlapping_images; + ImageMapId map_view_id{}; + bool is_sparse{}; }; struct ImageAllocBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d8dbd3824..9f6410d58 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -152,6 +152,9 @@ public: /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size); + /// Remove images in a region + void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); + /// Blit an image with the given parameters void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, @@ -190,7 +193,22 @@ public: private: /// Iterate over all page indices in a range template - static void ForEachPage(VAddr addr, size_t size, Func&& func) { + static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + template + static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; const u64 page_end = (addr + size - 1) >> PAGE_BITS; for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { @@ -269,6 +287,13 @@ private: template void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); + template + void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + + /// Iterates over all the images in a region calling func + template + void ForEachSparseSegment(ImageBase& image, Func&& func); + /// Find or create an image view in the given image with the passed parameters [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); @@ -340,7 +365,8 @@ private: std::unordered_map samplers; std::unordered_map framebuffers; - std::unordered_map, IdentityHash> page_table; + std::unordered_map, IdentityHash> page_table; + std::unordered_map, IdentityHash> gpu_page_table; bool has_deleted_images = false; u64 total_used_memory = 0; @@ -349,6 +375,7 @@ private: u64 critical_memory; SlotVector slot_images; + SlotVector slot_map_views; SlotVector slot_image_views; SlotVector slot_image_allocs; SlotVector slot_samplers; @@ -702,6 +729,21 @@ void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { } } +template +void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image); + } + UnregisterImage(id); + DeleteImage(id); + } +} + template void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, @@ -833,9 +875,10 @@ typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_ad if (it == page_table.end()) { return nullptr; } - const auto& image_ids = it->second; - for (const ImageId image_id : image_ids) { - const ImageBase& image = slot_images[image_id]; + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; if (image.cpu_addr != cpu_addr) { continue; } @@ -958,7 +1001,7 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) template ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidAddress(gpu_memory, config)) { + if (!IsValidEntry(gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } const auto [pair, is_new] = image_views.try_emplace(config); @@ -1026,7 +1069,7 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, } return false; }; - ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + ForEachImageInRegionGPU(gpu_addr, CalculateGuestSizeInBytes(info), lambda); return image_id; } @@ -1056,7 +1099,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector left_aliased_ids; std::vector right_aliased_ids; std::vector bad_overlap_ids; - ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { + ForEachImageInRegionGPU(gpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { if (info.type == ImageType::Linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch @@ -1091,6 +1134,24 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; + new_image.is_sparse = false; + if (new_image.info.type != ImageType::Linear && new_image.info.type != ImageType::Buffer) { + const LevelArray offsets = CalculateMipLevelOffsets(new_image.info); + size_t level; + const size_t levels = static_cast(new_image.info.resources.levels); + VAddr n_cpu_addr = new_image.cpu_addr; + GPUVAddr n_gpu_addr = new_image.gpu_addr; + for (level = 0; level < levels; level++) { + n_gpu_addr += offsets[level]; + n_cpu_addr += offsets[level]; + std::optional cpu_addr_opt = gpu_memory.GpuToCpuAddress(n_gpu_addr); + if (!cpu_addr_opt || *cpu_addr_opt == 0 || n_cpu_addr != *cpu_addr_opt) { + new_image.is_sparse = true; + break; + } + } + } + // TODO: Only upload what we need RefreshContents(new_image); @@ -1239,7 +1300,8 @@ void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; - ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { + boost::container::small_vector maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { const auto it = page_table.find(page); if (it == page_table.end()) { if constexpr (BOOL_BREAK) { @@ -1248,12 +1310,63 @@ void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f return; } } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} + +template +template +void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } for (const ImageId image_id : it->second) { Image& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::Picked)) { continue; } - if (!image.Overlaps(cpu_addr, size)) { + if (!image.OverlapsGPU(gpu_addr, size)) { continue; } image.flags |= ImageFlagBits::Picked; @@ -1275,6 +1388,30 @@ void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f } } +template +template +void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + GPUVAddr gpu_addr = image.gpu_addr; + const size_t levels = image.info.resources.levels; + const auto mipmap_sizes = CalculateMipLevelSizes(image.info); + for (size_t level = 0; level < levels; level++) { + const size_t size = mipmap_sizes[level]; + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (cpu_addr && *cpu_addr != 0) { + if constexpr (BOOL_BREAK) { + if (func(gpu_addr, *cpu_addr, size)) { + return true; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } + gpu_addr += size; + } +} + template ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { Image& image = slot_images[image_id]; @@ -1292,8 +1429,6 @@ void TextureCache

::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; - ForEachPage(image.cpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { page_table[page].push_back(image_id); }); u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || @@ -1301,6 +1436,21 @@ void TextureCache

::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (!image.is_sparse) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + ForEachSparseSegment(image, [this, image_id](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + }); } template @@ -1317,9 +1467,9 @@ void TextureCache

::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); - ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + const auto page_it = gpu_page_table.find(page); + if (page_it == gpu_page_table.end()) { UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); return; } @@ -1331,20 +1481,84 @@ void TextureCache

::UnregisterImage(ImageId image_id) { } image_ids.erase(vector_it); }); + if (!image.is_sparse) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + boost::container::small_vector maps_to_delete; + ForEachSparseSegment( + image, [this, image_id, &maps_to_delete]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, + size_t size) { + ForEachCPUPage(cpu_addr, size, [this, image_id, &maps_to_delete](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + maps_to_delete.push_back(*vector_it); + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + }); + + for (const ImageMapId map_id : maps_to_delete) { + slot_map_views.erase(map_id); + } } template void TextureCache

::TrackImage(ImageBase& image) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + if (!image.is_sparse) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); } template void TextureCache

::UntrackImage(ImageBase& image) { ASSERT(True(image.flags & ImageFlagBits::Tracked)); image.flags &= ~ImageFlagBits::Tracked; - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + if (!image.is_sparse) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + }); } template diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index c9571f7e4..9fbdc1ac6 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14; constexpr SlotId CORRUPT_ID{0xfffffffe}; using ImageId = SlotId; +using ImageMapId = SlotId; using ImageViewId = SlotId; using ImageAllocId = SlotId; using SamplerId = SlotId; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 4efe042b6..96bf8f8d9 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept { return offsets; } +LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { + const u32 num_levels = info.resources.levels; + const LevelInfo level_info = MakeLevelInfo(info); + LevelArray sizes{}; + for (u32 level = 0; level < num_levels; ++level) { + sizes[level] = CalculateLevelSize(level_info, level); + } + return sizes; +} + std::vector CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); std::vector offsets; @@ -776,14 +786,37 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { - if (config.Address() == 0) { +bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr) { + if (gpu_addr == 0) { return false; } - if (config.Address() > (u64(1) << 48)) { + if (gpu_addr > (u64(1) << 48)) { return false; } - return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); + const auto cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + return cpu_addr.has_value() && *cpu_addr != 0; +} + +bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { + const GPUVAddr gpu_addr = config.Address(); + if (IsValidAddress(gpu_memory, gpu_addr)) { + return true; + } + if (!config.IsBlockLinear()) { + return false; + } + const size_t levels = config.max_mip_level + 1; + if (levels <= 1) { + return false; + } + const ImageInfo info{config}; + const LevelArray offsets = CalculateMipLevelOffsets(info); + for (size_t level = 1; level < levels; level++) { + if (IsValidAddress(gpu_memory, static_cast(gpu_addr + offsets[level]))) { + return true; + } + } + return false; } std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index cdc5cbc75..b73361484 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -40,6 +40,8 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; +[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; + [[nodiscard]] std::vector CalculateSliceOffsets(const ImageInfo& info); [[nodiscard]] std::vector CalculateSliceSubresources(const ImageInfo& info); @@ -55,7 +57,9 @@ struct OverlapResult { const ImageInfo& src, SubresourceBase base); -[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); +[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr); + +[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); [[nodiscard]] std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, -- cgit v1.2.3