diff options
author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2021-11-14 20:55:52 +0100 |
---|---|---|
committer | Fernando Sahmkow <fsahmkow27@gmail.com> | 2022-10-06 21:00:52 +0200 |
commit | feb49c822d9cabc5bc7be9eab1f2bf4ba460176a (patch) | |
tree | 678c58e7fb8e41f5a04e309df9d196320d11de99 | |
parent | NVDRV: Update copyright notices. (diff) | |
download | yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.tar yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.tar.gz yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.tar.bz2 yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.tar.lz yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.tar.xz yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.tar.zst yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.zip |
-rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/common/address_space.cpp | 11 | ||||
-rw-r--r-- | src/common/address_space.h | 134 | ||||
-rw-r--r-- | src/common/address_space.inc | 338 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 460 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 163 | ||||
-rw-r--r-- | src/video_core/memory_manager.cpp | 10 | ||||
-rw-r--r-- | src/video_core/memory_manager.h | 3 |
8 files changed, 882 insertions, 239 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2db414819..a02696873 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -17,6 +17,8 @@ endif () include(GenerateSCMRev) add_library(common STATIC + address_space.cpp + address_space.h algorithm.h alignment.h announce_multiplayer_room.h diff --git a/src/common/address_space.cpp b/src/common/address_space.cpp new file mode 100644 index 000000000..6db85be87 --- /dev/null +++ b/src/common/address_space.cpp @@ -0,0 +1,11 @@ +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#include "common/address_space.inc" + +namespace Common { + +template class Common::FlatAllocator<u32, 0, 32>; + +} diff --git a/src/common/address_space.h b/src/common/address_space.h new file mode 100644 index 000000000..fd2f32b7d --- /dev/null +++ b/src/common/address_space.h @@ -0,0 +1,134 @@ +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <concepts> +#include <functional> +#include <mutex> +#include <vector> + +#include "common/common_types.h" + +namespace Common { +template <typename VaType, size_t AddressSpaceBits> +concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits; + +struct EmptyStruct {}; + +/** + * @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector + */ +template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, + bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct> +requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAddressSpaceMap { +private: + std::function<void(VaType, VaType)> + unmapCallback{}; //!< Callback called when the mappings in an region have changed + +protected: + /** + * @brief Represents a block of memory in the AS, the physical mapping is contiguous until + * another block with a different phys address is hit + */ + struct Block { + VaType virt{UnmappedVa}; //!< VA of the block + PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block + //!< is encountered + [[no_unique_address]] ExtraBlockInfo extraInfo; + + Block() = default; + + Block(VaType virt, PaType phys, ExtraBlockInfo extraInfo) + : virt(virt), phys(phys), extraInfo(extraInfo) {} + + constexpr bool Valid() { + return virt != UnmappedVa; + } + + constexpr bool Mapped() { + return phys != UnmappedPa; + } + + constexpr bool Unmapped() { + return phys == UnmappedPa; + } + + bool operator<(const VaType& pVirt) const { + return virt < pVirt; + } + }; + + std::mutex blockMutex; + std::vector<Block> blocks{Block{}}; + + /** + * @brief Maps a PA range into the given AS region + * @note blockMutex MUST be locked when calling this + */ + void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo); + + /** + * @brief Unmaps the given range and merges it with other unmapped regions + * @note blockMutex MUST be locked when calling this + */ + void UnmapLocked(VaType virt, VaType size); + +public: + static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + + ((1ULL << (AddressSpaceBits - 1)) - + 1)}; //!< The maximum VA that this AS can technically reach + + VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS + + FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {}); + + FlatAddressSpaceMap() = default; + + void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) { + std::scoped_lock lock(blockMutex); + MapLocked(virt, phys, size, extraInfo); + } + + void Unmap(VaType virt, VaType size) { + std::scoped_lock lock(blockMutex); + UnmapLocked(virt, size); + } +}; + +/** + * @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an + * initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block + */ +template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> +requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAllocator + : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> { +private: + using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>; + + VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once + //!< this reaches the AS limit the slower allocation path will be + //!< used + +public: + VaType vaStart; //!< The base VA of the allocator, no allocations will be below this + + FlatAllocator(VaType vaStart, VaType vaLimit = Base::VaMaximum); + + /** + * @brief Allocates a region in the AS of the given size and returns its address + */ + VaType Allocate(VaType size); + + /** + * @brief Marks the given region in the AS as allocated + */ + void AllocateFixed(VaType virt, VaType size); + + /** + * @brief Frees an AS region so it can be used again + */ + void Free(VaType virt, VaType size); +}; +} // namespace Common diff --git a/src/common/address_space.inc b/src/common/address_space.inc new file mode 100644 index 000000000..907c55d88 --- /dev/null +++ b/src/common/address_space.inc @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPLv3 or later +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include "common/address_space.h" +#include "common/assert.h" + +#define MAP_MEMBER(returnType) \ + template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \ + bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \ + requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap< \ + VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo> +#define MAP_MEMBER_CONST() \ + template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \ + bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \ + requires AddressSpaceValid<VaType, AddressSpaceBits> FlatAddressSpaceMap< \ + VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo> + +#define MM_MEMBER(returnType) \ + template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \ + requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \ + FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits> + +#define ALLOC_MEMBER(returnType) \ + template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \ + requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \ + FlatAllocator<VaType, UnmappedVa, AddressSpaceBits> +#define ALLOC_MEMBER_CONST() \ + template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \ + requires AddressSpaceValid<VaType, AddressSpaceBits> \ + FlatAllocator<VaType, UnmappedVa, AddressSpaceBits> + +namespace Common { +MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit, + std::function<void(VaType, VaType)> unmapCallback) + : unmapCallback(std::move(unmapCallback)), vaLimit(vaLimit) { + if (vaLimit > VaMaximum) + UNREACHABLE_MSG("Invalid VA limit!"); +} + +MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo) { + VaType virtEnd{virt + size}; + + if (virtEnd > vaLimit) + UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}", + virtEnd, vaLimit); + + auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; + if (blockEndSuccessor == blocks.begin()) + UNREACHABLE_MSG("Trying to map a block before the VA start: virtEnd: 0x{:X}", virtEnd); + + auto blockEndPredecessor{std::prev(blockEndSuccessor)}; + + if (blockEndSuccessor != blocks.end()) { + // We have blocks in front of us, if one is directly in front then we don't have to add a + // tail + if (blockEndSuccessor->virt != virtEnd) { + PaType tailPhys{[&]() -> PaType { + if constexpr (!PaContigSplit) { + return blockEndPredecessor + ->phys; // Always propagate unmapped regions rather than calculating offset + } else { + if (blockEndPredecessor->Unmapped()) + return blockEndPredecessor->phys; // Always propagate unmapped regions + // rather than calculating offset + else + return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; + } + }()}; + + if (blockEndPredecessor->virt >= virt) { + // If this block's start would be overlapped by the map then reuse it as a tail + // block + blockEndPredecessor->virt = virtEnd; + blockEndPredecessor->phys = tailPhys; + blockEndPredecessor->extraInfo = blockEndPredecessor->extraInfo; + + // No longer predecessor anymore + blockEndSuccessor = blockEndPredecessor--; + } else { + // Else insert a new one and we're done + blocks.insert(blockEndSuccessor, + {Block(virt, phys, extraInfo), + Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)}); + if (unmapCallback) + unmapCallback(virt, size); + + return; + } + } + } else { + // blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped + // chunk + if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) { + // Move the unmapped block start backwards + blockEndPredecessor->virt = virtEnd; + + // No longer predecessor anymore + blockEndSuccessor = blockEndPredecessor--; + } else { + // Else insert a new one and we're done + blocks.insert(blockEndSuccessor, + {Block(virt, phys, extraInfo), Block(virtEnd, UnmappedPa, {})}); + if (unmapCallback) + unmapCallback(virt, size); + + return; + } + } + + auto blockStartSuccessor{blockEndSuccessor}; + + // Walk the block vector to find the start successor as this is more efficient than another + // binary search in most scenarios + while (std::prev(blockStartSuccessor)->virt >= virt) + blockStartSuccessor--; + + // Check that the start successor is either the end block or something in between + if (blockStartSuccessor->virt > virtEnd) { + UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt); + } else if (blockStartSuccessor->virt == virtEnd) { + // We need to create a new block as there are none spare that we would overwrite + blocks.insert(blockStartSuccessor, Block(virt, phys, extraInfo)); + } else { + // Erase overwritten blocks + if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor) + blocks.erase(eraseStart, blockEndSuccessor); + + // Reuse a block that would otherwise be overwritten as a start block + blockStartSuccessor->virt = virt; + blockStartSuccessor->phys = phys; + blockStartSuccessor->extraInfo = extraInfo; + } + + if (unmapCallback) + unmapCallback(virt, size); +} + +MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { + VaType virtEnd{virt + size}; + + if (virtEnd > vaLimit) + UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}", + virtEnd, vaLimit); + + auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; + if (blockEndSuccessor == blocks.begin()) + UNREACHABLE_MSG("Trying to unmap a block before the VA start: virtEnd: 0x{:X}", virtEnd); + + auto blockEndPredecessor{std::prev(blockEndSuccessor)}; + + auto walkBackToPredecessor{[&](auto iter) { + while (iter->virt >= virt) + iter--; + + return iter; + }}; + + auto eraseBlocksWithEndUnmapped{[&](auto unmappedEnd) { + auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)}; + auto blockStartSuccessor{std::next(blockStartPredecessor)}; + + auto eraseEnd{[&]() { + if (blockStartPredecessor->Unmapped()) { + // If the start predecessor is unmapped then we can erase everything in our region + // and be done + return std::next(unmappedEnd); + } else { + // Else reuse the end predecessor as the start of our unmapped region then erase all + // up to it + unmappedEnd->virt = virt; + return unmappedEnd; + } + }()}; + + // We can't have two unmapped regions after each other + if (eraseEnd != blocks.end() && + (eraseEnd == blockStartSuccessor || + (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped()))) + UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!"); + + blocks.erase(blockStartSuccessor, eraseEnd); + }}; + + // We can avoid any splitting logic if these are the case + if (blockEndPredecessor->Unmapped()) { + if (blockEndPredecessor->virt > virt) + eraseBlocksWithEndUnmapped(blockEndPredecessor); + + if (unmapCallback) + unmapCallback(virt, size); + + return; // The region is unmapped, bail out early + } else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) { + eraseBlocksWithEndUnmapped(blockEndSuccessor); + + if (unmapCallback) + unmapCallback(virt, size); + + return; // The region is unmapped here and doesn't need splitting, bail out early + } else if (blockEndSuccessor == blocks.end()) { + // This should never happen as the end should always follow an unmapped block + UNREACHABLE_MSG("Unexpected Memory Manager state!"); + } else if (blockEndSuccessor->virt != virtEnd) { + // If one block is directly in front then we don't have to add a tail + + // The previous block is mapped so we will need to add a tail with an offset + PaType tailPhys{[&]() { + if constexpr (PaContigSplit) + return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; + else + return blockEndPredecessor->phys; + }()}; + + if (blockEndPredecessor->virt >= virt) { + // If this block's start would be overlapped by the unmap then reuse it as a tail block + blockEndPredecessor->virt = virtEnd; + blockEndPredecessor->phys = tailPhys; + + // No longer predecessor anymore + blockEndSuccessor = blockEndPredecessor--; + } else { + blocks.insert(blockEndSuccessor, + {Block(virt, UnmappedPa, {}), + Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)}); + if (unmapCallback) + unmapCallback(virt, size); + + return; // The previous block is mapped and ends before + } + } + + // Walk the block vector to find the start predecessor as this is more efficient than another + // binary search in most scenarios + auto blockStartPredecessor{walkBackToPredecessor(blockEndSuccessor)}; + auto blockStartSuccessor{std::next(blockStartPredecessor)}; + + if (blockStartSuccessor->virt > virtEnd) { + UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt); + } else if (blockStartSuccessor->virt == virtEnd) { + // There are no blocks between the start and the end that would let us skip inserting a new + // one for head + + // The previous block is may be unmapped, if so we don't need to insert any unmaps after it + if (blockStartPredecessor->Mapped()) + blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, {})); + } else if (blockStartPredecessor->Unmapped()) { + // If the previous block is unmapped + blocks.erase(blockStartSuccessor, blockEndPredecessor); + } else { + // Erase overwritten blocks, skipping the first one as we have written the unmapped start + // block there + if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor) + blocks.erase(eraseStart, blockEndSuccessor); + + // Add in the unmapped block header + blockStartSuccessor->virt = virt; + blockStartSuccessor->phys = UnmappedPa; + } + + if (unmapCallback) + unmapCallback(virt, size); +} + +ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart, VaType vaLimit) + : Base(vaLimit), currentLinearAllocEnd(vaStart), vaStart(vaStart) {} + +ALLOC_MEMBER(VaType)::Allocate(VaType size) { + std::scoped_lock lock(this->blockMutex); + + VaType allocStart{UnmappedVa}; + VaType allocEnd{currentLinearAllocEnd + size}; + + // Avoid searching backwards in the address space if possible + if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) { + auto allocEndSuccessor{ + std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)}; + if (allocEndSuccessor == this->blocks.begin()) + UNREACHABLE_MSG("First block in AS map is invalid!"); + + auto allocEndPredecessor{std::prev(allocEndSuccessor)}; + if (allocEndPredecessor->virt <= currentLinearAllocEnd) { + allocStart = currentLinearAllocEnd; + } else { + // Skip over fixed any mappings in front of us + while (allocEndSuccessor != this->blocks.end()) { + if (allocEndSuccessor->virt - allocEndPredecessor->virt < size || + allocEndPredecessor->Mapped()) { + allocStart = allocEndPredecessor->virt; + break; + } + + allocEndPredecessor = allocEndSuccessor++; + + // Use the VA limit to calculate if we can fit in the final block since it has no + // successor + if (allocEndSuccessor == this->blocks.end()) { + allocEnd = allocEndPredecessor->virt + size; + + if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit) + allocStart = allocEndPredecessor->virt; + } + } + } + } + + if (allocStart != UnmappedVa) { + currentLinearAllocEnd = allocStart + size; + } else { // If linear allocation overflows the AS then find a gap + if (this->blocks.size() <= 2) + UNREACHABLE_MSG("Unexpected allocator state!"); + + auto searchPredecessor{this->blocks.begin()}; + auto searchSuccessor{std::next(searchPredecessor)}; + + while (searchSuccessor != this->blocks.end() && + (searchSuccessor->virt - searchPredecessor->virt < size || + searchPredecessor->Mapped())) { + searchPredecessor = searchSuccessor++; + } + + if (searchSuccessor != this->blocks.end()) + allocStart = searchPredecessor->virt; + else + return {}; // AS is full + } + + this->MapLocked(allocStart, true, size, {}); + return allocStart; +} + +ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) { + this->Map(virt, true, size); +} + +ALLOC_MEMBER(void)::Free(VaType virt, VaType size) { + this->Unmap(virt, size); +} +} // namespace Common diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 5c70c9a57..344ddfc90 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -6,6 +6,7 @@ #include <cstring> #include <utility> +#include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -21,8 +22,8 @@ namespace Service::Nvidia::Devices { nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) - : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, - gmmu{std::make_shared<Tegra::MemoryManager>(system)} {} + : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{}, + gmmu{} {} nvhost_as_gpu::~nvhost_as_gpu() = default; NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, @@ -89,12 +90,49 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>& IoctlAllocAsEx params{}; std::memcpy(¶ms, input.data(), input.size()); - LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size); - if (params.big_page_size == 0) { - params.big_page_size = DEFAULT_BIG_PAGE_SIZE; + LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); + + std::scoped_lock lock(mutex); + + if (vm.initialised) { + UNREACHABLE_MSG("Cannot initialise an address space twice!"); + return NvResult::InvalidState; } - big_page_size = params.big_page_size; + if (params.big_page_size) { + if (!std::has_single_bit(params.big_page_size)) { + LOG_ERROR(Service_NVDRV, "Non power-of-2 big page size: 0x{:X}!", params.big_page_size); + return NvResult::BadValue; + } + + if (!(params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES)) { + LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size); + return NvResult::BadValue; + } + + vm.big_page_size = params.big_page_size; + vm.big_page_size_bits = static_cast<u32>(std::countr_zero(params.big_page_size)); + + vm.va_range_start = params.big_page_size << VM::VA_START_SHIFT; + } + + // If this is unspecified then default values should be used + if (params.va_range_start) { + vm.va_range_start = params.va_range_start; + vm.va_range_split = params.va_range_split; + vm.va_range_end = params.va_range_end; + } + + const u64 start_pages{vm.va_range_start >> VM::PAGE_SIZE_BITS}; + const u64 end_pages{vm.va_range_split >> VM::PAGE_SIZE_BITS}; + vm.small_page_allocator = std::make_shared<VM::Allocator>(start_pages, end_pages); + + const u64 start_big_pages{vm.va_range_split >> vm.big_page_size_bits}; + const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; + vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); + + gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS); + vm.initialised = true; return NvResult::Success; } @@ -106,21 +144,73 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector< LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, params.page_size, params.flags); - const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; - if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) { - params.offset = *(gmmu->AllocateFixed(params.offset, size)); + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } + + if (params.page_size != VM::YUZU_PAGESIZE && params.page_size != vm.big_page_size) { + return NvResult::BadValue; + } + + if (params.page_size != vm.big_page_size && + ((params.flags & MappingFlags::Sparse) != MappingFlags::None)) { + UNIMPLEMENTED_MSG("Sparse small pages are not implemented!"); + return NvResult::NotImplemented; + } + + const u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS + : vm.big_page_size_bits}; + + auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator + : *vm.big_page_allocator}; + + if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { + allocator.AllocateFixed(static_cast<u32>(params.offset >> page_size_bits), params.pages); } else { - params.offset = gmmu->Allocate(size, params.align); + params.offset = static_cast<u64>(allocator.Allocate(params.pages)) << page_size_bits; + if (!params.offset) { + UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!"); + return NvResult::InsufficientMemory; + } } - auto result = NvResult::Success; - if (!params.offset) { - LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size); - result = NvResult::InsufficientMemory; + u64 size{static_cast<u64>(params.pages) * params.page_size}; + + if ((params.flags & MappingFlags::Sparse) != MappingFlags::None) { + gmmu->MapSparse(params.offset, size); } + allocation_map[params.offset] = { + .size = size, + .page_size = params.page_size, + .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, + }; + std::memcpy(output.data(), ¶ms, output.size()); - return result; + return NvResult::Success; +} + +void nvhost_as_gpu::FreeMappingLocked(u64 offset) { + auto mapping{mapping_map.at(offset)}; + + if (!mapping->fixed) { + auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; + u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; + + allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits), + static_cast<u32>(mapping->size >> page_size_bits)); + } + + // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state + // Only FreeSpace can unmap them fully + if (mapping->sparse_alloc) + gmmu->MapSparse(offset, mapping->size); + else + gmmu->Unmap(offset, mapping->size); + + mapping_map.erase(offset); } NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) { @@ -130,7 +220,40 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, params.pages, params.page_size); - gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size); + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } + + try { + auto allocation{allocation_map[params.offset]}; + + if (allocation.page_size != params.page_size || + allocation.size != (static_cast<u64>(params.pages) * params.page_size)) { + return NvResult::BadValue; + } + + for (const auto& mapping : allocation.mappings) { + FreeMappingLocked(mapping->offset); + } + + // Unset sparse flag if required + if (allocation.sparse) { + gmmu->Unmap(params.offset, allocation.size); + } + + auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator + : *vm.big_page_allocator}; + u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS + : vm.big_page_size_bits}; + + allocator.Free(static_cast<u32>(params.offset >> page_size_bits), + static_cast<u32>(allocation.size >> page_size_bits)); + allocation_map.erase(params.offset); + } catch ([[maybe_unused]] const std::out_of_range& e) { + return NvResult::BadValue; + } std::memcpy(output.data(), ¶ms, output.size()); return NvResult::Success; @@ -141,43 +264,51 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); - auto result = NvResult::Success; std::vector<IoctlRemapEntry> entries(num_entries); std::memcpy(entries.data(), input.data(), input.size()); + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } + for (const auto& entry : entries) { - LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", - entry.offset, entry.nvmap_handle, entry.pages); - - if (entry.nvmap_handle == 0) { - // If nvmap handle is null, we should unmap instead. - const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; - const auto size{static_cast<u64>(entry.pages) << 0x10}; - gmmu->Unmap(offset, size); - continue; + GPUVAddr virtual_address{static_cast<u64>(entry.as_offset_big_pages) + << vm.big_page_size_bits}; + u64 size{static_cast<u64>(entry.big_pages) << vm.big_page_size_bits}; + + auto alloc{allocation_map.upper_bound(virtual_address)}; + + if (alloc-- == allocation_map.begin() || + (virtual_address - alloc->first) + size > alloc->second.size) { + LOG_WARNING(Service_NVDRV, "Cannot remap into an unallocated region!"); + return NvResult::BadValue; } - const auto object{nvmap.GetHandle(entry.nvmap_handle)}; - if (!object) { - LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle); - result = NvResult::InvalidState; - break; + if (!alloc->second.sparse) { + LOG_WARNING(Service_NVDRV, "Cannot remap a non-sparse mapping!"); + return NvResult::BadValue; } - const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; - const auto size{static_cast<u64>(entry.pages) << 0x10}; - const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10}; - const auto addr{gmmu->Map(object->address + map_offset, offset, size)}; + if (!entry.handle) { + gmmu->MapSparse(virtual_address, size); + } else { + auto handle{nvmap.GetHandle(entry.handle)}; + if (!handle) { + return NvResult::BadValue; + } - if (!addr) { - LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); - result = NvResult::InvalidState; - break; + VAddr cpu_address{static_cast<VAddr>( + handle->address + + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; + + gmmu->Map(virtual_address, cpu_address, size); } } std::memcpy(output.data(), entries.data(), output.size()); - return result; + return NvResult::Success; } NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { @@ -187,75 +318,96 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 LOG_DEBUG(Service_NVDRV, "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" ", offset={}", - params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, + params.flags, params.handle, params.buffer_offset, params.mapping_size, params.offset); - if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) { - if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) { - const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)}; - const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)}; + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } - if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) { - LOG_CRITICAL(Service_NVDRV, - "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " - "mapping_size = {}, offset={}", - params.flags, params.nvmap_handle, params.buffer_offset, - params.mapping_size, params.offset); + // Remaps a subregion of an existing mapping to a different PA + if ((params.flags & MappingFlags::Remap) != MappingFlags::None) { + try { + auto mapping{mapping_map.at(params.offset)}; - std::memcpy(output.data(), ¶ms, output.size()); - return NvResult::InvalidState; + if (mapping->size < params.mapping_size) { + LOG_WARNING(Service_NVDRV, + "Cannot remap a partially mapped GPU address space region: 0x{:X}", + params.offset); + return NvResult::BadValue; } - std::memcpy(output.data(), ¶ms, output.size()); - return NvResult::Success; - } else { - LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset); + u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; + VAddr cpu_address{mapping->ptr + params.buffer_offset}; + + gmmu->Map(gpu_address, cpu_address, params.mapping_size); - std::memcpy(output.data(), ¶ms, output.size()); - return NvResult::InvalidState; + return NvResult::Success; + } catch ([[maybe_unused]] const std::out_of_range& e) { + LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}", + params.offset); + return NvResult::BadValue; } } - const auto object{nvmap.GetHandle(params.nvmap_handle)}; - if (!object) { - LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle); - std::memcpy(output.data(), ¶ms, output.size()); - return NvResult::InvalidState; + auto handle{nvmap.GetHandle(params.handle)}; + if (!handle) { + return NvResult::BadValue; } - // The real nvservices doesn't make a distinction between handles and ids, and - // object can only have one handle and it will be the same as its id. Assert that this is the - // case to prevent unexpected behavior. - ASSERT(object->id == params.nvmap_handle); + VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; + u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; - u64 page_size{params.page_size}; - if (!page_size) { - page_size = object->align; - } + if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { + auto alloc{allocation_map.upper_bound(params.offset)}; - const auto physical_address{object->address + params.buffer_offset}; - u64 size{params.mapping_size}; - if (!size) { - size = object->size; - } + if (alloc-- == allocation_map.begin() || + (params.offset - alloc->first) + size > alloc->second.size) { + UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!"); + return NvResult::BadValue; + } - const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; - if (is_alloc) { - params.offset = gmmu->MapAllocate(physical_address, size, page_size); - } else { - params.offset = gmmu->Map(physical_address, params.offset, size); - } + gmmu->Map(params.offset, cpu_address, size); - auto result = NvResult::Success; - if (!params.offset) { - LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size); - result = NvResult::InvalidState; + auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false, + alloc->second.sparse)}; + alloc->second.mappings.push_back(mapping); + mapping_map[params.offset] = mapping; } else { - AddBufferMap(params.offset, size, physical_address, is_alloc); + bool big_page{[&]() { + if (Common::IsAligned(handle->align, vm.big_page_size)) + return true; + else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) + return false; + else { + UNREACHABLE(); + return false; + } + }()}; + + auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; + u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; + u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; + + params.offset = static_cast<u64>(allocator.Allocate( + static_cast<u32>(Common::AlignUp(size, page_size) >> page_size_bits))) + << page_size_bits; + if (!params.offset) { + UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!"); + return NvResult::InsufficientMemory; + } + + gmmu->Map(params.offset, cpu_address, size); + + auto mapping{ + std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; + mapping_map[params.offset] = mapping; } std::memcpy(output.data(), ¶ms, output.size()); - return result; + return NvResult::Success; } NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { @@ -264,13 +416,36 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8 LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); - if (const auto size{RemoveBufferMap(params.offset)}; size) { - gmmu->Unmap(params.offset, *size); - } else { - LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); + std::scoped_lock lock(mutex); + + if (!vm.initialised) { + return NvResult::BadValue; + } + + try { + auto mapping{mapping_map.at(params.offset)}; + + if (!mapping->fixed) { + auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; + u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; + + allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits), + static_cast<u32>(mapping->size >> page_size_bits)); + } + + // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state + // Only FreeSpace can unmap them fully + if (mapping->sparse_alloc) { + gmmu->MapSparse(params.offset, mapping->size); + } else { + gmmu->Unmap(params.offset, mapping->size); + } + + mapping_map.erase(params.offset); + } catch ([[maybe_unused]] const std::out_of_range& e) { + LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); } - std::memcpy(output.data(), ¶ms, output.size()); return NvResult::Success; } @@ -284,28 +459,37 @@ NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8 return NvResult::Success; } +void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) { + params.buf_size = 2 * sizeof(VaRegion); + + params.regions = std::array<VaRegion, 2>{ + VaRegion{ + .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS, + .page_size = VM::YUZU_PAGESIZE, + .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart, + }, + VaRegion{ + .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits, + .page_size = vm.big_page_size, + .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart, + }, + }; +} + NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) { IoctlGetVaRegions params{}; std::memcpy(¶ms, input.data(), input.size()); - LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, - params.buf_size); - - params.buf_size = 0x30; + LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr, + params.buf_size); - params.small = IoctlVaRegion{ - .offset = 0x04000000, - .page_size = DEFAULT_SMALL_PAGE_SIZE, - .pages = 0x3fbfff, - }; + std::scoped_lock lock(mutex); - params.big = IoctlVaRegion{ - .offset = 0x04000000, - .page_size = big_page_size, - .pages = 0x1bffff, - }; + if (!vm.initialised) { + return NvResult::BadValue; + } - // TODO(ogniK): This probably can stay stubbed but should add support way way later + GetVARegionsImpl(params); std::memcpy(output.data(), ¶ms, output.size()); return NvResult::Success; @@ -316,64 +500,24 @@ NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u IoctlGetVaRegions params{}; std::memcpy(¶ms, input.data(), input.size()); - LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, - params.buf_size); + LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr, + params.buf_size); - params.buf_size = 0x30; - - params.small = IoctlVaRegion{ - .offset = 0x04000000, - .page_size = 0x1000, - .pages = 0x3fbfff, - }; + std::scoped_lock lock(mutex); - params.big = IoctlVaRegion{ - .offset = 0x04000000, - .page_size = big_page_size, - .pages = 0x1bffff, - }; + if (!vm.initialised) { + return NvResult::BadValue; + } - // TODO(ogniK): This probably can stay stubbed but should add support way way later + GetVARegionsImpl(params); std::memcpy(output.data(), ¶ms, output.size()); - std::memcpy(inline_output.data(), ¶ms.small, sizeof(IoctlVaRegion)); - std::memcpy(inline_output.data() + sizeof(IoctlVaRegion), ¶ms.big, sizeof(IoctlVaRegion)); + std::memcpy(inline_output.data(), ¶ms.regions[0], sizeof(VaRegion)); + std::memcpy(inline_output.data() + sizeof(VaRegion), ¶ms.regions[1], sizeof(VaRegion)); return NvResult::Success; } -std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const { - const auto end{buffer_mappings.upper_bound(gpu_addr)}; - for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) { - if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) { - return iter->second; - } - } - - return std::nullopt; -} - -void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, - bool is_allocated) { - buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated}; -} - -std::optional<std::size_t> nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) { - if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) { - std::size_t size{}; - - if (iter->second.IsAllocated()) { - size = iter->second.Size(); - } - - buffer_mappings.erase(iter); - - return size; - } - - return std::nullopt; -} - Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) { LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id); return nullptr; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index f5fb33ba7..1d27739e2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -5,14 +5,19 @@ #pragma once +#include <bit> +#include <list> #include <map> #include <memory> +#include <mutex> #include <optional> #include <vector> +#include "common/address_space.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/swap.h" +#include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" namespace Tegra { @@ -30,17 +35,13 @@ class NvMap; namespace Service::Nvidia::Devices { -constexpr u32 DEFAULT_BIG_PAGE_SIZE = 1 << 16; -constexpr u32 DEFAULT_SMALL_PAGE_SIZE = 1 << 12; - -class nvmap; - -enum class AddressSpaceFlags : u32 { - None = 0x0, - FixedOffset = 0x1, - Remap = 0x100, +enum class MappingFlags : u32 { + None = 0, + Fixed = 1 << 0, + Sparse = 1 << 1, + Remap = 1 << 8, }; -DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags); +DECLARE_ENUM_FLAG_OPERATORS(MappingFlags); class nvhost_as_gpu final : public nvdevice { public: @@ -59,46 +60,15 @@ public: Kernel::KEvent* QueryEvent(u32 event_id) override; -private: - class BufferMap final { - public: - constexpr BufferMap() = default; - - constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_) - : start_addr{start_addr_}, end_addr{start_addr_ + size_} {} - - constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_, - bool is_allocated_) - : start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_}, - is_allocated{is_allocated_} {} - - constexpr VAddr StartAddr() const { - return start_addr; - } - - constexpr VAddr EndAddr() const { - return end_addr; - } - - constexpr std::size_t Size() const { - return end_addr - start_addr; - } - - constexpr VAddr CpuAddr() const { - return cpu_addr; - } - - constexpr bool IsAllocated() const { - return is_allocated; - } - - private: - GPUVAddr start_addr{}; - GPUVAddr end_addr{}; - VAddr cpu_addr{}; - bool is_allocated{}; + struct VaRegion { + u64 offset; + u32 page_size; + u32 _pad0_; + u64 pages; }; + static_assert(sizeof(VaRegion) == 0x18); +private: struct IoctlAllocAsEx { u32_le flags{}; // usually passes 1 s32_le as_fd{}; // ignored; passes 0 @@ -113,7 +83,7 @@ private: struct IoctlAllocSpace { u32_le pages{}; u32_le page_size{}; - AddressSpaceFlags flags{}; + MappingFlags flags{}; INSERT_PADDING_WORDS(1); union { u64_le offset; @@ -130,19 +100,19 @@ private: static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size"); struct IoctlRemapEntry { - u16_le flags{}; - u16_le kind{}; - u32_le nvmap_handle{}; - u32_le map_offset{}; - u32_le offset{}; - u32_le pages{}; + u16 flags; + u16 kind; + NvCore::NvMap::Handle::Id handle; + u32 handle_offset_big_pages; + u32 as_offset_big_pages; + u32 big_pages; }; static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size"); struct IoctlMapBufferEx { - AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable - u32_le kind{}; // -1 is default - u32_le nvmap_handle{}; + MappingFlags flags{}; // bit0: fixed_offset, bit2: cacheable + u32_le kind{}; // -1 is default + NvCore::NvMap::Handle::Id handle; u32_le page_size{}; // 0 means don't care s64_le buffer_offset{}; u64_le mapping_size{}; @@ -160,27 +130,15 @@ private: }; static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size"); - struct IoctlVaRegion { - u64_le offset{}; - u32_le page_size{}; - INSERT_PADDING_WORDS(1); - u64_le pages{}; - }; - static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size"); - struct IoctlGetVaRegions { u64_le buf_addr{}; // (contained output user ptr on linux, ignored) u32_le buf_size{}; // forced to 2*sizeof(struct va_region) u32_le reserved{}; - IoctlVaRegion small{}; - IoctlVaRegion big{}; + std::array<VaRegion, 2> regions{}; }; - static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2, + static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2, "IoctlGetVaRegions is incorrect size"); - s32 channel{}; - u32 big_page_size{DEFAULT_BIG_PAGE_SIZE}; - NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output); NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output); NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output); @@ -189,23 +147,74 @@ private: NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output); NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output); + void GetVARegionsImpl(IoctlGetVaRegions& params); NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output); NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& inline_output); - std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const; - void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); - std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); + void FreeMappingLocked(u64 offset); Module& module; NvCore::Container& container; NvCore::NvMap& nvmap; + struct Mapping { + VAddr ptr; + u64 offset; + u64 size; + bool fixed; + bool big_page; // Only valid if fixed == false + bool sparse_alloc; + + Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) + : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), + sparse_alloc(sparse_alloc_) {} + }; + + struct Allocation { + u64 size; + std::list<std::shared_ptr<Mapping>> mappings; + u32 page_size; + bool sparse; + }; + + std::map<u64, std::shared_ptr<Mapping>> + mapping_map; //!< This maps the base addresses of mapped buffers to their total sizes and + //!< mapping type, this is needed as what was originally a single buffer may + //!< have been split into multiple GPU side buffers with the remap flag. + std::map<u64, Allocation> allocation_map; //!< Holds allocations created by AllocSpace from + //!< which fixed buffers can be mapped into + std::mutex mutex; //!< Locks all AS operations + + struct VM { + static constexpr u32 YUZU_PAGESIZE{0x1000}; + static constexpr u32 PAGE_SIZE_BITS{std::countr_zero(YUZU_PAGESIZE)}; + + static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000}; + static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000}; + u32 big_page_size{DEFAULT_BIG_PAGE_SIZE}; + u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)}; + + static constexpr u32 VA_START_SHIFT{10}; + static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34}; + static constexpr u64 DEFAULT_VA_RANGE{1ULL << 37}; + u64 va_range_start{DEFAULT_BIG_PAGE_SIZE << VA_START_SHIFT}; + u64 va_range_split{DEFAULT_VA_SPLIT}; + u64 va_range_end{DEFAULT_VA_RANGE}; + + using Allocator = Common::FlatAllocator<u32, 0, 32>; + + std::unique_ptr<Allocator> big_page_allocator; + std::shared_ptr<Allocator> + small_page_allocator; //! Shared as this is also used by nvhost::GpuChannel + + bool initialised{}; + } vm; std::shared_ptr<Tegra::MemoryManager> gmmu; - // This is expected to be ordered, therefore we must use a map, not unordered_map - std::map<GPUVAddr, BufferMap> buffer_mappings; + // s32 channel{}; + // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; }; } // namespace Service::Nvidia::Devices diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 9e946d448..fc68bcc73 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -71,18 +71,22 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) rasterizer = rasterizer_; } -GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { +GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); } +GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) { + return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); +} + GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { - return Map(cpu_addr, *FindFreeRange(size, align), size); + return Map(*FindFreeRange(size, align), cpu_addr, size); } GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true); ASSERT(gpu_addr); - return Map(cpu_addr, *gpu_addr, size); + return Map(*gpu_addr, cpu_addr, size); } void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0a763fd19..b8878476a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -88,7 +88,8 @@ public: std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const; - [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); + GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size); + GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); |