diff options
Diffstat (limited to 'src/core')
49 files changed, 1774 insertions, 459 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 16ddb5e90..4ff2c1bb7 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -37,6 +37,8 @@ add_library(core STATIC debugger/gdbstub_arch.h debugger/gdbstub.cpp debugger/gdbstub.h + device_memory_manager.h + device_memory_manager.inc device_memory.cpp device_memory.h file_sys/fssystem/fs_i_storage.h @@ -609,6 +611,8 @@ add_library(core STATIC hle/service/ns/pdm_qry.h hle/service/nvdrv/core/container.cpp hle/service/nvdrv/core/container.h + hle/service/nvdrv/core/heap_mapper.cpp + hle/service/nvdrv/core/heap_mapper.h hle/service/nvdrv/core/nvmap.cpp hle/service/nvdrv/core/nvmap.h hle/service/nvdrv/core/syncpoint_manager.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index 461eea9c8..2392fe136 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -28,6 +28,7 @@ #include "core/file_sys/savedata_factory.h" #include "core/file_sys/vfs_concat.h" #include "core/file_sys/vfs_real.h" +#include "core/gpu_dirty_memory_manager.h" #include "core/hle/kernel/k_memory_manager.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_resource_limit.h" @@ -565,6 +566,9 @@ struct System::Impl { std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; + std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> + gpu_dirty_memory_managers; + std::deque<std::vector<u8>> user_channel; }; @@ -651,8 +655,14 @@ size_t System::GetCurrentHostThreadID() const { return impl->kernel.GetCurrentHostThreadID(); } -void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { - return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); +std::span<GPUDirtyMemoryManager> System::GetGPUDirtyMemoryManager() { + return impl->gpu_dirty_memory_managers; +} + +void System::GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback) { + for (auto& manager : impl->gpu_dirty_memory_managers) { + manager.Gather(callback); + } } PerfStatsResults System::GetAndResetPerfStats() { diff --git a/src/core/core.h b/src/core/core.h index ba5add0dc..80446f385 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -8,6 +8,7 @@ #include <functional> #include <memory> #include <mutex> +#include <span> #include <string> #include <vector> @@ -116,6 +117,7 @@ class CpuManager; class Debugger; class DeviceMemory; class ExclusiveMonitor; +class GPUDirtyMemoryManager; class PerfStats; class Reporter; class SpeedLimiter; @@ -224,7 +226,9 @@ public: /// Prepare the core emulation for a reschedule void PrepareReschedule(u32 core_index); - void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); + std::span<GPUDirtyMemoryManager> GetGPUDirtyMemoryManager(); + + void GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback); [[nodiscard]] size_t GetCurrentHostThreadID() const; diff --git a/src/core/device_memory.h b/src/core/device_memory.h index 13388b73e..11bf0e326 100644 --- a/src/core/device_memory.h +++ b/src/core/device_memory.h @@ -32,6 +32,12 @@ public: } template <typename T> + PAddr GetRawPhysicalAddr(const T* ptr) const { + return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) - + reinterpret_cast<uintptr_t>(buffer.BackingBasePointer())); + } + + template <typename T> T* GetPointer(Common::PhysicalAddress addr) { return reinterpret_cast<T*>(buffer.BackingBasePointer() + (GetInteger(addr) - DramMemoryMap::Base)); @@ -43,6 +49,16 @@ public: (GetInteger(addr) - DramMemoryMap::Base)); } + template <typename T> + T* GetPointerFromRaw(PAddr addr) { + return reinterpret_cast<T*>(buffer.BackingBasePointer() + addr); + } + + template <typename T> + const T* GetPointerFromRaw(PAddr addr) const { + return reinterpret_cast<T*>(buffer.BackingBasePointer() + addr); + } + Common::HostMemory buffer; }; diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h new file mode 100644 index 000000000..ffeed46cc --- /dev/null +++ b/src/core/device_memory_manager.h @@ -0,0 +1,211 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <array> +#include <atomic> +#include <deque> +#include <memory> +#include <mutex> + +#include "common/common_types.h" +#include "common/scratch_buffer.h" +#include "common/virtual_buffer.h" + +namespace Core { + +constexpr size_t DEVICE_PAGEBITS = 12ULL; +constexpr size_t DEVICE_PAGESIZE = 1ULL << DEVICE_PAGEBITS; +constexpr size_t DEVICE_PAGEMASK = DEVICE_PAGESIZE - 1ULL; + +class DeviceMemory; + +namespace Memory { +class Memory; +} + +template <typename DTraits> +struct DeviceMemoryManagerAllocator; + +struct Asid { + size_t id; +}; + +template <typename Traits> +class DeviceMemoryManager { + using DeviceInterface = typename Traits::DeviceInterface; + using DeviceMethods = typename Traits::DeviceMethods; + +public: + DeviceMemoryManager(const DeviceMemory& device_memory); + ~DeviceMemoryManager(); + + void BindInterface(DeviceInterface* device_inter); + + DAddr Allocate(size_t size); + void AllocateFixed(DAddr start, size_t size); + void Free(DAddr start, size_t size); + + void Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, bool track = false); + + void Unmap(DAddr address, size_t size); + + void TrackContinuityImpl(DAddr address, VAddr virtual_address, size_t size, Asid asid); + void TrackContinuity(DAddr address, VAddr virtual_address, size_t size, Asid asid) { + std::scoped_lock lk(mapping_guard); + TrackContinuityImpl(address, virtual_address, size, asid); + } + + // Write / Read + template <typename T> + T* GetPointer(DAddr address); + + template <typename T> + const T* GetPointer(DAddr address) const; + + template <typename Func> + void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) { + DAddr subbits = static_cast<DAddr>(address & page_mask); + const u32 base = compressed_device_addr[(address >> page_bits)]; + if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] { + const DAddr d_address = (static_cast<DAddr>(base) << page_bits) + subbits; + operation(d_address); + return; + } + InnerGatherDeviceAddresses(buffer, address); + for (u32 value : buffer) { + operation((static_cast<DAddr>(value) << page_bits) + subbits); + } + } + + template <typename Func> + void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) { + PAddr address = GetRawPhysicalAddr<u8>(p); + ApplyOpOnPAddr(address, buffer, operation); + } + + PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { + PAddr subbits = static_cast<PAddr>(address & page_mask); + auto paddr = compressed_physical_ptr[(address >> page_bits)]; + if (paddr == 0) { + return 0; + } + return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits; + } + + template <typename T> + void Write(DAddr address, T value); + + template <typename T> + T Read(DAddr address) const; + + u8* GetSpan(const DAddr src_addr, const std::size_t size); + const u8* GetSpan(const DAddr src_addr, const std::size_t size) const; + + void ReadBlock(DAddr address, void* dest_pointer, size_t size); + void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size); + void WriteBlock(DAddr address, const void* src_pointer, size_t size); + void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size); + + Asid RegisterProcess(Memory::Memory* memory); + void UnregisterProcess(Asid id); + + void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); + + static constexpr size_t AS_BITS = Traits::device_virtual_bits; + +private: + static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; + static constexpr size_t device_as_size = 1ULL << device_virtual_bits; + static constexpr size_t physical_min_bits = 32; + static constexpr size_t physical_max_bits = 33; + static constexpr size_t page_bits = 12; + static constexpr size_t page_size = 1ULL << page_bits; + static constexpr size_t page_mask = page_size - 1ULL; + static constexpr u32 physical_address_base = 1U << page_bits; + static constexpr u32 MULTI_FLAG_BITS = 31; + static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS; + static constexpr u32 MULTI_MASK = ~MULTI_FLAG; + + template <typename T> + T* GetPointerFromRaw(PAddr addr) { + return reinterpret_cast<T*>(physical_base + addr); + } + + template <typename T> + const T* GetPointerFromRaw(PAddr addr) const { + return reinterpret_cast<T*>(physical_base + addr); + } + + template <typename T> + PAddr GetRawPhysicalAddr(const T* ptr) const { + return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) - physical_base); + } + + void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory, + auto increment); + + void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address); + + std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl; + + const uintptr_t physical_base; + DeviceInterface* device_inter; + Common::VirtualBuffer<u32> compressed_physical_ptr; + Common::VirtualBuffer<u32> compressed_device_addr; + Common::VirtualBuffer<u32> continuity_tracker; + + // Process memory interfaces + + std::deque<size_t> id_pool; + std::deque<Memory::Memory*> registered_processes; + + // Memory protection management + + static constexpr size_t guest_max_as_bits = 39; + static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits; + static constexpr size_t guest_mask = guest_as_size - 1ULL; + static constexpr size_t asid_start_bit = guest_max_as_bits; + + std::pair<Asid, VAddr> ExtractCPUBacking(size_t page_index) { + auto content = cpu_backing_address[page_index]; + const VAddr address = content & guest_mask; + const Asid asid{static_cast<size_t>(content >> asid_start_bit)}; + return std::make_pair(asid, address); + } + + void InsertCPUBacking(size_t page_index, VAddr address, Asid asid) { + cpu_backing_address[page_index] = address | (asid.id << asid_start_bit); + } + + Common::VirtualBuffer<VAddr> cpu_backing_address; + static constexpr size_t subentries = 8 / sizeof(u8); + static constexpr size_t subentries_mask = subentries - 1; + class CounterEntry final { + public: + CounterEntry() = default; + + std::atomic_uint8_t& Count(std::size_t page) { + return values[page & subentries_mask]; + } + + const std::atomic_uint8_t& Count(std::size_t page) const { + return values[page & subentries_mask]; + } + + private: + std::array<std::atomic_uint8_t, subentries> values{}; + }; + static_assert(sizeof(CounterEntry) == subentries * sizeof(u8), + "CounterEntry should be 8 bytes!"); + + static constexpr size_t num_counter_entries = + (1ULL << (device_virtual_bits - page_bits)) / subentries; + using CachedPages = std::array<CounterEntry, num_counter_entries>; + std::unique_ptr<CachedPages> cached_pages; + std::mutex counter_guard; + std::mutex mapping_guard; +}; + +} // namespace Core diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc new file mode 100644 index 000000000..8ce122872 --- /dev/null +++ b/src/core/device_memory_manager.inc @@ -0,0 +1,582 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <atomic> +#include <limits> +#include <memory> +#include <type_traits> + +#include "common/address_space.h" +#include "common/address_space.inc" +#include "common/alignment.h" +#include "common/assert.h" +#include "common/div_ceil.h" +#include "common/scope_exit.h" +#include "common/settings.h" +#include "core/device_memory.h" +#include "core/device_memory_manager.h" +#include "core/memory.h" + +namespace Core { + +namespace { + +class MultiAddressContainer { +public: + MultiAddressContainer() = default; + ~MultiAddressContainer() = default; + + void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) { + buffer.resize(8); + buffer.resize(0); + size_t index = 0; + const auto add_value = [&](u32 value) { + buffer[index] = value; + index++; + buffer.resize(index); + }; + + u32 iter_entry = start_entry; + Entry* current = &storage[iter_entry - 1]; + add_value(current->value); + while (current->next_entry != 0) { + iter_entry = current->next_entry; + current = &storage[iter_entry - 1]; + add_value(current->value); + } + } + + u32 Register(u32 value) { + return RegisterImplementation(value); + } + + void Register(u32 value, u32 start_entry) { + auto entry_id = RegisterImplementation(value); + u32 iter_entry = start_entry; + Entry* current = &storage[iter_entry - 1]; + while (current->next_entry != 0) { + iter_entry = current->next_entry; + current = &storage[iter_entry - 1]; + } + current->next_entry = entry_id; + } + + std::pair<bool, u32> Unregister(u32 value, u32 start_entry) { + u32 iter_entry = start_entry; + Entry* previous{}; + Entry* current = &storage[iter_entry - 1]; + Entry* next{}; + bool more_than_one_remaining = false; + u32 result_start{start_entry}; + size_t count = 0; + while (current->value != value) { + count++; + previous = current; + iter_entry = current->next_entry; + current = &storage[iter_entry - 1]; + } + // Find next + u32 next_entry = current->next_entry; + if (next_entry != 0) { + next = &storage[next_entry - 1]; + more_than_one_remaining = next->next_entry != 0 || previous != nullptr; + } + if (previous) { + previous->next_entry = next_entry; + } else { + result_start = next_entry; + } + free_entries.emplace_back(iter_entry); + return std::make_pair(more_than_one_remaining || count > 1, result_start); + } + + u32 ReleaseEntry(u32 start_entry) { + Entry* current = &storage[start_entry - 1]; + free_entries.emplace_back(start_entry); + return current->value; + } + +private: + u32 RegisterImplementation(u32 value) { + auto entry_id = GetNewEntry(); + auto& entry = storage[entry_id - 1]; + entry.next_entry = 0; + entry.value = value; + return entry_id; + } + u32 GetNewEntry() { + if (!free_entries.empty()) { + u32 result = free_entries.front(); + free_entries.pop_front(); + return result; + } + storage.emplace_back(); + u32 new_entry = static_cast<u32>(storage.size()); + return new_entry; + } + + struct Entry { + u32 next_entry{}; + u32 value{}; + }; + + std::deque<Entry> storage; + std::deque<u32> free_entries; +}; + +struct EmptyAllocator { + EmptyAllocator([[maybe_unused]] DAddr address) {} +}; + +} // namespace + +template <typename DTraits> +struct DeviceMemoryManagerAllocator { + static constexpr size_t device_virtual_bits = DTraits::device_virtual_bits; + static constexpr DAddr first_address = 1ULL << Memory::YUZU_PAGEBITS; + static constexpr DAddr max_device_area = 1ULL << device_virtual_bits; + + DeviceMemoryManagerAllocator() : main_allocator(first_address) {} + + Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator; + MultiAddressContainer multi_dev_address; + + /// Returns true when vaddr -> vaddr+size is fully contained in the buffer + template <bool pin_area> + [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { + return addr >= 0 && addr + size <= max_device_area; + } + + DAddr Allocate(size_t size) { + return main_allocator.Allocate(size); + } + + void AllocateFixed(DAddr b_address, size_t b_size) { + main_allocator.AllocateFixed(b_address, b_size); + } + + void Free(DAddr b_address, size_t b_size) { + main_allocator.Free(b_address, b_size); + } +}; + +template <typename Traits> +DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_) + : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())}, + device_inter{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), + compressed_device_addr(1ULL << ((Settings::values.memory_layout_mode.GetValue() == + Settings::MemoryLayout::Memory_4Gb + ? physical_min_bits + : physical_max_bits) - + Memory::YUZU_PAGEBITS)), + continuity_tracker(device_as_size >> Memory::YUZU_PAGEBITS), + cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { + impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); + cached_pages = std::make_unique<CachedPages>(); + + const size_t total_virtual = device_as_size >> Memory::YUZU_PAGEBITS; + for (size_t i = 0; i < total_virtual; i++) { + compressed_physical_ptr[i] = 0; + continuity_tracker[i] = 1; + cpu_backing_address[i] = 0; + } + const size_t total_phys = 1ULL << ((Settings::values.memory_layout_mode.GetValue() == + Settings::MemoryLayout::Memory_4Gb + ? physical_min_bits + : physical_max_bits) - + Memory::YUZU_PAGEBITS); + for (size_t i = 0; i < total_phys; i++) { + compressed_device_addr[i] = 0; + } +} + +template <typename Traits> +DeviceMemoryManager<Traits>::~DeviceMemoryManager() = default; + +template <typename Traits> +void DeviceMemoryManager<Traits>::BindInterface(DeviceInterface* device_inter_) { + device_inter = device_inter_; +} + +template <typename Traits> +DAddr DeviceMemoryManager<Traits>::Allocate(size_t size) { + return impl->Allocate(size); +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::AllocateFixed(DAddr start, size_t size) { + return impl->AllocateFixed(start, size); +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) { + impl->Free(start, size); +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, + Asid asid, bool track) { + Core::Memory::Memory* process_memory = registered_processes[asid.id]; + size_t start_page_d = address >> Memory::YUZU_PAGEBITS; + size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; + std::scoped_lock lk(mapping_guard); + for (size_t i = 0; i < num_pages; i++) { + const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; + auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)); + if (ptr == nullptr) [[unlikely]] { + compressed_physical_ptr[start_page_d + i] = 0; + continue; + } + auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; + compressed_physical_ptr[start_page_d + i] = phys_addr; + InsertCPUBacking(start_page_d + i, new_vaddress, asid); + const u32 base_dev = compressed_device_addr[phys_addr - 1U]; + const u32 new_dev = static_cast<u32>(start_page_d + i); + if (base_dev == 0) [[likely]] { + compressed_device_addr[phys_addr - 1U] = new_dev; + continue; + } + u32 start_id = base_dev & MULTI_MASK; + if ((base_dev >> MULTI_FLAG_BITS) == 0) { + start_id = impl->multi_dev_address.Register(base_dev); + compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id; + } + impl->multi_dev_address.Register(new_dev, start_id); + } + if (track) { + TrackContinuityImpl(address, virtual_address, size, asid); + } +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { + size_t start_page_d = address >> Memory::YUZU_PAGEBITS; + size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; + device_inter->InvalidateRegion(address, size); + std::scoped_lock lk(mapping_guard); + for (size_t i = 0; i < num_pages; i++) { + auto phys_addr = compressed_physical_ptr[start_page_d + i]; + compressed_physical_ptr[start_page_d + i] = 0; + cpu_backing_address[start_page_d + i] = 0; + if (phys_addr != 0) [[likely]] { + const u32 base_dev = compressed_device_addr[phys_addr - 1U]; + if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] { + compressed_device_addr[phys_addr - 1] = 0; + continue; + } + const auto [more_entries, new_start] = impl->multi_dev_address.Unregister( + static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK); + if (!more_entries) { + compressed_device_addr[phys_addr - 1] = + impl->multi_dev_address.ReleaseEntry(new_start); + continue; + } + compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG; + } + } +} +template <typename Traits> +void DeviceMemoryManager<Traits>::TrackContinuityImpl(DAddr address, VAddr virtual_address, + size_t size, Asid asid) { + Core::Memory::Memory* process_memory = registered_processes[asid.id]; + size_t start_page_d = address >> Memory::YUZU_PAGEBITS; + size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; + uintptr_t last_ptr = 0; + size_t page_count = 1; + for (size_t i = num_pages; i > 0; i--) { + size_t index = i - 1; + const VAddr new_vaddress = virtual_address + index * Memory::YUZU_PAGESIZE; + const uintptr_t new_ptr = reinterpret_cast<uintptr_t>( + process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress))); + if (new_ptr + page_size == last_ptr) { + page_count++; + } else { + page_count = 1; + } + last_ptr = new_ptr; + continuity_tracker[start_page_d + index] = static_cast<u32>(page_count); + } +} +template <typename Traits> +u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) { + size_t page_index = src_addr >> page_bits; + size_t subbits = src_addr & page_mask; + if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) { + return GetPointer<u8>(src_addr); + } + return nullptr; +} + +template <typename Traits> +const u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) const { + size_t page_index = src_addr >> page_bits; + size_t subbits = src_addr & page_mask; + if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) { + return GetPointer<u8>(src_addr); + } + return nullptr; +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, + PAddr address) { + size_t phys_addr = address >> page_bits; + std::scoped_lock lk(mapping_guard); + u32 backing = compressed_device_addr[phys_addr]; + if ((backing >> MULTI_FLAG_BITS) != 0) { + impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer); + return; + } + buffer.resize(1); + buffer[0] = backing; +} + +template <typename Traits> +template <typename T> +T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) { + const size_t index = address >> Memory::YUZU_PAGEBITS; + const size_t offset = address & Memory::YUZU_PAGEMASK; + auto phys_addr = compressed_physical_ptr[index]; + if (phys_addr == 0) [[unlikely]] { + return nullptr; + } + return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + + offset); +} + +template <typename Traits> +template <typename T> +const T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) const { + const size_t index = address >> Memory::YUZU_PAGEBITS; + const size_t offset = address & Memory::YUZU_PAGEMASK; + auto phys_addr = compressed_physical_ptr[index]; + if (phys_addr == 0) [[unlikely]] { + return nullptr; + } + return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + + offset); +} + +template <typename Traits> +template <typename T> +void DeviceMemoryManager<Traits>::Write(DAddr address, T value) { + T* ptr = GetPointer<T>(address); + if (!ptr) [[unlikely]] { + return; + } + std::memcpy(ptr, &value, sizeof(T)); +} + +template <typename Traits> +template <typename T> +T DeviceMemoryManager<Traits>::Read(DAddr address) const { + const T* ptr = GetPointer<T>(address); + T result{}; + if (!ptr) [[unlikely]] { + return result; + } + std::memcpy(&result, ptr, sizeof(T)); + return result; +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto on_unmapped, + auto on_memory, auto increment) { + std::size_t remaining_size = size; + std::size_t page_index = addr >> Memory::YUZU_PAGEBITS; + std::size_t page_offset = addr & Memory::YUZU_PAGEMASK; + + while (remaining_size) { + const size_t next_pages = static_cast<std::size_t>(continuity_tracker[page_index]); + const std::size_t copy_amount = + std::min((next_pages << Memory::YUZU_PAGEBITS) - page_offset, remaining_size); + const auto current_vaddr = + static_cast<u64>((page_index << Memory::YUZU_PAGEBITS) + page_offset); + SCOPE_EXIT({ + page_index += next_pages; + page_offset = 0; + increment(copy_amount); + remaining_size -= copy_amount; + }); + + auto phys_addr = compressed_physical_ptr[page_index]; + if (phys_addr == 0) { + on_unmapped(copy_amount, current_vaddr); + continue; + } + auto* mem_ptr = GetPointerFromRaw<u8>( + (static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + page_offset); + on_memory(copy_amount, mem_ptr); + } +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) { + device_inter->FlushRegion(address, size); + WalkBlock( + address, size, + [&](size_t copy_amount, DAddr current_vaddr) { + LOG_ERROR( + HW_Memory, + "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, address, size); + std::memset(dest_pointer, 0, copy_amount); + }, + [&](size_t copy_amount, const u8* const src_ptr) { + std::memcpy(dest_pointer, src_ptr, copy_amount); + }, + [&](const std::size_t copy_amount) { + dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount; + }); +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) { + WalkBlock( + address, size, + [&](size_t copy_amount, DAddr current_vaddr) { + LOG_ERROR( + HW_Memory, + "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, address, size); + }, + [&](size_t copy_amount, u8* const dst_ptr) { + std::memcpy(dst_ptr, src_pointer, copy_amount); + }, + [&](const std::size_t copy_amount) { + src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; + }); + device_inter->InvalidateRegion(address, size); +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) { + WalkBlock( + address, size, + [&](size_t copy_amount, DAddr current_vaddr) { + LOG_ERROR( + HW_Memory, + "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, address, size); + std::memset(dest_pointer, 0, copy_amount); + }, + [&](size_t copy_amount, const u8* const src_ptr) { + std::memcpy(dest_pointer, src_ptr, copy_amount); + }, + [&](const std::size_t copy_amount) { + dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount; + }); +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer, + size_t size) { + WalkBlock( + address, size, + [&](size_t copy_amount, DAddr current_vaddr) { + LOG_ERROR( + HW_Memory, + "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, address, size); + }, + [&](size_t copy_amount, u8* const dst_ptr) { + std::memcpy(dst_ptr, src_pointer, copy_amount); + }, + [&](const std::size_t copy_amount) { + src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; + }); +} + +template <typename Traits> +Asid DeviceMemoryManager<Traits>::RegisterProcess(Memory::Memory* memory_device_inter) { + size_t new_id{}; + if (!id_pool.empty()) { + new_id = id_pool.front(); + id_pool.pop_front(); + registered_processes[new_id] = memory_device_inter; + } else { + registered_processes.emplace_back(memory_device_inter); + new_id = registered_processes.size() - 1U; + } + return Asid{new_id}; +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) { + registered_processes[asid.id] = nullptr; + id_pool.push_front(asid.id); +} + +template <typename Traits> +void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { + std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock); + const auto Lock = [&] { + if (!lk) { + lk.lock(); + } + }; + u64 uncache_begin = 0; + u64 cache_begin = 0; + u64 uncache_bytes = 0; + u64 cache_bytes = 0; + const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching; + + std::atomic_thread_fence(std::memory_order_acquire); + const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); + size_t page = addr >> Memory::YUZU_PAGEBITS; + auto [asid, base_vaddress] = ExtractCPUBacking(page); + size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; + auto* memory_device_inter = registered_processes[asid.id]; + for (; page != page_end; ++page) { + std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page); + + if (delta > 0) { + ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(), + "Count may overflow!"); + } else if (delta < 0) { + ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); + } else { + ASSERT_MSG(false, "Delta must be non-zero!"); + } + + // Adds or subtracts 1, as count is a unsigned 8-bit value + count.fetch_add(static_cast<u8>(delta), std::memory_order_release); + + // Assume delta is either -1 or 1 + if (count.load(std::memory_order::relaxed) == 0) { + if (uncache_bytes == 0) { + uncache_begin = vpage; + } + uncache_bytes += Memory::YUZU_PAGESIZE; + } else if (uncache_bytes > 0) { + Lock(); + MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, + uncache_bytes, false); + uncache_bytes = 0; + } + if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { + if (cache_bytes == 0) { + cache_begin = vpage; + } + cache_bytes += Memory::YUZU_PAGESIZE; + } else if (cache_bytes > 0) { + Lock(); + MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, + true); + cache_bytes = 0; + } + vpage++; + } + if (uncache_bytes > 0) { + Lock(); + MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, + false); + } + if (cache_bytes > 0) { + Lock(); + MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, + true); + } +} + +} // namespace Core diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 9687531e8..cc8fc176f 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h @@ -10,7 +10,7 @@ #include <utility> #include <vector> -#include "core/memory.h" +#include "core/device_memory_manager.h" namespace Core { @@ -23,7 +23,7 @@ public: ~GPUDirtyMemoryManager() = default; - void Collect(VAddr address, size_t size) { + void Collect(PAddr address, size_t size) { TransformAddress t = BuildTransform(address, size); TransformAddress tmp, original; do { @@ -47,7 +47,7 @@ public: std::memory_order_relaxed)); } - void Gather(std::function<void(VAddr, size_t)>& callback) { + void Gather(std::function<void(PAddr, size_t)>& callback) { { std::scoped_lock lk(guard); TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); @@ -65,7 +65,7 @@ public: mask = mask >> empty_bits; const size_t continuous_bits = std::countr_one(mask); - callback((static_cast<VAddr>(transform.address) << page_bits) + offset, + callback((static_cast<PAddr>(transform.address) << page_bits) + offset, continuous_bits << align_bits); mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; offset += continuous_bits << align_bits; @@ -80,7 +80,7 @@ private: u32 mask; }; - constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; + constexpr static size_t page_bits = DEVICE_PAGEBITS - 1; constexpr static size_t page_size = 1ULL << page_bits; constexpr static size_t page_mask = page_size - 1; @@ -89,7 +89,7 @@ private: constexpr static size_t align_mask = align_size - 1; constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; - bool IsValid(VAddr address) { + bool IsValid(PAddr address) { return address < (1ULL << 39); } @@ -103,7 +103,7 @@ private: return mask; } - TransformAddress BuildTransform(VAddr address, size_t size) { + TransformAddress BuildTransform(PAddr address, size_t size) { const size_t minor_address = address & page_mask; const size_t minor_bit = minor_address >> align_bits; const size_t top_bit = (minor_address + size + align_mask) >> align_bits; diff --git a/src/core/guest_memory.h b/src/core/guest_memory.h new file mode 100644 index 000000000..7ee18c126 --- /dev/null +++ b/src/core/guest_memory.h @@ -0,0 +1,214 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <iterator> +#include <memory> +#include <optional> +#include <span> +#include <vector> + +#include "common/assert.h" +#include "common/scratch_buffer.h" + +namespace Core::Memory { + +enum GuestMemoryFlags : u32 { + Read = 1 << 0, + Write = 1 << 1, + Safe = 1 << 2, + Cached = 1 << 3, + + SafeRead = Read | Safe, + SafeWrite = Write | Safe, + SafeReadWrite = SafeRead | SafeWrite, + SafeReadCachedWrite = SafeReadWrite | Cached, + + UnsafeRead = Read, + UnsafeWrite = Write, + UnsafeReadWrite = UnsafeRead | UnsafeWrite, + UnsafeReadCachedWrite = UnsafeReadWrite | Cached, +}; + +namespace { +template <typename M, typename T, GuestMemoryFlags FLAGS> +class GuestMemory { + using iterator = T*; + using const_iterator = const T*; + using value_type = T; + using element_type = T; + using iterator_category = std::contiguous_iterator_tag; + +public: + GuestMemory() = delete; + explicit GuestMemory(M& memory, u64 addr, std::size_t size, + Common::ScratchBuffer<T>* backup = nullptr) + : m_memory{memory}, m_addr{addr}, m_size{size} { + static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); + if constexpr (FLAGS & GuestMemoryFlags::Read) { + Read(addr, size, backup); + } + } + + ~GuestMemory() = default; + + T* data() noexcept { + return m_data_span.data(); + } + + const T* data() const noexcept { + return m_data_span.data(); + } + + size_t size() const noexcept { + return m_size; + } + + size_t size_bytes() const noexcept { + return this->size() * sizeof(T); + } + + [[nodiscard]] T* begin() noexcept { + return this->data(); + } + + [[nodiscard]] const T* begin() const noexcept { + return this->data(); + } + + [[nodiscard]] T* end() noexcept { + return this->data() + this->size(); + } + + [[nodiscard]] const T* end() const noexcept { + return this->data() + this->size(); + } + + T& operator[](size_t index) noexcept { + return m_data_span[index]; + } + + const T& operator[](size_t index) const noexcept { + return m_data_span[index]; + } + + void SetAddressAndSize(u64 addr, std::size_t size) noexcept { + m_addr = addr; + m_size = size; + m_addr_changed = true; + } + + std::span<T> Read(u64 addr, std::size_t size, + Common::ScratchBuffer<T>* backup = nullptr) noexcept { + m_addr = addr; + m_size = size; + if (m_size == 0) { + m_is_data_copy = true; + return {}; + } + + if (this->TrySetSpan()) { + if constexpr (FLAGS & GuestMemoryFlags::Safe) { + m_memory.FlushRegion(m_addr, this->size_bytes()); + } + } else { + if (backup) { + backup->resize_destructive(this->size()); + m_data_span = *backup; + } else { + m_data_copy.resize(this->size()); + m_data_span = std::span(m_data_copy); + } + m_is_data_copy = true; + m_span_valid = true; + if constexpr (FLAGS & GuestMemoryFlags::Safe) { + m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); + } else { + m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); + } + } + return m_data_span; + } + + void Write(std::span<T> write_data) noexcept { + if constexpr (FLAGS & GuestMemoryFlags::Cached) { + m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); + } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { + m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); + } else { + m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); + } + } + + bool TrySetSpan() noexcept { + if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { + m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; + m_span_valid = true; + return true; + } + return false; + } + +protected: + bool IsDataCopy() const noexcept { + return m_is_data_copy; + } + + bool AddressChanged() const noexcept { + return m_addr_changed; + } + + M& m_memory; + u64 m_addr{}; + size_t m_size{}; + std::span<T> m_data_span{}; + std::vector<T> m_data_copy{}; + bool m_span_valid{false}; + bool m_is_data_copy{false}; + bool m_addr_changed{false}; +}; + +template <typename M, typename T, GuestMemoryFlags FLAGS> +class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { +public: + GuestMemoryScoped() = delete; + explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, + Common::ScratchBuffer<T>* backup = nullptr) + : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { + if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { + if (!this->TrySetSpan()) { + if (backup) { + this->m_data_span = *backup; + this->m_span_valid = true; + this->m_is_data_copy = true; + } + } + } + } + + ~GuestMemoryScoped() { + if constexpr (FLAGS & GuestMemoryFlags::Write) { + if (this->size() == 0) [[unlikely]] { + return; + } + + if (this->AddressChanged() || this->IsDataCopy()) { + ASSERT(this->m_span_valid); + if constexpr (FLAGS & GuestMemoryFlags::Cached) { + this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); + } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { + this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); + } else { + this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); + } + } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || + (FLAGS & GuestMemoryFlags::Cached)) { + this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); + } + } + } +}; +} // namespace + +} // namespace Core::Memory diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 53735a225..0b08e877e 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -5,6 +5,7 @@ #include "common/scope_exit.h" #include "common/settings.h" #include "core/core.h" +#include "core/gpu_dirty_memory_manager.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_scoped_resource_reservation.h" #include "core/hle/kernel/k_shared_memory.h" @@ -320,7 +321,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa // Ensure our memory is initialized. m_memory.SetCurrentPageTable(*this); - m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); + m_memory.SetGPUDirtyManagers(m_kernel.System().GetGPUDirtyMemoryManager()); // Ensure we can insert the code region. R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize, @@ -417,7 +418,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, // Ensure our memory is initialized. m_memory.SetCurrentPageTable(*this); - m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); + m_memory.SetGPUDirtyManagers(m_kernel.System().GetGPUDirtyMemoryManager()); // Ensure we can insert the code region. R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code), @@ -1141,8 +1142,7 @@ void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {} KProcess::KProcess(KernelCore& kernel) : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel}, m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()}, - m_handle_table{kernel}, m_dirty_memory_managers{}, - m_exclusive_monitor{}, m_memory{kernel.System()} {} + m_handle_table{kernel}, m_exclusive_monitor{}, m_memory{kernel.System()} {} KProcess::~KProcess() = default; Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, @@ -1324,10 +1324,4 @@ bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointT return true; } -void KProcess::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { - for (auto& manager : m_dirty_memory_managers) { - manager.Gather(callback); - } -} - } // namespace Kernel diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 53c0e3316..ab1358a12 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -7,7 +7,6 @@ #include "core/arm/arm_interface.h" #include "core/file_sys/program_metadata.h" -#include "core/gpu_dirty_memory_manager.h" #include "core/hle/kernel/code_set.h" #include "core/hle/kernel/k_address_arbiter.h" #include "core/hle/kernel/k_capabilities.h" @@ -128,7 +127,6 @@ private: #ifdef HAS_NCE std::unordered_map<u64, u64> m_post_handlers{}; #endif - std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> m_dirty_memory_managers; std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor; Core::Memory::Memory m_memory; @@ -511,8 +509,6 @@ public: return m_memory; } - void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); - Core::ExclusiveMonitor& GetExclusiveMonitor() const { return *m_exclusive_monitor; } diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp index 3f38ceb03..e491dd260 100644 --- a/src/core/hle/service/hle_ipc.cpp +++ b/src/core/hle/service/hle_ipc.cpp @@ -12,6 +12,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/scratch_buffer.h" +#include "core/guest_memory.h" #include "core/hle/kernel/k_auto_object.h" #include "core/hle/kernel/k_handle_table.h" #include "core/hle/kernel/k_process.h" @@ -23,19 +24,6 @@ #include "core/hle/service/ipc_helpers.h" #include "core/memory.h" -namespace { -static thread_local std::array read_buffer_data_a{ - Common::ScratchBuffer<u8>(), - Common::ScratchBuffer<u8>(), - Common::ScratchBuffer<u8>(), -}; -static thread_local std::array read_buffer_data_x{ - Common::ScratchBuffer<u8>(), - Common::ScratchBuffer<u8>(), - Common::ScratchBuffer<u8>(), -}; -} // Anonymous namespace - namespace Service { SessionRequestHandler::SessionRequestHandler(Kernel::KernelCore& kernel_, const char* service_name_) @@ -343,48 +331,27 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons } std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { - static thread_local std::array read_buffer_a{ - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - }; + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); ASSERT_OR_EXECUTE_MSG( BufferDescriptorA().size() > buffer_index, { return {}; }, "BufferDescriptorA invalid buffer_index {}", buffer_index); - auto& read_buffer = read_buffer_a[buffer_index]; - return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), - BufferDescriptorA()[buffer_index].Size(), - &read_buffer_data_a[buffer_index]); + return gm.Read(BufferDescriptorA()[buffer_index].Address(), + BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); } std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { - static thread_local std::array read_buffer_x{ - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - }; + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); ASSERT_OR_EXECUTE_MSG( BufferDescriptorX().size() > buffer_index, { return {}; }, "BufferDescriptorX invalid buffer_index {}", buffer_index); - auto& read_buffer = read_buffer_x[buffer_index]; - return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), - BufferDescriptorX()[buffer_index].Size(), - &read_buffer_data_x[buffer_index]); + return gm.Read(BufferDescriptorX()[buffer_index].Address(), + BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); } std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { - static thread_local std::array read_buffer_a{ - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - }; - static thread_local std::array read_buffer_x{ - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), - }; + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; @@ -401,18 +368,14 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons ASSERT_OR_EXECUTE_MSG( BufferDescriptorA().size() > buffer_index, { return {}; }, "BufferDescriptorA invalid buffer_index {}", buffer_index); - auto& read_buffer = read_buffer_a[buffer_index]; - return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), - BufferDescriptorA()[buffer_index].Size(), - &read_buffer_data_a[buffer_index]); + return gm.Read(BufferDescriptorA()[buffer_index].Address(), + BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); } else { ASSERT_OR_EXECUTE_MSG( BufferDescriptorX().size() > buffer_index, { return {}; }, "BufferDescriptorX invalid buffer_index {}", buffer_index); - auto& read_buffer = read_buffer_x[buffer_index]; - return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), - BufferDescriptorX()[buffer_index].Size(), - &read_buffer_data_x[buffer_index]); + return gm.Read(BufferDescriptorX()[buffer_index].Address(), + BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); } } diff --git a/src/core/hle/service/hle_ipc.h b/src/core/hle/service/hle_ipc.h index 440737db5..8329d7265 100644 --- a/src/core/hle/service/hle_ipc.h +++ b/src/core/hle/service/hle_ipc.h @@ -41,6 +41,8 @@ class KernelCore; class KHandleTable; class KProcess; class KServerSession; +template <typename T> +class KScopedAutoObject; class KThread; } // namespace Kernel @@ -424,6 +426,9 @@ private: Kernel::KernelCore& kernel; Core::Memory::Memory& memory; + + mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_a{}; + mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_x{}; }; } // namespace Service diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index 37ca24f5d..21ef57d27 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp @@ -2,27 +2,135 @@ // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors // SPDX-License-Identifier: GPL-3.0-or-later +#include <atomic> +#include <deque> +#include <mutex> + +#include "core/hle/kernel/k_process.h" #include "core/hle/service/nvdrv/core/container.h" +#include "core/hle/service/nvdrv/core/heap_mapper.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/core/syncpoint_manager.h" +#include "core/memory.h" #include "video_core/host1x/host1x.h" namespace Service::Nvidia::NvCore { +Session::Session(SessionId id_, Kernel::KProcess* process_, Core::Asid asid_) + : id{id_}, process{process_}, asid{asid_}, has_preallocated_area{}, mapper{}, is_active{} {} + +Session::~Session() = default; + struct ContainerImpl { - explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_) - : file{host1x_}, manager{host1x_}, device_file_data{} {} + explicit ContainerImpl(Container& core, Tegra::Host1x::Host1x& host1x_) + : host1x{host1x_}, file{core, host1x_}, manager{host1x_}, device_file_data{} {} + Tegra::Host1x::Host1x& host1x; NvMap file; SyncpointManager manager; Container::Host1xDeviceFileData device_file_data; + std::deque<Session> sessions; + size_t new_ids{}; + std::deque<size_t> id_pool; + std::mutex session_guard; }; Container::Container(Tegra::Host1x::Host1x& host1x_) { - impl = std::make_unique<ContainerImpl>(host1x_); + impl = std::make_unique<ContainerImpl>(*this, host1x_); } Container::~Container() = default; +SessionId Container::OpenSession(Kernel::KProcess* process) { + using namespace Common::Literals; + + std::scoped_lock lk(impl->session_guard); + for (auto& session : impl->sessions) { + if (!session.is_active) { + continue; + } + if (session.process == process) { + return session.id; + } + } + size_t new_id{}; + auto* memory_interface = &process->GetMemory(); + auto& smmu = impl->host1x.MemoryManager(); + auto asid = smmu.RegisterProcess(memory_interface); + if (!impl->id_pool.empty()) { + new_id = impl->id_pool.front(); + impl->id_pool.pop_front(); + impl->sessions[new_id] = Session{SessionId{new_id}, process, asid}; + } else { + new_id = impl->new_ids++; + impl->sessions.emplace_back(SessionId{new_id}, process, asid); + } + auto& session = impl->sessions[new_id]; + session.is_active = true; + // Optimization + if (process->IsApplication()) { + auto& page_table = process->GetPageTable().GetBasePageTable(); + auto heap_start = page_table.GetHeapRegionStart(); + + Kernel::KProcessAddress cur_addr = heap_start; + size_t region_size = 0; + VAddr region_start = 0; + while (true) { + Kernel::KMemoryInfo mem_info{}; + Kernel::Svc::PageInfo page_info{}; + R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), + cur_addr)); + auto svc_mem_info = mem_info.GetSvcMemoryInfo(); + + // Check if this memory block is heap. + if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { + if (svc_mem_info.size > region_size) { + region_size = svc_mem_info.size; + region_start = svc_mem_info.base_address; + } + } + + // Check if we're done. + const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size; + if (next_address <= GetInteger(cur_addr)) { + break; + } + + cur_addr = next_address; + } + session.has_preallocated_area = false; + auto start_region = region_size >= 32_MiB ? smmu.Allocate(region_size) : 0; + if (start_region != 0) { + session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size, + asid, impl->host1x); + smmu.TrackContinuity(start_region, region_start, region_size, asid); + session.has_preallocated_area = true; + LOG_DEBUG(Debug, "Preallocation created!"); + } + } + return SessionId{new_id}; +} + +void Container::CloseSession(SessionId session_id) { + std::scoped_lock lk(impl->session_guard); + auto& session = impl->sessions[session_id.id]; + auto& smmu = impl->host1x.MemoryManager(); + if (session.has_preallocated_area) { + const DAddr region_start = session.mapper->GetRegionStart(); + const size_t region_size = session.mapper->GetRegionSize(); + session.mapper.reset(); + smmu.Free(region_start, region_size); + session.has_preallocated_area = false; + } + session.is_active = false; + smmu.UnregisterProcess(impl->sessions[session_id.id].asid); + impl->id_pool.emplace_front(session_id.id); +} + +Session* Container::GetSession(SessionId session_id) { + std::atomic_thread_fence(std::memory_order_acquire); + return &impl->sessions[session_id.id]; +} + NvMap& Container::GetNvMapFile() { return impl->file; } diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h index b4b63ac90..b4d3938a8 100644 --- a/src/core/hle/service/nvdrv/core/container.h +++ b/src/core/hle/service/nvdrv/core/container.h @@ -8,24 +8,56 @@ #include <memory> #include <unordered_map> +#include "core/device_memory_manager.h" #include "core/hle/service/nvdrv/nvdata.h" +namespace Kernel { +class KProcess; +} + namespace Tegra::Host1x { class Host1x; } // namespace Tegra::Host1x namespace Service::Nvidia::NvCore { +class HeapMapper; class NvMap; class SyncpointManager; struct ContainerImpl; +struct SessionId { + size_t id; +}; + +struct Session { + Session(SessionId id_, Kernel::KProcess* process_, Core::Asid asid_); + ~Session(); + + Session(const Session&) = delete; + Session& operator=(const Session&) = delete; + Session(Session&&) = default; + Session& operator=(Session&&) = default; + + SessionId id; + Kernel::KProcess* process; + Core::Asid asid; + bool has_preallocated_area{}; + std::unique_ptr<HeapMapper> mapper{}; + bool is_active{}; +}; + class Container { public: explicit Container(Tegra::Host1x::Host1x& host1x); ~Container(); + SessionId OpenSession(Kernel::KProcess* process); + void CloseSession(SessionId id); + + Session* GetSession(SessionId id); + NvMap& GetNvMapFile(); const NvMap& GetNvMapFile() const; diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.cpp b/src/core/hle/service/nvdrv/core/heap_mapper.cpp new file mode 100644 index 000000000..096dc5deb --- /dev/null +++ b/src/core/hle/service/nvdrv/core/heap_mapper.cpp @@ -0,0 +1,175 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <mutex> + +#include <boost/container/small_vector.hpp> +#define BOOST_NO_MT +#include <boost/pool/detail/mutex.hpp> +#undef BOOST_NO_MT +#include <boost/icl/interval.hpp> +#include <boost/icl/interval_base_set.hpp> +#include <boost/icl/interval_set.hpp> +#include <boost/icl/split_interval_map.hpp> +#include <boost/pool/pool.hpp> +#include <boost/pool/pool_alloc.hpp> +#include <boost/pool/poolfwd.hpp> + +#include "core/hle/service/nvdrv/core/heap_mapper.h" +#include "video_core/host1x/host1x.h" + +namespace boost { +template <typename T> +class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; +} + +namespace Service::Nvidia::NvCore { + +using IntervalCompare = std::less<DAddr>; +using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; +using IntervalAllocator = boost::fast_pool_allocator<DAddr>; +using IntervalSet = boost::icl::interval_set<DAddr>; +using IntervalType = typename IntervalSet::interval_type; + +template <typename Type> +struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { + // types + typedef counter_add_functor<Type> type; + typedef boost::icl::identity_based_inplace_combine<Type> base_type; + + // public member functions + void operator()(Type& current, const Type& added) const { + current += added; + if (current < base_type::identity_element()) { + current = base_type::identity_element(); + } + } + + // public static functions + static void version(Type&){}; +}; + +using OverlapCombine = counter_add_functor<int>; +using OverlapSection = boost::icl::inter_section<int>; +using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; + +struct HeapMapper::HeapMapperInternal { + HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {} + ~HeapMapperInternal() = default; + + template <typename Func> + void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, + Func&& func) { + const DAddr start_address = cpu_addr; + const DAddr end_address = start_address + size; + const IntervalType search_interval{start_address, end_address}; + auto it = current_range.lower_bound(search_interval); + if (it == current_range.end()) { + return; + } + auto end_it = current_range.upper_bound(search_interval); + for (; it != end_it; it++) { + auto& inter = it->first; + DAddr inter_addr_end = inter.upper(); + DAddr inter_addr = inter.lower(); + if (inter_addr_end > end_address) { + inter_addr_end = end_address; + } + if (inter_addr < start_address) { + inter_addr = start_address; + } + func(inter_addr, inter_addr_end, it->second); + } + } + + void RemoveEachInOverlapCounter(OverlapCounter& current_range, + const IntervalType search_interval, int subtract_value) { + bool any_removals = false; + current_range.add(std::make_pair(search_interval, subtract_value)); + do { + any_removals = false; + auto it = current_range.lower_bound(search_interval); + if (it == current_range.end()) { + return; + } + auto end_it = current_range.upper_bound(search_interval); + for (; it != end_it; it++) { + if (it->second <= 0) { + any_removals = true; + current_range.erase(it); + break; + } + } + } while (any_removals); + } + + IntervalSet base_set; + OverlapCounter mapping_overlaps; + Tegra::MaxwellDeviceMemoryManager& device_memory; + std::mutex guard; +}; + +HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid, + Tegra::Host1x::Host1x& host1x) + : m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_asid{asid} { + m_internal = std::make_unique<HeapMapperInternal>(host1x); +} + +HeapMapper::~HeapMapper() { + m_internal->device_memory.Unmap(m_daddress, m_size); +} + +DAddr HeapMapper::Map(VAddr start, size_t size) { + std::scoped_lock lk(m_internal->guard); + m_internal->base_set.clear(); + const IntervalType interval{start, start + size}; + m_internal->base_set.insert(interval); + m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, + [this](VAddr start_addr, VAddr end_addr, int) { + const IntervalType other{start_addr, end_addr}; + m_internal->base_set.subtract(other); + }); + if (!m_internal->base_set.empty()) { + auto it = m_internal->base_set.begin(); + auto end_it = m_internal->base_set.end(); + for (; it != end_it; it++) { + const VAddr inter_addr_end = it->upper(); + const VAddr inter_addr = it->lower(); + const size_t offset = inter_addr - m_vaddress; + const size_t sub_size = inter_addr_end - inter_addr; + m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size, + m_asid); + } + } + m_internal->mapping_overlaps += std::make_pair(interval, 1); + m_internal->base_set.clear(); + return m_daddress + (start - m_vaddress); +} + +void HeapMapper::Unmap(VAddr start, size_t size) { + std::scoped_lock lk(m_internal->guard); + m_internal->base_set.clear(); + m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, + [this](VAddr start_addr, VAddr end_addr, int value) { + if (value <= 1) { + const IntervalType other{start_addr, end_addr}; + m_internal->base_set.insert(other); + } + }); + if (!m_internal->base_set.empty()) { + auto it = m_internal->base_set.begin(); + auto end_it = m_internal->base_set.end(); + for (; it != end_it; it++) { + const VAddr inter_addr_end = it->upper(); + const VAddr inter_addr = it->lower(); + const size_t offset = inter_addr - m_vaddress; + const size_t sub_size = inter_addr_end - inter_addr; + m_internal->device_memory.Unmap(m_daddress + offset, sub_size); + } + } + const IntervalType to_remove{start, start + size}; + m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1); + m_internal->base_set.clear(); +} + +} // namespace Service::Nvidia::NvCore diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.h b/src/core/hle/service/nvdrv/core/heap_mapper.h new file mode 100644 index 000000000..491a12e4f --- /dev/null +++ b/src/core/hle/service/nvdrv/core/heap_mapper.h @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <memory> + +#include "common/common_types.h" +#include "core/device_memory_manager.h" + +namespace Tegra::Host1x { +class Host1x; +} // namespace Tegra::Host1x + +namespace Service::Nvidia::NvCore { + +class HeapMapper { +public: + HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid, + Tegra::Host1x::Host1x& host1x); + ~HeapMapper(); + + bool IsInBounds(VAddr start, size_t size) const { + VAddr end = start + size; + return start >= m_vaddress && end <= (m_vaddress + m_size); + } + + DAddr Map(VAddr start, size_t size); + + void Unmap(VAddr start, size_t size); + + DAddr GetRegionStart() const { + return m_daddress; + } + + size_t GetRegionSize() const { + return m_size; + } + +private: + struct HeapMapperInternal; + VAddr m_vaddress; + DAddr m_daddress; + size_t m_size; + Core::Asid m_asid; + std::unique_ptr<HeapMapperInternal> m_internal; +}; + +} // namespace Service::Nvidia::NvCore diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 0ca05257e..1b59c6b15 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -2,14 +2,19 @@ // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors // SPDX-License-Identifier: GPL-3.0-or-later +#include <functional> + #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" +#include "core/hle/service/nvdrv/core/container.h" +#include "core/hle/service/nvdrv/core/heap_mapper.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/memory.h" #include "video_core/host1x/host1x.h" using Core::Memory::YUZU_PAGESIZE; +constexpr size_t BIG_PAGE_SIZE = YUZU_PAGESIZE * 16; namespace Service::Nvidia::NvCore { NvMap::Handle::Handle(u64 size_, Id id_) @@ -17,9 +22,9 @@ NvMap::Handle::Handle(u64 size_, Id id_) flags.raw = 0; } -NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { +NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, + NvCore::SessionId pSessionId) { std::scoped_lock lock(mutex); - // Handles cannot be allocated twice if (allocated) { return NvResult::AccessDenied; @@ -28,6 +33,7 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) flags = pFlags; kind = pKind; align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign; + session_id = pSessionId; // This flag is only applicable for handles with an address passed if (pAddress) { @@ -63,7 +69,7 @@ NvResult NvMap::Handle::Duplicate(bool internal_session) { return NvResult::Success; } -NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {} +NvMap::NvMap(Container& core_, Tegra::Host1x::Host1x& host1x_) : host1x{host1x_}, core{core_} {} void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) { std::scoped_lock lock(handles_lock); @@ -78,12 +84,30 @@ void NvMap::UnmapHandle(Handle& handle_description) { handle_description.unmap_queue_entry.reset(); } + // Free and unmap the handle from Host1x GMMU + if (handle_description.pin_virt_address) { + host1x.GMMU().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address), + handle_description.aligned_size); + host1x.Allocator().Free(handle_description.pin_virt_address, + static_cast<u32>(handle_description.aligned_size)); + handle_description.pin_virt_address = 0; + } + // Free and unmap the handle from the SMMU - host1x.MemoryManager().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address), - handle_description.aligned_size); - host1x.Allocator().Free(handle_description.pin_virt_address, - static_cast<u32>(handle_description.aligned_size)); - handle_description.pin_virt_address = 0; + const size_t map_size = handle_description.aligned_size; + if (!handle_description.in_heap) { + auto& smmu = host1x.MemoryManager(); + size_t aligned_up = Common::AlignUp(map_size, BIG_PAGE_SIZE); + smmu.Unmap(handle_description.d_address, map_size); + smmu.Free(handle_description.d_address, static_cast<size_t>(aligned_up)); + handle_description.d_address = 0; + return; + } + const VAddr vaddress = handle_description.address; + auto* session = core.GetSession(handle_description.session_id); + session->mapper->Unmap(vaddress, map_size); + handle_description.d_address = 0; + handle_description.in_heap = false; } bool NvMap::TryRemoveHandle(const Handle& handle_description) { @@ -124,22 +148,33 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) { } } -VAddr NvMap::GetHandleAddress(Handle::Id handle) { +DAddr NvMap::GetHandleAddress(Handle::Id handle) { std::scoped_lock lock(handles_lock); try { - return handles.at(handle)->address; + return handles.at(handle)->d_address; } catch (std::out_of_range&) { return 0; } } -u32 NvMap::PinHandle(NvMap::Handle::Id handle) { +DAddr NvMap::PinHandle(NvMap::Handle::Id handle, bool low_area_pin) { auto handle_description{GetHandle(handle)}; if (!handle_description) [[unlikely]] { return 0; } std::scoped_lock lock(handle_description->mutex); + const auto map_low_area = [&] { + if (handle_description->pin_virt_address == 0) { + auto& gmmu_allocator = host1x.Allocator(); + auto& gmmu = host1x.GMMU(); + u32 address = + gmmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size)); + gmmu.Map(static_cast<GPUVAddr>(address), handle_description->d_address, + handle_description->aligned_size); + handle_description->pin_virt_address = address; + } + }; if (!handle_description->pins) { // If we're in the unmap queue we can just remove ourselves and return since we're already // mapped @@ -151,37 +186,58 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle) { unmap_queue.erase(*handle_description->unmap_queue_entry); handle_description->unmap_queue_entry.reset(); + if (low_area_pin) { + map_low_area(); + handle_description->pins++; + return static_cast<DAddr>(handle_description->pin_virt_address); + } + handle_description->pins++; - return handle_description->pin_virt_address; + return handle_description->d_address; } } + using namespace std::placeholders; // If not then allocate some space and map it - u32 address{}; - auto& smmu_allocator = host1x.Allocator(); - auto& smmu_memory_manager = host1x.MemoryManager(); - while ((address = smmu_allocator.Allocate( - static_cast<u32>(handle_description->aligned_size))) == 0) { - // Free handles until the allocation succeeds - std::scoped_lock queueLock(unmap_queue_lock); - if (auto freeHandleDesc{unmap_queue.front()}) { - // Handles in the unmap queue are guaranteed not to be pinned so don't bother - // checking if they are before unmapping - std::scoped_lock freeLock(freeHandleDesc->mutex); - if (handle_description->pin_virt_address) - UnmapHandle(*freeHandleDesc); - } else { - LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); + DAddr address{}; + auto& smmu = host1x.MemoryManager(); + auto* session = core.GetSession(handle_description->session_id); + const VAddr vaddress = handle_description->address; + const size_t map_size = handle_description->aligned_size; + if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) { + handle_description->d_address = session->mapper->Map(vaddress, map_size); + handle_description->in_heap = true; + } else { + size_t aligned_up = Common::AlignUp(map_size, BIG_PAGE_SIZE); + while ((address = smmu.Allocate(aligned_up)) == 0) { + // Free handles until the allocation succeeds + std::scoped_lock queueLock(unmap_queue_lock); + if (auto freeHandleDesc{unmap_queue.front()}) { + // Handles in the unmap queue are guaranteed not to be pinned so don't bother + // checking if they are before unmapping + std::scoped_lock freeLock(freeHandleDesc->mutex); + if (handle_description->d_address) + UnmapHandle(*freeHandleDesc); + } else { + LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); + } } + + handle_description->d_address = address; + smmu.Map(address, vaddress, map_size, session->asid, true); + handle_description->in_heap = false; } + } - smmu_memory_manager.Map(static_cast<GPUVAddr>(address), handle_description->address, - handle_description->aligned_size); - handle_description->pin_virt_address = address; + if (low_area_pin) { + map_low_area(); } handle_description->pins++; - return handle_description->pin_virt_address; + if (low_area_pin) { + return static_cast<DAddr>(handle_description->pin_virt_address); + } + return handle_description->d_address; } void NvMap::UnpinHandle(Handle::Id handle) { @@ -232,7 +288,7 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); } else if (handle_description->dupes == 0) { // Force unmap the handle - if (handle_description->pin_virt_address) { + if (handle_description->d_address) { std::scoped_lock queueLock(unmap_queue_lock); UnmapHandle(*handle_description); } diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index a8e573890..d7f695845 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h @@ -14,6 +14,7 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/nvdata.h" namespace Tegra { @@ -25,6 +26,8 @@ class Host1x; } // namespace Tegra namespace Service::Nvidia::NvCore { + +class Container; /** * @brief The nvmap core class holds the global state for nvmap and provides methods to manage * handles @@ -48,7 +51,7 @@ public: using Id = u32; Id id; //!< A globally unique identifier for this handle - s32 pins{}; + s64 pins{}; u32 pin_virt_address{}; std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; @@ -61,15 +64,18 @@ public: } flags{}; static_assert(sizeof(Flags) == sizeof(u32)); - u64 address{}; //!< The memory location in the guest's AS that this handle corresponds to, - //!< this can also be in the nvdrv tmem + VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to, + //!< this can also be in the nvdrv tmem bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC //!< call u8 kind{}; //!< Used for memory compression bool allocated{}; //!< If the handle has been allocated with `Alloc` + bool in_heap{}; + NvCore::SessionId session_id{}; - u64 dma_map_addr{}; //! remove me after implementing pinning. + DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds + //!< to, this can also be in the nvdrv tmem Handle(u64 size, Id id); @@ -77,7 +83,8 @@ public: * @brief Sets up the handle with the given memory config, can allocate memory from the tmem * if a 0 address is passed */ - [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress); + [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, + NvCore::SessionId pSessionId); /** * @brief Increases the dupe counter of the handle for the given session @@ -108,7 +115,7 @@ public: bool can_unlock; //!< If the address region is ready to be unlocked }; - explicit NvMap(Tegra::Host1x::Host1x& host1x); + explicit NvMap(Container& core, Tegra::Host1x::Host1x& host1x); /** * @brief Creates an unallocated handle of the given size @@ -117,7 +124,7 @@ public: std::shared_ptr<Handle> GetHandle(Handle::Id handle); - VAddr GetHandleAddress(Handle::Id handle); + DAddr GetHandleAddress(Handle::Id handle); /** * @brief Maps a handle into the SMMU address space @@ -125,7 +132,7 @@ public: * number of calls to `UnpinHandle` * @return The SMMU virtual address that the handle has been mapped to */ - u32 PinHandle(Handle::Id handle); + DAddr PinHandle(Handle::Id handle, bool low_area_pin); /** * @brief When this has been called an equal number of times to `PinHandle` for the supplied @@ -172,5 +179,7 @@ private: * @return If the handle was removed from the map */ bool TryRemoveHandle(const Handle& handle_description); + + Container& core; }; } // namespace Service::Nvidia::NvCore diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index a04538d5d..8adaddc60 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h @@ -7,6 +7,7 @@ #include <vector> #include "common/common_types.h" +#include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/nvdata.h" namespace Core { @@ -62,7 +63,7 @@ public: * Called once a device is opened * @param fd The device fd */ - virtual void OnOpen(DeviceFD fd) = 0; + virtual void OnOpen(NvCore::SessionId session_id, DeviceFD fd) = 0; /** * Called once a device is closed diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 05a43d8dc..c1ebbd62d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -35,14 +35,14 @@ NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in return NvResult::NotImplemented; } -void nvdisp_disp0::OnOpen(DeviceFD fd) {} +void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvdisp_disp0::OnClose(DeviceFD fd) {} void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, u32 stride, android::BufferTransformFlags transform, const Common::Rectangle<int>& crop_rect, std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { - const VAddr addr = nvmap.GetHandleAddress(buffer_handle); + const DAddr addr = nvmap.GetHandleAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", addr, offset, width, height, stride, format); diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index daee05fe8..5f13a50a2 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -32,7 +32,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; /// Performs a screen flip, drawing the buffer pointed to by the handle. diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 6b3639008..e6646ba04 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -86,7 +86,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i return NvResult::NotImplemented; } -void nvhost_as_gpu::OnOpen(DeviceFD fd) {} +void nvhost_as_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvhost_as_gpu::OnClose(DeviceFD fd) {} NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { @@ -206,6 +206,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { static_cast<u32>(aligned_size >> page_size_bits)); } + nvmap.UnpinHandle(mapping->handle); + // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Only FreeSpace can unmap them fully if (mapping->sparse_alloc) { @@ -293,12 +295,12 @@ NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { return NvResult::BadValue; } - VAddr cpu_address{static_cast<VAddr>( - handle->address + - (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; + DAddr base = nvmap.PinHandle(entry.handle, false); + DAddr device_address{static_cast<DAddr>( + base + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; - gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind), - use_big_pages); + gmmu->Map(virtual_address, device_address, size, + static_cast<Tegra::PTEKind>(entry.kind), use_big_pages); } } @@ -331,9 +333,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { } u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; - VAddr cpu_address{mapping->ptr + params.buffer_offset}; + VAddr device_address{mapping->ptr + params.buffer_offset}; - gmmu->Map(gpu_address, cpu_address, params.mapping_size, + gmmu->Map(gpu_address, device_address, params.mapping_size, static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); return NvResult::Success; @@ -349,7 +351,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { return NvResult::BadValue; } - VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; + DAddr device_address{ + static_cast<DAddr>(nvmap.PinHandle(params.handle, false) + params.buffer_offset)}; u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; bool big_page{[&]() { @@ -373,15 +376,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { } const bool use_big_pages = alloc->second.big_pages && big_page; - gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind), + gmmu->Map(params.offset, device_address, size, static_cast<Tegra::PTEKind>(params.kind), use_big_pages); - auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, - use_big_pages, alloc->second.sparse)}; + auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, + true, use_big_pages, alloc->second.sparse)}; alloc->second.mappings.push_back(mapping); mapping_map[params.offset] = mapping; } else { - auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; @@ -394,11 +396,11 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { return NvResult::InsufficientMemory; } - gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), + gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size), static_cast<Tegra::PTEKind>(params.kind), big_page); - auto mapping{ - std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; + auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, + false, big_page, false)}; mapping_map[params.offset] = mapping; } @@ -433,6 +435,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { gmmu->Unmap(params.offset, mapping->size); } + nvmap.UnpinHandle(mapping->handle); + mapping_map.erase(params.offset); } catch (const std::out_of_range&) { LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 79a21683d..7d0a99988 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -55,7 +55,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; Kernel::KEvent* QueryEvent(u32 event_id) override; @@ -159,16 +159,18 @@ private: NvCore::NvMap& nvmap; struct Mapping { - VAddr ptr; + NvCore::NvMap::Handle::Id handle; + DAddr ptr; u64 offset; u64 size; bool fixed; bool big_page; // Only valid if fixed == false bool sparse_alloc; - Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) - : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), - sparse_alloc(sparse_alloc_) {} + Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_, + bool big_page_, bool sparse_alloc_) + : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), + big_page(big_page_), sparse_alloc(sparse_alloc_) {} }; struct Allocation { @@ -212,9 +214,6 @@ private: bool initialised{}; } vm; std::shared_ptr<Tegra::MemoryManager> gmmu; - - // s32 channel{}; - // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index b8dd34e24..250d01de3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -76,7 +76,7 @@ NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inp return NvResult::NotImplemented; } -void nvhost_ctrl::OnOpen(DeviceFD fd) {} +void nvhost_ctrl::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvhost_ctrl::OnClose(DeviceFD fd) {} diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 992124b60..403f1a746 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -32,7 +32,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 3e0c96456..ddd85678b 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> return NvResult::NotImplemented; } -void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} +void nvhost_ctrl_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index d170299bd..d2ab05b21 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h @@ -28,7 +28,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index b0395c2f0..bf12d69a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -120,7 +120,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu return NvResult::NotImplemented; } -void nvhost_gpu::OnOpen(DeviceFD fd) {} +void nvhost_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvhost_gpu::OnClose(DeviceFD fd) {} NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 88fd228ff..e34a978db 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -47,7 +47,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index f43914e1b..2c0ac2a46 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -35,7 +35,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in case 0x7: return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output); case 0x9: - return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output); + return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output, fd); case 0xa: return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output); default: @@ -68,9 +68,10 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in return NvResult::NotImplemented; } -void nvhost_nvdec::OnOpen(DeviceFD fd) { +void nvhost_nvdec::OnOpen(NvCore::SessionId session_id, DeviceFD fd) { LOG_INFO(Service_NVDRV, "NVDEC video stream started"); system.SetNVDECActive(true); + sessions[fd] = session_id; } void nvhost_nvdec::OnClose(DeviceFD fd) { @@ -81,6 +82,10 @@ void nvhost_nvdec::OnClose(DeviceFD fd) { system.GPU().ClearCdmaInstance(iter->second); } system.SetNVDECActive(false); + auto it = sessions.find(fd); + if (it != sessions.end()) { + sessions.erase(it); + } } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index ad2233c49..627686757 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -20,7 +20,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; }; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 74c701b95..a0a7bfa40 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/kernel/k_process.h" #include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/core/syncpoint_manager.h" @@ -95,6 +96,8 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); auto& gpu = system.GPU(); + auto* session = core.GetSession(sessions[fd]); + if (gpu.UseNvdec()) { for (std::size_t i = 0; i < syncpt_increments.size(); i++) { const SyncptIncr& syncpt_incr = syncpt_increments[i]; @@ -106,8 +109,8 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De const auto object = nvmap.GetHandle(cmd_buffer.memory_id); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); - system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), - cmdlist.size() * sizeof(u32)); + session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), + cmdlist.size() * sizeof(u32)); gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); } // Some games expect command_buffers to be written back @@ -133,10 +136,12 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { return NvResult::Success; } -NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries) { +NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, + DeviceFD fd) { const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); for (size_t i = 0; i < num_entries; i++) { - entries[i].map_address = nvmap.PinHandle(entries[i].map_handle); + DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, true); + entries[i].map_address = static_cast<u32>(pin_address); } return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index 7ce748e18..900db81d2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -4,7 +4,9 @@ #pragma once #include <deque> +#include <unordered_map> #include <vector> + #include "common/common_types.h" #include "common/swap.h" #include "core/hle/service/nvdrv/core/syncpoint_manager.h" @@ -111,7 +113,7 @@ protected: NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd); NvResult GetSyncpoint(IoctlGetSyncpoint& params); NvResult GetWaitbase(IoctlGetWaitbase& params); - NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); + NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd); NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); NvResult SetSubmitTimeout(u32 timeout); @@ -125,6 +127,7 @@ protected: NvCore::NvMap& nvmap; NvCore::ChannelType channel_type; std::array<u32, MaxSyncPoints> device_syncpoints{}; + std::unordered_map<DeviceFD, NvCore::SessionId> sessions; }; }; // namespace Devices } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 9e6b86458..f87d53f12 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp @@ -44,7 +44,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in return NvResult::NotImplemented; } -void nvhost_nvjpg::OnOpen(DeviceFD fd) {} +void nvhost_nvjpg::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvhost_nvjpg::OnClose(DeviceFD fd) {} NvResult nvhost_nvjpg::SetNVMAPfd(IoctlSetNvmapFD& params) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 790c97f6a..def9c254d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h @@ -22,7 +22,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; private: diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 87f8d7c22..bf090f5eb 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -33,7 +33,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu case 0x3: return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output); case 0x9: - return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output); + return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output, fd); case 0xa: return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output); default: @@ -68,7 +68,9 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu return NvResult::NotImplemented; } -void nvhost_vic::OnOpen(DeviceFD fd) {} +void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) { + sessions[fd] = session_id; +} void nvhost_vic::OnClose(DeviceFD fd) { auto& host1x_file = core.Host1xDeviceFile(); @@ -76,6 +78,7 @@ void nvhost_vic::OnClose(DeviceFD fd) { if (iter != host1x_file.fd_to_id.end()) { system.GPU().ClearCdmaInstance(iter->second); } + sessions.erase(fd); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index cadbcb0a5..0cc04354a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -19,7 +19,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 71b2e62ec..da61a3bfe 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -36,9 +36,9 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, case 0x3: return WrapFixed(this, &nvmap::IocFromId, input, output); case 0x4: - return WrapFixed(this, &nvmap::IocAlloc, input, output); + return WrapFixed(this, &nvmap::IocAlloc, input, output, fd); case 0x5: - return WrapFixed(this, &nvmap::IocFree, input, output); + return WrapFixed(this, &nvmap::IocFree, input, output, fd); case 0x9: return WrapFixed(this, &nvmap::IocParam, input, output); case 0xe: @@ -67,8 +67,15 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, st return NvResult::NotImplemented; } -void nvmap::OnOpen(DeviceFD fd) {} -void nvmap::OnClose(DeviceFD fd) {} +void nvmap::OnOpen(NvCore::SessionId session_id, DeviceFD fd) { + sessions[fd] = session_id; +} +void nvmap::OnClose(DeviceFD fd) { + auto it = sessions.find(fd); + if (it != sessions.end()) { + sessions.erase(it); + } +} NvResult nvmap::IocCreate(IocCreateParams& params) { LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); @@ -87,7 +94,7 @@ NvResult nvmap::IocCreate(IocCreateParams& params) { return NvResult::Success; } -NvResult nvmap::IocAlloc(IocAllocParams& params) { +NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) { LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); if (!params.handle) { @@ -116,15 +123,15 @@ NvResult nvmap::IocAlloc(IocAllocParams& params) { return NvResult::InsufficientMemory; } - const auto result = - handle_description->Alloc(params.flags, params.align, params.kind, params.address); + const auto result = handle_description->Alloc(params.flags, params.align, params.kind, + params.address, sessions[fd]); if (result != NvResult::Success) { LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); return result; } bool is_out_io{}; - ASSERT(system.ApplicationProcess() - ->GetPageTable() + auto process = container.GetSession(sessions[fd])->process; + ASSERT(process->GetPageTable() .LockForMapDeviceAddressSpace(&is_out_io, handle_description->address, handle_description->size, Kernel::KMemoryPermission::None, true, false) @@ -224,7 +231,7 @@ NvResult nvmap::IocParam(IocParamParams& params) { return NvResult::Success; } -NvResult nvmap::IocFree(IocFreeParams& params) { +NvResult nvmap::IocFree(IocFreeParams& params, DeviceFD fd) { LOG_DEBUG(Service_NVDRV, "called"); if (!params.handle) { @@ -233,9 +240,9 @@ NvResult nvmap::IocFree(IocFreeParams& params) { } if (auto freeInfo{file.FreeHandle(params.handle, false)}) { + auto process = container.GetSession(sessions[fd])->process; if (freeInfo->can_unlock) { - ASSERT(system.ApplicationProcess() - ->GetPageTable() + ASSERT(process->GetPageTable() .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size) .IsSuccess()); } diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 049c11028..d07d85f88 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -33,7 +33,7 @@ public: NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output) override; - void OnOpen(DeviceFD fd) override; + void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; enum class HandleParameterType : u32_le { @@ -100,11 +100,11 @@ public: static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); NvResult IocCreate(IocCreateParams& params); - NvResult IocAlloc(IocAllocParams& params); + NvResult IocAlloc(IocAllocParams& params, DeviceFD fd); NvResult IocGetId(IocGetIdParams& params); NvResult IocFromId(IocFromIdParams& params); NvResult IocParam(IocParamParams& params); - NvResult IocFree(IocFreeParams& params); + NvResult IocFree(IocFreeParams& params, DeviceFD fd); private: /// Id to use for the next handle that is created. @@ -115,6 +115,7 @@ private: NvCore::Container& container; NvCore::NvMap& file; + std::unordered_map<DeviceFD, NvCore::SessionId> sessions; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 9e46ee8dd..cb256e5b4 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -45,13 +45,22 @@ void EventInterface::FreeEvent(Kernel::KEvent* event) { void LoopProcess(Nvnflinger::Nvnflinger& nvnflinger, Core::System& system) { auto server_manager = std::make_unique<ServerManager>(system); auto module = std::make_shared<Module>(system); - server_manager->RegisterNamedService("nvdrv", std::make_shared<NVDRV>(system, module, "nvdrv")); - server_manager->RegisterNamedService("nvdrv:a", - std::make_shared<NVDRV>(system, module, "nvdrv:a")); - server_manager->RegisterNamedService("nvdrv:s", - std::make_shared<NVDRV>(system, module, "nvdrv:s")); - server_manager->RegisterNamedService("nvdrv:t", - std::make_shared<NVDRV>(system, module, "nvdrv:t")); + const auto NvdrvInterfaceFactoryForApplication = [&, module] { + return std::make_shared<NVDRV>(system, module, "nvdrv"); + }; + const auto NvdrvInterfaceFactoryForApplets = [&, module] { + return std::make_shared<NVDRV>(system, module, "nvdrv:a"); + }; + const auto NvdrvInterfaceFactoryForSysmodules = [&, module] { + return std::make_shared<NVDRV>(system, module, "nvdrv:s"); + }; + const auto NvdrvInterfaceFactoryForTesting = [&, module] { + return std::make_shared<NVDRV>(system, module, "nvdrv:t"); + }; + server_manager->RegisterNamedService("nvdrv", NvdrvInterfaceFactoryForApplication); + server_manager->RegisterNamedService("nvdrv:a", NvdrvInterfaceFactoryForApplets); + server_manager->RegisterNamedService("nvdrv:s", NvdrvInterfaceFactoryForSysmodules); + server_manager->RegisterNamedService("nvdrv:t", NvdrvInterfaceFactoryForTesting); server_manager->RegisterNamedService("nvmemp", std::make_shared<NVMEMP>(system)); nvnflinger.SetNVDrvInstance(module); ServerManager::RunServer(std::move(server_manager)); @@ -113,7 +122,7 @@ NvResult Module::VerifyFD(DeviceFD fd) const { return NvResult::Success; } -DeviceFD Module::Open(const std::string& device_name) { +DeviceFD Module::Open(const std::string& device_name, NvCore::SessionId session_id) { auto it = builders.find(device_name); if (it == builders.end()) { LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name); @@ -124,7 +133,7 @@ DeviceFD Module::Open(const std::string& device_name) { auto& builder = it->second; auto device = builder(fd)->second; - device->OnOpen(fd); + device->OnOpen(session_id, fd); return fd; } diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index d8622b3ca..c594f0e5e 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -77,7 +77,7 @@ public: NvResult VerifyFD(DeviceFD fd) const; /// Opens a device node and returns a file descriptor to it. - DeviceFD Open(const std::string& device_name); + DeviceFD Open(const std::string& device_name, NvCore::SessionId session_id); /// Sends an ioctl command to the specified file descriptor. NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output); @@ -93,6 +93,10 @@ public: NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event); + NvCore::Container& GetContainer() { + return container; + } + private: friend class EventInterface; friend class Service::Nvnflinger::Nvnflinger; diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index c8a880e84..6e4825313 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp @@ -3,8 +3,10 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "common/logging/log.h" +#include "common/scope_exit.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" +#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/service/ipc_helpers.h" #include "core/hle/service/nvdrv/nvdata.h" @@ -37,7 +39,7 @@ void NVDRV::Open(HLERequestContext& ctx) { return; } - DeviceFD fd = nvdrv->Open(device_name); + DeviceFD fd = nvdrv->Open(device_name, session_id); rb.Push<DeviceFD>(fd); rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed); @@ -150,12 +152,29 @@ void NVDRV::Close(HLERequestContext& ctx) { void NVDRV::Initialize(HLERequestContext& ctx) { LOG_WARNING(Service_NVDRV, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 3}; + SCOPE_EXIT({ + rb.Push(ResultSuccess); + rb.PushEnum(NvResult::Success); + }); - is_initialized = true; + if (is_initialized) { + // No need to initialize again + return; + } - IPC::ResponseBuilder rb{ctx, 3}; - rb.Push(ResultSuccess); - rb.PushEnum(NvResult::Success); + IPC::RequestParser rp{ctx}; + const auto process_handle{ctx.GetCopyHandle(0)}; + // The transfer memory is lent to nvdrv as a work buffer since nvdrv is + // unable to allocate as much memory on its own. For HLE it's unnecessary to handle it + [[maybe_unused]] const auto transfer_memory_handle{ctx.GetCopyHandle(1)}; + [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>(); + + auto& container = nvdrv->GetContainer(); + auto process = ctx.GetObjectFromHandle<Kernel::KProcess>(process_handle); + session_id = container.OpenSession(process.GetPointerUnsafe()); + + is_initialized = true; } void NVDRV::QueryEvent(HLERequestContext& ctx) { @@ -242,6 +261,9 @@ NVDRV::NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char* RegisterHandlers(functions); } -NVDRV::~NVDRV() = default; +NVDRV::~NVDRV() { + auto& container = nvdrv->GetContainer(); + container.CloseSession(session_id); +} } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h index 6e98115dc..f2195ae1e 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.h +++ b/src/core/hle/service/nvdrv/nvdrv_interface.h @@ -35,6 +35,7 @@ private: u64 pid{}; bool is_initialized{}; + NvCore::SessionId session_id{}; Common::ScratchBuffer<u8> output_buffer; Common::ScratchBuffer<u8> inline_output_buffer; }; diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp index 2fef6cc1a..86e272b41 100644 --- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp +++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp @@ -87,19 +87,20 @@ Result CreateNvMapHandle(u32* out_nv_map_handle, Nvidia::Devices::nvmap& nvmap, R_SUCCEED(); } -Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle) { +Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Nvidia::DeviceFD nvmap_fd) { // Free the handle. Nvidia::Devices::nvmap::IocFreeParams free_params{ .handle = handle, }; - R_UNLESS(nvmap.IocFree(free_params) == Nvidia::NvResult::Success, VI::ResultOperationFailed); + R_UNLESS(nvmap.IocFree(free_params, nvmap_fd) == Nvidia::NvResult::Success, + VI::ResultOperationFailed); // We succeeded. R_SUCCEED(); } Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::ProcessAddress buffer, - u32 size) { + u32 size, Nvidia::DeviceFD nvmap_fd) { // Assign the allocated memory to the handle. Nvidia::Devices::nvmap::IocAllocParams alloc_params{ .handle = handle, @@ -109,16 +110,16 @@ Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::Proce .kind = 0, .address = GetInteger(buffer), }; - R_UNLESS(nvmap.IocAlloc(alloc_params) == Nvidia::NvResult::Success, VI::ResultOperationFailed); + R_UNLESS(nvmap.IocAlloc(alloc_params, nvmap_fd) == Nvidia::NvResult::Success, + VI::ResultOperationFailed); // We succeeded. R_SUCCEED(); } -Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, +Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, Nvidia::DeviceFD nvmap_fd, Common::ProcessAddress buffer, u32 size) { // Get the nvmap device. - auto nvmap_fd = nvdrv.Open("/dev/nvmap"); auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd); ASSERT(nvmap != nullptr); @@ -127,11 +128,11 @@ Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, // Ensure we maintain a clean state on failure. ON_RESULT_FAILURE { - ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle))); + ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle, nvmap_fd))); }; // Assign the allocated memory to the handle. - R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size)); + R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size, nvmap_fd)); } constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888; @@ -197,9 +198,13 @@ Result FbShareBufferManager::Initialize(u64* out_buffer_id, u64* out_layer_id, u std::addressof(m_buffer_page_group), m_system, SharedBufferSize)); + auto& container = m_nvdrv->GetContainer(); + m_session_id = container.OpenSession(m_system.ApplicationProcess()); + m_nvmap_fd = m_nvdrv->Open("/dev/nvmap", m_session_id); + // Create an nvmap handle for the buffer and assign the memory to it. - R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, map_address, - SharedBufferSize)); + R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, m_nvmap_fd, + map_address, SharedBufferSize)); // Record the display id. m_display_id = display_id; diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h index c809c01b4..033bf4bbe 100644 --- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h +++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h @@ -4,6 +4,8 @@ #pragma once #include "common/math_util.h" +#include "core/hle/service/nvdrv/core/container.h" +#include "core/hle/service/nvdrv/nvdata.h" #include "core/hle/service/nvnflinger/nvnflinger.h" #include "core/hle/service/nvnflinger/ui/fence.h" @@ -53,7 +55,8 @@ private: u64 m_layer_id = 0; u32 m_buffer_nvmap_handle = 0; SharedMemoryPoolLayout m_pool_layout = {}; - + Nvidia::DeviceFD m_nvmap_fd = {}; + Nvidia::NvCore::SessionId m_session_id = {}; std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group; std::mutex m_guard; diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index af6591370..71d6fdb0c 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -124,7 +124,7 @@ void Nvnflinger::ShutdownLayers() { void Nvnflinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { nvdrv = std::move(instance); - disp_fd = nvdrv->Open("/dev/nvdisp_disp0"); + disp_fd = nvdrv->Open("/dev/nvdisp_disp0", {}); } std::optional<u64> Nvnflinger::OpenDisplay(std::string_view name) { diff --git a/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp b/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp index ce70946ec..ede2a1193 100644 --- a/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp +++ b/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp @@ -22,11 +22,13 @@ GraphicBuffer::GraphicBuffer(Service::Nvidia::NvCore::NvMap& nvmap, : NvGraphicBuffer(GetBuffer(buffer)), m_nvmap(std::addressof(nvmap)) { if (this->BufferId() > 0) { m_nvmap->DuplicateHandle(this->BufferId(), true); + m_nvmap->PinHandle(this->BufferId(), false); } } GraphicBuffer::~GraphicBuffer() { if (m_nvmap != nullptr && this->BufferId() > 0) { + m_nvmap->UnpinHandle(this->BufferId()); m_nvmap->FreeHandle(this->BufferId(), true); } } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8176a41be..1c218566f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -24,6 +24,8 @@ #include "core/hle/kernel/k_process.h" #include "core/memory.h" #include "video_core/gpu.h" +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/host1x/host1x.h" #include "video_core/rasterizer_download_area.h" namespace Core::Memory { @@ -637,17 +639,6 @@ struct Memory::Impl { LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); - // During boot, current_page_table might not be set yet, in which case we need not flush - if (system.IsPoweredOn()) { - auto& gpu = system.GPU(); - for (u64 i = 0; i < size; i++) { - const auto page = base + i; - if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { - gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); - } - } - } - const auto end = base + size; ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); @@ -811,21 +802,33 @@ struct Memory::Impl { return true; } - void HandleRasterizerDownload(VAddr address, size_t size) { + void HandleRasterizerDownload(VAddr v_address, size_t size) { + const auto* p = GetPointerImpl( + v_address, []() {}, []() {}); + if (!gpu_device_memory) [[unlikely]] { + gpu_device_memory = &system.Host1x().MemoryManager(); + } const size_t core = system.GetCurrentHostThreadID(); auto& current_area = rasterizer_read_areas[core]; - const VAddr end_address = address + size; - if (current_area.start_address <= address && end_address <= current_area.end_address) - [[likely]] { - return; - } - current_area = system.GPU().OnCPURead(address, size); + gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { + const DAddr end_address = address + size; + if (current_area.start_address <= address && end_address <= current_area.end_address) + [[likely]] { + return; + } + current_area = system.GPU().OnCPURead(address, size); + }); } - void HandleRasterizerWrite(VAddr address, size_t size) { + void HandleRasterizerWrite(VAddr v_address, size_t size) { + const auto* p = GetPointerImpl( + v_address, []() {}, []() {}); constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; const size_t core = std::min(system.GetCurrentHostThreadID(), sys_core); // any other calls threads go to syscore. + if (!gpu_device_memory) [[unlikely]] { + gpu_device_memory = &system.Host1x().MemoryManager(); + } // Guard on sys_core; if (core == sys_core) [[unlikely]] { sys_core_guard.lock(); @@ -835,36 +838,53 @@ struct Memory::Impl { sys_core_guard.unlock(); } }); - auto& current_area = rasterizer_write_areas[core]; - VAddr subaddress = address >> YUZU_PAGEBITS; - bool do_collection = current_area.last_address == subaddress; - if (!do_collection) [[unlikely]] { - do_collection = system.GPU().OnCPUWrite(address, size); - if (!do_collection) { - return; + gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { + auto& current_area = rasterizer_write_areas[core]; + PAddr subaddress = address >> YUZU_PAGEBITS; + bool do_collection = current_area.last_address == subaddress; + if (!do_collection) [[unlikely]] { + do_collection = system.GPU().OnCPUWrite(address, size); + if (!do_collection) { + return; + } + current_area.last_address = subaddress; } - current_area.last_address = subaddress; - } - gpu_dirty_managers[core].Collect(address, size); + gpu_dirty_managers[core].Collect(address, size); + }); } struct GPUDirtyState { - VAddr last_address; + PAddr last_address; }; - void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { - system.GPU().InvalidateRegion(GetInteger(dest_addr), size); - } - - void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { - system.GPU().FlushRegion(GetInteger(dest_addr), size); + void InvalidateGPUMemory(u8* p, size_t size) { + constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; + const size_t core = std::min(system.GetCurrentHostThreadID(), + sys_core); // any other calls threads go to syscore. + if (!gpu_device_memory) [[unlikely]] { + gpu_device_memory = &system.Host1x().MemoryManager(); + } + // Guard on sys_core; + if (core == sys_core) [[unlikely]] { + sys_core_guard.lock(); + } + SCOPE_EXIT({ + if (core == sys_core) [[unlikely]] { + sys_core_guard.unlock(); + } + }); + auto& gpu = system.GPU(); + gpu_device_memory->ApplyOpOnPointer( + p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); }); } Core::System& system; + Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{}; Common::PageTable* current_page_table = nullptr; std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_read_areas{}; std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; + std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{}; std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; std::mutex sys_core_guard; @@ -1059,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) impl->MarkRegionDebug(GetInteger(vaddr), size, debug); } -void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { - impl->InvalidateRegion(dest_addr, size); -} - -void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { - impl->FlushRegion(dest_addr, size); -} - bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { [[maybe_unused]] bool mapped = true; [[maybe_unused]] bool rasterizer = false; @@ -1078,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { GetInteger(vaddr)); mapped = false; }, - [&] { - impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); - rasterizer = true; - }); + [&] { rasterizer = true; }); + if (rasterizer) { + impl->InvalidateGPUMemory(ptr, size); + } #ifdef __linux__ if (!rasterizer && mapped) { diff --git a/src/core/memory.h b/src/core/memory.h index dddfaf4a4..f7e6b297f 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -12,6 +12,7 @@ #include "common/scratch_buffer.h" #include "common/typed_address.h" +#include "core/guest_memory.h" #include "core/hle/result.h" namespace Common { @@ -486,10 +487,10 @@ public: void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); - void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); + bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); + bool InvalidateSeparateHeap(void* fault_address); - void FlushRegion(Common::ProcessAddress dest_addr, size_t size); private: Core::System& system; @@ -498,209 +499,9 @@ private: std::unique_ptr<Impl> impl; }; -enum GuestMemoryFlags : u32 { - Read = 1 << 0, - Write = 1 << 1, - Safe = 1 << 2, - Cached = 1 << 3, - - SafeRead = Read | Safe, - SafeWrite = Write | Safe, - SafeReadWrite = SafeRead | SafeWrite, - SafeReadCachedWrite = SafeReadWrite | Cached, - - UnsafeRead = Read, - UnsafeWrite = Write, - UnsafeReadWrite = UnsafeRead | UnsafeWrite, - UnsafeReadCachedWrite = UnsafeReadWrite | Cached, -}; - -namespace { -template <typename M, typename T, GuestMemoryFlags FLAGS> -class GuestMemory { - using iterator = T*; - using const_iterator = const T*; - using value_type = T; - using element_type = T; - using iterator_category = std::contiguous_iterator_tag; - -public: - GuestMemory() = delete; - explicit GuestMemory(M& memory, u64 addr, std::size_t size, - Common::ScratchBuffer<T>* backup = nullptr) - : m_memory{memory}, m_addr{addr}, m_size{size} { - static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); - if constexpr (FLAGS & GuestMemoryFlags::Read) { - Read(addr, size, backup); - } - } - - ~GuestMemory() = default; - - T* data() noexcept { - return m_data_span.data(); - } - - const T* data() const noexcept { - return m_data_span.data(); - } - - size_t size() const noexcept { - return m_size; - } - - size_t size_bytes() const noexcept { - return this->size() * sizeof(T); - } - - [[nodiscard]] T* begin() noexcept { - return this->data(); - } - - [[nodiscard]] const T* begin() const noexcept { - return this->data(); - } - - [[nodiscard]] T* end() noexcept { - return this->data() + this->size(); - } - - [[nodiscard]] const T* end() const noexcept { - return this->data() + this->size(); - } - - T& operator[](size_t index) noexcept { - return m_data_span[index]; - } - - const T& operator[](size_t index) const noexcept { - return m_data_span[index]; - } - - void SetAddressAndSize(u64 addr, std::size_t size) noexcept { - m_addr = addr; - m_size = size; - m_addr_changed = true; - } - - std::span<T> Read(u64 addr, std::size_t size, - Common::ScratchBuffer<T>* backup = nullptr) noexcept { - m_addr = addr; - m_size = size; - if (m_size == 0) { - m_is_data_copy = true; - return {}; - } - - if (this->TrySetSpan()) { - if constexpr (FLAGS & GuestMemoryFlags::Safe) { - m_memory.FlushRegion(m_addr, this->size_bytes()); - } - } else { - if (backup) { - backup->resize_destructive(this->size()); - m_data_span = *backup; - } else { - m_data_copy.resize(this->size()); - m_data_span = std::span(m_data_copy); - } - m_is_data_copy = true; - m_span_valid = true; - if constexpr (FLAGS & GuestMemoryFlags::Safe) { - m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); - } else { - m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); - } - } - return m_data_span; - } - - void Write(std::span<T> write_data) noexcept { - if constexpr (FLAGS & GuestMemoryFlags::Cached) { - m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); - } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { - m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); - } else { - m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); - } - } - - bool TrySetSpan() noexcept { - if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { - m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; - m_span_valid = true; - return true; - } - return false; - } - -protected: - bool IsDataCopy() const noexcept { - return m_is_data_copy; - } - - bool AddressChanged() const noexcept { - return m_addr_changed; - } - - M& m_memory; - u64 m_addr{}; - size_t m_size{}; - std::span<T> m_data_span{}; - std::vector<T> m_data_copy{}; - bool m_span_valid{false}; - bool m_is_data_copy{false}; - bool m_addr_changed{false}; -}; - -template <typename M, typename T, GuestMemoryFlags FLAGS> -class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { -public: - GuestMemoryScoped() = delete; - explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, - Common::ScratchBuffer<T>* backup = nullptr) - : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { - if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { - if (!this->TrySetSpan()) { - if (backup) { - this->m_data_span = *backup; - this->m_span_valid = true; - this->m_is_data_copy = true; - } - } - } - } - - ~GuestMemoryScoped() { - if constexpr (FLAGS & GuestMemoryFlags::Write) { - if (this->size() == 0) [[unlikely]] { - return; - } - - if (this->AddressChanged() || this->IsDataCopy()) { - ASSERT(this->m_span_valid); - if constexpr (FLAGS & GuestMemoryFlags::Cached) { - this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); - } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { - this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); - } else { - this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); - } - } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || - (FLAGS & GuestMemoryFlags::Cached)) { - this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); - } - } - } -}; -} // namespace - template <typename T, GuestMemoryFlags FLAGS> -using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>; +using CpuGuestMemory = GuestMemory<Core::Memory::Memory, T, FLAGS>; template <typename T, GuestMemoryFlags FLAGS> -using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>; -template <typename T, GuestMemoryFlags FLAGS> -using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>; -template <typename T, GuestMemoryFlags FLAGS> -using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; +using CpuGuestMemoryScoped = GuestMemoryScoped<Core::Memory::Memory, T, FLAGS>; + } // namespace Core::Memory |