diff options
Diffstat (limited to 'src')
49 files changed, 757 insertions, 321 deletions
diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp index c8fa912bf..e065e592f 100644 --- a/src/core/file_sys/control_metadata.cpp +++ b/src/core/file_sys/control_metadata.cpp @@ -8,13 +8,23 @@ namespace FileSys { -const std::array<const char*, 15> LANGUAGE_NAMES = { - "AmericanEnglish", "BritishEnglish", "Japanese", - "French", "German", "LatinAmericanSpanish", - "Spanish", "Italian", "Dutch", - "CanadianFrench", "Portugese", "Russian", - "Korean", "Taiwanese", "Chinese", -}; +const std::array<const char*, 15> LANGUAGE_NAMES{{ + "AmericanEnglish", + "BritishEnglish", + "Japanese", + "French", + "German", + "LatinAmericanSpanish", + "Spanish", + "Italian", + "Dutch", + "CanadianFrench", + "Portuguese", + "Russian", + "Korean", + "Taiwanese", + "Chinese", +}}; std::string LanguageEntry::GetApplicationName() const { return Common::StringFromFixedZeroTerminatedBuffer(application_name.data(), diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index a3f8f2f73..07c3af64a 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -381,22 +381,22 @@ std::vector<RegisteredCacheEntry> RegisteredCache::ListEntriesFilter( return out; } -static std::shared_ptr<NCA> GetNCAFromNSPForID(std::shared_ptr<NSP> nsp, const NcaID& id) { - const auto file = nsp->GetFile(fmt::format("{}.nca", Common::HexArrayToString(id, false))); +static std::shared_ptr<NCA> GetNCAFromNSPForID(const NSP& nsp, const NcaID& id) { + const auto file = nsp.GetFile(fmt::format("{}.nca", Common::HexArrayToString(id, false))); if (file == nullptr) return nullptr; return std::make_shared<NCA>(file); } -InstallResult RegisteredCache::InstallEntry(std::shared_ptr<XCI> xci, bool overwrite_if_exists, +InstallResult RegisteredCache::InstallEntry(const XCI& xci, bool overwrite_if_exists, const VfsCopyFunction& copy) { - return InstallEntry(xci->GetSecurePartitionNSP(), overwrite_if_exists, copy); + return InstallEntry(*xci.GetSecurePartitionNSP(), overwrite_if_exists, copy); } -InstallResult RegisteredCache::InstallEntry(std::shared_ptr<NSP> nsp, bool overwrite_if_exists, +InstallResult RegisteredCache::InstallEntry(const NSP& nsp, bool overwrite_if_exists, const VfsCopyFunction& copy) { - const auto& ncas = nsp->GetNCAsCollapsed(); - const auto& meta_iter = std::find_if(ncas.begin(), ncas.end(), [](std::shared_ptr<NCA> nca) { + const auto ncas = nsp.GetNCAsCollapsed(); + const auto meta_iter = std::find_if(ncas.begin(), ncas.end(), [](const auto& nca) { return nca->GetType() == NCAContentType::Meta; }); @@ -410,7 +410,7 @@ InstallResult RegisteredCache::InstallEntry(std::shared_ptr<NSP> nsp, bool overw const auto meta_id_raw = (*meta_iter)->GetName().substr(0, 32); const auto meta_id = Common::HexStringToArray<16>(meta_id_raw); - const auto res = RawInstallNCA(*meta_iter, copy, overwrite_if_exists, meta_id); + const auto res = RawInstallNCA(**meta_iter, copy, overwrite_if_exists, meta_id); if (res != InstallResult::Success) return res; @@ -422,7 +422,7 @@ InstallResult RegisteredCache::InstallEntry(std::shared_ptr<NSP> nsp, bool overw const auto nca = GetNCAFromNSPForID(nsp, record.nca_id); if (nca == nullptr) return InstallResult::ErrorCopyFailed; - const auto res2 = RawInstallNCA(nca, copy, overwrite_if_exists, record.nca_id); + const auto res2 = RawInstallNCA(*nca, copy, overwrite_if_exists, record.nca_id); if (res2 != InstallResult::Success) return res2; } @@ -431,21 +431,21 @@ InstallResult RegisteredCache::InstallEntry(std::shared_ptr<NSP> nsp, bool overw return InstallResult::Success; } -InstallResult RegisteredCache::InstallEntry(std::shared_ptr<NCA> nca, TitleType type, +InstallResult RegisteredCache::InstallEntry(const NCA& nca, TitleType type, bool overwrite_if_exists, const VfsCopyFunction& copy) { CNMTHeader header{ - nca->GetTitleId(), ///< Title ID - 0, ///< Ignore/Default title version - type, ///< Type - {}, ///< Padding - 0x10, ///< Default table offset - 1, ///< 1 Content Entry - 0, ///< No Meta Entries - {}, ///< Padding + nca.GetTitleId(), ///< Title ID + 0, ///< Ignore/Default title version + type, ///< Type + {}, ///< Padding + 0x10, ///< Default table offset + 1, ///< 1 Content Entry + 0, ///< No Meta Entries + {}, ///< Padding }; OptionalHeader opt_header{0, 0}; - ContentRecord c_rec{{}, {}, {}, GetCRTypeFromNCAType(nca->GetType()), {}}; - const auto& data = nca->GetBaseFile()->ReadBytes(0x100000); + ContentRecord c_rec{{}, {}, {}, GetCRTypeFromNCAType(nca.GetType()), {}}; + const auto& data = nca.GetBaseFile()->ReadBytes(0x100000); mbedtls_sha256(data.data(), data.size(), c_rec.hash.data(), 0); memcpy(&c_rec.nca_id, &c_rec.hash, 16); const CNMT new_cnmt(header, opt_header, {c_rec}, {}); @@ -454,10 +454,10 @@ InstallResult RegisteredCache::InstallEntry(std::shared_ptr<NCA> nca, TitleType return RawInstallNCA(nca, copy, overwrite_if_exists, c_rec.nca_id); } -InstallResult RegisteredCache::RawInstallNCA(std::shared_ptr<NCA> nca, const VfsCopyFunction& copy, +InstallResult RegisteredCache::RawInstallNCA(const NCA& nca, const VfsCopyFunction& copy, bool overwrite_if_exists, std::optional<NcaID> override_id) { - const auto in = nca->GetBaseFile(); + const auto in = nca.GetBaseFile(); Core::Crypto::SHA256Hash hash{}; // Calculate NcaID diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h index 6b89db8de..3b77af4e0 100644 --- a/src/core/file_sys/registered_cache.h +++ b/src/core/file_sys/registered_cache.h @@ -6,7 +6,6 @@ #include <array> #include <functional> -#include <map> #include <memory> #include <string> #include <vector> @@ -104,17 +103,16 @@ public: // Raw copies all the ncas from the xci/nsp to the csache. Does some quick checks to make sure // there is a meta NCA and all of them are accessible. - InstallResult InstallEntry(std::shared_ptr<XCI> xci, bool overwrite_if_exists = false, + InstallResult InstallEntry(const XCI& xci, bool overwrite_if_exists = false, const VfsCopyFunction& copy = &VfsRawCopy); - InstallResult InstallEntry(std::shared_ptr<NSP> nsp, bool overwrite_if_exists = false, + InstallResult InstallEntry(const NSP& nsp, bool overwrite_if_exists = false, const VfsCopyFunction& copy = &VfsRawCopy); // Due to the fact that we must use Meta-type NCAs to determine the existance of files, this // poses quite a challenge. Instead of creating a new meta NCA for this file, yuzu will create a // dir inside the NAND called 'yuzu_meta' and store the raw CNMT there. // TODO(DarkLordZach): Author real meta-type NCAs and install those. - InstallResult InstallEntry(std::shared_ptr<NCA> nca, TitleType type, - bool overwrite_if_exists = false, + InstallResult InstallEntry(const NCA& nca, TitleType type, bool overwrite_if_exists = false, const VfsCopyFunction& copy = &VfsRawCopy); private: @@ -128,7 +126,7 @@ private: std::optional<NcaID> GetNcaIDFromMetadata(u64 title_id, ContentRecordType type) const; VirtualFile GetFileAtID(NcaID id) const; VirtualFile OpenFileOrDirectoryConcat(const VirtualDir& dir, std::string_view path) const; - InstallResult RawInstallNCA(std::shared_ptr<NCA> nca, const VfsCopyFunction& copy, + InstallResult RawInstallNCA(const NCA& nca, const VfsCopyFunction& copy, bool overwrite_if_exists, std::optional<NcaID> override_id = {}); bool RawInstallYuzuMeta(const CNMT& cnmt); diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 687dea409..e6b5171ee 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -282,7 +282,7 @@ static void FpuWrite(std::size_t id, u128 val, Kernel::Thread* thread = nullptr) if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val; } else if (id == FPCR_REGISTER) { - thread_context.fpcr = val[0]; + thread_context.fpcr = static_cast<u32>(val[0]); } } diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h index bec065543..59dc11c22 100644 --- a/src/core/hle/kernel/resource_limit.h +++ b/src/core/hle/kernel/resource_limit.h @@ -14,7 +14,7 @@ namespace Kernel { class KernelCore; -enum class ResourceType { +enum class ResourceType : u32 { PhysicalMemory, Threads, Events, @@ -25,6 +25,10 @@ enum class ResourceType { ResourceTypeCount }; +constexpr bool IsValidResourceType(ResourceType type) { + return type < ResourceType::ResourceTypeCount; +} + class ResourceLimit final : public Object { public: /** diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index f287f7c97..1f19d5576 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -105,6 +105,38 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add return RESULT_SUCCESS; } + +enum class ResourceLimitValueType { + CurrentValue, + LimitValue, +}; + +ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_type, + ResourceLimitValueType value_type) { + const auto type = static_cast<ResourceType>(resource_type); + if (!IsValidResourceType(type)) { + LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); + return ERR_INVALID_ENUM_VALUE; + } + + const auto& kernel = Core::System::GetInstance().Kernel(); + const auto* const current_process = kernel.CurrentProcess(); + ASSERT(current_process != nullptr); + + const auto resource_limit_object = + current_process->GetHandleTable().Get<ResourceLimit>(resource_limit); + if (!resource_limit_object) { + LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", + resource_limit); + return ERR_INVALID_HANDLE; + } + + if (value_type == ResourceLimitValueType::CurrentValue) { + return MakeResult(resource_limit_object->GetCurrentResourceValue(type)); + } + + return MakeResult(resource_limit_object->GetMaxResourceValue(type)); +} } // Anonymous namespace /// Set the process heap to a given Size. It can both extend and shrink the heap. @@ -1346,6 +1378,87 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) { return RESULT_SUCCESS; } +static ResultCode CreateResourceLimit(Handle* out_handle) { + LOG_DEBUG(Kernel_SVC, "called"); + + auto& kernel = Core::System::GetInstance().Kernel(); + auto resource_limit = ResourceLimit::Create(kernel); + + auto* const current_process = kernel.CurrentProcess(); + ASSERT(current_process != nullptr); + + const auto handle = current_process->GetHandleTable().Create(std::move(resource_limit)); + if (handle.Failed()) { + return handle.Code(); + } + + *out_handle = *handle; + return RESULT_SUCCESS; +} + +static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_limit, + u32 resource_type) { + LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type); + + const auto limit_value = RetrieveResourceLimitValue(resource_limit, resource_type, + ResourceLimitValueType::LimitValue); + if (limit_value.Failed()) { + return limit_value.Code(); + } + + *out_value = static_cast<u64>(*limit_value); + return RESULT_SUCCESS; +} + +static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_limit, + u32 resource_type) { + LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type); + + const auto current_value = RetrieveResourceLimitValue(resource_limit, resource_type, + ResourceLimitValueType::CurrentValue); + if (current_value.Failed()) { + return current_value.Code(); + } + + *out_value = static_cast<u64>(*current_value); + return RESULT_SUCCESS; +} + +static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource_type, u64 value) { + LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit, + resource_type, value); + + const auto type = static_cast<ResourceType>(resource_type); + if (!IsValidResourceType(type)) { + LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); + return ERR_INVALID_ENUM_VALUE; + } + + auto& kernel = Core::System::GetInstance().Kernel(); + auto* const current_process = kernel.CurrentProcess(); + ASSERT(current_process != nullptr); + + auto resource_limit_object = + current_process->GetHandleTable().Get<ResourceLimit>(resource_limit); + if (!resource_limit_object) { + LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", + resource_limit); + return ERR_INVALID_HANDLE; + } + + const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value)); + if (set_result.IsError()) { + LOG_ERROR( + Kernel_SVC, + "Attempted to lower resource limit ({}) for category '{}' below its current value ({})", + resource_limit_object->GetMaxResourceValue(type), resource_type, + resource_limit_object->GetCurrentResourceValue(type)); + return set_result; + } + + return RESULT_SUCCESS; +} + namespace { struct FunctionDef { using Func = void(); @@ -1405,8 +1518,8 @@ static const FunctionDef SVC_Table[] = { {0x2D, nullptr, "UnmapPhysicalMemory"}, {0x2E, nullptr, "GetFutureThreadInfo"}, {0x2F, nullptr, "GetLastThreadInfo"}, - {0x30, nullptr, "GetResourceLimitLimitValue"}, - {0x31, nullptr, "GetResourceLimitCurrentValue"}, + {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, + {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, @@ -1482,8 +1595,8 @@ static const FunctionDef SVC_Table[] = { {0x7A, nullptr, "StartProcess"}, {0x7B, nullptr, "TerminateProcess"}, {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, - {0x7D, nullptr, "CreateResourceLimit"}, - {0x7E, nullptr, "SetResourceLimitLimitValue"}, + {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"}, + {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, {0x7F, nullptr, "CallSecureMonitor"}, }; diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 233a99fb0..fa1116624 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -43,6 +43,14 @@ void SvcWrap() { FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw); } +template <ResultCode func(u32*)> +void SvcWrap() { + u32 param = 0; + const u32 retval = func(¶m).raw; + Core::CurrentArmInterface().SetReg(1, param); + FuncReturn(retval); +} + template <ResultCode func(u32*, u32)> void SvcWrap() { u32 param_1 = 0; diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index c629f9357..9521431bb 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -21,17 +21,6 @@ namespace Service::Account { -// TODO: RE this structure -struct UserData { - INSERT_PADDING_WORDS(1); - u32 icon_id; - u8 bg_color_id; - INSERT_PADDING_BYTES(0x7); - INSERT_PADDING_BYTES(0x10); - INSERT_PADDING_BYTES(0x60); -}; -static_assert(sizeof(UserData) == 0x80, "UserData structure has incorrect size"); - // Smallest JPEG https://github.com/mathiasbynens/small/blob/master/jpeg.jpg // used as a backup should the one on disk not exist constexpr u32 backup_jpeg_size = 107; @@ -72,9 +61,11 @@ private: void Get(Kernel::HLERequestContext& ctx) { LOG_INFO(Service_ACC, "called user_id={}", user_id.Format()); ProfileBase profile_base{}; - std::array<u8, MAX_DATA> data{}; + ProfileData data{}; if (profile_manager.GetProfileBaseAndData(user_id, profile_base, data)) { - ctx.WriteBuffer(data); + std::array<u8, sizeof(ProfileData)> raw_data; + std::memcpy(raw_data.data(), &data, sizeof(ProfileData)); + ctx.WriteBuffer(raw_data); IPC::ResponseBuilder rb{ctx, 16}; rb.Push(RESULT_SUCCESS); rb.PushRaw(profile_base); diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp index 968263846..1316d0b07 100644 --- a/src/core/hle/service/acc/profile_manager.cpp +++ b/src/core/hle/service/acc/profile_manager.cpp @@ -18,7 +18,7 @@ struct UserRaw { UUID uuid2; u64 timestamp; ProfileUsername username; - INSERT_PADDING_BYTES(0x80); + ProfileData extra_data; }; static_assert(sizeof(UserRaw) == 0xC8, "UserRaw has incorrect size."); @@ -346,7 +346,7 @@ void ProfileManager::ParseUserSaveFile() { continue; } - AddUser({user.uuid, user.username, user.timestamp, {}, false}); + AddUser({user.uuid, user.username, user.timestamp, user.extra_data, false}); } std::stable_partition(profiles.begin(), profiles.end(), @@ -361,6 +361,7 @@ void ProfileManager::WriteUserSaveFile() { raw.users[i].uuid2 = profiles[i].user_uuid; raw.users[i].uuid = profiles[i].user_uuid; raw.users[i].timestamp = profiles[i].creation_time; + raw.users[i].extra_data = profiles[i].data; } const auto raw_path = diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h index d2d8e6c6b..c4ce2e0b3 100644 --- a/src/core/hle/service/acc/profile_manager.h +++ b/src/core/hle/service/acc/profile_manager.h @@ -13,7 +13,6 @@ namespace Service::Account { constexpr std::size_t MAX_USERS = 8; -constexpr std::size_t MAX_DATA = 128; constexpr u128 INVALID_UUID{{0, 0}}; struct UUID { @@ -50,9 +49,20 @@ static_assert(sizeof(UUID) == 16, "UUID is an invalid size!"); constexpr std::size_t profile_username_size = 32; using ProfileUsername = std::array<u8, profile_username_size>; -using ProfileData = std::array<u8, MAX_DATA>; using UserIDArray = std::array<UUID, MAX_USERS>; +/// Contains extra data related to a user. +/// TODO: RE this structure +struct ProfileData { + INSERT_PADDING_WORDS(1); + u32 icon_id; + u8 bg_color_id; + INSERT_PADDING_BYTES(0x7); + INSERT_PADDING_BYTES(0x10); + INSERT_PADDING_BYTES(0x60); +}; +static_assert(sizeof(ProfileData) == 0x80, "ProfileData structure has incorrect size"); + /// This holds general information about a users profile. This is where we store all the information /// based on a specific user struct ProfileInfo { diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 46604887c..a089bf159 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -575,8 +575,8 @@ void Controller_NPad::AddNewControllerAt(NPadControllerType controller, u32 npad return; } - connected_controllers[npad_id] = {controller, true}; - InitNewlyAddedControler(npad_id); + connected_controllers[NPadIdToIndex(npad_id)] = {controller, true}; + InitNewlyAddedControler(NPadIdToIndex(npad_id)); } void Controller_NPad::ConnectNPad(u32 npad_id) { diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index ea8057b80..abff6544d 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -80,9 +80,9 @@ public: struct LedPattern { explicit LedPattern(u64 light1, u64 light2, u64 light3, u64 light4) { position1.Assign(light1); - position1.Assign(light2); - position1.Assign(light3); - position1.Assign(light4); + position2.Assign(light2); + position3.Assign(light3); + position4.Assign(light4); } union { u64 raw{}; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 874d5e1c3..2e2b0ae1c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" #include "core/memory.h" -#include "video_core/command_processor.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -129,6 +128,12 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< return 0; } +static void PushGPUEntries(Tegra::CommandList&& entries) { + auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()}; + dma_pusher.Push(std::move(entries)); + dma_pusher.DispatchCalls(); +} + u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); @@ -142,11 +147,11 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp params.num_entries * sizeof(Tegra::CommandListHeader), "Incorrect input size"); - std::vector<Tegra::CommandListHeader> entries(params.num_entries); + Tegra::CommandList entries(params.num_entries); std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], params.num_entries * sizeof(Tegra::CommandListHeader)); - Core::System::GetInstance().GPU().ProcessCommandLists(entries); + PushGPUEntries(std::move(entries)); params.fence_out.id = 0; params.fence_out.value = 0; @@ -163,11 +168,11 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, params.num_entries, params.flags); - std::vector<Tegra::CommandListHeader> entries(params.num_entries); + Tegra::CommandList entries(params.num_entries); Memory::ReadBlock(params.address, entries.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); - Core::System::GetInstance().GPU().ProcessCommandLists(entries); + PushGPUEntries(std::move(entries)); params.fence_out.id = 0; params.fence_out.value = 0; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3f906a517..0406fbcd9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,6 @@ add_library(video_core STATIC - command_processor.cpp - command_processor.h + dma_pusher.cpp + dma_pusher.h debug_utils/debug_utils.cpp debug_utils/debug_utils.h engines/fermi_2d.cpp diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h deleted file mode 100644 index bd766e77a..000000000 --- a/src/video_core/command_processor.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <type_traits> -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/memory_manager.h" - -namespace Tegra { - -enum class SubmissionMode : u32 { - IncreasingOld = 0, - Increasing = 1, - NonIncreasingOld = 2, - NonIncreasing = 3, - Inline = 4, - IncreaseOnce = 5 -}; - -struct CommandListHeader { - u32 entry0; // gpu_va_lo - union { - u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F) - BitField<0, 8, u32> gpu_va_hi; - BitField<8, 2, u32> unk1; - BitField<10, 21, u32> sz; - BitField<31, 1, u32> unk2; - }; - - GPUVAddr Address() const { - return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0; - } -}; -static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size"); - -union CommandHeader { - u32 hex; - - BitField<0, 13, u32> method; - BitField<13, 3, u32> subchannel; - - BitField<16, 13, u32> arg_count; - BitField<16, 13, u32> inline_data; - - BitField<29, 3, SubmissionMode> mode; -}; -static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); -static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); - -} // namespace Tegra diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp new file mode 100644 index 000000000..63a958f11 --- /dev/null +++ b/src/video_core/dma_pusher.cpp @@ -0,0 +1,123 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/microprofile.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/dma_pusher.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" + +namespace Tegra { + +DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} + +DmaPusher::~DmaPusher() = default; + +MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, 128, 192)); + +void DmaPusher::DispatchCalls() { + MICROPROFILE_SCOPE(DispatchCalls); + + // On entering GPU code, assume all memory may be touched by the ARM core. + gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); + + dma_pushbuffer_subindex = 0; + + while (Core::System::GetInstance().IsPoweredOn()) { + if (!Step()) { + break; + } + } +} + +bool DmaPusher::Step() { + if (dma_get != dma_put) { + // Push buffer non-empty, read a word + const CommandHeader command_header{ + Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))}; + + dma_get += sizeof(u32); + + if (!non_main) { + dma_mget = dma_get; + } + + // now, see if we're in the middle of a command + if (dma_state.length_pending) { + // Second word of long non-inc methods command - method count + dma_state.length_pending = 0; + dma_state.method_count = command_header.method_count_; + } else if (dma_state.method_count) { + // Data word of methods command + CallMethod(command_header.argument); + + if (!dma_state.non_incrementing) { + dma_state.method++; + } + + if (dma_increment_once) { + dma_state.non_incrementing = true; + } + + dma_state.method_count--; + } else { + // No command active - this is the first word of a new one + switch (command_header.mode) { + case SubmissionMode::Increasing: + SetState(command_header); + dma_state.non_incrementing = false; + dma_increment_once = false; + break; + case SubmissionMode::NonIncreasing: + SetState(command_header); + dma_state.non_incrementing = true; + dma_increment_once = false; + break; + case SubmissionMode::Inline: + dma_state.method = command_header.method; + dma_state.subchannel = command_header.subchannel; + CallMethod(command_header.arg_count); + dma_state.non_incrementing = true; + dma_increment_once = false; + break; + case SubmissionMode::IncreaseOnce: + SetState(command_header); + dma_state.non_incrementing = false; + dma_increment_once = true; + break; + } + } + } else if (ib_enable && !dma_pushbuffer.empty()) { + // Current pushbuffer empty, but we have more IB entries to read + const CommandList& command_list{dma_pushbuffer.front()}; + const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; + dma_get = command_list_header.addr; + dma_put = dma_get + command_list_header.size * sizeof(u32); + non_main = command_list_header.is_non_main; + + if (dma_pushbuffer_subindex >= command_list.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } + } else { + // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do + return {}; + } + + return true; +} + +void DmaPusher::SetState(const CommandHeader& command_header) { + dma_state.method = command_header.method; + dma_state.subchannel = command_header.subchannel; + dma_state.method_count = command_header.method_count; +} + +void DmaPusher::CallMethod(u32 argument) const { + gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); +} + +} // namespace Tegra diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h new file mode 100644 index 000000000..16e0697c4 --- /dev/null +++ b/src/video_core/dma_pusher.h @@ -0,0 +1,99 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include <queue> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/memory_manager.h" + +namespace Tegra { + +enum class SubmissionMode : u32 { + IncreasingOld = 0, + Increasing = 1, + NonIncreasingOld = 2, + NonIncreasing = 3, + Inline = 4, + IncreaseOnce = 5 +}; + +struct CommandListHeader { + union { + u64 raw; + BitField<0, 40, GPUVAddr> addr; + BitField<41, 1, u64> is_non_main; + BitField<42, 21, u64> size; + }; +}; +static_assert(sizeof(CommandListHeader) == sizeof(u64), "CommandListHeader is incorrect size"); + +union CommandHeader { + u32 argument; + BitField<0, 13, u32> method; + BitField<0, 24, u32> method_count_; + BitField<13, 3, u32> subchannel; + BitField<16, 13, u32> arg_count; + BitField<16, 13, u32> method_count; + BitField<29, 3, SubmissionMode> mode; +}; +static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); +static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); + +class GPU; + +using CommandList = std::vector<Tegra::CommandListHeader>; + +/** + * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the + * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled + * into a "command stream" consisting of 32-bit words that make up "commands". + * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for + * details on this implementation. + */ +class DmaPusher { +public: + explicit DmaPusher(GPU& gpu); + ~DmaPusher(); + + void Push(CommandList&& entries) { + dma_pushbuffer.push(std::move(entries)); + } + + void DispatchCalls(); + +private: + bool Step(); + + void SetState(const CommandHeader& command_header); + + void CallMethod(u32 argument) const; + + GPU& gpu; + + std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed + std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer + + struct DmaState { + u32 method; ///< Current method + u32 subchannel; ///< Current subchannel + u32 method_count; ///< Current method count + u32 length_pending; ///< Large NI command length pending + bool non_incrementing; ///< Current command’s NI flag + }; + + DmaState dma_state{}; + bool dma_increment_once{}; + + GPUVAddr dma_put{}; ///< pushbuffer current end address + GPUVAddr dma_get{}; ///< pushbuffer current read address + GPUVAddr dma_mget{}; ///< main pushbuffer last read address + bool ib_enable{true}; ///< IB mode enabled + bool non_main{}; ///< non-main pushbuffer active +}; + +} // namespace Tegra diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 8d0700d13..80f70e332 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -14,13 +14,13 @@ namespace Tegra::Engines { Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) : memory_manager(memory_manager), rasterizer{rasterizer} {} -void Fermi2D::WriteReg(u32 method, u32 value) { - ASSERT_MSG(method < Regs::NUM_REGS, +void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { + ASSERT_MSG(method_call.method < Regs::NUM_REGS, "Invalid Fermi2D register, increase the size of the Regs structure"); - regs.reg_array[method] = value; + regs.reg_array[method_call.method] = method_call.argument; - switch (method) { + switch (method_call.method) { case FERMI2D_REG_INDEX(trigger): { HandleSurfaceCopy(); break; @@ -73,13 +73,13 @@ void Fermi2D::HandleSurfaceCopy() { Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer, dst_buffer, true, regs.src.BlockHeight(), - regs.src.BlockDepth()); + regs.src.BlockDepth(), 0); } else { // If the input is linear and the output is tiled, swizzle the input and copy it over. Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer, src_buffer, false, regs.dst.BlockHeight(), - regs.dst.BlockDepth()); + regs.dst.BlockDepth(), 0); } } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 2a6e8bbbb..50009bf75 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -27,7 +27,7 @@ public: ~Fermi2D() = default; /// Write the value to the register identified by method. - void WriteReg(u32 method, u32 value); + void CallMethod(const GPU::MethodCall& method_call); struct Regs { static constexpr std::size_t NUM_REGS = 0x258; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 2adbc9eaf..4880191fc 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -17,19 +17,19 @@ KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer, KeplerMemory::~KeplerMemory() = default; -void KeplerMemory::WriteReg(u32 method, u32 value) { - ASSERT_MSG(method < Regs::NUM_REGS, +void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { + ASSERT_MSG(method_call.method < Regs::NUM_REGS, "Invalid KeplerMemory register, increase the size of the Regs structure"); - regs.reg_array[method] = value; + regs.reg_array[method_call.method] = method_call.argument; - switch (method) { + switch (method_call.method) { case KEPLERMEMORY_REG_INDEX(exec): { state.write_offset = 0; break; } case KEPLERMEMORY_REG_INDEX(data): { - ProcessData(value); + ProcessData(method_call.argument); break; } } diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index bf4a13cff..fe9ebc5b9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -9,6 +9,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" namespace VideoCore { @@ -26,7 +27,7 @@ public: ~KeplerMemory(); /// Write the value to the register identified by method. - void WriteReg(u32 method, u32 value); + void CallMethod(const GPU::MethodCall& method_call); struct Regs { static constexpr size_t NUM_REGS = 0x7F; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index f0a5470b9..b19b3a75a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -97,71 +97,74 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { macro_interpreter.Execute(search->second, std::move(parameters)); } -void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { +void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); // It is an error to write to a register other than the current macro's ARG register before it // has finished execution. if (executing_macro != 0) { - ASSERT(method == executing_macro + 1); + ASSERT(method_call.method == executing_macro + 1); } // Methods after 0xE00 are special, they're actually triggers for some microcode that was // uploaded to the GPU during initialization. - if (method >= MacroRegistersStart) { + if (method_call.method >= MacroRegistersStart) { // We're trying to execute a macro if (executing_macro == 0) { // A macro call must begin by writing the macro method's register, not its argument. - ASSERT_MSG((method % 2) == 0, + ASSERT_MSG((method_call.method % 2) == 0, "Can't start macro execution by writing to the ARGS register"); - executing_macro = method; + executing_macro = method_call.method; } - macro_params.push_back(value); + macro_params.push_back(method_call.argument); // Call the macro when there are no more parameters in the command buffer - if (remaining_params == 0) { + if (method_call.IsLastCall()) { CallMacroMethod(executing_macro, std::move(macro_params)); } return; } - ASSERT_MSG(method < Regs::NUM_REGS, + ASSERT_MSG(method_call.method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } - if (regs.reg_array[method] != value) { - regs.reg_array[method] = value; + if (regs.reg_array[method_call.method] != method_call.argument) { + regs.reg_array[method_call.method] = method_call.argument; // Vertex format - if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && - method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { + if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && + method_call.method < + MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { dirty_flags.vertex_attrib_format = true; } // Vertex buffer - if (method >= MAXWELL3D_REG_INDEX(vertex_array) && - method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { - dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); - } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && - method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { + if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && + method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { dirty_flags.vertex_array |= - 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); - } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && - method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { - dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays)); + 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); + } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && + method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { + dirty_flags.vertex_array |= + 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); + } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && + method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { + dirty_flags.vertex_array |= + 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays)); } } - switch (method) { + switch (method_call.method) { case MAXWELL3D_REG_INDEX(macros.data): { - ProcessMacroUpload(value); + ProcessMacroUpload(method_call.argument); break; } case MAXWELL3D_REG_INDEX(macros.bind): { - ProcessMacroBind(value); + ProcessMacroBind(method_call.argument); break; } case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): @@ -180,7 +183,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { - ProcessCBData(value); + ProcessCBData(method_call.argument); break; } case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 5ee383764..d3b3ed1f0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -915,8 +915,15 @@ public: u32 viewport_transform_enabled; - INSERT_PADDING_WORDS(0x25); + INSERT_PADDING_WORDS(0x3); + + union { + BitField<0, 1, u32> depth_range_0_1; + BitField<3, 1, u32> depth_clamp_near; + BitField<4, 1, u32> depth_clamp_far; + } view_volume_clip_control; + INSERT_PADDING_WORDS(0x21); struct { u32 enable; LogicOperation operation; @@ -1093,7 +1100,7 @@ public: u32 GetRegisterValue(u32 method) const; /// Write the value to the register identified by method. - void WriteReg(u32 method, u32 value, u32 remaining_params); + void CallMethod(const GPU::MethodCall& method_call); /// Returns a list of enabled textures for the specified shader stage. std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; @@ -1238,6 +1245,7 @@ ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(cull, 0x646); ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); +ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); ASSERT_REG_POSITION(logic_op, 0x671); ASSERT_REG_POSITION(clear_buffers, 0x674); ASSERT_REG_POSITION(color_mask, 0x680); diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp index 8b5f08351..656db6a61 100644 --- a/src/video_core/engines/maxwell_compute.cpp +++ b/src/video_core/engines/maxwell_compute.cpp @@ -8,13 +8,13 @@ namespace Tegra::Engines { -void MaxwellCompute::WriteReg(u32 method, u32 value) { - ASSERT_MSG(method < Regs::NUM_REGS, +void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) { + ASSERT_MSG(method_call.method < Regs::NUM_REGS, "Invalid MaxwellCompute register, increase the size of the Regs structure"); - regs.reg_array[method] = value; + regs.reg_array[method_call.method] = method_call.argument; - switch (method) { + switch (method_call.method) { case MAXWELL_COMPUTE_REG_INDEX(compute): { LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented"); UNREACHABLE(); diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/maxwell_compute.h index 6ea934fb9..1d71f11bd 100644 --- a/src/video_core/engines/maxwell_compute.h +++ b/src/video_core/engines/maxwell_compute.h @@ -9,6 +9,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/gpu.h" namespace Tegra::Engines { @@ -42,7 +43,7 @@ public: "MaxwellCompute Regs has wrong size"); /// Write the value to the register identified by method. - void WriteReg(u32 method, u32 value); + void CallMethod(const GPU::MethodCall& method_call); }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a34e884fe..06462f570 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -14,16 +14,16 @@ namespace Tegra::Engines { MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) : memory_manager(memory_manager), rasterizer{rasterizer} {} -void MaxwellDMA::WriteReg(u32 method, u32 value) { - ASSERT_MSG(method < Regs::NUM_REGS, +void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { + ASSERT_MSG(method_call.method < Regs::NUM_REGS, "Invalid MaxwellDMA register, increase the size of the Regs structure"); - regs.reg_array[method] = value; + regs.reg_array[method_call.method] = method_call.argument; #define MAXWELLDMA_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) - switch (method) { + switch (method_call.method) { case MAXWELLDMA_REG_INDEX(exec): { HandleCopy(); break; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 5f3704f05..1f8cd65d2 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -24,7 +24,7 @@ public: ~MaxwellDMA() = default; /// Write the value to the register identified by method. - void WriteReg(u32 method, u32 value); + void CallMethod(const GPU::MethodCall& method_call); struct Regs { static constexpr std::size_t NUM_REGS = 0x1D6; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 51b3904f6..6c81dee64 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -26,6 +26,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { memory_manager = std::make_unique<Tegra::MemoryManager>(); + dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); @@ -51,6 +52,14 @@ const MemoryManager& GPU::MemoryManager() const { return *memory_manager; } +DmaPusher& GPU::DmaPusher() { + return *dma_pusher; +} + +const DmaPusher& GPU::DmaPusher() const { + return *dma_pusher; +} + u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); @@ -113,4 +122,48 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) { } } +enum class BufferMethods { + BindObject = 0, + CountBufferMethods = 0x40, +}; + +void GPU::CallMethod(const MethodCall& method_call) { + LOG_TRACE(HW_GPU, + "Processing method {:08X} on subchannel {} value " + "{:08X} remaining params {}", + MethCall.method, MethCall.subchannel, value, remaining_params); + + ASSERT(method_call.subchannel < bound_engines.size()); + + if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) { + // Bind the current subchannel to the desired engine id. + LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, + method_call.argument); + bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); + return; + } + + const EngineID engine = bound_engines[method_call.subchannel]; + + switch (engine) { + case EngineID::FERMI_TWOD_A: + fermi_2d->CallMethod(method_call); + break; + case EngineID::MAXWELL_B: + maxwell_3d->CallMethod(method_call); + break; + case EngineID::MAXWELL_COMPUTE_B: + maxwell_compute->CallMethod(method_call); + break; + case EngineID::MAXWELL_DMA_COPY_A: + maxwell_dma->CallMethod(method_call); + break; + case EngineID::KEPLER_INLINE_TO_MEMORY_B: + kepler_memory->CallMethod(method_call); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented engine"); + } +} + } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 5cc1e19ca..af5ccd1e9 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -9,6 +9,7 @@ #include <vector> #include "common/common_types.h" #include "core/hle/service/nvflinger/buffer_queue.h" +#include "video_core/dma_pusher.h" #include "video_core/memory_manager.h" namespace VideoCore { @@ -119,8 +120,23 @@ public: explicit GPU(VideoCore::RasterizerInterface& rasterizer); ~GPU(); - /// Processes a command list stored at the specified address in GPU memory. - void ProcessCommandLists(const std::vector<CommandListHeader>& commands); + struct MethodCall { + u32 method{}; + u32 argument{}; + u32 subchannel{}; + u32 method_count{}; + + bool IsLastCall() const { + return method_count <= 1; + } + + MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0) + : method(method), argument(argument), subchannel(subchannel), + method_count(method_count) {} + }; + + /// Calls a GPU method. + void CallMethod(const MethodCall& method_call); /// Returns a reference to the Maxwell3D GPU engine. Engines::Maxwell3D& Maxwell3D(); @@ -134,7 +150,14 @@ public: /// Returns a const reference to the GPU memory manager. const Tegra::MemoryManager& MemoryManager() const; + /// Returns a reference to the GPU DMA pusher. + Tegra::DmaPusher& DmaPusher(); + + /// Returns a const reference to the GPU DMA pusher. + const Tegra::DmaPusher& DmaPusher() const; + private: + std::unique_ptr<Tegra::DmaPusher> dma_pusher; std::unique_ptr<Tegra::MemoryManager> memory_manager; /// Mapping of command subchannels to their bound engine ids. diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 2b0dea5cd..9c55e9f1e 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -250,7 +250,7 @@ void MacroInterpreter::SetMethodAddress(u32 address) { } void MacroInterpreter::Send(u32 value) { - maxwell3d.WriteReg(method_address.address, value, 0); + maxwell3d.CallMethod({method_address.address, value}); // Increment the method address by the method increment. method_address.address.Assign(method_address.address.Value() + method_address.increment.Value()); diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index f14abba7d..a310491a8 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -16,12 +16,12 @@ namespace VideoCore { using Surface::GetBytesPerPixel; using Surface::PixelFormat; -using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); +using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; template <bool morton_to_linear, PixelFormat format> static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, - u8* buffer, std::size_t buffer_size, VAddr addr) { + u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) { constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual @@ -31,12 +31,13 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth if constexpr (morton_to_linear) { Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, - stride, height, depth, block_height, block_depth); + stride, height, depth, block_height, block_depth, + tile_width_spacing); } else { - Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, - (height + tile_size_y - 1) / tile_size_y, depth, - bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), - buffer, false, block_height, block_depth); + Tegra::Texture::CopySwizzledData( + (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, + depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false, + block_height, block_depth, tile_width_spacing); } } @@ -183,13 +184,14 @@ static constexpr ConversionArray linear_to_morton_fns = { // clang-format on }; -constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { +static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { switch (mode) { case MortonSwizzleMode::MortonToLinear: return morton_to_linear_fns[static_cast<std::size_t>(format)]; case MortonSwizzleMode::LinearToMorton: return linear_to_morton_fns[static_cast<std::size_t>(format)]; } + UNREACHABLE(); } /// 8x8 Z-Order coordinate from 2D coordinates @@ -325,11 +327,11 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { } void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, - u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer, - std::size_t buffer_size, VAddr addr) { + u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, + u8* buffer, std::size_t buffer_size, VAddr addr) { - GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer, - buffer_size, addr); + GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, + tile_width_spacing, buffer, buffer_size, addr); } void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, @@ -350,4 +352,4 @@ void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_ } } -} // namespace VideoCore
\ No newline at end of file +} // namespace VideoCore diff --git a/src/video_core/morton.h b/src/video_core/morton.h index b9b9eca86..065f59ce3 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h @@ -12,10 +12,10 @@ namespace VideoCore { enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, - u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer, - std::size_t buffer_size, VAddr addr); + u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, + u8* buffer, std::size_t buffer_size, VAddr addr); void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data, bool morton_to_linear); -} // namespace VideoCore
\ No newline at end of file +} // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4f608f69e..a44bbfae8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -113,10 +113,24 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); + CheckExtensions(); } RasterizerOpenGL::~RasterizerOpenGL() {} +void RasterizerOpenGL::CheckExtensions() { + if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { + LOG_WARNING( + Render_OpenGL, + "Anisotropic filter is not supported! This can cause graphical issues in some games."); + } + if (!GLAD_GL_ARB_buffer_storage) { + LOG_WARNING( + Render_OpenGL, + "Buffer storage control is not supported! This can cause performance degradation."); + } +} + void RasterizerOpenGL::SetupVertexFormat() { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; @@ -669,7 +683,7 @@ void RasterizerOpenGL::DrawArrays() { bool invalidate = buffer_cache.Map(buffer_size); if (invalidate) { // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array = 0xFFFFFFFF; } SetupVertexFormat(); @@ -1001,6 +1015,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { viewport.depth_range_far = regs.viewports[i].depth_range_far; viewport.depth_range_near = regs.viewports[i].depth_range_near; } + state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; + state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; } void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index dfb4616f2..7ec9746b1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -189,6 +189,10 @@ private: /// Check asserts for alpha testing. void CheckAlphaTests(); + /// Check for extension that are not strictly required + /// but are needed for correct emulation + void CheckExtensions(); + bool has_ARB_direct_state_access = false; bool has_ARB_multi_bind = false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index d458f77e4..dde2f468d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -97,6 +97,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, + params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), params.srgb_conversion); @@ -162,6 +163,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.block_width = 1 << config.memory_layout.block_width; params.block_height = 1 << config.memory_layout.block_height; params.block_depth = 1 << config.memory_layout.block_depth; + params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; @@ -197,6 +199,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.block_width = 1 << std::min(block_width, 5U); params.block_height = 1 << std::min(block_height, 5U); params.block_depth = 1 << std::min(block_depth, 5U); + params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromDepthFormat(format); params.component_type = ComponentTypeFromDepthFormat(format); params.type = GetFormatType(params.pixel_format); @@ -223,6 +226,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, + params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; @@ -387,8 +391,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, for (u32 i = 0; i < params.depth; i++) { MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level), - params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size, - params.addr + offset); + params.MipBlockDepth(mip_level), params.tile_width_spacing, 1, + gl_buffer.data() + offset_gl, gl_size, params.addr + offset); offset += layer_size; offset_gl += gl_size; } @@ -396,8 +400,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, const u64 offset = params.GetMipmapLevelOffset(mip_level); MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level), - params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(), - params.addr + offset); + params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, + gl_buffer.data(), gl_buffer.size(), params.addr + offset); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9ac79c5a4..c710aa245 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -196,9 +196,15 @@ struct SurfaceParams { /// Checks if surfaces are compatible for caching bool IsCompatibleSurface(const SurfaceParams& other) const { - return std::tie(pixel_format, type, width, height, target, depth) == - std::tie(other.pixel_format, other.type, other.width, other.height, other.target, - other.depth); + if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) == + std::tie(other.pixel_format, other.type, other.width, other.height, other.target, + other.depth, other.is_tiled)) { + if (!is_tiled) + return true; + return std::tie(block_height, block_depth, tile_width_spacing) == + std::tie(other.block_height, other.block_depth, other.tile_width_spacing); + } + return false; } /// Initializes parameters for caching, should be called after everything has been initialized @@ -208,6 +214,7 @@ struct SurfaceParams { u32 block_width; u32 block_height; u32 block_depth; + u32 tile_width_spacing; PixelFormat pixel_format; ComponentType component_type; SurfaceType type; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7c0935a4e..0c4524d5c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -520,7 +520,7 @@ public: switch (attribute) { case Attribute::Index::ClipDistances0123: case Attribute::Index::ClipDistances4567: { - const u64 index = attribute == Attribute::Index::ClipDistances4567 ? 4 : 0 + elem; + const u64 index = (attribute == Attribute::Index::ClipDistances4567 ? 4 : 0) + elem; UNIMPLEMENTED_IF_MSG( ((header.vtg.clip_distances >> index) & 1) == 0, "Shader is setting gl_ClipDistance{} without enabling it in the header", index); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index b3bfad6a0..dc0a5ed5e 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -92,7 +92,8 @@ OpenGLState::OpenGLState() { point.size = 1; fragment_color_clamp.enabled = false; - + depth_clamp.far_plane = false; + depth_clamp.near_plane = false; polygon_offset.fill_enable = false; polygon_offset.line_enable = false; polygon_offset.point_enable = false; @@ -147,7 +148,7 @@ void OpenGLState::ApplyCulling() const { } void OpenGLState::ApplyColorMask() const { - if (GLAD_GL_ARB_viewport_array && independant_blend.enabled) { + if (independant_blend.enabled) { for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { const auto& updated = color_mask[i]; const auto& current = cur_state.color_mask[i]; @@ -264,7 +265,7 @@ void OpenGLState::EmulateViewportWithScissor() { } void OpenGLState::ApplyViewport() const { - if (GLAD_GL_ARB_viewport_array && geometry_shaders.enabled) { + if (geometry_shaders.enabled) { for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports); i++) { const auto& current = cur_state.viewports[i]; @@ -525,6 +526,21 @@ void OpenGLState::ApplyVertexBufferState() const { } } +void OpenGLState::ApplyDepthClamp() const { + if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && + depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { + return; + } + if (depth_clamp.far_plane != depth_clamp.near_plane) { + UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!"); + } + if (depth_clamp.far_plane || depth_clamp.near_plane) { + glEnable(GL_DEPTH_CLAMP); + } else { + glDisable(GL_DEPTH_CLAMP); + } +} + void OpenGLState::Apply() const { ApplyFramebufferState(); ApplyVertexBufferState(); @@ -556,11 +572,9 @@ void OpenGLState::Apply() const { if (point.size != cur_state.point.size) { glPointSize(point.size); } - if (GLAD_GL_ARB_color_buffer_float) { - if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) { - glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, - fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); - } + if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) { + glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, + fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); } if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) { if (multisample_control.alpha_to_coverage) { @@ -576,7 +590,7 @@ void OpenGLState::Apply() const { glDisable(GL_SAMPLE_ALPHA_TO_ONE); } } - + ApplyDepthClamp(); ApplyColorMask(); ApplyViewport(); ApplyStencilTest(); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index ad29aade6..439bfbc98 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -49,6 +49,11 @@ public: } fragment_color_clamp; struct { + bool far_plane; + bool near_plane; + } depth_clamp; // GL_DEPTH_CLAMP + + struct { bool enabled; // viewports arrays are only supported when geometry shaders are enabled. } geometry_shaders; @@ -235,6 +240,7 @@ private: void ApplyLogicOp() const; void ApplyTextures() const; void ApplySamplers() const; + void ApplyDepthClamp() const; void ApplyPolygonOffset() const; }; diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 7eabd34f1..bbae9285f 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -127,7 +127,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const template <bool fast> void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, - const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) { + const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, + const u32 width_spacing) { auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; const u32 stride_x = width * out_bytes_per_pixel; const u32 layer_z = height * stride_x; @@ -137,7 +138,8 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool const u32 block_x_elements = gob_elements_x; const u32 block_y_elements = gob_elements_y * block_height; const u32 block_z_elements = gob_elements_z * block_depth; - const u32 blocks_on_x = div_ceil(width, block_x_elements); + const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); + const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); const u32 blocks_on_y = div_ceil(height, block_y_elements); const u32 blocks_on_z = div_ceil(depth, block_z_elements); const u32 xy_block_size = gob_size * block_height; @@ -169,13 +171,15 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, - bool unswizzle, u32 block_height, u32 block_depth) { + bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth); + bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, + width_spacing); } else { SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth); + bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, + width_spacing); } } @@ -228,19 +232,19 @@ u32 BytesPerPixel(TextureFormat format) { void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, - u32 block_depth) { + u32 block_depth, u32 width_spacing) { CopySwizzledData((width + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, - block_height, block_depth); + block_height, block_depth, width_spacing); } std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, - u32 block_height, u32 block_depth) { + u32 block_height, u32 block_depth, u32 width_spacing) { std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, - width, height, depth, block_height, block_depth); + width, height, depth, block_height, block_depth, width_spacing); return unswizzled_data; } diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index f4ef7c73e..85b7e9f7b 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -22,19 +22,20 @@ inline std::size_t GetGOBSize() { void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height = TICEntry::DefaultBlockHeight, - u32 block_depth = TICEntry::DefaultBlockHeight); + u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); /** * Unswizzles a swizzled texture without changing its format. */ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height = TICEntry::DefaultBlockHeight, - u32 block_depth = TICEntry::DefaultBlockHeight); + u32 block_depth = TICEntry::DefaultBlockHeight, + u32 width_spacing = 0); /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, - bool unswizzle, u32 block_height, u32 block_depth); + bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); /** * Decodes an unswizzled texture into a A8R8G8B8 texture. diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index ffa08f5c1..e7c78bee2 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -166,6 +166,8 @@ struct TICEntry { BitField<3, 3, u32> block_height; BitField<6, 3, u32> block_depth; + BitField<10, 3, u32> tile_width_spacing; + // High 16 bits of the pitch value BitField<0, 16, u32> pitch_high; BitField<26, 1, u32> use_header_opt_control; diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp index 7ee572761..ec46dc4e3 100644 --- a/src/yuzu/configuration/configure_input.cpp +++ b/src/yuzu/configuration/configure_input.cpp @@ -4,11 +4,9 @@ #include <algorithm> #include <memory> -#include <utility> -#include <QMenu> -#include <QMessageBox> + #include <QTimer> -#include "common/param_package.h" + #include "configuration/configure_touchscreen_advanced.h" #include "core/core.h" #include "core/hle/service/am/am.h" @@ -16,16 +14,25 @@ #include "core/hle/service/am/applet_oe.h" #include "core/hle/service/hid/controllers/npad.h" #include "core/hle/service/sm/sm.h" -#include "input_common/main.h" #include "ui_configure_input.h" #include "ui_configure_input_player.h" -#include "ui_configure_mouse_advanced.h" -#include "ui_configure_touchscreen_advanced.h" -#include "yuzu/configuration/config.h" #include "yuzu/configuration/configure_input.h" #include "yuzu/configuration/configure_input_player.h" #include "yuzu/configuration/configure_mouse_advanced.h" +namespace { +template <typename Dialog, typename... Args> +void CallConfigureDialog(ConfigureInput& parent, Args&&... args) { + parent.applyConfiguration(); + Dialog dialog(&parent, std::forward<Args>(args)...); + + const auto res = dialog.exec(); + if (res == QDialog::Accepted) { + dialog.applyConfiguration(); + } +} +} // Anonymous namespace + ConfigureInput::ConfigureInput(QWidget* parent) : QWidget(parent), ui(std::make_unique<Ui::ConfigureInput>()) { ui->setupUi(this); @@ -65,31 +72,20 @@ ConfigureInput::ConfigureInput(QWidget* parent) for (std::size_t i = 0; i < players_configure.size(); ++i) { connect(players_configure[i], &QPushButton::pressed, this, - [this, i]() { CallConfigureDialog<ConfigureInputPlayer>(i, false); }); + [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); }); } connect(ui->handheld_configure, &QPushButton::pressed, this, - [this]() { CallConfigureDialog<ConfigureInputPlayer>(8, false); }); + [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); }); connect(ui->debug_configure, &QPushButton::pressed, this, - [this]() { CallConfigureDialog<ConfigureInputPlayer>(9, true); }); + [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); }); connect(ui->mouse_advanced, &QPushButton::pressed, this, - [this]() { CallConfigureDialog<ConfigureMouseAdvanced>(); }); + [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); }); connect(ui->touchscreen_advanced, &QPushButton::pressed, this, - [this]() { CallConfigureDialog<ConfigureTouchscreenAdvanced>(); }); -} - -template <typename Dialog, typename... Args> -void ConfigureInput::CallConfigureDialog(Args&&... args) { - this->applyConfiguration(); - Dialog dialog(this, std::forward<Args>(args)...); - - const auto res = dialog.exec(); - if (res == QDialog::Accepted) { - dialog.applyConfiguration(); - } + [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); }); } void ConfigureInput::OnDockedModeChanged(bool last_state, bool new_state) { diff --git a/src/yuzu/configuration/configure_input.h b/src/yuzu/configuration/configure_input.h index 29a8a03f8..e8723dfcb 100644 --- a/src/yuzu/configuration/configure_input.h +++ b/src/yuzu/configuration/configure_input.h @@ -5,20 +5,12 @@ #pragma once #include <array> -#include <functional> #include <memory> -#include <optional> -#include <string> -#include <unordered_map> #include <QKeyEvent> #include <QWidget> -#include "common/param_package.h" -#include "core/settings.h" -#include "input_common/main.h" #include "ui_configure_input.h" -#include "yuzu/configuration/config.h" class QPushButton; class QString; @@ -40,9 +32,6 @@ public: private: void updateUIEnabled(); - template <typename Dialog, typename... Args> - void CallConfigureDialog(Args&&... args); - void OnDockedModeChanged(bool last_state, bool new_state); /// Load configuration settings. diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index ba6e09368..7dadd83c1 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -25,13 +25,6 @@ const std::array<std::string, ConfigureInputPlayer::ANALOG_SUB_BUTTONS_NUM> "modifier", }}; -static void MoveGridElement(QGridLayout* grid, int row_old, int column_old, int row_new, - int column_new) { - const auto item = grid->itemAtPosition(row_old, column_old); - // grid->removeItem(item); - grid->addItem(item, row_new, column_new); -} - static void LayerGridElements(QGridLayout* grid, QWidget* item, QWidget* onTopOf) { const int index1 = grid->indexOf(item); const int index2 = grid->indexOf(onTopOf); @@ -111,11 +104,10 @@ static QString AnalogToText(const Common::ParamPackage& param, const std::string } }; -ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, u8 player_index, bool debug) - : QDialog(parent), ui(std::make_unique<Ui::ConfigureInputPlayer>()), - timeout_timer(std::make_unique<QTimer>()), poll_timer(std::make_unique<QTimer>()), - player_index(player_index), debug(debug) { - +ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_index, bool debug) + : QDialog(parent), ui(std::make_unique<Ui::ConfigureInputPlayer>()), player_index(player_index), + debug(debug), timeout_timer(std::make_unique<QTimer>()), + poll_timer(std::make_unique<QTimer>()) { ui->setupUi(this); setFocusPolicy(Qt::ClickFocus); @@ -315,7 +307,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, u8 player_index, boo for (std::size_t i = 0; i < controller_color_buttons.size(); ++i) { connect(controller_color_buttons[i], &QPushButton::clicked, this, - std::bind(&ConfigureInputPlayer::OnControllerButtonClick, this, i)); + [this, i] { OnControllerButtonClick(static_cast<int>(i)); }); } this->loadConfiguration(); diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index b0e5550c5..7a53f6715 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h @@ -9,9 +9,10 @@ #include <memory> #include <optional> #include <string> -#include <unordered_map> + #include <QDialog> #include <QKeyEvent> + #include "common/param_package.h" #include "core/settings.h" #include "input_common/main.h" @@ -29,16 +30,39 @@ class ConfigureInputPlayer : public QDialog { Q_OBJECT public: - explicit ConfigureInputPlayer(QWidget* parent, u8 player_index, bool debug = false); + explicit ConfigureInputPlayer(QWidget* parent, std::size_t player_index, bool debug = false); ~ConfigureInputPlayer() override; /// Save all button configurations to settings file void applyConfiguration(); private: + void OnControllerButtonClick(int i); + + /// Load configuration settings. + void loadConfiguration(); + /// Restore all buttons to their default values. + void restoreDefaults(); + /// Clear all input configuration + void ClearAll(); + + /// Update UI to reflect current configuration. + void updateButtonLabels(); + + /// Called when the button was pressed. + void handleClick(QPushButton* button, + std::function<void(const Common::ParamPackage&)> new_input_setter, + InputCommon::Polling::DeviceType type); + + /// Finish polling and configure input using the input_setter + void setPollingResult(const Common::ParamPackage& params, bool abort); + + /// Handle key press events. + void keyPressEvent(QKeyEvent* event) override; + std::unique_ptr<Ui::ConfigureInputPlayer> ui; - u8 player_index; + std::size_t player_index; bool debug; std::unique_ptr<QTimer> timeout_timer; @@ -77,27 +101,4 @@ private: std::array<QPushButton*, 4> controller_color_buttons; std::array<QColor, 4> controller_colors; - - void OnControllerButtonClick(int i); - - /// Load configuration settings. - void loadConfiguration(); - /// Restore all buttons to their default values. - void restoreDefaults(); - /// Clear all input configuration - void ClearAll(); - - /// Update UI to reflect current configuration. - void updateButtonLabels(); - - /// Called when the button was pressed. - void handleClick(QPushButton* button, - std::function<void(const Common::ParamPackage&)> new_input_setter, - InputCommon::Polling::DeviceType type); - - /// Finish polling and configure input using the input_setter - void setPollingResult(const Common::ParamPackage& params, bool abort); - - /// Handle key press events. - void keyPressEvent(QKeyEvent* event) override; }; diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp index dab58fbaa..ef857035e 100644 --- a/src/yuzu/configuration/configure_mouse_advanced.cpp +++ b/src/yuzu/configuration/configure_mouse_advanced.cpp @@ -4,11 +4,11 @@ #include <algorithm> #include <memory> -#include <utility> + #include <QKeyEvent> #include <QMenu> -#include <QMessageBox> #include <QTimer> + #include "common/assert.h" #include "common/param_package.h" #include "input_common/main.h" diff --git a/src/yuzu/configuration/configure_mouse_advanced.h b/src/yuzu/configuration/configure_mouse_advanced.h index 218df2bda..e04da4bf2 100644 --- a/src/yuzu/configuration/configure_mouse_advanced.h +++ b/src/yuzu/configuration/configure_mouse_advanced.h @@ -7,7 +7,7 @@ #include <memory> #include <optional> #include <QDialog> -#include <QWidget> + #include "core/settings.h" class QCheckBox; @@ -28,23 +28,6 @@ public: void applyConfiguration(); private: - std::unique_ptr<Ui::ConfigureMouseAdvanced> ui; - - /// This will be the the setting function when an input is awaiting configuration. - std::optional<std::function<void(const Common::ParamPackage&)>> input_setter; - - std::array<QPushButton*, Settings::NativeMouseButton::NumMouseButtons> button_map; - std::array<Common::ParamPackage, Settings::NativeMouseButton::NumMouseButtons> buttons_param; - - std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> device_pollers; - - std::unique_ptr<QTimer> timeout_timer; - std::unique_ptr<QTimer> poll_timer; - - /// A flag to indicate if keyboard keys are okay when configuring an input. If this is false, - /// keyboard events are ignored. - bool want_keyboard_keys = false; - /// Load configuration settings. void loadConfiguration(); /// Restore all buttons to their default values. @@ -65,4 +48,21 @@ private: /// Handle key press events. void keyPressEvent(QKeyEvent* event) override; + + std::unique_ptr<Ui::ConfigureMouseAdvanced> ui; + + /// This will be the the setting function when an input is awaiting configuration. + std::optional<std::function<void(const Common::ParamPackage&)>> input_setter; + + std::array<QPushButton*, Settings::NativeMouseButton::NumMouseButtons> button_map; + std::array<Common::ParamPackage, Settings::NativeMouseButton::NumMouseButtons> buttons_param; + + std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> device_pollers; + + std::unique_ptr<QTimer> timeout_timer; + std::unique_ptr<QTimer> poll_timer; + + /// A flag to indicate if keyboard keys are okay when configuring an input. If this is false, + /// keyboard events are ignored. + bool want_keyboard_keys = false; }; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 9c6d150a5..93bf117c8 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1105,14 +1105,14 @@ void GMainWindow::OnMenuInstallToNAND() { return; } const auto res = - Service::FileSystem::GetUserNANDContents()->InstallEntry(nsp, false, qt_raw_copy); + Service::FileSystem::GetUserNANDContents()->InstallEntry(*nsp, false, qt_raw_copy); if (res == FileSys::InstallResult::Success) { success(); } else { if (res == FileSys::InstallResult::ErrorAlreadyExists) { if (overwrite()) { const auto res2 = Service::FileSystem::GetUserNANDContents()->InstallEntry( - nsp, true, qt_raw_copy); + *nsp, true, qt_raw_copy); if (res2 == FileSys::InstallResult::Success) { success(); } else { @@ -1167,10 +1167,10 @@ void GMainWindow::OnMenuInstallToNAND() { FileSys::InstallResult res; if (index >= static_cast<size_t>(FileSys::TitleType::Application)) { res = Service::FileSystem::GetUserNANDContents()->InstallEntry( - nca, static_cast<FileSys::TitleType>(index), false, qt_raw_copy); + *nca, static_cast<FileSys::TitleType>(index), false, qt_raw_copy); } else { res = Service::FileSystem::GetSystemNANDContents()->InstallEntry( - nca, static_cast<FileSys::TitleType>(index), false, qt_raw_copy); + *nca, static_cast<FileSys::TitleType>(index), false, qt_raw_copy); } if (res == FileSys::InstallResult::Success) { @@ -1178,7 +1178,7 @@ void GMainWindow::OnMenuInstallToNAND() { } else if (res == FileSys::InstallResult::ErrorAlreadyExists) { if (overwrite()) { const auto res2 = Service::FileSystem::GetUserNANDContents()->InstallEntry( - nca, static_cast<FileSys::TitleType>(index), true, qt_raw_copy); + *nca, static_cast<FileSys::TitleType>(index), true, qt_raw_copy); if (res2 == FileSys::InstallResult::Success) { success(); } else { |