diff options
author | comex <comexk@gmail.com> | 2023-07-02 00:01:11 +0200 |
---|---|---|
committer | comex <comexk@gmail.com> | 2023-07-02 00:01:11 +0200 |
commit | 98685d48e3cb9f25f6919f004ec62cadf33afad2 (patch) | |
tree | 9df2ce7f57370641589bfae7196c77b090bcbe0f /src/common | |
parent | PR feedback + constification (diff) | |
parent | Update translations (2023-07-01) (#10972) (diff) | |
download | yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.gz yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.bz2 yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.lz yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.xz yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.zst yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.zip |
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/common/fs/fs.cpp | 14 | ||||
-rw-r--r-- | src/common/fs/fs_types.h | 2 | ||||
-rw-r--r-- | src/common/input.h | 45 | ||||
-rw-r--r-- | src/common/ring_buffer.h | 3 | ||||
-rw-r--r-- | src/common/scratch_buffer.h | 9 | ||||
-rw-r--r-- | src/common/settings.cpp | 19 | ||||
-rw-r--r-- | src/common/settings.h | 12 | ||||
-rw-r--r-- | src/common/steady_clock.cpp | 5 | ||||
-rw-r--r-- | src/common/telemetry.cpp | 1 | ||||
-rw-r--r-- | src/common/thread.h | 2 | ||||
-rw-r--r-- | src/common/wall_clock.cpp | 77 | ||||
-rw-r--r-- | src/common/wall_clock.h | 89 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.cpp | 4 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.h | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_wait.cpp | 70 | ||||
-rw-r--r-- | src/common/x64/native_clock.cpp | 166 | ||||
-rw-r--r-- | src/common/x64/native_clock.h | 59 | ||||
-rw-r--r-- | src/common/x64/rdtsc.cpp | 39 | ||||
-rw-r--r-- | src/common/x64/rdtsc.h | 37 |
20 files changed, 353 insertions, 303 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index efc4a9fe9..3adf13a3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64) x64/cpu_wait.h x64/native_clock.cpp x64/native_clock.h + x64/rdtsc.cpp + x64/rdtsc.h x64/xbyak_abi.h x64/xbyak_util.h ) diff --git a/src/common/fs/fs.cpp b/src/common/fs/fs.cpp index 6d66c926d..36e67c145 100644 --- a/src/common/fs/fs.cpp +++ b/src/common/fs/fs.cpp @@ -436,7 +436,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable if (True(filter & DirEntryFilter::File) && entry.status().type() == fs::file_type::regular) { - if (!callback(entry.path())) { + if (!callback(entry)) { callback_error = true; break; } @@ -444,7 +444,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable if (True(filter & DirEntryFilter::Directory) && entry.status().type() == fs::file_type::directory) { - if (!callback(entry.path())) { + if (!callback(entry)) { callback_error = true; break; } @@ -493,7 +493,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path, if (True(filter & DirEntryFilter::File) && entry.status().type() == fs::file_type::regular) { - if (!callback(entry.path())) { + if (!callback(entry)) { callback_error = true; break; } @@ -501,7 +501,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path, if (True(filter & DirEntryFilter::Directory) && entry.status().type() == fs::file_type::directory) { - if (!callback(entry.path())) { + if (!callback(entry)) { callback_error = true; break; } @@ -605,6 +605,12 @@ fs::file_type GetEntryType(const fs::path& path) { } u64 GetSize(const fs::path& path) { +#ifdef ANDROID + if (Android::IsContentUri(path)) { + return Android::GetSize(path); + } +#endif + std::error_code ec; const auto file_size = fs::file_size(path, ec); diff --git a/src/common/fs/fs_types.h b/src/common/fs/fs_types.h index 5a4090c19..900f85d24 100644 --- a/src/common/fs/fs_types.h +++ b/src/common/fs/fs_types.h @@ -66,6 +66,6 @@ DECLARE_ENUM_FLAG_OPERATORS(DirEntryFilter); * @returns A boolean value. * Return true to indicate whether the callback is successful, false otherwise. */ -using DirEntryCallable = std::function<bool(const std::filesystem::path& path)>; +using DirEntryCallable = std::function<bool(const std::filesystem::directory_entry& entry)>; } // namespace Common::FS diff --git a/src/common/input.h b/src/common/input.h index 66fb15f0a..2c4ccea22 100644 --- a/src/common/input.h +++ b/src/common/input.h @@ -75,8 +75,10 @@ enum class DriverResult { ErrorWritingData, NoDeviceDetected, InvalidHandle, + InvalidParameters, NotSupported, Disabled, + Delayed, Unknown, }; @@ -86,7 +88,7 @@ enum class NfcState { NewAmiibo, WaitingForAmiibo, AmiiboRemoved, - NotAnAmiibo, + InvalidTagType, NotSupported, WrongDeviceState, WriteFailed, @@ -218,8 +220,22 @@ struct CameraStatus { }; struct NfcStatus { - NfcState state{}; - std::vector<u8> data{}; + NfcState state{NfcState::Unknown}; + u8 uuid_length; + u8 protocol; + u8 tag_type; + std::array<u8, 10> uuid; +}; + +struct MifareData { + u8 command; + u8 sector; + std::array<u8, 0x6> key; + std::array<u8, 0x10> data; +}; + +struct MifareRequest { + std::array<MifareData, 0x10> data; }; // List of buttons to be passed to Qt that can be translated @@ -294,7 +310,7 @@ struct CallbackStatus { BatteryStatus battery_status{}; VibrationStatus vibration_status{}; CameraFormat camera_status{CameraFormat::None}; - NfcState nfc_status{NfcState::Unknown}; + NfcStatus nfc_status{}; std::vector<u8> raw_data{}; }; @@ -356,9 +372,30 @@ public: return NfcState::NotSupported; } + virtual NfcState StartNfcPolling() { + return NfcState::NotSupported; + } + + virtual NfcState StopNfcPolling() { + return NfcState::NotSupported; + } + + virtual NfcState ReadAmiiboData([[maybe_unused]] std::vector<u8>& out_data) { + return NfcState::NotSupported; + } + virtual NfcState WriteNfcData([[maybe_unused]] const std::vector<u8>& data) { return NfcState::NotSupported; } + + virtual NfcState ReadMifareData([[maybe_unused]] const MifareRequest& request, + [[maybe_unused]] MifareRequest& out_data) { + return NfcState::NotSupported; + } + + virtual NfcState WriteMifareData([[maybe_unused]] const MifareRequest& request) { + return NfcState::NotSupported; + } }; /// An abstract class template for a factory that can create input devices. diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h index 4c328ab44..416680d44 100644 --- a/src/common/ring_buffer.h +++ b/src/common/ring_buffer.h @@ -9,6 +9,7 @@ #include <cstddef> #include <cstring> #include <new> +#include <span> #include <type_traits> #include <vector> @@ -53,7 +54,7 @@ public: return push_count; } - std::size_t Push(const std::vector<T>& input) { + std::size_t Push(const std::span<T> input) { return Push(input.data(), input.size()); } diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index a69a5a7af..6fe907953 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -3,6 +3,9 @@ #pragma once +#include <iterator> + +#include "common/concepts.h" #include "common/make_unique_for_overwrite.h" namespace Common { @@ -16,6 +19,12 @@ namespace Common { template <typename T> class ScratchBuffer { public: + using iterator = T*; + using const_iterator = const T*; + using value_type = T; + using element_type = T; + using iterator_category = std::contiguous_iterator_tag; + ScratchBuffer() = default; explicit ScratchBuffer(size_t initial_capacity) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 66dffc9bf..6cbbea1b2 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -1,8 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include <version> #if __cpp_lib_chrono >= 201907L #include <chrono> +#include <exception> +#include <stdexcept> #endif #include <string_view> @@ -25,9 +28,19 @@ std::string GetTimeZoneString() { if (time_zone_index == 0) { // Auto #if __cpp_lib_chrono >= 201907L const struct std::chrono::tzdb& time_zone_data = std::chrono::get_tzdb(); - const std::chrono::time_zone* current_zone = time_zone_data.current_zone(); - std::string_view current_zone_name = current_zone->name(); - location_name = current_zone_name; + try { + const std::chrono::time_zone* current_zone = time_zone_data.current_zone(); + std::string_view current_zone_name = current_zone->name(); + location_name = current_zone_name; + } catch (std::runtime_error& runtime_error) { + // VCRUNTIME will throw a runtime_error if the operating system's selected time zone + // cannot be found + location_name = Common::TimeZone::FindSystemTimeZone(); + LOG_WARNING(Common, + "Error occurred when trying to determine system time zone:\n{}\nFalling " + "back to hour offset \"{}\"", + runtime_error.what(), location_name); + } #else location_name = Common::TimeZone::FindSystemTimeZone(); #endif diff --git a/src/common/settings.h b/src/common/settings.h index 9682281b0..ae5ed93d8 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -483,6 +483,7 @@ struct Values { AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3, "astc_recompression"}; SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"}; + SwitchableSetting<bool> barrier_feedback_loops{true, "barrier_feedback_loops"}; SwitchableSetting<u8> bg_red{0, "bg_red"}; SwitchableSetting<u8> bg_green{0, "bg_green"}; @@ -524,9 +525,16 @@ struct Values { Setting<bool> tas_loop{false, "tas_loop"}; Setting<bool> mouse_panning{false, "mouse_panning"}; - Setting<u8, true> mouse_panning_sensitivity{50, 1, 100, "mouse_panning_sensitivity"}; - Setting<bool> mouse_enabled{false, "mouse_enabled"}; + Setting<u8, true> mouse_panning_x_sensitivity{50, 1, 100, "mouse_panning_x_sensitivity"}; + Setting<u8, true> mouse_panning_y_sensitivity{50, 1, 100, "mouse_panning_y_sensitivity"}; + Setting<u8, true> mouse_panning_deadzone_x_counterweight{ + 0, 0, 100, "mouse_panning_deadzone_x_counterweight"}; + Setting<u8, true> mouse_panning_deadzone_y_counterweight{ + 0, 0, 100, "mouse_panning_deadzone_y_counterweight"}; + Setting<u8, true> mouse_panning_decay_strength{22, 0, 100, "mouse_panning_decay_strength"}; + Setting<u8, true> mouse_panning_min_decay{5, 0, 100, "mouse_panning_min_decay"}; + Setting<bool> mouse_enabled{false, "mouse_enabled"}; Setting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"}; Setting<bool> keyboard_enabled{false, "keyboard_enabled"}; diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp index 782859196..9415eed29 100644 --- a/src/common/steady_clock.cpp +++ b/src/common/steady_clock.cpp @@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() { // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. static constexpr s64 Multiplier = 100; // Convert Windows epoch to Unix epoch. - static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; + static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; FILETIME filetime; GetSystemTimePreciseAsFileTime(&filetime); return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + - static_cast<s64>(filetime.dwLowDateTime)) - - WindowsEpochToUnixEpochNS; + static_cast<s64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); } #endif diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 91352912d..929ed67e4 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) { add_field("CPU_Extension_x64_GFNI", caps.gfni); add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); + add_field("CPU_Extension_x64_MONITORX", caps.monitorx); add_field("CPU_Extension_x64_MOVBE", caps.movbe); add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); add_field("CPU_Extension_x64_POPCNT", caps.popcnt); diff --git a/src/common/thread.h b/src/common/thread.h index 8ae169b4e..c6976fb6c 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -55,7 +55,7 @@ public: is_set = false; } - [[nodiscard]] bool IsSet() { + [[nodiscard]] bool IsSet() const { return is_set; } diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 817e71d52..dc0dcbd68 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -2,88 +2,75 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/steady_clock.h" -#include "common/uint128.h" #include "common/wall_clock.h" #ifdef ARCHITECTURE_x86_64 #include "common/x64/cpu_detect.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" #endif namespace Common { class StandardWallClock final : public WallClock { public: - explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, - start_time{SteadyClock::Now()} {} + explicit StandardWallClock() : start_time{SteadyClock::Now()} {} - std::chrono::nanoseconds GetTimeNS() override { + std::chrono::nanoseconds GetTimeNS() const override { return SteadyClock::Now() - start_time; } - std::chrono::microseconds GetTimeUS() override { - return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS()); + std::chrono::microseconds GetTimeUS() const override { + return static_cast<std::chrono::microseconds>(GetHostTicksElapsed() / NsToUsRatio::den); } - std::chrono::milliseconds GetTimeMS() override { - return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS()); + std::chrono::milliseconds GetTimeMS() const override { + return static_cast<std::chrono::milliseconds>(GetHostTicksElapsed() / NsToMsRatio::den); } - u64 GetClockCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetCNTPCT() const override { + return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } - u64 GetCPUCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetGPUTick() const override { + return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; } - void Pause([[maybe_unused]] bool is_paused) override { - // Do nothing in this clock type. + u64 GetHostTicksNow() const override { + return static_cast<u64>(SteadyClock::Now().time_since_epoch().count()); + } + + u64 GetHostTicksElapsed() const override { + return static_cast<u64>(GetTimeNS().count()); + } + + bool IsNative() const override { + return false; } private: SteadyClock::time_point start_time; }; +std::unique_ptr<WallClock> CreateOptimalClock() { #ifdef ARCHITECTURE_x86_64 - -std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); - u64 rtsc_frequency = 0; - if (caps.invariant_tsc) { - rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency(); - } - // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: - // - A nanosecond - // - The emulated CPU frequency - // - The emulated clock counter frequency (CNTFRQ) - if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || - rtsc_frequency <= emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, - emulated_clock_frequency); + if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) { + return std::make_unique<X64::NativeClock>(caps.tsc_frequency); } else { - return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, - rtsc_frequency); + // Fallback to StandardWallClock if the hardware TSC + // - Is not invariant + // - Is not more precise than GPUTickFreq + return std::make_unique<StandardWallClock>(); } -} - #else - -std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); -} - + return std::make_unique<StandardWallClock>(); #endif +} -std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); +std::unique_ptr<WallClock> CreateStandardWallClock() { + return std::make_unique<StandardWallClock>(); } } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 157ec5eae..f45d3d8c5 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -5,6 +5,7 @@ #include <chrono> #include <memory> +#include <ratio> #include "common/common_types.h" @@ -12,50 +13,82 @@ namespace Common { class WallClock { public: - static constexpr u64 NS_RATIO = 1'000'000'000; - static constexpr u64 US_RATIO = 1'000'000; - static constexpr u64 MS_RATIO = 1'000; + static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz + static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz + static constexpr u64 CPUTickFreq = 1'020'000'000; // T210/4 A57 CPU Tick Frequency = 1020.0 MHz virtual ~WallClock() = default; - /// Returns current wall time in nanoseconds - [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; + /// @returns The time in nanoseconds since the construction of this clock. + virtual std::chrono::nanoseconds GetTimeNS() const = 0; - /// Returns current wall time in microseconds - [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; + /// @returns The time in microseconds since the construction of this clock. + virtual std::chrono::microseconds GetTimeUS() const = 0; - /// Returns current wall time in milliseconds - [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; + /// @returns The time in milliseconds since the construction of this clock. + virtual std::chrono::milliseconds GetTimeMS() const = 0; - /// Returns current wall time in emulated clock cycles - [[nodiscard]] virtual u64 GetClockCycles() = 0; + /// @returns The guest CNTPCT ticks since the construction of this clock. + virtual u64 GetCNTPCT() const = 0; - /// Returns current wall time in emulated cpu cycles - [[nodiscard]] virtual u64 GetCPUCycles() = 0; + /// @returns The guest GPU ticks since the construction of this clock. + virtual u64 GetGPUTick() const = 0; - virtual void Pause(bool is_paused) = 0; + /// @returns The raw host timer ticks since an indeterminate epoch. + virtual u64 GetHostTicksNow() const = 0; - /// Tells if the wall clock, uses the host CPU's hardware clock - [[nodiscard]] bool IsNative() const { - return is_native; + /// @returns The raw host timer ticks since the construction of this clock. + virtual u64 GetHostTicksElapsed() const = 0; + + /// @returns Whether the clock directly uses the host's hardware clock. + virtual bool IsNative() const = 0; + + static inline u64 NSToCNTPCT(u64 ns) { + return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; + } + + static inline u64 NSToGPUTick(u64 ns) { + return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + } + + // Cycle Timing + + static inline u64 CPUTickToNS(u64 cpu_tick) { + return cpu_tick * CPUTickToNsRatio::num / CPUTickToNsRatio::den; + } + + static inline u64 CPUTickToUS(u64 cpu_tick) { + return cpu_tick * CPUTickToUsRatio::num / CPUTickToUsRatio::den; + } + + static inline u64 CPUTickToCNTPCT(u64 cpu_tick) { + return cpu_tick * CPUTickToCNTPCTRatio::num / CPUTickToCNTPCTRatio::den; + } + + static inline u64 CPUTickToGPUTick(u64 cpu_tick) { + return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den; } protected: - explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) - : emulated_cpu_frequency{emulated_cpu_frequency_}, - emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} + using NsRatio = std::nano; + using UsRatio = std::micro; + using MsRatio = std::milli; + + using NsToUsRatio = std::ratio_divide<std::nano, std::micro>; + using NsToMsRatio = std::ratio_divide<std::nano, std::milli>; + using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>; + using NsToGPUTickRatio = std::ratio<GPUTickFreq, std::nano::den>; - u64 emulated_cpu_frequency; - u64 emulated_clock_frequency; + // Cycle Timing -private: - bool is_native; + using CPUTickToNsRatio = std::ratio<std::nano::den, CPUTickFreq>; + using CPUTickToUsRatio = std::ratio<std::micro::den, CPUTickFreq>; + using CPUTickToCNTPCTRatio = std::ratio<CNTFRQ, CPUTickFreq>; + using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>; }; -[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr<WallClock> CreateOptimalClock(); -[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr<WallClock> CreateStandardWallClock(); } // namespace Common diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..780120a5b 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/x64/cpu_detect.h" +#include "common/x64/rdtsc.h" #ifdef _WIN32 #include <windows.h> @@ -167,6 +168,7 @@ static CPUCaps Detect() { __cpuid(cpu_id, 0x80000001); caps.lzcnt = Common::Bit<5>(cpu_id[2]); caps.fma4 = Common::Bit<16>(cpu_id[2]); + caps.monitorx = Common::Bit<29>(cpu_id[2]); } if (max_ex_fn >= 0x80000007) { @@ -187,6 +189,8 @@ static CPUCaps Detect() { caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * caps.tsc_crystal_ratio_numerator / caps.tsc_crystal_ratio_denominator; + } else { + caps.tsc_frequency = X64::EstimateRDTSCFrequency(); } } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 8253944d6..756459417 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -63,6 +63,7 @@ struct CPUCaps { bool gfni : 1; bool invariant_tsc : 1; bool lzcnt : 1; + bool monitorx : 1; bool movbe : 1; bool pclmulqdq : 1; bool popcnt : 1; diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..41d385f59 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -9,58 +9,64 @@ #include "common/x64/cpu_detect.h" #include "common/x64/cpu_wait.h" +#include "common/x64/rdtsc.h" namespace Common::X64 { +namespace { + +// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. +// For reference: +// At 1 GHz, 100K cycles is 100us +// At 2 GHz, 100K cycles is 50us +// At 4 GHz, 100K cycles is 25us +constexpr auto PauseCycles = 100'000U; + +} // Anonymous namespace + #ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; +__forceinline static void TPAUSE() { + static constexpr auto RequestC02State = 0U; + _tpause(RequestC02State, FencedRDTSC() + PauseCycles); } -__forceinline static void TPAUSE() { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - static constexpr auto PauseCycles = 100'000; - _tpause(0, FencedRDTSC() + PauseCycles); +__forceinline static void MWAITX() { + static constexpr auto EnableWaitTimeFlag = 1U << 1; + static constexpr auto RequestC1State = 0U; + + // monitor_var should be aligned to a cache line. + alignas(64) u64 monitor_var{}; + _mm_monitorx(&monitor_var, 0, 0); + _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); } #else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} - static void TPAUSE() { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - static constexpr auto PauseCycles = 100'000; + static constexpr auto RequestC02State = 0U; const auto tsc = FencedRDTSC() + PauseCycles; const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); const auto edx = static_cast<u32>(tsc >> 32); - asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); + asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); +} + +static void MWAITX() { + static constexpr auto EnableWaitTimeFlag = 1U << 1; + static constexpr auto RequestC1State = 0U; + + // monitor_var should be aligned to a cache line. + alignas(64) u64 monitor_var{}; + asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); + asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); } #endif void MicroSleep() { static const bool has_waitpkg = GetCPUCaps().waitpkg; + static const bool has_monitorx = GetCPUCaps().monitorx; if (has_waitpkg) { TPAUSE(); + } else if (has_monitorx) { + MWAITX(); } else { std::this_thread::yield(); } diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..7d2a26bd9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -1,164 +1,50 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <array> -#include <chrono> -#include <thread> - -#include "common/atomic_ops.h" -#include "common/steady_clock.h" #include "common/uint128.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" -#ifdef _MSC_VER -#include <intrin.h> -#endif - -namespace Common { +namespace Common::X64 { -#ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} -#else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} -#endif +NativeClock::NativeClock(u64 rdtsc_frequency_) + : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, + ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, + us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, + ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, + cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)}, + gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {} -template <u64 Nearest> -static u64 RoundToNearest(u64 value) { - const auto mod = value % Nearest; - return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +std::chrono::nanoseconds NativeClock::GetTimeNS() const { + return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; } -u64 EstimateRDTSCFrequency() { - // Discard the first result measuring the rdtsc. - FencedRDTSC(); - std::this_thread::sleep_for(std::chrono::milliseconds{1}); - FencedRDTSC(); - - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 250 milliseconds. - std::this_thread::sleep_for(std::chrono::milliseconds{250}); - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast<u64>( - std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - return RoundToNearest<1000>(tsc_freq); +std::chrono::microseconds NativeClock::GetTimeUS() const { + return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)}; } -namespace X64 { -NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ - rtsc_frequency_} { - // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. - time_sync_thread = std::jthread{[this](std::stop_token token) { - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 10 seconds. - if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { - return; - } - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast<u64>( - std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - rtsc_frequency = tsc_freq; - CalculateAndSetFactors(); - }}; - - time_point.inner.last_measure = FencedRDTSC(); - time_point.inner.accumulated_ticks = 0U; - CalculateAndSetFactors(); +std::chrono::milliseconds NativeClock::GetTimeMS() const { + return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)}; } -u64 NativeClock::GetRTSC() { - TimePoint new_time_point{}; - TimePoint current_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - const u64 current_measure = FencedRDTSC(); - u64 diff = current_measure - current_time_point.inner.last_measure; - diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) - new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure - ? current_measure - : current_time_point.inner.last_measure; - new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - return new_time_point.inner.accumulated_ticks; +u64 NativeClock::GetCNTPCT() const { + return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); } -void NativeClock::Pause(bool is_paused) { - if (!is_paused) { - TimePoint current_time_point{}; - TimePoint new_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - new_time_point.pack = current_time_point.pack; - new_time_point.inner.last_measure = FencedRDTSC(); - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - } +u64 NativeClock::GetGPUTick() const { + return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor); } -std::chrono::nanoseconds NativeClock::GetTimeNS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; +u64 NativeClock::GetHostTicksNow() const { + return FencedRDTSC(); } -std::chrono::microseconds NativeClock::GetTimeUS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; +u64 NativeClock::GetHostTicksElapsed() const { + return FencedRDTSC() - start_ticks; } -std::chrono::milliseconds NativeClock::GetTimeMS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; +bool NativeClock::IsNative() const { + return true; } -u64 NativeClock::GetClockCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, clock_rtsc_factor); -} - -u64 NativeClock::GetCPUCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, cpu_rtsc_factor); -} - -void NativeClock::CalculateAndSetFactors() { - ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); - us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); - ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); - clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); - cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); -} - -} // namespace X64 - -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..334415eff 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -3,58 +3,39 @@ #pragma once -#include "common/polyfill_thread.h" #include "common/wall_clock.h" -namespace Common { +namespace Common::X64 { -namespace X64 { class NativeClock final : public WallClock { public: - explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_); + explicit NativeClock(u64 rdtsc_frequency_); - std::chrono::nanoseconds GetTimeNS() override; + std::chrono::nanoseconds GetTimeNS() const override; - std::chrono::microseconds GetTimeUS() override; + std::chrono::microseconds GetTimeUS() const override; - std::chrono::milliseconds GetTimeMS() override; + std::chrono::milliseconds GetTimeMS() const override; - u64 GetClockCycles() override; + u64 GetCNTPCT() const override; - u64 GetCPUCycles() override; + u64 GetGPUTick() const override; - void Pause(bool is_paused) override; + u64 GetHostTicksNow() const override; -private: - u64 GetRTSC(); - - void CalculateAndSetFactors(); - - union alignas(16) TimePoint { - TimePoint() : pack{} {} - u128 pack{}; - struct Inner { - u64 last_measure{}; - u64 accumulated_ticks{}; - } inner; - }; - - TimePoint time_point; + u64 GetHostTicksElapsed() const override; - // factors - u64 clock_rtsc_factor{}; - u64 cpu_rtsc_factor{}; - u64 ns_rtsc_factor{}; - u64 us_rtsc_factor{}; - u64 ms_rtsc_factor{}; + bool IsNative() const override; - u64 rtsc_frequency; - - std::jthread time_sync_thread; +private: + u64 start_ticks; + u64 rdtsc_frequency; + + u64 ns_rdtsc_factor; + u64 us_rdtsc_factor; + u64 ms_rdtsc_factor; + u64 cntpct_rdtsc_factor; + u64 gputick_rdtsc_factor; }; -} // namespace X64 - -u64 EstimateRDTSCFrequency(); -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <thread> + +#include "common/steady_clock.h" +#include "common/uint128.h" +#include "common/x64/rdtsc.h" + +namespace Common::X64 { + +template <u64 Nearest> +static u64 RoundToNearest(u64 value) { + const auto mod = value % Nearest; + return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + +u64 EstimateRDTSCFrequency() { + // Discard the first result measuring the rdtsc. + FencedRDTSC(); + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + FencedRDTSC(); + + // Get the current time. + const auto start_time = RealTimeClock::Now(); + const u64 tsc_start = FencedRDTSC(); + // Wait for 100 milliseconds. + std::this_thread::sleep_for(std::chrono::milliseconds{100}); + const auto end_time = RealTimeClock::Now(); + const u64 tsc_end = FencedRDTSC(); + // Calculate differences. + const u64 timer_diff = static_cast<u64>( + std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); + const u64 tsc_diff = tsc_end - tsc_start; + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); + return RoundToNearest<100'000>(tsc_freq); +} + +} // namespace Common::X64 diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#include "common/common_types.h" + +namespace Common::X64 { + +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} +#else +static inline u64 FencedRDTSC() { + u64 eax; + u64 edx; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; +} +#endif + +u64 EstimateRDTSCFrequency(); + +} // namespace Common::X64 |