diff options
27 files changed, 714 insertions, 156 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 65a4922ea..f8ec8fea8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -82,6 +82,7 @@ else() -Werror=missing-declarations -Werror=missing-field-initializers -Werror=reorder + -Werror=sign-compare -Werror=switch -Werror=uninitialized -Werror=unused-function diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index d25a1a645..090dd19b1 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -51,9 +51,6 @@ if (NOT MSVC) target_compile_options(audio_core PRIVATE -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=reorder - -Werror=sign-compare -Werror=shadow -Werror=unused-parameter -Werror=unused-variable diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index ccd5ca6cc..7dba739b4 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -29,10 +29,9 @@ namespace { (static_cast<float>(r_channel) * r_mix_amount))); } -[[nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(s16 fl_channel, s16 fr_channel, - s16 fc_channel, - [[maybe_unused]] s16 lf_channel, - s16 bl_channel, s16 br_channel) { +[[maybe_unused, nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2( + s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel, + s16 br_channel) { // Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels // are mixed to be 36.94% @@ -57,11 +56,11 @@ namespace { const std::array<float_le, 4>& coeff) { const auto left = static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] + - static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[0]; + static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[3]; const auto right = static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] + - static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[0]; + static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[3]; return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))}; } @@ -241,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { const auto channel_count = buffer_offsets.size(); const auto& final_mix = mix_context.GetFinalMixInfo(); const auto& in_params = final_mix.GetInParams(); - std::vector<s32*> mix_buffers(channel_count); + std::vector<std::span<s32>> mix_buffers(channel_count); for (std::size_t i = 0; i < channel_count; i++) { mix_buffers[i] = command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]); @@ -294,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample); } else if (stream_channel_count == 2) { // Mix all channels into 2 channels - if (sink_context.HasDownMixingCoefficients()) { - const auto [left, right] = Mix6To2WithCoefficients( - fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample, - sink_context.GetDownmixCoefficients()); - buffer[i * stream_channel_count + 0] = left; - buffer[i * stream_channel_count + 1] = right; - } else { - const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample, - lf_sample, bl_sample, br_sample); - buffer[i * stream_channel_count + 0] = left; - buffer[i * stream_channel_count + 1] = right; - } + const auto [left, right] = Mix6To2WithCoefficients( + fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample, + sink_context.GetDownmixCoefficients()); + buffer[i * stream_channel_count + 0] = left; + buffer[i * stream_channel_count + 1] = right; } else if (stream_channel_count == 6) { // Pass through buffer[i * stream_channel_count + 0] = fl_sample; diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp index b3250be09..b99d0fc91 100644 --- a/src/audio_core/command_generator.cpp +++ b/src/audio_core/command_generator.cpp @@ -31,7 +31,7 @@ constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{ 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f}; template <std::size_t N> -void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { +void ApplyMix(std::span<s32> output, std::span<const s32> input, s32 gain, s32 sample_count) { for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) { for (std::size_t j = 0; j < N; j++) { output[i + j] += @@ -40,7 +40,8 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { } } -s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) { +s32 ApplyMixRamp(std::span<s32> output, std::span<const s32> input, float gain, float delta, + s32 sample_count) { s32 x = 0; for (s32 i = 0; i < sample_count; i++) { x = static_cast<s32>(static_cast<float>(input[i]) * gain); @@ -50,20 +51,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam return x; } -void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) { +void ApplyGain(std::span<s32> output, std::span<const s32> input, s32 gain, s32 delta, + s32 sample_count) { for (s32 i = 0; i < sample_count; i++) { output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15); gain += delta; } } -void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) { +void ApplyGainWithoutDelta(std::span<s32> output, std::span<const s32> input, s32 gain, + s32 sample_count) { for (s32 i = 0; i < sample_count; i++) { output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15); } } -s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) { +s32 ApplyMixDepop(std::span<s32> output, s32 first_sample, s32 delta, s32 sample_count) { const bool positive = first_sample > 0; auto final_sample = std::abs(first_sample); for (s32 i = 0; i < sample_count; i++) { @@ -128,10 +131,10 @@ constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 3, 3, 3}; template <std::size_t CHANNEL_COUNT> -void ApplyReverbGeneric(I3dl2ReverbState& state, - const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input, - const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output, - s32 sample_count) { +void ApplyReverbGeneric( + I3dl2ReverbState& state, + const std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT>& input, + const std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) { auto GetTapLookup = []() { if constexpr (CHANNEL_COUNT == 1) { @@ -457,8 +460,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff "input_mix_buffer={}, output_mix_buffer={}", node_id, input_offset, output_offset); } - const auto* input = GetMixBuffer(input_offset); - auto* output = GetMixBuffer(output_offset); + std::span<const s32> input = GetMixBuffer(input_offset); + std::span<s32> output = GetMixBuffer(output_offset); // Biquad filter parameters const auto [n0, n1, n2] = params.numerator; @@ -551,8 +554,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E return; } - std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{}; - std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{}; + std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT> input{}; + std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT> output{}; const auto status = params.status; for (s32 i = 0; i < channel_count; i++) { @@ -587,7 +590,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E for (s32 i = 0; i < channel_count; i++) { // Only copy if the buffer input and output do not match! if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) { - std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32)); + std::memcpy(output[i].data(), input[i].data(), + worker_params.sample_count * sizeof(s32)); } } } @@ -603,8 +607,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset, for (s32 i = 0; i < channel_count; i++) { // TODO(ogniK): Actually implement biquad filter if (params.input[i] != params.output[i]) { - const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]); - auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); + std::span<const s32> input = GetMixBuffer(mix_buffer_offset + params.input[i]); + std::span<s32> output = GetMixBuffer(mix_buffer_offset + params.output[i]); ApplyMix<1>(output, input, 32768, worker_params.sample_count); } } @@ -643,14 +647,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf if (samples_read != static_cast<int>(worker_params.sample_count) && samples_read <= params.sample_count) { - std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read); + std::memset(GetMixBuffer(output_index).data(), 0, + params.sample_count - samples_read); } } else { AuxInfoDSP empty{}; memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP)); memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP)); if (output_index != input_index) { - std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index), + std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(), worker_params.sample_count * sizeof(s32)); } } @@ -668,7 +673,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter } s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, - const s32* data, u32 sample_count, u32 write_offset, + std::span<const s32> data, u32 sample_count, u32 write_offset, u32 write_count) { if (max_samples == 0) { return 0; @@ -678,14 +683,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3 return 0; } - std::size_t data_offset{}; + s32 data_offset{}; u32 remaining = sample_count; while (remaining > 0) { // Get position in buffer const auto base = send_buffer + (offset * sizeof(u32)); const auto samples_to_grab = std::min(max_samples - offset, remaining); // Write to output - memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32)); + memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32)); offset = (offset + samples_to_grab) % max_samples; remaining -= samples_to_grab; data_offset += samples_to_grab; @@ -698,7 +703,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3 } s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, - s32* out_data, u32 sample_count, u32 read_offset, + std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count) { if (max_samples == 0) { return 0; @@ -710,15 +715,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3 } u32 remaining = sample_count; + s32 data_offset{}; while (remaining > 0) { const auto base = recv_buffer + (offset * sizeof(u32)); const auto samples_to_grab = std::min(max_samples - offset, remaining); std::vector<s32> buffer(samples_to_grab); memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32)); - std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32)); - out_data += samples_to_grab; + std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32)); offset = (offset + samples_to_grab) % max_samples; remaining -= samples_to_grab; + data_offset += samples_to_grab; } if (read_count != 0) { @@ -965,8 +971,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t node_id, input_offset, output_offset, volume); } - auto* output = GetMixBuffer(output_offset); - const auto* input = GetMixBuffer(input_offset); + std::span<s32> output = GetMixBuffer(output_offset); + std::span<const s32> input = GetMixBuffer(input_offset); const s32 gain = static_cast<s32>(volume * 32768.0f); // Mix with loop unrolling @@ -1172,12 +1178,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s return samples_processed; } -s32* CommandGenerator::GetMixBuffer(std::size_t index) { - return mix_buffer.data() + (index * worker_params.sample_count); +std::span<s32> CommandGenerator::GetMixBuffer(std::size_t index) { + return std::span<s32>(mix_buffer.data() + (index * worker_params.sample_count), + worker_params.sample_count); } -const s32* CommandGenerator::GetMixBuffer(std::size_t index) const { - return mix_buffer.data() + (index * worker_params.sample_count); +std::span<const s32> CommandGenerator::GetMixBuffer(std::size_t index) const { + return std::span<const s32>(mix_buffer.data() + (index * worker_params.sample_count), + worker_params.sample_count); } std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const { @@ -1188,15 +1196,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const { return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT; } -s32* CommandGenerator::GetChannelMixBuffer(s32 channel) { +std::span<s32> CommandGenerator::GetChannelMixBuffer(s32 channel) { return GetMixBuffer(worker_params.mix_buffer_count + channel); } -const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const { +std::span<const s32> CommandGenerator::GetChannelMixBuffer(s32 channel) const { return GetMixBuffer(worker_params.mix_buffer_count + channel); } -void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, +void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output, VoiceState& dsp_state, s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id) { @@ -1208,7 +1216,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate, in_params.mix_id, in_params.splitter_info_id); } - ASSERT_OR_EXECUTE(output != nullptr, { return; }); + ASSERT_OR_EXECUTE(output.data() != nullptr, { return; }); const auto resample_rate = static_cast<s32>( static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) * @@ -1225,6 +1233,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o } std::size_t temp_mix_offset{}; + s32 samples_output{}; auto samples_remaining = sample_count; while (samples_remaining > 0) { const auto samples_to_output = std::min(samples_remaining, min_required_samples); @@ -1328,20 +1337,21 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) { // No need to resample - std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32)); + std::memcpy(output.data() + samples_output, sample_buffer.data(), + samples_read * sizeof(s32)); } else { std::fill(sample_buffer.begin() + temp_mix_offset, sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read), 0); - AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction, - samples_to_output); + AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate, + dsp_state.fraction, samples_to_output); // Resample for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) { dsp_state.sample_history[i] = sample_buffer[samples_to_read + i]; } } - output += samples_to_output; samples_remaining -= samples_to_output; + samples_output += samples_to_output; } } diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h index f310d7317..59a33ba76 100644 --- a/src/audio_core/command_generator.h +++ b/src/audio_core/command_generator.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <span> #include "audio_core/common.h" #include "audio_core/voice_context.h" #include "common/common_types.h" @@ -41,10 +42,10 @@ public: void PreCommand(); void PostCommand(); - [[nodiscard]] s32* GetChannelMixBuffer(s32 channel); - [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const; - [[nodiscard]] s32* GetMixBuffer(std::size_t index); - [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const; + [[nodiscard]] std::span<s32> GetChannelMixBuffer(s32 channel); + [[nodiscard]] std::span<const s32> GetChannelMixBuffer(s32 channel) const; + [[nodiscard]] std::span<s32> GetMixBuffer(std::size_t index); + [[nodiscard]] std::span<const s32> GetMixBuffer(std::size_t index) const; [[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const; [[nodiscard]] std::size_t GetTotalMixBufferCount() const; @@ -77,10 +78,11 @@ private: void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled); [[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index); - s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data, - u32 sample_count, u32 write_offset, u32 write_count); - s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data, - u32 sample_count, u32 read_offset, u32 read_count); + s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, + std::span<const s32> data, u32 sample_count, u32 write_offset, + u32 write_count); + s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, + std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count); void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, std::vector<u8>& work_buffer); @@ -91,8 +93,9 @@ private: s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset, s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); - void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state, - s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id); + void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output, + VoiceState& dsp_state, s32 channel, s32 target_sample_rate, + s32 sample_count, s32 node_id); AudioCommon::AudioRendererParameter& worker_params; VoiceContext& voice_context; diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp index a69543696..cc55b290c 100644 --- a/src/audio_core/sink_context.cpp +++ b/src/audio_core/sink_context.cpp @@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const { void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) { ASSERT(in.type == SinkTypes::Device); - has_downmix_coefs = in.device.down_matrix_enabled; - if (has_downmix_coefs) { + if (in.device.down_matrix_enabled) { downmix_coefficients = in.device.down_matrix_coef; + } else { + downmix_coefficients = { + 1.0f, // front + 0.707f, // center + 0.0f, // lfe + 0.707f, // back + }; } + in_use = in.in_use; use_count = in.device.input_count; buffers = in.device.input; @@ -34,10 +41,6 @@ std::vector<u8> SinkContext::OutputBuffers() const { return buffer_ret; } -bool SinkContext::HasDownMixingCoefficients() const { - return has_downmix_coefs; -} - const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const { return downmix_coefficients; } diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h index 9e2b69785..254961fe2 100644 --- a/src/audio_core/sink_context.h +++ b/src/audio_core/sink_context.h @@ -84,7 +84,6 @@ public: [[nodiscard]] bool InUse() const; [[nodiscard]] std::vector<u8> OutputBuffers() const; - [[nodiscard]] bool HasDownMixingCoefficients() const; [[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const; private: @@ -92,7 +91,6 @@ private: s32 use_count{}; std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{}; std::size_t sink_count{}; - bool has_downmix_coefs{false}; DownmixCoefficients downmix_coefficients{}; }; } // namespace AudioCore diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp index 077f34995..274f57659 100644 --- a/src/common/fs/file.cpp +++ b/src/common/fs/file.cpp @@ -306,9 +306,9 @@ bool IOFile::Flush() const { errno = 0; #ifdef _WIN32 - const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0; + const auto flush_result = std::fflush(file) == 0; #else - const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0; + const auto flush_result = std::fflush(file) == 0; #endif if (!flush_result) { @@ -320,6 +320,28 @@ bool IOFile::Flush() const { return flush_result; } +bool IOFile::Commit() const { + if (!IsOpen()) { + return false; + } + + errno = 0; + +#ifdef _WIN32 + const auto commit_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0; +#else + const auto commit_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0; +#endif + + if (!commit_result) { + const auto ec = std::error_code{errno, std::generic_category()}; + LOG_ERROR(Common_Filesystem, "Failed to commit the file at path={}, ec_message={}", + PathToUTF8String(file_path), ec.message()); + } + + return commit_result; +} + bool IOFile::SetSize(u64 size) const { if (!IsOpen()) { return false; @@ -347,6 +369,9 @@ u64 IOFile::GetSize() const { return 0; } + // Flush any unwritten buffered data into the file prior to retrieving the file size. + std::fflush(file); + std::error_code ec; const auto file_size = fs::file_size(file_path, ec); diff --git a/src/common/fs/file.h b/src/common/fs/file.h index 588fe619d..2c4ab4332 100644 --- a/src/common/fs/file.h +++ b/src/common/fs/file.h @@ -396,13 +396,22 @@ public: [[nodiscard]] size_t WriteString(std::span<const char> string) const; /** - * Attempts to flush any unwritten buffered data into the file and flush the file into the disk. + * Attempts to flush any unwritten buffered data into the file. * * @returns True if the flush was successful, false otherwise. */ bool Flush() const; /** + * Attempts to commit the file into the disk. + * Note that this is an expensive operation as this forces the operating system to write + * the contents of the file associated with the file descriptor into the disk. + * + * @returns True if the commit was successful, false otherwise. + */ + bool Commit() const; + + /** * Resizes the file to a given size. * If the file is resized to a smaller size, the remainder of the file is discarded. * If the file is resized to a larger size, the new area appears as if zero-filled. diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index b6fa4affb..61dddab3f 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -171,19 +171,22 @@ FileBackend::FileBackend(const std::filesystem::path& filename) { FileBackend::~FileBackend() = default; void FileBackend::Write(const Entry& entry) { + if (!file->IsOpen()) { + return; + } + using namespace Common::Literals; - // prevent logs from going over the maximum size (in case its spamming and the user doesn't - // know) + // Prevent logs from exceeding a set maximum size in the event that log entries are spammed. constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB; constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB; - if (!file->IsOpen()) { - return; - } + const bool write_limit_exceeded = + bytes_written > MAX_BYTES_WRITTEN_EXTENDED || + (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging); - if (Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN_EXTENDED) { - return; - } else if (!Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN) { + // Close the file after the write limit is exceeded. + if (write_limit_exceeded) { + file->Close(); return; } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 19b970981..b2b0dbe05 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -667,8 +667,6 @@ else() target_compile_options(core PRIVATE -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=sign-compare -Werror=shadow $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 7583d68b2..b769fe959 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -290,7 +290,7 @@ private: IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); - rb.Push<u32>(1); + rb.Push<u32>(2); } // Should be similar to QueryAudioDeviceOutputEvent diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index c3423c815..c4283a952 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -44,10 +44,7 @@ else() -Werror -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=reorder -Werror=shadow - -Werror=sign-compare $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> -Werror=unused-variable diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 7124c755c..d2b9d5f2b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - rasterizer->UnmapMemory(*cpu_addr, size); + const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); + + for (const auto& map : submapped_ranges) { + // Flush and invalidate through the GPU interface, to be asynchronous if possible. + const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first); + ASSERT(cpu_addr); + + rasterizer->UnmapMemory(*cpu_addr, map.second); + } UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } @@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s //// Lock the new page // TryLockPage(page_entry, size); + auto& current_page = page_table[PageEntryIndex(gpu_addr)]; - page_table[PageEntryIndex(gpu_addr)] = page_entry; + if ((!current_page.IsValid() && page_entry.IsValid()) || + current_page.ToAddress() != page_entry.ToAddress()) { + rasterizer->ModifyGPUMemory(gpu_addr, size); + } + + current_page = page_entry; } std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, @@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { return page_entry.ToAddress() + (gpu_addr & page_mask); } +std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { + size_t page_index{addr >> page_bits}; + const size_t page_last{(addr + size + page_size - 1) >> page_bits}; + while (page_index < page_last) { + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (page_addr && *page_addr != 0) { + return page_addr; + } + ++page_index; + } + return std::nullopt; +} + template <typename T> T MemoryManager::Read(GPUVAddr addr) const { if (auto page_pointer{GetPointer(addr)}; page_pointer) { @@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { return page <= Core::Memory::PAGE_SIZE; } +bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { + size_t page_index{gpu_addr >> page_bits}; + const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; + std::optional<VAddr> old_page_addr{}; + while (page_index != page_last) { + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (!page_addr || *page_addr == 0) { + return false; + } + if (old_page_addr) { + if (*old_page_addr + page_size != *page_addr) { + return false; + } + } + old_page_addr = page_addr; + ++page_index; + } + return true; +} + +bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const { + size_t page_index{gpu_addr >> page_bits}; + const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; + while (page_index < page_last) { + if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) { + return false; + } + ++page_index; + } + return true; +} + +std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( + GPUVAddr gpu_addr, std::size_t size) const { + std::vector<std::pair<GPUVAddr, std::size_t>> result{}; + size_t page_index{gpu_addr >> page_bits}; + size_t remaining_size{size}; + size_t page_offset{gpu_addr & page_mask}; + std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; + std::optional<VAddr> old_page_addr{}; + const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) { + if (!last_segment) { + GPUVAddr new_base_addr = page_index << page_bits; + last_segment = {new_base_addr, bytes}; + } else { + last_segment->second += bytes; + } + }; + const auto split = [this, &last_segment, &result] { + if (last_segment) { + result.push_back(*last_segment); + last_segment = std::nullopt; + } + }; + while (remaining_size > 0) { + const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (!page_addr) { + split(); + } else if (old_page_addr) { + if (*old_page_addr + page_size != *page_addr) { + split(); + } + extend_size(num_bytes); + } else { + extend_size(num_bytes); + } + ++page_index; + page_offset = 0; + remaining_size -= num_bytes; + } + split(); + return result; +} + } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b3538d503..99d13e7f6 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -76,6 +76,8 @@ public: [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; + [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; + template <typename T> [[nodiscard]] T Read(GPUVAddr addr) const; @@ -112,10 +114,28 @@ public: void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); /** - * IsGranularRange checks if a gpu region can be simply read with a pointer. + * Checks if a gpu region can be simply read with a pointer. */ [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; + /** + * Checks if a gpu region is mapped by a single range of cpu addresses. + */ + [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const; + + /** + * Checks if a gpu region is mapped entirely. + */ + [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; + + /** + * Returns a vector with all the subranges of cpu addresses mapped beneath. + * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector + * will be returned; + */ + std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, + std::size_t size) const; + [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 07939432f..0cec4225b 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -87,6 +87,9 @@ public: /// Unmap memory range virtual void UnmapMemory(VAddr addr, u64 size) = 0; + /// Remap GPU memory range. This means underneath backing memory changed + virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eb8bdaa85..07ad0e205 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { shader_cache.OnCPUWrite(addr, size); } +void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) { + { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UnmapGPUMemory(addr, size); + } +} + void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write<u32>(addr, value); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9995a563b..482efed7a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -80,6 +80,7 @@ public: void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; void UnmapMemory(VAddr addr, u64 size) override; + void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index abaf1ee6a..8fb5be393 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -261,9 +261,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), - copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI); glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), - copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); + copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI); glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); } program_manager.RestoreGuestCompute(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1c9120170..bd4d649cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { pipeline_cache.OnCPUWrite(addr, size); } +void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { + { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UnmapGPUMemory(addr, size); + } +} + void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write<u32>(addr, value); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index cb8c5c279..41459c5c5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -72,6 +72,7 @@ public: void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; void UnmapMemory(VAddr addr, u64 size) override; + void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index f22358c90..6052d148a 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ } } +ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) + : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} + std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { if (other_addr < gpu_addr) { // Subresource address can't be lower than the base diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index e326cab71..ff1feda9b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 { Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked + Remapped = 1 << 8, ///< Image has been remapped. + Sparse = 1 << 9, ///< Image has non continous submemory. // Garbage Collection Flags - BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher - ///< garbage collection priority - Alias = 1 << 9, ///< This image has aliases and has priority on garbage - ///< collection + BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 11, ///< This image has aliases and has priority on garbage + ///< collection }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -57,6 +59,12 @@ struct ImageBase { return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } + [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_gpu_addr + overlap_size; + const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes; + return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; + } + void CheckBadOverlapState(); void CheckAliasState(); @@ -84,6 +92,29 @@ struct ImageBase { std::vector<AliasedImage> aliased_images; std::vector<ImageId> overlapping_images; + ImageMapId map_view_id{}; +}; + +struct ImageMapView { + explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id); + + [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_cpu_addr + overlap_size; + const VAddr cpu_addr_end = cpu_addr + size; + return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; + } + + [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { + const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size; + const GPUVAddr gpu_addr_end = gpu_addr + size; + return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; + } + + GPUVAddr gpu_addr; + VAddr cpu_addr; + size_t size; + ImageId image_id; + bool picked{}; }; struct ImageAllocBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d8dbd3824..e3542301e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -13,6 +13,7 @@ #include <span> #include <type_traits> #include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> @@ -152,6 +153,9 @@ public: /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size); + /// Remove images in a region + void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); + /// Blit an image with the given parameters void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, @@ -190,7 +194,22 @@ public: private: /// Iterate over all page indices in a range template <typename Func> - static void ForEachPage(VAddr addr, size_t size, Func&& func) { + static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + template <typename Func> + static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; const u64 page_end = (addr + size - 1) >> PAGE_BITS; for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { @@ -220,7 +239,7 @@ private: FramebufferId GetFramebufferId(const RenderTargets& key); /// Refresh the contents (pixel data) of an image - void RefreshContents(Image& image); + void RefreshContents(Image& image, ImageId image_id); /// Upload data from guest to an image template <typename StagingBuffer> @@ -269,6 +288,16 @@ private: template <typename Func> void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); + template <typename Func> + void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + + template <typename Func> + void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + + /// Iterates over all the images in a region calling func + template <typename Func> + void ForEachSparseSegment(ImageBase& image, Func&& func); + /// Find or create an image view in the given image with the passed parameters [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); @@ -279,10 +308,10 @@ private: void UnregisterImage(ImageId image); /// Track CPU reads and writes for image - void TrackImage(ImageBase& image); + void TrackImage(ImageBase& image, ImageId image_id); /// Stop tracking CPU reads and writes for image - void UntrackImage(ImageBase& image); + void UntrackImage(ImageBase& image, ImageId image_id); /// Delete image from the cache void DeleteImage(ImageId image); @@ -340,7 +369,13 @@ private: std::unordered_map<TSCEntry, SamplerId> samplers; std::unordered_map<RenderTargets, FramebufferId> framebuffers; - std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; + std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; + + std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; + + VAddr virtual_invalid_space{}; bool has_deleted_images = false; u64 total_used_memory = 0; @@ -349,6 +384,7 @@ private: u64 critical_memory; SlotVector<Image> slot_images; + SlotVector<ImageMapView> slot_map_views; SlotVector<ImageView> slot_image_views; SlotVector<ImageAlloc> slot_image_allocs; SlotVector<Sampler> slot_samplers; @@ -459,7 +495,7 @@ void TextureCache<P>::RunGarbageCollector() { } } if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image); + UntrackImage(*image, image_id); } UnregisterImage(image_id); DeleteImage(image_id); @@ -658,7 +694,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { return; } image.flags |= ImageFlagBits::CpuModified; - UntrackImage(image); + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } }); } @@ -695,7 +733,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { for (const ImageId id : deleted_images) { Image& image = slot_images[id]; if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image); + UntrackImage(image, id); } UnregisterImage(id); DeleteImage(id); @@ -703,6 +741,23 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { } template <class P> +void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector<ImageId> deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Remapped)) { + continue; + } + image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + } +} + +template <class P> void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy, @@ -833,9 +888,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad if (it == page_table.end()) { return nullptr; } - const auto& image_ids = it->second; - for (const ImageId image_id : image_ids) { - const ImageBase& image = slot_images[image_id]; + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; if (image.cpu_addr != cpu_addr) { continue; } @@ -915,13 +971,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { } template <class P> -void TextureCache<P>::RefreshContents(Image& image) { +void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { if (False(image.flags & ImageFlagBits::CpuModified)) { // Only upload modified images return; } image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image); + TrackImage(image, image_id); if (image.info.num_samples > 1) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); @@ -958,7 +1014,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) template <class P> ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { - if (!IsValidAddress(gpu_memory, config)) { + if (!IsValidEntry(gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } const auto [pair, is_new] = image_views.try_emplace(config); @@ -1000,14 +1056,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a template <class P> ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { - return ImageId{}; + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } } const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { const bool strict_size = False(options & RelaxedOptions::Size) && True(existing_image.flags & ImageFlagBits::Strong); @@ -1033,7 +1095,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, template <class P> ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + const auto size = CalculateGuestSizeInBytes(info); + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + if (!cpu_addr) { + const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + virtual_invalid_space += Common::AlignUp(size, 32); + cpu_addr = std::optional<VAddr>(fake_addr); + } + } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; @@ -1053,10 +1124,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); std::vector<ImageId> overlap_ids; + std::unordered_set<ImageId> overlaps_found; std::vector<ImageId> left_aliased_ids; std::vector<ImageId> right_aliased_ids; + std::unordered_set<ImageId> ignore_textures; std::vector<ImageId> bad_overlap_ids; - ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { + const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + return; + } if (info.type == ImageType::Linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch @@ -1064,6 +1141,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } return; } + overlaps_found.insert(overlap_id); static constexpr bool strict_size = true; const std::optional<OverlapResult> solution = ResolveOverlap( new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); @@ -1087,12 +1165,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bad_overlap_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::BadOverlap; } - }); + }; + ForEachImageInRegion(cpu_addr, size_bytes, region_check); + const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { + if (!overlaps_found.contains(overlap_id)) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + } + if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { + ignore_textures.insert(overlap_id); + } + } + }; + ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; + if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + new_image.flags |= ImageFlagBits::Sparse; + } + + for (const ImageId overlap_id : ignore_textures) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + UNIMPLEMENTED(); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + // TODO: Only upload what we need - RefreshContents(new_image); + RefreshContents(new_image, new_image_id); for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; @@ -1104,7 +1210,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA runtime.CopyImage(new_image, overlap, copies); } if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap); + UntrackImage(overlap, overlap_id); } UnregisterImage(overlap_id); DeleteImage(overlap_id); @@ -1239,7 +1345,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; boost::container::small_vector<ImageId, 32> images; - ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { + boost::container::small_vector<ImageMapId, 32> maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { const auto it = page_table.find(page); if (it == page_table.end()) { if constexpr (BOOL_BREAK) { @@ -1248,12 +1355,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f return; } } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} + +template <class P> +template <typename Func> +void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 8> images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} + +template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 8> images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } for (const ImageId image_id : it->second) { Image& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::Picked)) { continue; } - if (!image.Overlaps(cpu_addr, size)) { + if (!image.OverlapsGPU(gpu_addr, size)) { continue; } image.flags |= ImageFlagBits::Picked; @@ -1276,6 +1476,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f } template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; + static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; + const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + for (auto& segment : segments) { + const auto gpu_addr = segment.first; + const auto size = segment.second; + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + if constexpr (RETURNS_BOOL) { + if (func(gpu_addr, *cpu_addr, size)) { + break; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } +} + +template <class P> ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { Image& image = slot_images[image_id]; if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { @@ -1292,8 +1513,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; - ForEachPage(image.cpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { page_table[page].push_back(image_id); }); u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || @@ -1301,6 +1520,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + std::vector<ImageViewId> sparse_maps{}; + ForEachSparseSegment( + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + sparse_maps.push_back(map_id); + }); + sparse_views.emplace(image_id, std::move(sparse_maps)); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); } template <class P> @@ -1317,34 +1557,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); - ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector<ImageId>& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); + const auto& clear_page_table = + [this, image_id]( + u64 page, + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { + const auto page_it = selected_page_table.find(page); + if (page_it == selected_page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageId>& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); + }; + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageMapId>& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, sparse_page_table); }); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map_range = slot_map_views[map_view_id]; + const VAddr cpu_addr = map_range.cpu_addr; + const std::size_t size = map_range.size; + ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageMapId>& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + slot_map_views.erase(map_view_id); + } + sparse_views.erase(it); } template <class P> -void TextureCache<P>::TrackImage(ImageBase& image) { +void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + if (True(image.flags & ImageFlagBits::Registered)) { + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + } + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); } template <class P> -void TextureCache<P>::UntrackImage(ImageBase& image) { +void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { ASSERT(True(image.flags & ImageFlagBits::Tracked)); image.flags &= ~ImageFlagBits::Tracked; - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ASSERT(True(image.flags & ImageFlagBits::Registered)); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + } } template <class P> @@ -1486,10 +1817,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool if (invalidate) { image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image); + TrackImage(image, image_id); } } else { - RefreshContents(image); + RefreshContents(image, image_id); SynchronizeAliases(image_id); } if (is_modification) { diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index c9571f7e4..9fbdc1ac6 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14; constexpr SlotId CORRUPT_ID{0xfffffffe}; using ImageId = SlotId; +using ImageMapId = SlotId; using ImageViewId = SlotId; using ImageAllocId = SlotId; using SamplerId = SlotId; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 20794fa32..c872517b8 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept { return offsets; } +LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { + const u32 num_levels = info.resources.levels; + const LevelInfo level_info = MakeLevelInfo(info); + LevelArray sizes{}; + for (u32 level = 0; level < num_levels; ++level) { + sizes[level] = CalculateLevelSize(level_info, level); + } + return sizes; +} + std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); std::vector<u32> offsets; @@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { - if (config.Address() == 0) { +bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { + const GPUVAddr address = config.Address(); + if (address == 0) { return false; } - if (config.Address() > (u64(1) << 48)) { + if (address > (1ULL << 48)) { return false; } - return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); + if (gpu_memory.GpuToCpuAddress(address).has_value()) { + return true; + } + const ImageInfo info{config}; + const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); + return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); } std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index cdc5cbc75..766502908 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -40,6 +40,8 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; +[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; + [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); @@ -55,7 +57,7 @@ struct OverlapResult { const ImageInfo& src, SubresourceBase base); -[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); +[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |