diff options
-rw-r--r-- | .gitmodules | 8 | ||||
m--------- | externals/opus | 0 | ||||
-rw-r--r-- | externals/opus/CMakeLists.txt | 250 | ||||
m--------- | externals/opus/opus | 0 | ||||
-rw-r--r-- | src/core/file_sys/savedata_factory.cpp | 12 | ||||
-rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 12 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.h | 7 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 61 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 8 | ||||
-rw-r--r-- | src/video_core/morton.cpp | 2 | ||||
-rw-r--r-- | src/video_core/rasterizer_accelerated.cpp | 63 | ||||
-rw-r--r-- | src/video_core/rasterizer_accelerated.h | 31 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 50 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 11 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 223 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 222 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 | ||||
-rw-r--r-- | src/video_core/surface.cpp | 8 | ||||
-rw-r--r-- | src/video_core/surface.h | 16 | ||||
-rw-r--r-- | src/video_core/textures/texture.h | 1 |
21 files changed, 562 insertions, 426 deletions
diff --git a/.gitmodules b/.gitmodules index 35e0d1240..ee0dc6c19 100644 --- a/.gitmodules +++ b/.gitmodules @@ -26,11 +26,11 @@ path = externals/mbedtls url = https://github.com/DarkLordZach/mbedtls [submodule "opus"] - path = externals/opus - url = https://github.com/ogniK5377/opus.git + path = externals/opus/opus + url = https://github.com/xiph/opus.git [submodule "soundtouch"] - path = externals/soundtouch - url = https://github.com/citra-emu/ext-soundtouch.git + path = externals/soundtouch + url = https://github.com/citra-emu/ext-soundtouch.git [submodule "libressl"] path = externals/libressl url = https://github.com/citra-emu/ext-libressl-portable.git diff --git a/externals/opus b/externals/opus deleted file mode 160000 -Subproject 562f8ba555c4181e1b57e82e496e4a959b9c019 diff --git a/externals/opus/CMakeLists.txt b/externals/opus/CMakeLists.txt new file mode 100644 index 000000000..cbb393272 --- /dev/null +++ b/externals/opus/CMakeLists.txt @@ -0,0 +1,250 @@ +cmake_minimum_required(VERSION 3.8) + +project(opus) + +option(OPUS_STACK_PROTECTOR "Use stack protection" OFF) +option(OPUS_USE_ALLOCA "Use alloca for stack arrays (on non-C99 compilers)" OFF) +option(OPUS_CUSTOM_MODES "Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames" OFF) +option(OPUS_FIXED_POINT "Compile as fixed-point (for machines without a fast enough FPU)" OFF) +option(OPUS_ENABLE_FLOAT_API "Compile with the floating point API (for machines with float library" ON) + +include(opus/opus_functions.cmake) + +if(OPUS_STACK_PROTECTOR) + if(NOT MSVC) # GC on by default on MSVC + check_and_set_flag(STACK_PROTECTION_STRONG -fstack-protector-strong) + endif() +else() + if(MSVC) + check_and_set_flag(BUFFER_SECURITY_CHECK /GS-) + endif() +endif() + +add_library(opus STATIC + # CELT sources + opus/celt/bands.c + opus/celt/celt.c + opus/celt/celt_decoder.c + opus/celt/celt_encoder.c + opus/celt/celt_lpc.c + opus/celt/cwrs.c + opus/celt/entcode.c + opus/celt/entdec.c + opus/celt/entenc.c + opus/celt/kiss_fft.c + opus/celt/laplace.c + opus/celt/mathops.c + opus/celt/mdct.c + opus/celt/modes.c + opus/celt/pitch.c + opus/celt/quant_bands.c + opus/celt/rate.c + opus/celt/vq.c + + # SILK sources + opus/silk/A2NLSF.c + opus/silk/CNG.c + opus/silk/HP_variable_cutoff.c + opus/silk/LPC_analysis_filter.c + opus/silk/LPC_fit.c + opus/silk/LPC_inv_pred_gain.c + opus/silk/LP_variable_cutoff.c + opus/silk/NLSF2A.c + opus/silk/NLSF_VQ.c + opus/silk/NLSF_VQ_weights_laroia.c + opus/silk/NLSF_decode.c + opus/silk/NLSF_del_dec_quant.c + opus/silk/NLSF_encode.c + opus/silk/NLSF_stabilize.c + opus/silk/NLSF_unpack.c + opus/silk/NSQ.c + opus/silk/NSQ_del_dec.c + opus/silk/PLC.c + opus/silk/VAD.c + opus/silk/VQ_WMat_EC.c + opus/silk/ana_filt_bank_1.c + opus/silk/biquad_alt.c + opus/silk/bwexpander.c + opus/silk/bwexpander_32.c + opus/silk/check_control_input.c + opus/silk/code_signs.c + opus/silk/control_SNR.c + opus/silk/control_audio_bandwidth.c + opus/silk/control_codec.c + opus/silk/dec_API.c + opus/silk/decode_core.c + opus/silk/decode_frame.c + opus/silk/decode_indices.c + opus/silk/decode_parameters.c + opus/silk/decode_pitch.c + opus/silk/decode_pulses.c + opus/silk/decoder_set_fs.c + opus/silk/enc_API.c + opus/silk/encode_indices.c + opus/silk/encode_pulses.c + opus/silk/gain_quant.c + opus/silk/init_decoder.c + opus/silk/init_encoder.c + opus/silk/inner_prod_aligned.c + opus/silk/interpolate.c + opus/silk/lin2log.c + opus/silk/log2lin.c + opus/silk/pitch_est_tables.c + opus/silk/process_NLSFs.c + opus/silk/quant_LTP_gains.c + opus/silk/resampler.c + opus/silk/resampler_down2.c + opus/silk/resampler_down2_3.c + opus/silk/resampler_private_AR2.c + opus/silk/resampler_private_IIR_FIR.c + opus/silk/resampler_private_down_FIR.c + opus/silk/resampler_private_up2_HQ.c + opus/silk/resampler_rom.c + opus/silk/shell_coder.c + opus/silk/sigm_Q15.c + opus/silk/sort.c + opus/silk/stereo_LR_to_MS.c + opus/silk/stereo_MS_to_LR.c + opus/silk/stereo_decode_pred.c + opus/silk/stereo_encode_pred.c + opus/silk/stereo_find_predictor.c + opus/silk/stereo_quant_pred.c + opus/silk/sum_sqr_shift.c + opus/silk/table_LSF_cos.c + opus/silk/tables_LTP.c + opus/silk/tables_NLSF_CB_NB_MB.c + opus/silk/tables_NLSF_CB_WB.c + opus/silk/tables_gain.c + opus/silk/tables_other.c + opus/silk/tables_pitch_lag.c + opus/silk/tables_pulses_per_block.c + + # Opus sources + opus/src/analysis.c + opus/src/mapping_matrix.c + opus/src/mlp.c + opus/src/mlp_data.c + opus/src/opus.c + opus/src/opus_decoder.c + opus/src/opus_encoder.c + opus/src/opus_multistream.c + opus/src/opus_multistream_decoder.c + opus/src/opus_multistream_encoder.c + opus/src/opus_projection_decoder.c + opus/src/opus_projection_encoder.c + opus/src/repacketizer.c +) + +if (DEBUG) + target_sources(opus PRIVATE opus/silk/debug.c) +endif() + +if (OPUS_FIXED_POINT) + target_sources(opus PRIVATE + opus/silk/fixed/LTP_analysis_filter_FIX.c + opus/silk/fixed/LTP_scale_ctrl_FIX.c + opus/silk/fixed/apply_sine_window_FIX.c + opus/silk/fixed/autocorr_FIX.c + opus/silk/fixed/burg_modified_FIX.c + opus/silk/fixed/corrMatrix_FIX.c + opus/silk/fixed/encode_frame_FIX.c + opus/silk/fixed/find_LPC_FIX.c + opus/silk/fixed/find_LTP_FIX.c + opus/silk/fixed/find_pitch_lags_FIX.c + opus/silk/fixed/find_pred_coefs_FIX.c + opus/silk/fixed/k2a_FIX.c + opus/silk/fixed/k2a_Q16_FIX.c + opus/silk/fixed/noise_shape_analysis_FIX.c + opus/silk/fixed/pitch_analysis_core_FIX.c + opus/silk/fixed/prefilter_FIX.c + opus/silk/fixed/process_gains_FIX.c + opus/silk/fixed/regularize_correlations_FIX.c + opus/silk/fixed/residual_energy16_FIX.c + opus/silk/fixed/residual_energy_FIX.c + opus/silk/fixed/schur64_FIX.c + opus/silk/fixed/schur_FIX.c + opus/silk/fixed/solve_LS_FIX.c + opus/silk/fixed/vector_ops_FIX.c + opus/silk/fixed/warped_autocorrelation_FIX.c + ) +else() + target_sources(opus PRIVATE + opus/silk/float/LPC_analysis_filter_FLP.c + opus/silk/float/LPC_inv_pred_gain_FLP.c + opus/silk/float/LTP_analysis_filter_FLP.c + opus/silk/float/LTP_scale_ctrl_FLP.c + opus/silk/float/apply_sine_window_FLP.c + opus/silk/float/autocorrelation_FLP.c + opus/silk/float/burg_modified_FLP.c + opus/silk/float/bwexpander_FLP.c + opus/silk/float/corrMatrix_FLP.c + opus/silk/float/encode_frame_FLP.c + opus/silk/float/energy_FLP.c + opus/silk/float/find_LPC_FLP.c + opus/silk/float/find_LTP_FLP.c + opus/silk/float/find_pitch_lags_FLP.c + opus/silk/float/find_pred_coefs_FLP.c + opus/silk/float/inner_product_FLP.c + opus/silk/float/k2a_FLP.c + opus/silk/float/noise_shape_analysis_FLP.c + opus/silk/float/pitch_analysis_core_FLP.c + opus/silk/float/process_gains_FLP.c + opus/silk/float/regularize_correlations_FLP.c + opus/silk/float/residual_energy_FLP.c + opus/silk/float/scale_copy_vector_FLP.c + opus/silk/float/scale_vector_FLP.c + opus/silk/float/schur_FLP.c + opus/silk/float/sort_FLP.c + opus/silk/float/warped_autocorrelation_FLP.c + opus/silk/float/wrappers_FLP.c + ) +endif() + +target_compile_definitions(opus PRIVATE OPUS_BUILD ENABLE_HARDENING) + +if(NOT MSVC) + target_compile_definitions(opus PRIVATE _FORTIFY_SOURCE=2) +endif() + +# It is strongly recommended to uncomment one of these VAR_ARRAYS: Use C99 +# variable-length arrays for stack allocation USE_ALLOCA: Use alloca() for stack +# allocation If none is defined, then the fallback is a non-threadsafe global +# array +if(OPUS_USE_ALLOCA OR MSVC) + target_compile_definitions(opus PRIVATE USE_ALLOCA) +else() + target_compile_definitions(opus PRIVATE VAR_ARRAYS) +endif() + +if(OPUS_CUSTOM_MODES) + target_compile_definitions(opus PRIVATE CUSTOM_MODES) +endif() + +if(NOT OPUS_ENABLE_FLOAT_API) + target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API) +endif() + +target_compile_definitions(opus +PUBLIC + -DOPUS_VERSION="\\"1.3.1\\"" + +PRIVATE + # Use C99 intrinsics to speed up float-to-int conversion + HAVE_LRINTF +) + +if (FIXED_POINT) + target_compile_definitions(opus PRIVATE -DFIXED_POINT=1 -DDISABLE_FLOAT_API) +endif() + +target_include_directories(opus +PUBLIC + opus/include + +PRIVATE + opus/celt + opus/silk + opus/silk/fixed + opus/silk/float + opus/src +) diff --git a/externals/opus/opus b/externals/opus/opus new file mode 160000 +Subproject ad8fe90db79b7d2a135e3dfd2ed6631b0c5662a diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp index fc8755c78..e2a7eaf7b 100644 --- a/src/core/file_sys/savedata_factory.cpp +++ b/src/core/file_sys/savedata_factory.cpp @@ -16,6 +16,7 @@ namespace FileSys { constexpr char SAVE_DATA_SIZE_FILENAME[] = ".yuzu_save_size"; namespace { + void PrintSaveDataDescriptorWarnings(SaveDataDescriptor meta) { if (meta.type == SaveDataType::SystemSaveData || meta.type == SaveDataType::SaveData) { if (meta.zero_1 != 0) { @@ -52,6 +53,13 @@ void PrintSaveDataDescriptorWarnings(SaveDataDescriptor meta) { meta.user_id[1], meta.user_id[0]); } } + +bool ShouldSaveDataBeAutomaticallyCreated(SaveDataSpaceId space, const SaveDataDescriptor& desc) { + return desc.type == SaveDataType::CacheStorage || desc.type == SaveDataType::TemporaryStorage || + (space == SaveDataSpaceId::NandUser && ///< Normal Save Data -- Current Title & User + desc.type == SaveDataType::SaveData && desc.title_id == 0 && desc.save_id == 0); +} + } // Anonymous namespace std::string SaveDataDescriptor::DebugInfo() const { @@ -96,6 +104,10 @@ ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, auto out = dir->GetDirectoryRelative(save_directory); + if (out == nullptr && ShouldSaveDataBeAutomaticallyCreated(space, meta)) { + return Create(space, meta); + } + // Return an error if the save data doesn't actually exist. if (out == nullptr) { // TODO(Subv): Find out correct error code. diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cb6eda1b8..c911c6ec4 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -36,6 +36,8 @@ add_library(video_core STATIC memory_manager.h morton.cpp morton.h + rasterizer_accelerated.cpp + rasterizer_accelerated.h rasterizer_cache.cpp rasterizer_cache.h rasterizer_interface.h diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 91adef360..3a39aeabe 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -50,7 +50,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { } } -Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { +Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); ASSERT(cbuf_mask[regs.tex_cb_index]); @@ -61,13 +61,11 @@ Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) co ASSERT(address < texinfo.Address() + texinfo.size); const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; - return GetTextureInfo(tex_handle, offset); + return GetTextureInfo(tex_handle); } -Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const { - return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id), - GetTSCEntry(tex_handle.tsc_id)}; +Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { + return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; } u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { @@ -89,7 +87,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); return result; diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 8e7182727..b185c98c7 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -196,11 +196,10 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); - Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const; + Texture::FullTextureInfo GetTexture(std::size_t offset) const; - /// Given a Texture Handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const; + /// Given a texture handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 514ed93fa..2bed6cb38 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -760,61 +760,8 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { return tsc_entry; } -std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { - std::vector<Texture::FullTextureInfo> textures; - - auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)]; - auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; - - // Offset into the texture constbuffer where the texture info begins. - static constexpr std::size_t TextureInfoOffset = 0x20; - - for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; - current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { - - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)}; - - Texture::FullTextureInfo tex_info{}; - // TODO(Subv): Use the shader to determine which textures are actually accessed. - tex_info.index = - static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) / - sizeof(Texture::TextureHandle); - - // Load the TIC data. - auto tic_entry = GetTICEntry(tex_handle.tic_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry)); - - // Load the TSC data - auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry)); - - textures.push_back(tex_info); - } - - return textures; -} - -Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const { - Texture::FullTextureInfo tex_info{}; - tex_info.index = static_cast<u32>(offset); - - // Load the TIC data. - auto tic_entry = GetTICEntry(tex_handle.tic_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry)); - - // Load the TSC data - auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry)); - - return tex_info; +Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { + return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; } Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, @@ -830,7 +777,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - return GetTextureInfo(tex_handle, offset); + return GetTextureInfo(tex_handle); } u32 Maxwell3D::GetRegisterValue(u32 method) const { @@ -867,7 +814,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b const GPUVAddr tex_info_address = tex_info_buffer.address + offset; const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); return result; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 987ad77b2..8cc842684 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1250,12 +1250,8 @@ public: void FlushMMEInlineDraw(); - /// Given a Texture Handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const; - - /// Returns a list of enabled textures for the specified shader stage. - std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; + /// Given a texture handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index fe5f08ace..2f2fe6859 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -112,6 +112,7 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_6X5>, MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, + MortonCopy<true, PixelFormat::E5B9G9R9F>, MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z24S8>, @@ -192,6 +193,7 @@ static constexpr ConversionArray linear_to_morton_fns = { nullptr, nullptr, nullptr, + MortonCopy<false, PixelFormat::E5B9G9R9F>, MortonCopy<false, PixelFormat::Z32F>, MortonCopy<false, PixelFormat::Z16>, MortonCopy<false, PixelFormat::Z24S8>, diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp new file mode 100644 index 000000000..b230dcc18 --- /dev/null +++ b/src/video_core/rasterizer_accelerated.cpp @@ -0,0 +1,63 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <mutex> + +#include <boost/icl/interval_map.hpp> + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/rasterizer_accelerated.h" + +namespace VideoCore { + +namespace { + +template <typename Map, typename Interval> +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + +} // Anonymous namespace + +RasterizerAccelerated::RasterizerAccelerated() = default; + +RasterizerAccelerated::~RasterizerAccelerated() = default; + +void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + std::lock_guard lock{pages_mutex}; + const u64 page_start{addr >> Memory::PAGE_BITS}; + const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) { + cached_pages.add({pages_interval, delta}); + } + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) { + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + } else if (delta < 0 && count == -delta) { + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + } else { + ASSERT(count >= 0); + } + } + + if (delta < 0) { + cached_pages.add({pages_interval, delta}); + } +} + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h new file mode 100644 index 000000000..8f7e3547e --- /dev/null +++ b/src/video_core/rasterizer_accelerated.h @@ -0,0 +1,31 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <mutex> + +#include <boost/icl/interval_map.hpp> + +#include "common/common_types.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCore { + +/// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface. +class RasterizerAccelerated : public RasterizerInterface { +public: + explicit RasterizerAccelerated(); + ~RasterizerAccelerated() override; + + void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; + +private: + using CachedPageMap = boost::icl::interval_map<u64, int>; + CachedPageMap cached_pages; + + std::mutex pages_mutex; +}; + +} // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9431d64ac..6a4d2c83a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -68,8 +68,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind ScreenInfo& info) : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { - OpenGLState::ApplyDefaultState(); - shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); @@ -342,42 +340,6 @@ std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); } -template <typename Map, typename Interval> -static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); -} - -void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { - std::lock_guard lock{pages_mutex}; - const u64 page_start{addr >> Memory::PAGE_BITS}; - const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; - - // Interval maps will erase segments if count reaches 0, so if delta is negative we have to - // subtract after iterating - const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end); - if (delta > 0) - cached_pages.add({pages_interval, delta}); - - for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { - const auto interval = pair.first & pages_interval; - const int count = pair.second; - - const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; - const u64 interval_size = interval_end_addr - interval_start_addr; - - if (delta > 0 && count == delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); - else if (delta < 0 && count == -delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); - else - ASSERT(count >= 0); - } - - if (delta < 0) - cached_pages.add({pages_interval, delta}); -} - void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { shader_cache.LoadDiskCache(stop_loading, callback); @@ -969,7 +931,7 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = [&]() { + const auto texture = [&] { if (!entry.IsBindless()) { return maxwell3d.GetStageTexture(stage, entry.GetOffset()); } @@ -977,7 +939,7 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag Tegra::Texture::TextureHandle tex_handle; Tegra::Engines::ShaderType shader_type = static_cast<Tegra::Engines::ShaderType>(stage); tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second); - return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + return maxwell3d.GetTextureInfo(tex_handle); }(); if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { @@ -1000,7 +962,7 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = [&]() { + const auto texture = [&] { if (!entry.IsBindless()) { return compute.GetTexture(entry.GetOffset()); } @@ -1008,7 +970,7 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) Tegra::Texture::TextureHandle tex_handle; tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); - return compute.GetTextureInfo(tex_handle, entry.GetOffset()); + return compute.GetTextureInfo(tex_handle); }(); if (SetupTexture(bindpoint, texture, entry)) { @@ -1046,7 +1008,7 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { const auto& entries = shader->GetShaderEntries().images; for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto tic = [&]() { + const auto tic = [&] { if (!entry.IsBindless()) { return compute.GetTexture(entry.GetOffset()).tic; } @@ -1054,7 +1016,7 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { Tegra::Texture::TextureHandle tex_handle; tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); - return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; + return compute.GetTextureInfo(tex_handle).tic; }(); SetupImage(bindpoint, tic, entry); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index c24a02d71..bd6fe5c3a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -9,17 +9,16 @@ #include <cstddef> #include <map> #include <memory> -#include <mutex> #include <optional> #include <tuple> #include <utility> -#include <boost/icl/interval_map.hpp> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" @@ -52,7 +51,7 @@ namespace OpenGL { struct ScreenInfo; struct DrawParameters; -class RasterizerOpenGL : public VideoCore::RasterizerInterface { +class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info); @@ -73,7 +72,6 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -228,11 +226,6 @@ private: AccelDraw accelerate_draw = AccelDraw::Disabled; OGLFramebuffer clear_framebuffer; - - using CachedPageMap = boost::icl::interval_map<u64, int>; - CachedPageMap cached_pages; - - std::mutex pages_mutex; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index bf86b5a0b..f25148362 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <iterator> #include <glad/glad.h> #include "common/assert.h" @@ -69,147 +70,29 @@ void Enable(GLenum cap, GLuint index, bool enable) { } void Enable(GLenum cap, bool& current_value, bool new_value) { - if (UpdateValue(current_value, new_value)) + if (UpdateValue(current_value, new_value)) { Enable(cap, new_value); + } } void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { - if (UpdateValue(current_value, new_value)) + if (UpdateValue(current_value, new_value)) { Enable(cap, index, new_value); -} - -} // namespace - -OpenGLState::OpenGLState() { - // These all match default OpenGL values - framebuffer_srgb.enabled = false; - - multisample_control.alpha_to_coverage = false; - multisample_control.alpha_to_one = false; - - cull.enabled = false; - cull.mode = GL_BACK; - cull.front_face = GL_CCW; - - depth.test_enabled = false; - depth.test_func = GL_LESS; - depth.write_mask = GL_TRUE; - - primitive_restart.enabled = false; - primitive_restart.index = 0; - - for (auto& item : color_mask) { - item.red_enabled = GL_TRUE; - item.green_enabled = GL_TRUE; - item.blue_enabled = GL_TRUE; - item.alpha_enabled = GL_TRUE; } +} - const auto ResetStencil = [](auto& config) { - config.test_func = GL_ALWAYS; - config.test_ref = 0; - config.test_mask = 0xFFFFFFFF; - config.write_mask = 0xFFFFFFFF; - config.action_depth_fail = GL_KEEP; - config.action_depth_pass = GL_KEEP; - config.action_stencil_fail = GL_KEEP; - }; - stencil.test_enabled = false; - ResetStencil(stencil.front); - ResetStencil(stencil.back); - - for (auto& item : viewports) { - item.x = 0; - item.y = 0; - item.width = 0; - item.height = 0; - item.depth_range_near = 0.0f; - item.depth_range_far = 1.0f; - item.scissor.enabled = false; - item.scissor.x = 0; - item.scissor.y = 0; - item.scissor.width = 0; - item.scissor.height = 0; - } - - for (auto& item : blend) { - item.enabled = true; - item.rgb_equation = GL_FUNC_ADD; - item.a_equation = GL_FUNC_ADD; - item.src_rgb_func = GL_ONE; - item.dst_rgb_func = GL_ZERO; - item.src_a_func = GL_ONE; - item.dst_a_func = GL_ZERO; - } - - independant_blend.enabled = false; - - blend_color.red = 0.0f; - blend_color.green = 0.0f; - blend_color.blue = 0.0f; - blend_color.alpha = 0.0f; - - logic_op.enabled = false; - logic_op.operation = GL_COPY; - - draw.read_framebuffer = 0; - draw.draw_framebuffer = 0; - draw.vertex_array = 0; - draw.shader_program = 0; - draw.program_pipeline = 0; - - clip_distance = {}; - - point.size = 1; - - fragment_color_clamp.enabled = false; - - depth_clamp.far_plane = false; - depth_clamp.near_plane = false; - - polygon_offset.fill_enable = false; - polygon_offset.line_enable = false; - polygon_offset.point_enable = false; - polygon_offset.factor = 0.0f; - polygon_offset.units = 0.0f; - polygon_offset.clamp = 0.0f; +} // Anonymous namespace - alpha_test.enabled = false; - alpha_test.func = GL_ALWAYS; - alpha_test.ref = 0.0f; -} +OpenGLState::OpenGLState() = default; void OpenGLState::SetDefaultViewports() { - for (auto& item : viewports) { - item.x = 0; - item.y = 0; - item.width = 0; - item.height = 0; - item.depth_range_near = 0.0f; - item.depth_range_far = 1.0f; - item.scissor.enabled = false; - item.scissor.x = 0; - item.scissor.y = 0; - item.scissor.width = 0; - item.scissor.height = 0; - } + viewports.fill(Viewport{}); depth_clamp.far_plane = false; depth_clamp.near_plane = false; } -void OpenGLState::ApplyDefaultState() { - glEnable(GL_BLEND); - glDisable(GL_FRAMEBUFFER_SRGB); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glDisable(GL_PRIMITIVE_RESTART); - glDisable(GL_STENCIL_TEST); - glDisable(GL_COLOR_LOGIC_OP); - glDisable(GL_SCISSOR_TEST); -} - -void OpenGLState::ApplyFramebufferState() const { +void OpenGLState::ApplyFramebufferState() { if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); } @@ -218,52 +101,52 @@ void OpenGLState::ApplyFramebufferState() const { } } -void OpenGLState::ApplyVertexArrayState() const { +void OpenGLState::ApplyVertexArrayState() { if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { glBindVertexArray(draw.vertex_array); } } -void OpenGLState::ApplyShaderProgram() const { +void OpenGLState::ApplyShaderProgram() { if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { glUseProgram(draw.shader_program); } } -void OpenGLState::ApplyProgramPipeline() const { +void OpenGLState::ApplyProgramPipeline() { if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { glBindProgramPipeline(draw.program_pipeline); } } -void OpenGLState::ApplyClipDistances() const { +void OpenGLState::ApplyClipDistances() { for (std::size_t i = 0; i < clip_distance.size(); ++i) { Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], clip_distance[i]); } } -void OpenGLState::ApplyPointSize() const { +void OpenGLState::ApplyPointSize() { if (UpdateValue(cur_state.point.size, point.size)) { glPointSize(point.size); } } -void OpenGLState::ApplyFragmentColorClamp() const { +void OpenGLState::ApplyFragmentColorClamp() { if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); } } -void OpenGLState::ApplyMultisample() const { +void OpenGLState::ApplyMultisample() { Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, multisample_control.alpha_to_coverage); Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, multisample_control.alpha_to_one); } -void OpenGLState::ApplyDepthClamp() const { +void OpenGLState::ApplyDepthClamp() { if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { return; @@ -276,7 +159,7 @@ void OpenGLState::ApplyDepthClamp() const { Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); } -void OpenGLState::ApplySRgb() const { +void OpenGLState::ApplySRgb() { if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) return; cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; @@ -287,7 +170,7 @@ void OpenGLState::ApplySRgb() const { } } -void OpenGLState::ApplyCulling() const { +void OpenGLState::ApplyCulling() { Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); if (UpdateValue(cur_state.cull.mode, cull.mode)) { @@ -299,7 +182,12 @@ void OpenGLState::ApplyCulling() const { } } -void OpenGLState::ApplyColorMask() const { +void OpenGLState::ApplyColorMask() { + if (!dirty.color_mask) { + return; + } + dirty.color_mask = false; + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { const auto& updated = color_mask[i]; auto& current = cur_state.color_mask[i]; @@ -314,7 +202,7 @@ void OpenGLState::ApplyColorMask() const { } } -void OpenGLState::ApplyDepth() const { +void OpenGLState::ApplyDepth() { Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); if (cur_state.depth.test_func != depth.test_func) { @@ -328,7 +216,7 @@ void OpenGLState::ApplyDepth() const { } } -void OpenGLState::ApplyPrimitiveRestart() const { +void OpenGLState::ApplyPrimitiveRestart() { Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); if (cur_state.primitive_restart.index != primitive_restart.index) { @@ -337,7 +225,12 @@ void OpenGLState::ApplyPrimitiveRestart() const { } } -void OpenGLState::ApplyStencilTest() const { +void OpenGLState::ApplyStencilTest() { + if (!dirty.stencil_state) { + return; + } + dirty.stencil_state = false; + Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { @@ -366,7 +259,7 @@ void OpenGLState::ApplyStencilTest() const { ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); } -void OpenGLState::ApplyViewport() const { +void OpenGLState::ApplyViewport() { for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { const auto& updated = viewports[i]; auto& current = cur_state.viewports[i]; @@ -403,7 +296,7 @@ void OpenGLState::ApplyViewport() const { } } -void OpenGLState::ApplyGlobalBlending() const { +void OpenGLState::ApplyGlobalBlending() { const Blend& updated = blend[0]; Blend& current = cur_state.blend[0]; @@ -427,7 +320,7 @@ void OpenGLState::ApplyGlobalBlending() const { } } -void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { +void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) { const Blend& updated = blend[target]; Blend& current = cur_state.blend[target]; @@ -451,7 +344,12 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { } } -void OpenGLState::ApplyBlending() const { +void OpenGLState::ApplyBlending() { + if (!dirty.blend_state) { + return; + } + dirty.blend_state = false; + if (independant_blend.enabled) { const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { @@ -470,7 +368,7 @@ void OpenGLState::ApplyBlending() const { } } -void OpenGLState::ApplyLogicOp() const { +void OpenGLState::ApplyLogicOp() { Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { @@ -478,7 +376,12 @@ void OpenGLState::ApplyLogicOp() const { } } -void OpenGLState::ApplyPolygonOffset() const { +void OpenGLState::ApplyPolygonOffset() { + if (!dirty.polygon_offset) { + return; + } + dirty.polygon_offset = false; + Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, polygon_offset.fill_enable); Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, @@ -499,7 +402,7 @@ void OpenGLState::ApplyPolygonOffset() const { } } -void OpenGLState::ApplyAlphaTest() const { +void OpenGLState::ApplyAlphaTest() { Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled); if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref), std::tie(alpha_test.func, alpha_test.ref))) { @@ -507,19 +410,19 @@ void OpenGLState::ApplyAlphaTest() const { } } -void OpenGLState::ApplyTextures() const { +void OpenGLState::ApplyTextures() { if (const auto update = UpdateArray(cur_state.textures, textures)) { glBindTextures(update->first, update->second, textures.data() + update->first); } } -void OpenGLState::ApplySamplers() const { +void OpenGLState::ApplySamplers() { if (const auto update = UpdateArray(cur_state.samplers, samplers)) { glBindSamplers(update->first, update->second, samplers.data() + update->first); } } -void OpenGLState::ApplyImages() const { +void OpenGLState::ApplyImages() { if (const auto update = UpdateArray(cur_state.images, images)) { glBindImageTextures(update->first, update->second, images.data() + update->first); } @@ -535,32 +438,20 @@ void OpenGLState::Apply() { ApplyPointSize(); ApplyFragmentColorClamp(); ApplyMultisample(); - if (dirty.color_mask) { - ApplyColorMask(); - dirty.color_mask = false; - } + ApplyColorMask(); ApplyDepthClamp(); ApplyViewport(); - if (dirty.stencil_state) { - ApplyStencilTest(); - dirty.stencil_state = false; - } + ApplyStencilTest(); ApplySRgb(); ApplyCulling(); ApplyDepth(); ApplyPrimitiveRestart(); - if (dirty.blend_state) { - ApplyBlending(); - dirty.blend_state = false; - } + ApplyBlending(); ApplyLogicOp(); ApplyTextures(); ApplySamplers(); ApplyImages(); - if (dirty.polygon_offset) { - ApplyPolygonOffset(); - dirty.polygon_offset = false; - } + ApplyPolygonOffset(); ApplyAlphaTest(); } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index c358d3b38..cca25206b 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -5,168 +5,146 @@ #pragma once #include <array> +#include <type_traits> #include <glad/glad.h> #include "video_core/engines/maxwell_3d.h" namespace OpenGL { -namespace TextureUnits { - -struct TextureUnit { - GLint id; - constexpr GLenum Enum() const { - return static_cast<GLenum>(GL_TEXTURE0 + id); - } -}; - -constexpr TextureUnit MaxwellTexture(int unit) { - return TextureUnit{unit}; -} - -constexpr TextureUnit LightingLUT{3}; -constexpr TextureUnit FogLUT{4}; -constexpr TextureUnit ProcTexNoiseLUT{5}; -constexpr TextureUnit ProcTexColorMap{6}; -constexpr TextureUnit ProcTexAlphaMap{7}; -constexpr TextureUnit ProcTexLUT{8}; -constexpr TextureUnit ProcTexDiffLUT{9}; - -} // namespace TextureUnits - class OpenGLState { public: struct { - bool enabled; // GL_FRAMEBUFFER_SRGB + bool enabled = false; // GL_FRAMEBUFFER_SRGB } framebuffer_srgb; struct { - bool alpha_to_coverage; // GL_ALPHA_TO_COVERAGE - bool alpha_to_one; // GL_ALPHA_TO_ONE + bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE + bool alpha_to_one = false; // GL_ALPHA_TO_ONE } multisample_control; struct { - bool enabled; // GL_CLAMP_FRAGMENT_COLOR_ARB + bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB } fragment_color_clamp; struct { - bool far_plane; - bool near_plane; + bool far_plane = false; + bool near_plane = false; } depth_clamp; // GL_DEPTH_CLAMP struct { - bool enabled; // GL_CULL_FACE - GLenum mode; // GL_CULL_FACE_MODE - GLenum front_face; // GL_FRONT_FACE + bool enabled = false; // GL_CULL_FACE + GLenum mode = GL_BACK; // GL_CULL_FACE_MODE + GLenum front_face = GL_CCW; // GL_FRONT_FACE } cull; struct { - bool test_enabled; // GL_DEPTH_TEST - GLenum test_func; // GL_DEPTH_FUNC - GLboolean write_mask; // GL_DEPTH_WRITEMASK + bool test_enabled = false; // GL_DEPTH_TEST + GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK + GLenum test_func = GL_LESS; // GL_DEPTH_FUNC } depth; struct { - bool enabled; - GLuint index; + bool enabled = false; + GLuint index = 0; } primitive_restart; // GL_PRIMITIVE_RESTART struct ColorMask { - GLboolean red_enabled; - GLboolean green_enabled; - GLboolean blue_enabled; - GLboolean alpha_enabled; + GLboolean red_enabled = GL_TRUE; + GLboolean green_enabled = GL_TRUE; + GLboolean blue_enabled = GL_TRUE; + GLboolean alpha_enabled = GL_TRUE; }; std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_mask; // GL_COLOR_WRITEMASK struct { - bool test_enabled; // GL_STENCIL_TEST + bool test_enabled = false; // GL_STENCIL_TEST struct { - GLenum test_func; // GL_STENCIL_FUNC - GLint test_ref; // GL_STENCIL_REF - GLuint test_mask; // GL_STENCIL_VALUE_MASK - GLuint write_mask; // GL_STENCIL_WRITEMASK - GLenum action_stencil_fail; // GL_STENCIL_FAIL - GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL - GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS + GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC + GLint test_ref = 0; // GL_STENCIL_REF + GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK + GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK + GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL + GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL + GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS } front, back; } stencil; struct Blend { - bool enabled; // GL_BLEND - GLenum rgb_equation; // GL_BLEND_EQUATION_RGB - GLenum a_equation; // GL_BLEND_EQUATION_ALPHA - GLenum src_rgb_func; // GL_BLEND_SRC_RGB - GLenum dst_rgb_func; // GL_BLEND_DST_RGB - GLenum src_a_func; // GL_BLEND_SRC_ALPHA - GLenum dst_a_func; // GL_BLEND_DST_ALPHA + bool enabled = false; // GL_BLEND + GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB + GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA + GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB + GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB + GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA + GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA }; std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend; struct { - bool enabled; + bool enabled = false; } independant_blend; struct { - GLclampf red; - GLclampf green; - GLclampf blue; - GLclampf alpha; + GLclampf red = 0.0f; + GLclampf green = 0.0f; + GLclampf blue = 0.0f; + GLclampf alpha = 0.0f; } blend_color; // GL_BLEND_COLOR struct { - bool enabled; // GL_LOGIC_OP_MODE - GLenum operation; + bool enabled = false; // GL_LOGIC_OP_MODE + GLenum operation = GL_COPY; } logic_op; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {}; struct { - GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING - GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING - GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING - GLuint shader_program; // GL_CURRENT_PROGRAM - GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING + GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING + GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING + GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING + GLuint shader_program = 0; // GL_CURRENT_PROGRAM + GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING } draw; - struct viewport { - GLint x; - GLint y; - GLint width; - GLint height; - GLfloat depth_range_near; // GL_DEPTH_RANGE - GLfloat depth_range_far; // GL_DEPTH_RANGE + struct Viewport { + GLint x = 0; + GLint y = 0; + GLint width = 0; + GLint height = 0; + GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE + GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE struct { - bool enabled; // GL_SCISSOR_TEST - GLint x; - GLint y; - GLsizei width; - GLsizei height; + bool enabled = false; // GL_SCISSOR_TEST + GLint x = 0; + GLint y = 0; + GLsizei width = 0; + GLsizei height = 0; } scissor; }; - std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; + std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; struct { - float size; // GL_POINT_SIZE + float size = 1.0f; // GL_POINT_SIZE } point; struct { - bool point_enable; - bool line_enable; - bool fill_enable; - GLfloat units; - GLfloat factor; - GLfloat clamp; + bool point_enable = false; + bool line_enable = false; + bool fill_enable = false; + GLfloat units = 0.0f; + GLfloat factor = 0.0f; + GLfloat clamp = 0.0f; } polygon_offset; struct { - bool enabled; // GL_ALPHA_TEST - GLenum func; // GL_ALPHA_TEST_FUNC - GLfloat ref; // GL_ALPHA_TEST_REF + bool enabled = false; // GL_ALPHA_TEST + GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC + GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF } alpha_test; - std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE + std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE OpenGLState(); @@ -179,34 +157,31 @@ public: /// Apply this state as the current OpenGL state void Apply(); - void ApplyFramebufferState() const; - void ApplyVertexArrayState() const; - void ApplyShaderProgram() const; - void ApplyProgramPipeline() const; - void ApplyClipDistances() const; - void ApplyPointSize() const; - void ApplyFragmentColorClamp() const; - void ApplyMultisample() const; - void ApplySRgb() const; - void ApplyCulling() const; - void ApplyColorMask() const; - void ApplyDepth() const; - void ApplyPrimitiveRestart() const; - void ApplyStencilTest() const; - void ApplyViewport() const; - void ApplyTargetBlending(std::size_t target, bool force) const; - void ApplyGlobalBlending() const; - void ApplyBlending() const; - void ApplyLogicOp() const; - void ApplyTextures() const; - void ApplySamplers() const; - void ApplyImages() const; - void ApplyDepthClamp() const; - void ApplyPolygonOffset() const; - void ApplyAlphaTest() const; - - /// Set the initial OpenGL state - static void ApplyDefaultState(); + void ApplyFramebufferState(); + void ApplyVertexArrayState(); + void ApplyShaderProgram(); + void ApplyProgramPipeline(); + void ApplyClipDistances(); + void ApplyPointSize(); + void ApplyFragmentColorClamp(); + void ApplyMultisample(); + void ApplySRgb(); + void ApplyCulling(); + void ApplyColorMask(); + void ApplyDepth(); + void ApplyPrimitiveRestart(); + void ApplyStencilTest(); + void ApplyViewport(); + void ApplyTargetBlending(std::size_t target, bool force); + void ApplyGlobalBlending(); + void ApplyBlending(); + void ApplyLogicOp(); + void ApplyTextures(); + void ApplySamplers(); + void ApplyImages(); + void ApplyDepthClamp(); + void ApplyPolygonOffset(); + void ApplyAlphaTest(); /// Resets any references to the given resource OpenGLState& UnbindTexture(GLuint handle); @@ -253,5 +228,6 @@ private: bool color_mask; } dirty{}; }; +static_assert(std::is_trivially_copyable_v<OpenGLState>); } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 2f9bfd7e4..55b3e58b2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -131,6 +131,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6_SRGB {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, ComponentType::Float, false}, // E5B9G9R9F // Depth formats {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 9a3c05288..621136b6e 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -315,6 +315,14 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, break; } break; + case Tegra::Texture::TextureFormat::E5B9G9R9_SHAREDEXP: + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::E5B9G9R9F; + default: + break; + } + break; case Tegra::Texture::TextureFormat::ZF32: return PixelFormat::Z32F; case Tegra::Texture::TextureFormat::Z16: diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 97668f802..d3bcd38c5 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -86,19 +86,20 @@ enum class PixelFormat { ASTC_2D_8X6_SRGB = 68, ASTC_2D_6X5 = 69, ASTC_2D_6X5_SRGB = 70, + E5B9G9R9F = 71, MaxColorFormat, // Depth formats - Z32F = 71, - Z16 = 72, + Z32F = 72, + Z16 = 73, MaxDepthFormat, // DepthStencil formats - Z24S8 = 73, - S8Z24 = 74, - Z32FS8 = 75, + Z24S8 = 74, + S8Z24 = 75, + Z32FS8 = 76, MaxDepthStencilFormat, @@ -207,6 +208,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 2, // ASTC_2D_8X6_SRGB 2, // ASTC_2D_6X5 2, // ASTC_2D_6X5_SRGB + 0, // E5B9G9R9F 0, // Z32F 0, // Z16 0, // Z24S8 @@ -302,6 +304,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 8, // ASTC_2D_8X6_SRGB 6, // ASTC_2D_6X5 6, // ASTC_2D_6X5_SRGB + 1, // E5B9G9R9F 1, // Z32F 1, // Z16 1, // Z24S8 @@ -389,6 +392,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 6, // ASTC_2D_8X6_SRGB 5, // ASTC_2D_6X5 5, // ASTC_2D_6X5_SRGB + 1, // E5B9G9R9F 1, // Z32F 1, // Z16 1, // Z24S8 @@ -476,6 +480,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 128, // ASTC_2D_8X6_SRGB 128, // ASTC_2D_6X5 128, // ASTC_2D_6X5_SRGB + 32, // E5B9G9R9F 32, // Z32F 16, // Z16 32, // Z24S8 @@ -578,6 +583,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB SurfaceCompression::Converted, // ASTC_2D_6X5 SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB + SurfaceCompression::None, // E5B9G9R9F SurfaceCompression::None, // Z32F SurfaceCompression::None, // Z16 SurfaceCompression::None, // Z24S8 diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index e36bc2c04..0429af9c1 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -354,7 +354,6 @@ struct TSCEntry { static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); struct FullTextureInfo { - u32 index; TICEntry tic; TSCEntry tsc; }; |