153 files changed, 5260 insertions, 1465 deletions
diff --git a/src/common/lz4_compression.h b/src/common/lz4_compression.h
index fe2231a6c..4c16f6e03 100644
--- a/src/common/lz4_compression.h
+++ b/src/common/lz4_compression.h
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#pragma once
+
 #include <vector>
 
 #include "common/common_types.h"
diff --git a/src/common/scope_exit.h b/src/common/scope_exit.h
index baf1f1c9e..1176a72b1 100644
--- a/src/common/scope_exit.h
+++ b/src/common/scope_exit.h
@@ -20,7 +20,7 @@ struct ScopeExitHelper {
 
 template <typename Func>
 ScopeExitHelper<Func> ScopeExit(Func&& func) {
-    return ScopeExitHelper<Func>(std::move(func));
+    return ScopeExitHelper<Func>(std::forward<Func>(func));
 }
 } // namespace detail
 
diff --git a/src/common/swap.h b/src/common/swap.h
index b3eab1324..71932c2bb 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -21,11 +21,6 @@
 
 #if defined(_MSC_VER)
 #include <cstdlib>
-#elif defined(__linux__)
-#include <byteswap.h>
-#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) ||                     \
-    defined(__NetBSD__) || defined(__OpenBSD__)
-#include <sys/endian.h>
 #endif
 #include <cstring>
 #include "common/common_types.h"
@@ -62,86 +57,49 @@
 namespace Common {
 
 #ifdef _MSC_VER
-inline u16 swap16(u16 _data) {
-    return _byteswap_ushort(_data);
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
+    return _byteswap_ushort(data);
 }
-inline u32 swap32(u32 _data) {
-    return _byteswap_ulong(_data);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return _byteswap_ulong(data);
 }
-inline u64 swap64(u64 _data) {
-    return _byteswap_uint64(_data);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return _byteswap_uint64(data);
 }
-#elif defined(ARCHITECTURE_ARM) && (__ARM_ARCH >= 6)
-inline u16 swap16(u16 _data) {
-    u32 data = _data;
-    __asm__("rev16 %0, %1\n" : "=l"(data) : "l"(data));
-    return (u16)data;
-}
-inline u32 swap32(u32 _data) {
-    __asm__("rev %0, %1\n" : "=l"(_data) : "l"(_data));
-    return _data;
-}
-inline u64 swap64(u64 _data) {
-    return ((u64)swap32(_data) << 32) | swap32(_data >> 32);
-}
-#elif __linux__
-inline u16 swap16(u16 _data) {
-    return bswap_16(_data);
-}
-inline u32 swap32(u32 _data) {
-    return bswap_32(_data);
-}
-inline u64 swap64(u64 _data) {
-    return bswap_64(_data);
-}
-#elif __APPLE__
-inline __attribute__((always_inline)) u16 swap16(u16 _data) {
-    return (_data >> 8) | (_data << 8);
-}
-inline __attribute__((always_inline)) u32 swap32(u32 _data) {
-    return __builtin_bswap32(_data);
-}
-inline __attribute__((always_inline)) u64 swap64(u64 _data) {
-    return __builtin_bswap64(_data);
-}
-#elif defined(__Bitrig__) || defined(__OpenBSD__)
+#elif defined(__clang__) || defined(__GNUC__)
+#if defined(__Bitrig__) || defined(__OpenBSD__)
 // redefine swap16, swap32, swap64 as inline functions
 #undef swap16
 #undef swap32
 #undef swap64
-inline u16 swap16(u16 _data) {
-    return __swap16(_data);
-}
-inline u32 swap32(u32 _data) {
-    return __swap32(_data);
-}
-inline u64 swap64(u64 _data) {
-    return __swap64(_data);
-}
-#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__)
-inline u16 swap16(u16 _data) {
-    return bswap16(_data);
+#endif
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
+    return __builtin_bswap16(data);
 }
-inline u32 swap32(u32 _data) {
-    return bswap32(_data);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return __builtin_bswap32(data);
 }
-inline u64 swap64(u64 _data) {
-    return bswap64(_data);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return __builtin_bswap64(data);
 }
 #else
-// Slow generic implementation.
-inline u16 swap16(u16 data) {
+// Generic implementation.
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
     return (data >> 8) | (data << 8);
 }
-inline u32 swap32(u32 data) {
-    return (swap16(data) << 16) | swap16(data >> 16);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return ((data & 0xFF000000U) >> 24) | ((data & 0x00FF0000U) >> 8) |
+           ((data & 0x0000FF00U) << 8) | ((data & 0x000000FFU) << 24);
 }
-inline u64 swap64(u64 data) {
-    return ((u64)swap32(data) << 32) | swap32(data >> 32);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return ((data & 0xFF00000000000000ULL) >> 56) | ((data & 0x00FF000000000000ULL) >> 40) |
+           ((data & 0x0000FF0000000000ULL) >> 24) | ((data & 0x000000FF00000000ULL) >> 8) |
+           ((data & 0x00000000FF000000ULL) << 8) | ((data & 0x0000000000FF0000ULL) << 24) |
+           ((data & 0x000000000000FF00ULL) << 40) | ((data & 0x00000000000000FFULL) << 56);
 }
 #endif
 
-inline float swapf(float f) {
+[[nodiscard]] inline float swapf(float f) noexcept {
     static_assert(sizeof(u32) == sizeof(float), "float must be the same size as uint32_t.");
 
     u32 value;
@@ -153,7 +111,7 @@ inline float swapf(float f) {
     return f;
 }
 
-inline double swapd(double f) {
+[[nodiscard]] inline double swapd(double f) noexcept {
     static_assert(sizeof(u64) == sizeof(double), "double must be the same size as uint64_t.");
 
     u64 value;
diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h
index e0a64b035..e9de941c8 100644
--- a/src/common/zstd_compression.h
+++ b/src/common/zstd_compression.h
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#pragma once
+
 #include <vector>
 
 #include "common/common_types.h"
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 4dfd41b43..978b1518f 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -7,6 +7,10 @@
 #include <array>
 #include "common/common_types.h"
 
+namespace Common {
+struct PageTable;
+}
+
 namespace Kernel {
 enum class VMAPermission : u8;
 }
@@ -49,8 +53,14 @@ public:
     /// Clear all instruction cache
     virtual void ClearInstructionCache() = 0;
 
-    /// Notify CPU emulation that page tables have changed
-    virtual void PageTableChanged() = 0;
+    /// Notifies CPU emulation that the current page table has changed.
+    ///
+    /// @param new_page_table                 The new page table.
+    /// @param new_address_space_size_in_bits The new usable size of the address space in bits.
+    ///                                       This can be either 32, 36, or 39 on official software.
+    ///
+    virtual void PageTableChanged(Common::PageTable& new_page_table,
+                                  std::size_t new_address_space_size_in_bits) = 0;
 
     /**
      * Set the Program Counter to an address
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 49145911b..44307fa19 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -99,7 +99,7 @@ public:
     }
 
     void CallSVC(u32 swi) override {
-        Kernel::CallSVC(swi);
+        Kernel::CallSVC(parent.system, swi);
     }
 
     void AddTicks(u64 ticks) override {
@@ -112,14 +112,14 @@ public:
         // Always execute at least one tick.
         amortized_ticks = std::max<u64>(amortized_ticks, 1);
 
-        parent.core_timing.AddTicks(amortized_ticks);
+        parent.system.CoreTiming().AddTicks(amortized_ticks);
         num_interpreted_instructions = 0;
     }
     u64 GetTicksRemaining() override {
-        return std::max(parent.core_timing.GetDowncount(), 0);
+        return std::max(parent.system.CoreTiming().GetDowncount(), 0);
     }
     u64 GetCNTPCT() override {
-        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
+        return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
     }
 
     ARM_Dynarmic& parent;
@@ -128,18 +128,16 @@ public:
     u64 tpidr_el0 = 0;
 };
 
-std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
-    auto* current_process = Core::CurrentProcess();
-    auto** const page_table = current_process->VMManager().page_table.pointers.data();
-
+std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table,
+                                                          std::size_t address_space_bits) const {
     Dynarmic::A64::UserConfig config;
 
     // Callbacks
     config.callbacks = cb.get();
 
     // Memory
-    config.page_table = reinterpret_cast<void**>(page_table);
-    config.page_table_address_space_bits = current_process->VMManager().GetAddressSpaceWidth();
+    config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
+    config.page_table_address_space_bits = address_space_bits;
     config.silently_mirror_page_table = false;
 
     // Multi-process state
@@ -171,16 +169,11 @@ void ARM_Dynarmic::Step() {
     cb->InterpreterFallback(jit->GetPC(), 1);
 }
 
-ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
                            std::size_t core_index)
-    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing},
-      core_index{core_index}, core_timing{core_timing},
-      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
-    ThreadContext ctx{};
-    inner_unicorn.SaveContext(ctx);
-    PageTableChanged();
-    LoadContext(ctx);
-}
+    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system},
+      core_index{core_index}, system{system},
+      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
 
 ARM_Dynarmic::~ARM_Dynarmic() = default;
 
@@ -275,8 +268,9 @@ void ARM_Dynarmic::ClearExclusiveState() {
     jit->ClearExclusiveState();
 }
 
-void ARM_Dynarmic::PageTableChanged() {
-    jit = MakeJit();
+void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table,
+                                    std::size_t new_address_space_size_in_bits) {
+    jit = MakeJit(page_table, new_address_space_size_in_bits);
 }
 
 DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index d867c2a50..b701e97a3 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,19 +12,15 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
 
-namespace Core::Timing {
-class CoreTiming;
-}
-
 namespace Core {
 
 class ARM_Dynarmic_Callbacks;
 class DynarmicExclusiveMonitor;
+class System;
 
 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
-                 std::size_t core_index);
+    ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic() override;
 
     void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -52,10 +48,12 @@ public:
     void ClearExclusiveState() override;
 
     void ClearInstructionCache() override;
-    void PageTableChanged() override;
+    void PageTableChanged(Common::PageTable& new_page_table,
+                          std::size_t new_address_space_size_in_bits) override;
 
 private:
-    std::unique_ptr<Dynarmic::A64::Jit> MakeJit() const;
+    std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table,
+                                                std::size_t address_space_bits) const;
 
     friend class ARM_Dynarmic_Callbacks;
     std::unique_ptr<ARM_Dynarmic_Callbacks> cb;
@@ -63,7 +61,7 @@ private:
     ARM_Unicorn inner_unicorn;
 
     std::size_t core_index;
-    Timing::CoreTiming& core_timing;
+    System& system;
     DynarmicExclusiveMonitor& exclusive_monitor;
 };
 
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 27309280c..4e07fe8b5 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -10,7 +10,6 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/svc.h"
-#include "core/memory.h"
 
 namespace Core {
 
@@ -49,20 +48,6 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
     }
 }
 
-static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
-    u32 esr{};
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
-
-    auto ec = esr >> 26;
-    auto iss = esr & 0xFFFFFF;
-
-    switch (ec) {
-    case 0x15: // SVC
-        Kernel::CallSVC(iss);
-        break;
-    }
-}
-
 static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
                                void* user_data) {
     ARM_Interface::ThreadContext ctx{};
@@ -72,7 +57,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
     return {};
 }
 
-ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} {
+ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
     CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));
 
     auto fpv = 3 << 20;
@@ -177,7 +162,7 @@ void ARM_Unicorn::Run() {
     if (GDBStub::IsServerEnabled()) {
         ExecuteInstructions(std::max(4000000, 0));
     } else {
-        ExecuteInstructions(std::max(core_timing.GetDowncount(), 0));
+        ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0));
     }
 }
 
@@ -190,7 +175,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
 void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
     MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
     CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
-    core_timing.AddTicks(num_instructions);
+    system.CoreTiming().AddTicks(num_instructions);
     if (GDBStub::IsServerEnabled()) {
         if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
             uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
@@ -273,4 +258,20 @@ void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) {
     last_bkpt_hit = true;
 }
 
+void ARM_Unicorn::InterruptHook(uc_engine* uc, u32 int_no, void* user_data) {
+    u32 esr{};
+    CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
+
+    const auto ec = esr >> 26;
+    const auto iss = esr & 0xFFFFFF;
+
+    auto* const arm_instance = static_cast<ARM_Unicorn*>(user_data);
+
+    switch (ec) {
+    case 0x15: // SVC
+        Kernel::CallSVC(arm_instance->system, iss);
+        break;
+    }
+}
+
 } // namespace Core
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 1e44f0736..34e974b4d 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -9,15 +9,13 @@
 #include "core/arm/arm_interface.h"
 #include "core/gdbstub/gdbstub.h"
 
-namespace Core::Timing {
-class CoreTiming;
-}
-
 namespace Core {
 
+class System;
+
 class ARM_Unicorn final : public ARM_Interface {
 public:
-    explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
+    explicit ARM_Unicorn(System& system);
     ~ARM_Unicorn() override;
 
     void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -43,12 +41,14 @@ public:
     void Run() override;
     void Step() override;
     void ClearInstructionCache() override;
-    void PageTableChanged() override{};
+    void PageTableChanged(Common::PageTable&, std::size_t) override {}
     void RecordBreak(GDBStub::BreakpointAddress bkpt);
 
 private:
+    static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data);
+
     uc_engine* uc{};
-    Timing::CoreTiming& core_timing;
+    System& system;
     GDBStub::BreakpointAddress last_bkpt{};
     bool last_bkpt_hit = false;
 };
diff --git a/src/core/core.cpp b/src/core/core.cpp
index bc9e887b6..175a5f2ea 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -3,9 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <array>
-#include <map>
 #include <memory>
-#include <thread>
 #include <utility>
 
 #include "common/file_util.h"
@@ -38,8 +36,6 @@
 #include "frontend/applets/software_keyboard.h"
 #include "frontend/applets/web_browser.h"
 #include "video_core/debug_utils/debug_utils.h"
-#include "video_core/gpu_asynch.h"
-#include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"
 
@@ -81,7 +77,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
     return vfs->OpenFile(path, FileSys::Mode::Read);
 }
 struct System::Impl {
-    explicit Impl(System& system) : kernel{system} {}
+    explicit Impl(System& system) : kernel{system}, cpu_core_manager{system} {}
 
     Cpu& CurrentCpuCore() {
         return cpu_core_manager.GetCurrentCore();
@@ -99,6 +95,7 @@ struct System::Impl {
         LOG_DEBUG(HW_Memory, "initialized OK");
 
         core_timing.Initialize();
+        cpu_core_manager.Initialize();
         kernel.Initialize();
 
         const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
@@ -120,9 +117,6 @@ struct System::Impl {
         if (web_browser == nullptr)
             web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
 
-        auto main_process = Kernel::Process::Create(system, "main");
-        kernel.MakeCurrentProcess(main_process.get());
-
         telemetry_session = std::make_unique<Core::TelemetrySession>();
         service_manager = std::make_shared<Service::SM::ServiceManager>();
 
@@ -134,15 +128,9 @@ struct System::Impl {
             return ResultStatus::ErrorVideoCore;
         }
 
-        is_powered_on = true;
-
-        if (Settings::values.use_asynchronous_gpu_emulation) {
-            gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
-        } else {
-            gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
-        }
+        gpu_core = VideoCore::CreateGPU(system);
 
-        cpu_core_manager.Initialize(system);
+        is_powered_on = true;
 
         LOG_DEBUG(Core, "Initialized OK");
 
@@ -179,7 +167,8 @@ struct System::Impl {
             return init_result;
         }
 
-        const Loader::ResultStatus load_result{app_loader->Load(*kernel.CurrentProcess())};
+        auto main_process = Kernel::Process::Create(system, "main");
+        const auto [load_result, load_parameters] = app_loader->Load(*main_process);
         if (load_result != Loader::ResultStatus::Success) {
             LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result));
             Shutdown();
@@ -187,6 +176,16 @@ struct System::Impl {
             return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
                                              static_cast<u32>(load_result));
         }
+        kernel.MakeCurrentProcess(main_process.get());
+
+        // Main process has been loaded and been made current.
+        // Begin GPU and CPU execution.
+        gpu_core->Start();
+        cpu_core_manager.StartThreads();
+
+        // All threads are started, begin main process execution, now that we're in the clear.
+        main_process->Run(load_parameters->main_thread_priority,
+                          load_parameters->main_thread_stack_size);
 
         status = ResultStatus::Success;
         return status;
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index e75741db0..ba63c3e61 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -55,13 +55,13 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
     : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
     if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
-        arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
+        arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
 #else
-        arm_interface = std::make_unique<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>(system);
         LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
     } else {
-        arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
+        arm_interface = std::make_unique<ARM_Unicorn>(system);
     }
 
     scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 93bc5619c..8fcb4eeb1 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -19,17 +19,19 @@ void RunCpuCore(const System& system, Cpu& cpu_state) {
 }
 } // Anonymous namespace
 
-CpuCoreManager::CpuCoreManager() = default;
+CpuCoreManager::CpuCoreManager(System& system) : system{system} {}
 CpuCoreManager::~CpuCoreManager() = default;
 
-void CpuCoreManager::Initialize(System& system) {
+void CpuCoreManager::Initialize() {
     barrier = std::make_unique<CpuBarrier>();
     exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
 
     for (std::size_t index = 0; index < cores.size(); ++index) {
         cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
     }
+}
 
+void CpuCoreManager::StartThreads() {
     // Create threads for CPU cores 1-3, and build thread_to_cpu map
     // CPU core 0 is run on the main thread
     thread_to_cpu[std::this_thread::get_id()] = cores[0].get();
diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h
index a4d70ec56..2cbbf8216 100644
--- a/src/core/cpu_core_manager.h
+++ b/src/core/cpu_core_manager.h
@@ -18,7 +18,7 @@ class System;
 
 class CpuCoreManager {
 public:
-    CpuCoreManager();
+    explicit CpuCoreManager(System& system);
     CpuCoreManager(const CpuCoreManager&) = delete;
     CpuCoreManager(CpuCoreManager&&) = delete;
 
@@ -27,7 +27,8 @@ public:
     CpuCoreManager& operator=(const CpuCoreManager&) = delete;
     CpuCoreManager& operator=(CpuCoreManager&&) = delete;
 
-    void Initialize(System& system);
+    void Initialize();
+    void StartThreads();
     void Shutdown();
 
     Cpu& GetCore(std::size_t index);
@@ -54,6 +55,8 @@ private:
 
     /// Map of guest threads to CPU cores
     std::map<std::thread::id, Cpu*> thread_to_cpu;
+
+    System& system;
 };
 
 } // namespace Core
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index d0bcb4660..70a522556 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -13,6 +13,23 @@
 namespace Core::Frontend {
 
 /**
+ * Represents a graphics context that can be used for background computation or drawing. If the
+ * graphics backend doesn't require the context, then the implementation of these methods can be
+ * stubs
+ */
+class GraphicsContext {
+public:
+    /// Makes the graphics context current for the caller thread
+    virtual void MakeCurrent() = 0;
+
+    /// Releases (dunno if this is the "right" word) the context from the caller thread
+    virtual void DoneCurrent() = 0;
+
+    /// Swap buffers to display the next frame
+    virtual void SwapBuffers() = 0;
+};
+
+/**
  * Abstraction class used to provide an interface between emulation code and the frontend
  * (e.g. SDL, QGLWidget, GLFW, etc...).
  *
@@ -30,7 +47,7 @@ namespace Core::Frontend {
  * - DO NOT TREAT THIS CLASS AS A GUI TOOLKIT ABSTRACTION LAYER. That's not what it is. Please
  *   re-read the upper points again and think about it if you don't see this.
  */
-class EmuWindow {
+class EmuWindow : public GraphicsContext {
 public:
     /// Data structure to store emuwindow configuration
     struct WindowConfig {
@@ -40,17 +57,21 @@ public:
         std::pair<unsigned, unsigned> min_client_area_size;
     };
 
-    /// Swap buffers to display the next frame
-    virtual void SwapBuffers() = 0;
-
     /// Polls window events
     virtual void PollEvents() = 0;
 
-    /// Makes the graphics context current for the caller thread
-    virtual void MakeCurrent() = 0;
-
-    /// Releases (dunno if this is the "right" word) the GLFW context from the caller thread
-    virtual void DoneCurrent() = 0;
+    /**
+     * Returns a GraphicsContext that the frontend provides that is shared with the emu window. This
+     * context can be used from other threads for background graphics computation. If the frontend
+     * is using a graphics backend that doesn't need anything specific to run on a different thread,
+     * then it can use a stubbed implemenation for GraphicsContext.
+     *
+     * If the return value is null, then the core should assume that the frontend cannot provide a
+     * Shared Context
+     */
+    virtual std::unique_ptr<GraphicsContext> CreateSharedContext() const {
+        return nullptr;
+    }
 
     /**
      * Signal that a touch pressed event has occurred (e.g. mouse click pressed)
diff --git a/src/core/hle/kernel/client_port.h b/src/core/hle/kernel/client_port.h
index 6cd607206..4921ad4f0 100644
--- a/src/core/hle/kernel/client_port.h
+++ b/src/core/hle/kernel/client_port.h
@@ -25,7 +25,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ClientPort;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ClientPort;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h
index b1f39aad7..09cdff588 100644
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -29,7 +29,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ClientSession;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ClientSession;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4d58e7c69..8539fabe4 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -182,7 +182,12 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
 
 void KernelCore::MakeCurrentProcess(Process* process) {
     impl->current_process = process;
-    Memory::SetCurrentPageTable(&process->VMManager().page_table);
+
+    if (process == nullptr) {
+        return;
+    }
+
+    Memory::SetCurrentPageTable(*process);
 }
 
 Process* KernelCore::CurrentProcess() {
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 4e94048da..6d7a7e754 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -28,21 +28,20 @@ namespace {
  *
  * @param owner_process The parent process for the main thread
  * @param kernel The kernel instance to create the main thread under.
- * @param entry_point The address at which the thread should start execution
  * @param priority The priority to give the main thread
  */
-void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
-    // Initialize new "main" thread
-    const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
+void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) {
+    const auto& vm_manager = owner_process.VMManager();
+    const VAddr entry_point = vm_manager.GetCodeRegionBaseAddress();
+    const VAddr stack_top = vm_manager.GetTLSIORegionEndAddress();
     auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
                                      owner_process.GetIdealCore(), stack_top, owner_process);
 
     SharedPtr<Thread> thread = std::move(thread_res).Unwrap();
 
     // Register 1 must be a handle to the main thread
-    const Handle guest_handle = owner_process.GetHandleTable().Create(thread).Unwrap();
-    thread->SetGuestHandle(guest_handle);
-    thread->GetContext().cpu_registers[1] = guest_handle;
+    const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap();
+    thread->GetContext().cpu_registers[1] = thread_handle;
 
     // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
     thread->ResumeFromWait();
@@ -106,8 +105,6 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
     is_64bit_process = metadata.Is64BitProgram();
 
     vm_manager.Reset(metadata.GetAddressSpaceType());
-    // Ensure that the potentially resized page table is seen by CPU backends.
-    Memory::SetCurrentPageTable(&vm_manager.page_table);
 
     const auto& caps = metadata.GetKernelCapabilities();
     const auto capability_init_result =
@@ -119,7 +116,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
     return handle_table.SetSize(capabilities.GetHandleTableSize());
 }
 
-void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
+void Process::Run(s32 main_thread_priority, u64 stack_size) {
     // The kernel always ensures that the given stack size is page aligned.
     main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
 
@@ -135,7 +132,7 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
     vm_manager.LogLayout();
     ChangeStatus(ProcessStatus::Running);
 
-    SetupMainThread(*this, kernel, entry_point, main_thread_priority);
+    SetupMainThread(*this, kernel, main_thread_priority);
 }
 
 void Process::PrepareForTermination() {
@@ -242,9 +239,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
     MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
 
     code_memory_size += module_.memory.size();
-
-    // Clear instruction cache in CPU JIT
-    system.InvalidateCpuInstructionCaches();
 }
 
 Process::Process(Core::System& system)
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index f060f2a3b..bf3b7eef3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -85,7 +85,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::Process;
+    static constexpr HandleType HANDLE_TYPE = HandleType::Process;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
@@ -225,9 +225,12 @@ public:
     ResultCode LoadFromMetadata(const FileSys::ProgramMetadata& metadata);
 
     /**
-     * Applies address space changes and launches the process main thread.
+     * Starts the main application thread for this process.
+     *
+     * @param main_thread_priority The priority for the main thread.
+     * @param stack_size           The stack size for the main thread in bytes.
      */
-    void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size);
+    void Run(s32 main_thread_priority, u64 stack_size);
 
     /**
      * Prepares a process for termination by stopping all of its threads
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 2eb9dcbb7..84215f572 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -31,7 +31,7 @@ public:
         return reset_type;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ReadableEvent;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ReadableEvent;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h
index 70e09858a..2613a6bb5 100644
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -41,7 +41,7 @@ public:
         return GetTypeName();
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ResourceLimit;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ResourceLimit;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index fef573b71..dc88a1ebd 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -43,7 +43,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ServerPort;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ServerPort;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index 09b835ff8..738df30f8 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,7 +46,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::ServerSession;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ServerSession;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index 37e18c443..c2b6155e1 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -76,7 +76,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::SharedMemory;
+    static constexpr HandleType HANDLE_TYPE = HandleType::SharedMemory;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 2fd07ab34..4c763b288 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -131,16 +131,15 @@ enum class ResourceLimitValueType {
     LimitValue,
 };
 
-ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_type,
-                                          ResourceLimitValueType value_type) {
+ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
+                                          u32 resource_type, ResourceLimitValueType value_type) {
     const auto type = static_cast<ResourceType>(resource_type);
     if (!IsValidResourceType(type)) {
         LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
         return ERR_INVALID_ENUM_VALUE;
     }
 
-    const auto& kernel = Core::System::GetInstance().Kernel();
-    const auto* const current_process = kernel.CurrentProcess();
+    const auto* const current_process = system.Kernel().CurrentProcess();
     ASSERT(current_process != nullptr);
 
     const auto resource_limit_object =
@@ -160,7 +159,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_ty
 } // Anonymous namespace
 
 /// Set the process heap to a given Size. It can both extend and shrink the heap.
-static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
+static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) {
     LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
 
     // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB.
@@ -175,7 +174,7 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
         return ERR_INVALID_SIZE;
     }
 
-    auto& vm_manager = Core::System::GetInstance().Kernel().CurrentProcess()->VMManager();
+    auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
     const auto alloc_result = vm_manager.SetHeapSize(heap_size);
     if (alloc_result.Failed()) {
         return alloc_result.Code();
@@ -185,7 +184,7 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
+static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) {
     LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot);
 
     if (!Common::Is4KBAligned(addr)) {
@@ -217,7 +216,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
         return ERR_INVALID_MEMORY_PERMISSIONS;
     }
 
-    auto* const current_process = Core::CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     auto& vm_manager = current_process->VMManager();
 
     if (!vm_manager.IsWithinAddressSpace(addr, size)) {
@@ -242,7 +241,8 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
     return vm_manager.ReprotectRange(addr, size, converted_permissions);
 }
 
-static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attribute) {
+static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask,
+                                     u32 attribute) {
     LOG_DEBUG(Kernel_SVC,
               "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address,
               size, mask, attribute);
@@ -280,7 +280,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
         return ERR_INVALID_COMBINATION;
     }
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
+    auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
     if (!vm_manager.IsWithinAddressSpace(address, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Given address (0x{:016X}) is outside the bounds of the address space.", address);
@@ -291,11 +291,11 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
 }
 
 /// Maps a memory range into a different range.
-static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
+static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
+    auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
     const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
 
     if (result.IsError()) {
@@ -306,11 +306,11 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
 }
 
 /// Unmaps a region that was previously mapped with svcMapMemory
-static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
+static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
+    auto& vm_manager = system.Kernel().CurrentProcess()->VMManager();
     const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
 
     if (result.IsError()) {
@@ -321,7 +321,8 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
 }
 
 /// Connect to an OS service given the port name, returns the handle to the port to out
-static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address) {
+static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
+                                     VAddr port_name_address) {
     if (!Memory::IsValidVirtualAddress(port_name_address)) {
         LOG_ERROR(Kernel_SVC,
                   "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
@@ -340,8 +341,8 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address
 
     LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);
 
-    auto& kernel = Core::System::GetInstance().Kernel();
-    auto it = kernel.FindNamedPort(port_name);
+    auto& kernel = system.Kernel();
+    const auto it = kernel.FindNamedPort(port_name);
     if (!kernel.IsValidNamedPort(it)) {
         LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
         return ERR_NOT_FOUND;
@@ -353,14 +354,14 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address
     CASCADE_RESULT(client_session, client_port->Connect());
 
     // Return the client session
-    auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
     CASCADE_RESULT(*out_handle, handle_table.Create(client_session));
     return RESULT_SUCCESS;
 }
 
 /// Makes a blocking IPC call to an OS service.
-static ResultCode SendSyncRequest(Handle handle) {
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     SharedPtr<ClientSession> session = handle_table.Get<ClientSession>(handle);
     if (!session) {
         LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
@@ -369,18 +370,18 @@ static ResultCode SendSyncRequest(Handle handle) {
 
     LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
 
-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();
 
     // TODO(Subv): svcSendSyncRequest should put the caller thread to sleep while the server
     // responds and cause a reschedule.
-    return session->SendSyncRequest(GetCurrentThread());
+    return session->SendSyncRequest(system.CurrentScheduler().GetCurrentThread());
 }
 
 /// Get the ID for the specified thread.
-static ResultCode GetThreadId(u64* thread_id, Handle thread_handle) {
+static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) {
     LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", thread_handle);
@@ -392,10 +393,10 @@ static ResultCode GetThreadId(u64* thread_id, Handle thread_handle) {
 }
 
 /// Gets the ID of the specified process or a specified thread's owning process.
-static ResultCode GetProcessId(u64* process_id, Handle handle) {
+static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) {
     LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Process> process = handle_table.Get<Process>(handle);
     if (process) {
         *process_id = process->GetProcessID();
@@ -437,8 +438,8 @@ static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thr
 };
 
 /// Wait for the given handles to synchronize, timeout after the specified nanoseconds
-static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64 handle_count,
-                                      s64 nano_seconds) {
+static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
+                                      u64 handle_count, s64 nano_seconds) {
     LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}",
               handles_address, handle_count, nano_seconds);
 
@@ -457,11 +458,11 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64
         return ERR_OUT_OF_RANGE;
     }
 
-    auto* const thread = GetCurrentThread();
+    auto* const thread = system.CurrentScheduler().GetCurrentThread();
 
     using ObjectPtr = Thread::ThreadWaitObjects::value_type;
     Thread::ThreadWaitObjects objects(handle_count);
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
 
     for (u64 i = 0; i < handle_count; ++i) {
         const Handle handle = Memory::Read32(handles_address + i * sizeof(Handle));
@@ -507,16 +508,16 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64
     thread->WakeAfterDelay(nano_seconds);
     thread->SetWakeupCallback(DefaultThreadWakeupCallback);
 
-    Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
 
     return RESULT_TIMEOUT;
 }
 
 /// Resumes a thread waiting on WaitSynchronization
-static ResultCode CancelSynchronization(Handle thread_handle) {
+static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) {
     LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -531,8 +532,8 @@ static ResultCode CancelSynchronization(Handle thread_handle) {
 }
 
 /// Attempts to locks a mutex, creating it if it does not already exist
-static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
-                                Handle requesting_thread_handle) {
+static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle,
+                                VAddr mutex_addr, Handle requesting_thread_handle) {
     LOG_TRACE(Kernel_SVC,
               "called holding_thread_handle=0x{:08X}, mutex_addr=0x{:X}, "
               "requesting_current_thread_handle=0x{:08X}",
@@ -549,13 +550,13 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
         return ERR_INVALID_ADDRESS;
     }
 
-    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle,
                                                   requesting_thread_handle);
 }
 
 /// Unlock a mutex
-static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
+static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
     LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr);
 
     if (Memory::IsKernelVirtualAddress(mutex_addr)) {
@@ -569,7 +570,7 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
         return ERR_INVALID_ADDRESS;
     }
 
-    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     return current_process->GetMutex().Release(mutex_addr);
 }
 
@@ -592,7 +593,7 @@ struct BreakReason {
 };
 
 /// Break program execution
-static void Break(u32 reason, u64 info1, u64 info2) {
+static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
     BreakReason break_reason{reason};
     bool has_dumped_buffer{};
 
@@ -670,22 +671,24 @@ static void Break(u32 reason, u64 info1, u64 info2) {
             Debug_Emulated,
             "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
             reason, info1, info2);
+
         handle_debug_buffer(info1, info2);
-        Core::System::GetInstance()
-            .ArmInterface(static_cast<std::size_t>(GetCurrentThread()->GetProcessorID()))
-            .LogBacktrace();
+
+        auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+        const auto thread_processor_id = current_thread->GetProcessorID();
+        system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
         ASSERT(false);
 
-        Core::CurrentProcess()->PrepareForTermination();
+        system.Kernel().CurrentProcess()->PrepareForTermination();
 
         // Kill the current thread
-        GetCurrentThread()->Stop();
-        Core::System::GetInstance().PrepareReschedule();
+        current_thread->Stop();
+        system.PrepareReschedule();
     }
 }
 
 /// Used to output a message on a debug hardware unit - does nothing on a retail unit
-static void OutputDebugString(VAddr address, u64 len) {
+static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) {
     if (len == 0) {
         return;
     }
@@ -696,7 +699,8 @@ static void OutputDebugString(VAddr address, u64 len) {
 }
 
 /// Gets system/memory information for the current process
-static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) {
+static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle,
+                          u64 info_sub_id) {
     LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
               info_sub_id, handle);
 
@@ -754,7 +758,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             return ERR_INVALID_ENUM_VALUE;
         }
 
-        const auto& current_process_handle_table = Core::CurrentProcess()->GetHandleTable();
+        const auto& current_process_handle_table =
+            system.Kernel().CurrentProcess()->GetHandleTable();
         const auto process = current_process_handle_table.Get<Process>(static_cast<Handle>(handle));
         if (!process) {
             return ERR_INVALID_HANDLE;
@@ -844,7 +849,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             return ERR_INVALID_COMBINATION;
         }
 
-        Process* const current_process = Core::CurrentProcess();
+        Process* const current_process = system.Kernel().CurrentProcess();
         HandleTable& handle_table = current_process->GetHandleTable();
         const auto resource_limit = current_process->GetResourceLimit();
         if (!resource_limit) {
@@ -875,7 +880,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             return ERR_INVALID_COMBINATION;
         }
 
-        *result = Core::CurrentProcess()->GetRandomEntropy(info_sub_id);
+        *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id);
         return RESULT_SUCCESS;
 
     case GetInfoType::PrivilegedProcessId:
@@ -892,15 +897,14 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             return ERR_INVALID_COMBINATION;
         }
 
-        const auto thread =
-            Core::CurrentProcess()->GetHandleTable().Get<Thread>(static_cast<Handle>(handle));
+        const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<Thread>(
+            static_cast<Handle>(handle));
         if (!thread) {
             LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}",
                       static_cast<Handle>(handle));
             return ERR_INVALID_HANDLE;
         }
 
-        const auto& system = Core::System::GetInstance();
         const auto& core_timing = system.CoreTiming();
         const auto& scheduler = system.CurrentScheduler();
         const auto* const current_thread = scheduler.GetCurrentThread();
@@ -927,13 +931,13 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
 }
 
 /// Sets the thread activity
-static ResultCode SetThreadActivity(Handle handle, u32 activity) {
+static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
     LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
     if (activity > static_cast<u32>(ThreadActivity::Paused)) {
         return ERR_INVALID_ENUM_VALUE;
     }
 
-    const auto* current_process = Core::CurrentProcess();
+    const auto* current_process = system.Kernel().CurrentProcess();
     const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -950,7 +954,7 @@ static ResultCode SetThreadActivity(Handle handle, u32 activity) {
         return ERR_INVALID_HANDLE;
     }
 
-    if (thread == GetCurrentThread()) {
+    if (thread == system.CurrentScheduler().GetCurrentThread()) {
         LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
         return ERR_BUSY;
     }
@@ -960,10 +964,10 @@ static ResultCode SetThreadActivity(Handle handle, u32 activity) {
 }
 
 /// Gets the thread context
-static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
+static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, Handle handle) {
     LOG_DEBUG(Kernel_SVC, "called, context=0x{:08X}, thread=0x{:X}", thread_context, handle);
 
-    const auto* current_process = Core::CurrentProcess();
+    const auto* current_process = system.Kernel().CurrentProcess();
     const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -980,7 +984,7 @@ static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
         return ERR_INVALID_HANDLE;
     }
 
-    if (thread == GetCurrentThread()) {
+    if (thread == system.CurrentScheduler().GetCurrentThread()) {
         LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
         return ERR_BUSY;
     }
@@ -1001,10 +1005,10 @@ static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
 }
 
 /// Gets the priority for the specified thread
-static ResultCode GetThreadPriority(u32* priority, Handle handle) {
+static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) {
     LOG_TRACE(Kernel_SVC, "called");
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
@@ -1016,7 +1020,7 @@ static ResultCode GetThreadPriority(u32* priority, Handle handle) {
 }
 
 /// Sets the priority for the specified thread
-static ResultCode SetThreadPriority(Handle handle, u32 priority) {
+static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) {
     LOG_TRACE(Kernel_SVC, "called");
 
     if (priority > THREADPRIO_LOWEST) {
@@ -1027,7 +1031,7 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
         return ERR_INVALID_THREAD_PRIORITY;
     }
 
-    const auto* const current_process = Core::CurrentProcess();
+    const auto* const current_process = system.Kernel().CurrentProcess();
 
     SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
     if (!thread) {
@@ -1037,18 +1041,18 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
 
     thread->SetPriority(priority);
 
-    Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
 /// Get which CPU core is executing the current thread
-static u32 GetCurrentProcessorNumber() {
+static u32 GetCurrentProcessorNumber(Core::System& system) {
     LOG_TRACE(Kernel_SVC, "called");
-    return GetCurrentThread()->GetProcessorID();
+    return system.CurrentScheduler().GetCurrentThread()->GetProcessorID();
 }
 
-static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size,
-                                  u32 permissions) {
+static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
+                                  u64 size, u32 permissions) {
     LOG_TRACE(Kernel_SVC,
               "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
               shared_memory_handle, addr, size, permissions);
@@ -1082,7 +1086,7 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
         return ERR_INVALID_MEMORY_PERMISSIONS;
     }
 
-    auto* const current_process = Core::CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle);
     if (!shared_memory) {
         LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
@@ -1100,7 +1104,8 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
     return shared_memory->Map(*current_process, addr, permissions_type, MemoryPermission::DontCare);
 }
 
-static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size) {
+static ResultCode UnmapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
+                                    u64 size) {
     LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}",
                 shared_memory_handle, addr, size);
 
@@ -1125,7 +1130,7 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
         return ERR_INVALID_ADDRESS_STATE;
     }
 
-    auto* const current_process = Core::CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle);
     if (!shared_memory) {
         LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
@@ -1143,10 +1148,11 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
     return shared_memory->Unmap(*current_process, addr, size);
 }
 
-static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_address,
-                                     Handle process_handle, VAddr address) {
+static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address,
+                                     VAddr page_info_address, Handle process_handle,
+                                     VAddr address) {
     LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address);
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     SharedPtr<Process> process = handle_table.Get<Process>(process_handle);
     if (!process) {
         LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
@@ -1172,20 +1178,156 @@ static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_
     return RESULT_SUCCESS;
 }
 
-static ResultCode QueryMemory(VAddr memory_info_address, VAddr page_info_address,
-                              VAddr query_address) {
+static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address,
+                              VAddr page_info_address, VAddr query_address) {
     LOG_TRACE(Kernel_SVC,
               "called, memory_info_address=0x{:016X}, page_info_address=0x{:016X}, "
               "query_address=0x{:016X}",
               memory_info_address, page_info_address, query_address);
 
-    return QueryProcessMemory(memory_info_address, page_info_address, CurrentProcess,
+    return QueryProcessMemory(system, memory_info_address, page_info_address, CurrentProcess,
                               query_address);
 }
 
+static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
+                                       u64 src_address, u64 size) {
+    LOG_DEBUG(Kernel_SVC,
+              "called. process_handle=0x{:08X}, dst_address=0x{:016X}, "
+              "src_address=0x{:016X}, size=0x{:016X}",
+              process_handle, dst_address, src_address, size);
+
+    if (!Common::Is4KBAligned(src_address)) {
+        LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
+                  src_address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (!Common::Is4KBAligned(dst_address)) {
+        LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
+                  dst_address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(dst_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Destination address range overflows the address space (dst_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  dst_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!IsValidAddressRange(src_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Source address range overflows the address space (src_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  src_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    auto process = handle_table.Get<Process>(process_handle);
+    if (!process) {
+        LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
+                  process_handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    auto& vm_manager = process->VMManager();
+    if (!vm_manager.IsWithinAddressSpace(src_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Source address range is not within the address space (src_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  src_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!vm_manager.IsWithinASLRRegion(dst_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  dst_address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return vm_manager.MapCodeMemory(dst_address, src_address, size);
+}
+
+ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
+                                  u64 src_address, u64 size) {
+    LOG_DEBUG(Kernel_SVC,
+              "called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, "
+              "size=0x{:016X}",
+              process_handle, dst_address, src_address, size);
+
+    if (!Common::Is4KBAligned(dst_address)) {
+        LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
+                  dst_address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (!Common::Is4KBAligned(src_address)) {
+        LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
+                  src_address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(dst_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Destination address range overflows the address space (dst_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  dst_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!IsValidAddressRange(src_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Source address range overflows the address space (src_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  src_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    auto process = handle_table.Get<Process>(process_handle);
+    if (!process) {
+        LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
+                  process_handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    auto& vm_manager = process->VMManager();
+    if (!vm_manager.IsWithinAddressSpace(src_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Source address range is not within the address space (src_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  src_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!vm_manager.IsWithinASLRRegion(dst_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  dst_address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return vm_manager.UnmapCodeMemory(dst_address, src_address, size);
+}
+
 /// Exits the current process
-static void ExitProcess() {
-    auto* current_process = Core::CurrentProcess();
+static void ExitProcess(Core::System& system) {
+    auto* current_process = system.Kernel().CurrentProcess();
 
     LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
     ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
@@ -1194,20 +1336,20 @@ static void ExitProcess() {
     current_process->PrepareForTermination();
 
     // Kill the current thread
-    GetCurrentThread()->Stop();
+    system.CurrentScheduler().GetCurrentThread()->Stop();
 
-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();
 }
 
 /// Creates a new thread
-static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, VAddr stack_top,
-                               u32 priority, s32 processor_id) {
+static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
+                               VAddr stack_top, u32 priority, s32 processor_id) {
     LOG_TRACE(Kernel_SVC,
               "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
               "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
               entry_point, arg, stack_top, priority, processor_id, *out_handle);
 
-    auto* const current_process = Core::CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
 
     if (processor_id == THREADPROCESSORID_IDEAL) {
         // Set the target CPU to the one specified by the process.
@@ -1238,31 +1380,33 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
         return ERR_INVALID_THREAD_PRIORITY;
     }
 
-    const std::string name = fmt::format("thread-{:X}", entry_point);
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     CASCADE_RESULT(SharedPtr<Thread> thread,
-                   Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top,
+                   Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top,
                                   *current_process));
 
-    const auto new_guest_handle = current_process->GetHandleTable().Create(thread);
-    if (new_guest_handle.Failed()) {
+    const auto new_thread_handle = current_process->GetHandleTable().Create(thread);
+    if (new_thread_handle.Failed()) {
         LOG_ERROR(Kernel_SVC, "Failed to create handle with error=0x{:X}",
-                  new_guest_handle.Code().raw);
-        return new_guest_handle.Code();
+                  new_thread_handle.Code().raw);
+        return new_thread_handle.Code();
     }
-    thread->SetGuestHandle(*new_guest_handle);
-    *out_handle = *new_guest_handle;
+    *out_handle = *new_thread_handle;
+
+    // Set the thread name for debugging purposes.
+    thread->SetName(
+        fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
 
-    Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
 
     return RESULT_SUCCESS;
 }
 
 /// Starts the thread for the provided handle
-static ResultCode StartThread(Handle thread_handle) {
+static ResultCode StartThread(Core::System& system, Handle thread_handle) {
     LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1275,16 +1419,14 @@ static ResultCode StartThread(Handle thread_handle) {
     thread->ResumeFromWait();
 
     if (thread->GetStatus() == ThreadStatus::Ready) {
-        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+        system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
     }
 
     return RESULT_SUCCESS;
 }
 
 /// Called when a thread exits
-static void ExitThread() {
-    auto& system = Core::System::GetInstance();
-
+static void ExitThread(Core::System& system) {
     LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
 
     auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
@@ -1294,7 +1436,7 @@ static void ExitThread() {
 }
 
 /// Sleep the current thread
-static void SleepThread(s64 nanoseconds) {
+static void SleepThread(Core::System& system, s64 nanoseconds) {
     LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
 
     enum class SleepType : s64 {
@@ -1303,7 +1445,6 @@ static void SleepThread(s64 nanoseconds) {
         YieldAndWaitForLoadBalancing = -2,
     };
 
-    auto& system = Core::System::GetInstance();
     auto& scheduler = system.CurrentScheduler();
     auto* const current_thread = scheduler.GetCurrentThread();
 
@@ -1332,8 +1473,9 @@ static void SleepThread(s64 nanoseconds) {
 }
 
 /// Wait process wide key atomic
-static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr,
-                                           Handle thread_handle, s64 nano_seconds) {
+static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_addr,
+                                           VAddr condition_variable_addr, Handle thread_handle,
+                                           s64 nano_seconds) {
     LOG_TRACE(
         Kernel_SVC,
         "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
@@ -1353,7 +1495,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
         return ERR_INVALID_ADDRESS;
     }
 
-    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     const auto& handle_table = current_process->GetHandleTable();
     SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     ASSERT(thread);
@@ -1363,7 +1505,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
         return release_result;
     }
 
-    SharedPtr<Thread> current_thread = GetCurrentThread();
+    SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
     current_thread->SetCondVarWaitAddress(condition_variable_addr);
     current_thread->SetMutexWaitAddress(mutex_addr);
     current_thread->SetWaitHandle(thread_handle);
@@ -1374,19 +1516,20 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
 
     // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
 /// Signal process wide key
-static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target) {
+static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
+                                       s32 target) {
     LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
               condition_variable_addr, target);
 
-    const auto RetrieveWaitingThreads = [](std::size_t core_index,
-                                           std::vector<SharedPtr<Thread>>& waiting_threads,
-                                           VAddr condvar_addr) {
-        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+    const auto RetrieveWaitingThreads = [&system](std::size_t core_index,
+                                                  std::vector<SharedPtr<Thread>>& waiting_threads,
+                                                  VAddr condvar_addr) {
+        const auto& scheduler = system.Scheduler(core_index);
         const auto& thread_list = scheduler.GetThreadList();
 
         for (const auto& thread : thread_list) {
@@ -1425,9 +1568,8 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
         // liberate Cond Var Thread.
         thread->SetCondVarWaitAddress(0);
 
-        std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
-
-        auto& monitor = Core::System::GetInstance().Monitor();
+        const std::size_t current_core = system.CurrentCoreIndex();
+        auto& monitor = system.Monitor();
 
         // Atomically read the value of the mutex.
         u32 mutex_val = 0;
@@ -1456,7 +1598,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
             thread->SetLockOwner(nullptr);
             thread->SetMutexWaitAddress(0);
             thread->SetWaitHandle(0);
-            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+            system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
         } else {
             // Atomically signal that the mutex now has a waiting thread.
             do {
@@ -1472,7 +1614,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
             // The mutex is already owned by some other thread, make this thread wait on it.
             const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
-            const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+            const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
             auto owner = handle_table.Get<Thread>(owner_handle);
             ASSERT(owner);
             ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
@@ -1487,14 +1629,17 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 }
 
 // Wait for an address (via Address Arbiter)
-static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout) {
+static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
+                                 s64 timeout) {
     LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
                 address, type, value, timeout);
+
     // If the passed address is a kernel virtual address, return invalid memory state.
     if (Memory::IsKernelVirtualAddress(address)) {
         LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
         return ERR_INVALID_ADDRESS_STATE;
     }
+
     // If the address is not properly aligned to 4 bytes, return invalid address.
     if (!Common::IsWordAligned(address)) {
         LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
@@ -1502,20 +1647,22 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
     }
 
     const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
-    auto& address_arbiter =
-        Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
+    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
     return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
 }
 
 // Signals to an address (via Address Arbiter)
-static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to_wake) {
+static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
+                                  s32 num_to_wake) {
     LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
                 address, type, value, num_to_wake);
+
     // If the passed address is a kernel virtual address, return invalid memory state.
     if (Memory::IsKernelVirtualAddress(address)) {
         LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
         return ERR_INVALID_ADDRESS_STATE;
     }
+
     // If the address is not properly aligned to 4 bytes, return invalid address.
     if (!Common::IsWordAligned(address)) {
         LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address);
@@ -1523,16 +1670,15 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
     }
 
     const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
-    auto& address_arbiter =
-        Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
+    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
     return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }
 
 /// This returns the total CPU ticks elapsed since the CPU was powered-on
-static u64 GetSystemTick() {
+static u64 GetSystemTick(Core::System& system) {
     LOG_TRACE(Kernel_SVC, "called");
 
-    auto& core_timing = Core::System::GetInstance().CoreTiming();
+    auto& core_timing = system.CoreTiming();
     const u64 result{core_timing.GetTicks()};
 
     // Advance time to defeat dumb games that busy-wait for the frame to end.
@@ -1542,18 +1688,18 @@ static u64 GetSystemTick() {
 }
 
 /// Close a handle
-static ResultCode CloseHandle(Handle handle) {
+static ResultCode CloseHandle(Core::System& system, Handle handle) {
     LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle);
 
-    auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     return handle_table.Close(handle);
 }
 
 /// Clears the signaled state of an event or process.
-static ResultCode ResetSignal(Handle handle) {
+static ResultCode ResetSignal(Core::System& system, Handle handle) {
     LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
 
     auto event = handle_table.Get<ReadableEvent>(handle);
     if (event) {
@@ -1570,7 +1716,8 @@ static ResultCode ResetSignal(Handle handle) {
 }
 
 /// Creates a TransferMemory object
-static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32 permissions) {
+static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size,
+                                       u32 permissions) {
     LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
               permissions);
 
@@ -1598,7 +1745,7 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
         return ERR_INVALID_MEMORY_PERMISSIONS;
     }
 
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
 
     auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
@@ -1611,7 +1758,8 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
     return RESULT_SUCCESS;
 }
 
-static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32 permission_raw) {
+static ResultCode MapTransferMemory(Core::System& system, Handle handle, VAddr address, u64 size,
+                                    u32 permission_raw) {
     LOG_DEBUG(Kernel_SVC,
               "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}",
               handle, address, size, permission_raw);
@@ -1645,7 +1793,7 @@ static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32
         return ERR_INVALID_STATE;
     }
 
-    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto& kernel = system.Kernel();
     const auto* const current_process = kernel.CurrentProcess();
     const auto& handle_table = current_process->GetHandleTable();
 
@@ -1667,7 +1815,8 @@ static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32
     return transfer_memory->MapMemory(address, size, permissions);
 }
 
-static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) {
+static ResultCode UnmapTransferMemory(Core::System& system, Handle handle, VAddr address,
+                                      u64 size) {
     LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle,
               address, size);
 
@@ -1692,7 +1841,7 @@ static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) {
         return ERR_INVALID_ADDRESS_STATE;
     }
 
-    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto& kernel = system.Kernel();
     const auto* const current_process = kernel.CurrentProcess();
     const auto& handle_table = current_process->GetHandleTable();
 
@@ -1714,10 +1863,11 @@ static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) {
     return transfer_memory->UnmapMemory(address, size);
 }
 
-static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
+static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core,
+                                    u64* mask) {
     LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1731,11 +1881,12 @@ static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask)
     return RESULT_SUCCESS;
 }
 
-static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
+static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
+                                    u64 mask) {
     LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
               mask, core);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1780,8 +1931,8 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permissions,
-                                     u32 remote_permissions) {
+static ResultCode CreateSharedMemory(Core::System& system, Handle* handle, u64 size,
+                                     u32 local_permissions, u32 remote_permissions) {
     LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size,
               local_permissions, remote_permissions);
     if (size == 0) {
@@ -1817,7 +1968,7 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
         return ERR_INVALID_MEMORY_PERMISSIONS;
     }
 
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     auto process = kernel.CurrentProcess();
     auto& handle_table = process->GetHandleTable();
     auto shared_mem_handle = SharedMemory::Create(kernel, process, size, local_perms, remote_perms);
@@ -1826,10 +1977,10 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
     return RESULT_SUCCESS;
 }
 
-static ResultCode CreateEvent(Handle* write_handle, Handle* read_handle) {
+static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) {
     LOG_DEBUG(Kernel_SVC, "called");
 
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     const auto [readable_event, writable_event] =
         WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent");
 
@@ -1854,10 +2005,10 @@ static ResultCode CreateEvent(Handle* write_handle, Handle* read_handle) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode ClearEvent(Handle handle) {
+static ResultCode ClearEvent(Core::System& system, Handle handle) {
     LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
 
     auto writable_event = handle_table.Get<WritableEvent>(handle);
     if (writable_event) {
@@ -1875,10 +2026,10 @@ static ResultCode ClearEvent(Handle handle) {
     return ERR_INVALID_HANDLE;
 }
 
-static ResultCode SignalEvent(Handle handle) {
+static ResultCode SignalEvent(Core::System& system, Handle handle) {
     LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle);
 
-    HandleTable& handle_table = Core::CurrentProcess()->GetHandleTable();
+    HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     auto writable_event = handle_table.Get<WritableEvent>(handle);
 
     if (!writable_event) {
@@ -1890,7 +2041,7 @@ static ResultCode SignalEvent(Handle handle) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
+static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) {
     LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type);
 
     // This function currently only allows retrieving a process' status.
@@ -1898,7 +2049,7 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
         Status,
     };
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const auto process = handle_table.Get<Process>(process_handle);
     if (!process) {
         LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
@@ -1916,10 +2067,10 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode CreateResourceLimit(Handle* out_handle) {
+static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) {
     LOG_DEBUG(Kernel_SVC, "called");
 
-    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& kernel = system.Kernel();
     auto resource_limit = ResourceLimit::Create(kernel);
 
     auto* const current_process = kernel.CurrentProcess();
@@ -1934,11 +2085,11 @@ static ResultCode CreateResourceLimit(Handle* out_handle) {
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_limit,
-                                             u32 resource_type) {
+static ResultCode GetResourceLimitLimitValue(Core::System& system, u64* out_value,
+                                             Handle resource_limit, u32 resource_type) {
     LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
 
-    const auto limit_value = RetrieveResourceLimitValue(resource_limit, resource_type,
+    const auto limit_value = RetrieveResourceLimitValue(system, resource_limit, resource_type,
                                                         ResourceLimitValueType::LimitValue);
     if (limit_value.Failed()) {
         return limit_value.Code();
@@ -1948,11 +2099,11 @@ static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_lim
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_limit,
-                                               u32 resource_type) {
+static ResultCode GetResourceLimitCurrentValue(Core::System& system, u64* out_value,
+                                               Handle resource_limit, u32 resource_type) {
     LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
 
-    const auto current_value = RetrieveResourceLimitValue(resource_limit, resource_type,
+    const auto current_value = RetrieveResourceLimitValue(system, resource_limit, resource_type,
                                                           ResourceLimitValueType::CurrentValue);
     if (current_value.Failed()) {
         return current_value.Code();
@@ -1962,7 +2113,8 @@ static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_l
     return RESULT_SUCCESS;
 }
 
-static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource_type, u64 value) {
+static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resource_limit,
+                                             u32 resource_type, u64 value) {
     LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit,
               resource_type, value);
 
@@ -1972,8 +2124,7 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource
         return ERR_INVALID_ENUM_VALUE;
     }
 
-    auto& kernel = Core::System::GetInstance().Kernel();
-    auto* const current_process = kernel.CurrentProcess();
+    auto* const current_process = system.Kernel().CurrentProcess();
     ASSERT(current_process != nullptr);
 
     auto resource_limit_object =
@@ -1997,8 +2148,8 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetProcessList(u32* out_num_processes, VAddr out_process_ids,
-                                 u32 out_process_ids_size) {
+static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
+                                 VAddr out_process_ids, u32 out_process_ids_size) {
     LOG_DEBUG(Kernel_SVC, "called. out_process_ids=0x{:016X}, out_process_ids_size={}",
               out_process_ids, out_process_ids_size);
 
@@ -2010,7 +2161,7 @@ static ResultCode GetProcessList(u32* out_num_processes, VAddr out_process_ids,
         return ERR_OUT_OF_RANGE;
     }
 
-    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto& kernel = system.Kernel();
     const auto& vm_manager = kernel.CurrentProcess()->VMManager();
     const auto total_copy_size = out_process_ids_size * sizeof(u64);
 
@@ -2034,8 +2185,8 @@ static ResultCode GetProcessList(u32* out_num_processes, VAddr out_process_ids,
     return RESULT_SUCCESS;
 }
 
-ResultCode GetThreadList(u32* out_num_threads, VAddr out_thread_ids, u32 out_thread_ids_size,
-                         Handle debug_handle) {
+ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
+                         u32 out_thread_ids_size, Handle debug_handle) {
     // TODO: Handle this case when debug events are supported.
     UNIMPLEMENTED_IF(debug_handle != InvalidHandle);
 
@@ -2049,7 +2200,7 @@ ResultCode GetThreadList(u32* out_num_threads, VAddr out_thread_ids, u32 out_thr
         return ERR_OUT_OF_RANGE;
     }
 
-    const auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    const auto* const current_process = system.Kernel().CurrentProcess();
     const auto& vm_manager = current_process->VMManager();
     const auto total_copy_size = out_thread_ids_size * sizeof(u64);
 
@@ -2076,7 +2227,7 @@ ResultCode GetThreadList(u32* out_num_threads, VAddr out_thread_ids, u32 out_thr
 
 namespace {
 struct FunctionDef {
-    using Func = void();
+    using Func = void(Core::System&);
 
     u32 id;
     Func* func;
@@ -2139,7 +2290,7 @@ static const FunctionDef SVC_Table[] = {
     {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
     {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"},
     {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"},
-    {0x36, nullptr, "Unknown"},
+    {0x36, nullptr, "SynchronizePreemptionState"},
     {0x37, nullptr, "Unknown"},
     {0x38, nullptr, "Unknown"},
     {0x39, nullptr, "Unknown"},
@@ -2204,8 +2355,8 @@ static const FunctionDef SVC_Table[] = {
     {0x74, nullptr, "MapProcessMemory"},
     {0x75, nullptr, "UnmapProcessMemory"},
     {0x76, SvcWrap<QueryProcessMemory>, "QueryProcessMemory"},
-    {0x77, nullptr, "MapProcessCodeMemory"},
-    {0x78, nullptr, "UnmapProcessCodeMemory"},
+    {0x77, SvcWrap<MapProcessCodeMemory>, "MapProcessCodeMemory"},
+    {0x78, SvcWrap<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"},
     {0x79, nullptr, "CreateProcess"},
     {0x7A, nullptr, "StartProcess"},
     {0x7B, nullptr, "TerminateProcess"},
@@ -2225,7 +2376,7 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
 
 MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
 
-void CallSVC(u32 immediate) {
+void CallSVC(Core::System& system, u32 immediate) {
     MICROPROFILE_SCOPE(Kernel_SVC);
 
     // Lock the global kernel mutex when we enter the kernel HLE.
@@ -2234,7 +2385,7 @@ void CallSVC(u32 immediate) {
     const FunctionDef* info = GetSVCInfo(immediate);
     if (info) {
         if (info->func) {
-            info->func();
+            info->func(system);
         } else {
             LOG_CRITICAL(Kernel_SVC, "Unimplemented SVC function {}(..)", info->name);
         }
diff --git a/src/core/hle/kernel/svc.h b/src/core/hle/kernel/svc.h
index c37ae0f98..c5539ac1c 100644
--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@@ -6,8 +6,12 @@
 
 #include "common/common_types.h"
 
+namespace Core {
+class System;
+}
+
 namespace Kernel {
 
-void CallSVC(u32 immediate);
+void CallSVC(Core::System& system, u32 immediate);
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index b3733680f..865473c6f 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -11,278 +11,319 @@
 
 namespace Kernel {
 
-static inline u64 Param(int n) {
-    return Core::CurrentArmInterface().GetReg(n);
+static inline u64 Param(const Core::System& system, int n) {
+    return system.CurrentArmInterface().GetReg(n);
 }
 
 /**
  * HLE a function return from the current ARM userland process
- * @param res Result to return
+ * @param system System context
+ * @param result Result to return
  */
-static inline void FuncReturn(u64 res) {
-    Core::CurrentArmInterface().SetReg(0, res);
+static inline void FuncReturn(Core::System& system, u64 result) {
+    system.CurrentArmInterface().SetReg(0, result);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type ResultCode
 
-template <ResultCode func(u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0)).raw);
+template <ResultCode func(Core::System&, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0)).raw);
 }
 
-template <ResultCode func(u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0))).raw);
+template <ResultCode func(Core::System&, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
 }
 
-template <ResultCode func(u32, u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u32, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(
+        system,
+        func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw);
+}
+
+template <ResultCode func(Core::System&, u32, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
+                            Param(system, 2), Param(system, 3))
+                           .raw);
 }
 
-template <ResultCode func(u32*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*)>
+void SvcWrap(Core::System& system) {
     u32 param = 0;
-    const u32 retval = func(&param).raw;
-    Core::CurrentArmInterface().SetReg(1, param);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param).raw;
+    system.CurrentArmInterface().SetReg(1, param);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u32*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u32*)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
     u32 param_2 = 0;
-    const u32 retval = func(&param_1, &param_2).raw;
+    const u32 retval = func(system, &param_1, &param_2).raw;
 
-    auto& arm_interface = Core::CurrentArmInterface();
+    auto& arm_interface = system.CurrentArmInterface();
     arm_interface.SetReg(1, param_1);
     arm_interface.SetReg(2, param_2);
 
-    FuncReturn(retval);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    const u32 retval = func(&param_1, Param(1)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1)).raw;
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u64, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    const u32 retval = func(&param_1, Param(1), static_cast<u32>(Param(2))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval =
+        func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64*, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u32)>
+void SvcWrap(Core::System& system) {
     u64 param_1 = 0;
-    const u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64, s32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<s32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u64, s32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
 }
 
-template <ResultCode func(u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
 }
 
-template <ResultCode func(u64*, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u64)>
+void SvcWrap(Core::System& system) {
     u64 param_1 = 0;
-    u32 retval = func(&param_1, Param(1)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1)).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64*, u32, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u32, u32)>
+void SvcWrap(Core::System& system) {
     u64 param_1 = 0;
-    u32 retval = func(&param_1, static_cast<u32>(Param(1)), static_cast<u32>(Param(2))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1)),
+                            static_cast<u32>(Param(system, 2)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1)).raw);
+template <ResultCode func(Core::System&, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw);
 }
 
-template <ResultCode func(u32, u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1)), Param(2)).raw);
+template <ResultCode func(Core::System&, u32, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)),
+                            static_cast<u32>(Param(system, 1)), Param(system, 2))
+                           .raw);
 }
 
-template <ResultCode func(u32, u32*, u64*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32, u32*, u64*)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
     u64 param_2 = 0;
-    ResultCode retval = func(static_cast<u32>(Param(2)), &param_1, &param_2);
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    Core::CurrentArmInterface().SetReg(2, param_2);
-    FuncReturn(retval.raw);
-}
+    const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), &param_1, &param_2);
 
-template <ResultCode func(u64, u64, u32, u32)>
-void SvcWrap() {
-    FuncReturn(
-        func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw);
+    system.CurrentArmInterface().SetReg(1, param_1);
+    system.CurrentArmInterface().SetReg(2, param_2);
+    FuncReturn(system, retval.raw);
 }
 
-template <ResultCode func(u64, u64, u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2)), Param(3)).raw);
+template <ResultCode func(Core::System&, u64, u64, u32, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3)))
+                           .raw);
 }
 
-template <ResultCode func(u32, u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1), static_cast<u32>(Param(2))).raw);
+template <ResultCode func(Core::System&, u64, u64, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), Param(system, 3))
+                           .raw);
 }
 
-template <ResultCode func(u64, u64, u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), Param(2)).raw);
+template <ResultCode func(Core::System&, u32, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)))
+                           .raw);
 }
 
-template <ResultCode func(u64, u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2))).raw);
+template <ResultCode func(Core::System&, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw);
 }
 
-template <ResultCode func(u32, u64, u64, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
     FuncReturn(
-        func(static_cast<u32>(Param(0)), Param(1), Param(2), static_cast<u32>(Param(3))).raw);
+        system,
+        func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw);
 }
 
-template <ResultCode func(u32, u64, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1), Param(2)).raw);
+template <ResultCode func(Core::System&, u32, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
+                            Param(system, 2), static_cast<u32>(Param(system, 3)))
+                           .raw);
 }
 
-template <ResultCode func(u32*, u64, u64, s64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32, u64, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(
+        system,
+        func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw);
+}
+
+template <ResultCode func(Core::System&, u32*, u64, u64, s64)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    ResultCode retval =
-        func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3)));
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval.raw);
+    const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
+                            static_cast<s64>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64, u64, u32, s64)>
-void SvcWrap() {
-    FuncReturn(
-        func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))).raw);
+template <ResultCode func(Core::System&, u64, u64, u32, s64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
+                           .raw);
 }
 
-template <ResultCode func(u64*, u64, u64, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
     u64 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), Param(3)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval =
+        func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u64, u64, u64, u32, s32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), Param(3), static_cast<u32>(Param(4)),
-                      static_cast<s32>(Param(5)))
-                     .raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3),
+                            static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u32*, u64, u64, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), static_cast<u32>(Param(3))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2),
+                            static_cast<u32>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(Handle*, u64, u32, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, Handle*, u64, u32, u32)>
+void SvcWrap(Core::System& system) {
     u32 param_1 = 0;
-    u32 retval =
-        func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
+                            static_cast<u32>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }
 
-template <ResultCode func(u64, u32, s32, s64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
-                    static_cast<s64>(Param(3)))
-                   .raw);
+template <ResultCode func(Core::System&, u64, u32, s32, s64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
+                           .raw);
 }
 
-template <ResultCode func(u64, u32, s32, s32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
-                    static_cast<s32>(Param(3)))
-                   .raw);
+template <ResultCode func(Core::System&, u64, u32, s32, s32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
+                           .raw);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type u32
 
-template <u32 func()>
-void SvcWrap() {
-    FuncReturn(func());
+template <u32 func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system));
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type u64
 
-template <u64 func()>
-void SvcWrap() {
-    FuncReturn(func());
+template <u64 func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system));
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 /// Function wrappers that return type void
 
-template <void func()>
-void SvcWrap() {
-    func();
+template <void func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    func(system);
 }
 
-template <void func(s64)>
-void SvcWrap() {
-    func(static_cast<s64>(Param(0)));
+template <void func(Core::System&, s64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<s64>(Param(system, 0)));
 }
 
-template <void func(u64, u64 len)>
-void SvcWrap() {
-    func(Param(0), Param(1));
+template <void func(Core::System&, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), Param(system, 1));
 }
 
-template <void func(u64, u64, u64)>
-void SvcWrap() {
-    func(Param(0), Param(1), Param(2));
+template <void func(Core::System&, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), Param(system, 1), Param(system, 2));
 }
 
-template <void func(u32, u64, u64)>
-void SvcWrap() {
-    func(static_cast<u32>(Param(0)), Param(1), Param(2));
+template <void func(Core::System&, u32, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 1b891f632..ca52267b2 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -220,11 +220,6 @@ void Thread::SetPriority(u32 priority) {
     UpdatePriority();
 }
 
-void Thread::BoostPriority(u32 priority) {
-    scheduler->SetThreadPriority(this, priority);
-    current_priority = priority;
-}
-
 void Thread::SetWaitSynchronizationResult(ResultCode result) {
     context.cpu_registers[0] = result.raw;
 }
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 73e5d1bb4..411a73b49 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -102,11 +102,16 @@ public:
     std::string GetName() const override {
         return name;
     }
+
+    void SetName(std::string new_name) {
+        name = std::move(new_name);
+    }
+
     std::string GetTypeName() const override {
         return "Thread";
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::Thread;
+    static constexpr HandleType HANDLE_TYPE = HandleType::Thread;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
@@ -136,12 +141,6 @@ public:
      */
     void SetPriority(u32 priority);
 
-    /**
-     * Temporarily boosts the thread's priority until the next time it is scheduled
-     * @param priority The new priority
-     */
-    void BoostPriority(u32 priority);
-
     /// Adds a thread to the list of threads that are waiting for a lock held by this thread.
     void AddMutexWaiter(SharedPtr<Thread> thread);
 
@@ -345,10 +344,6 @@ public:
         arb_wait_address = address;
     }
 
-    void SetGuestHandle(Handle handle) {
-        guest_handle = handle;
-    }
-
     bool HasWakeupCallback() const {
         return wakeup_callback != nullptr;
     }
@@ -442,9 +437,6 @@ private:
     /// If waiting for an AddressArbiter, this is the address being waited on.
     VAddr arb_wait_address{0};
 
-    /// Handle used by guest emulated application to access this thread
-    Handle guest_handle = 0;
-
     /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
     Handle callback_handle = 0;
 
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index ec0a480ce..f0c0c12fc 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -302,6 +302,86 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
     return MakeResult<VAddr>(heap_region_base);
 }
 
+ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) {
+    constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped;
+    const auto src_check_result = CheckRangeState(
+        src_address, size, MemoryState::All, MemoryState::Heap, VMAPermission::All,
+        VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute);
+
+    if (src_check_result.Failed()) {
+        return src_check_result.Code();
+    }
+
+    const auto mirror_result =
+        MirrorMemory(dst_address, src_address, size, MemoryState::ModuleCode);
+    if (mirror_result.IsError()) {
+        return mirror_result;
+    }
+
+    // Ensure we lock the source memory region.
+    const auto src_vma_result = CarveVMARange(src_address, size);
+    if (src_vma_result.Failed()) {
+        return src_vma_result.Code();
+    }
+    auto src_vma_iter = *src_vma_result;
+    src_vma_iter->second.attribute = MemoryAttribute::Locked;
+    Reprotect(src_vma_iter, VMAPermission::Read);
+
+    // The destination memory region is fine as is, however we need to make it read-only.
+    return ReprotectRange(dst_address, size, VMAPermission::Read);
+}
+
+ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) {
+    constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped;
+    const auto src_check_result = CheckRangeState(
+        src_address, size, MemoryState::All, MemoryState::Heap, VMAPermission::None,
+        VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked, ignore_attribute);
+
+    if (src_check_result.Failed()) {
+        return src_check_result.Code();
+    }
+
+    // Yes, the kernel only checks the first page of the region.
+    const auto dst_check_result =
+        CheckRangeState(dst_address, Memory::PAGE_SIZE, MemoryState::FlagModule,
+                        MemoryState::FlagModule, VMAPermission::None, VMAPermission::None,
+                        MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute);
+
+    if (dst_check_result.Failed()) {
+        return dst_check_result.Code();
+    }
+
+    const auto dst_memory_state = std::get<MemoryState>(*dst_check_result);
+    const auto dst_contiguous_check_result = CheckRangeState(
+        dst_address, size, MemoryState::All, dst_memory_state, VMAPermission::None,
+        VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute);
+
+    if (dst_contiguous_check_result.Failed()) {
+        return dst_contiguous_check_result.Code();
+    }
+
+    const auto unmap_result = UnmapRange(dst_address, size);
+    if (unmap_result.IsError()) {
+        return unmap_result;
+    }
+
+    // With the mirrored portion unmapped, restore the original region's traits.
+    const auto src_vma_result = CarveVMARange(src_address, size);
+    if (src_vma_result.Failed()) {
+        return src_vma_result.Code();
+    }
+    auto src_vma_iter = *src_vma_result;
+    src_vma_iter->second.state = MemoryState::Heap;
+    src_vma_iter->second.attribute = MemoryAttribute::None;
+    Reprotect(src_vma_iter, VMAPermission::ReadWrite);
+
+    if (dst_memory_state == MemoryState::ModuleCode) {
+        Core::System::GetInstance().InvalidateCpuInstructionCaches();
+    }
+
+    return unmap_result;
+}
+
 MemoryInfo VMManager::QueryMemory(VAddr address) const {
     const auto vma = FindVMA(address);
     MemoryInfo memory_info{};
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 6f484b7bf..288eb9450 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -43,6 +43,9 @@ enum class VMAPermission : u8 {
     ReadExecute = Read | Execute,
     WriteExecute = Write | Execute,
     ReadWriteExecute = Read | Write | Execute,
+
+    // Used as a wildcard when checking permissions across memory ranges
+    All = 0xFF,
 };
 
 constexpr VMAPermission operator|(VMAPermission lhs, VMAPermission rhs) {
@@ -152,6 +155,9 @@ enum class MemoryState : u32 {
     FlagUncached                    = 1U << 24,
     FlagCodeMemory                  = 1U << 25,
 
+    // Wildcard used in range checking to indicate all states.
+    All                             = 0xFFFFFFFF,
+
     // Convenience flag sets to reduce repetition
     IPCFlags = FlagIPC0 | FlagIPC3 | FlagIPC1,
 
@@ -415,6 +421,49 @@ public:
     ///
     ResultVal<VAddr> SetHeapSize(u64 size);
 
+    /// Maps a region of memory as code memory.
+    ///
+    /// @param dst_address The base address of the region to create the aliasing memory region.
+    /// @param src_address The base address of the region to be aliased.
+    /// @param size        The total amount of memory to map in bytes.
+    ///
+    /// @pre Both memory regions lie within the actual addressable address space.
+    ///
+    /// @post After this function finishes execution, assuming success, then the address range
+    ///       [dst_address, dst_address+size) will alias the memory region,
+    ///       [src_address, src_address+size).
+    ///       <p>
+    ///       What this also entails is as follows:
+    ///          1. The aliased region gains the Locked memory attribute.
+    ///          2. The aliased region becomes read-only.
+    ///          3. The aliasing region becomes read-only.
+    ///          4. The aliasing region is created with a memory state of MemoryState::CodeModule.
+    ///
+    ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size);
+
+    /// Unmaps a region of memory designated as code module memory.
+    ///
+    /// @param dst_address The base address of the memory region aliasing the source memory region.
+    /// @param src_address The base address of the memory region being aliased.
+    /// @param size        The size of the memory region to unmap in bytes.
+    ///
+    /// @pre Both memory ranges lie within the actual addressable address space.
+    ///
+    /// @pre The memory region being unmapped has been previously been mapped
+    ///      by a call to MapCodeMemory.
+    ///
+    /// @post After execution of the function, if successful. the aliasing memory region
+    ///       will be unmapped and the aliased region will have various traits about it
+    ///       restored to what they were prior to the original mapping call preceding
+    ///       this function call.
+    ///       <p>
+    ///       What this also entails is as follows:
+    ///           1. The state of the memory region will now indicate a general heap region.
+    ///           2. All memory attributes for the memory region are cleared.
+    ///           3. Memory permissions for the region are restored to user read/write.
+    ///
+    ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64 size);
+
     /// Queries the memory manager for information about the given address.
     ///
     /// @param address The address to query the memory manager about for information.
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index 90580ed93..c8eaf9488 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -30,7 +30,7 @@ void WaitObject::RemoveWaitingThread(Thread* thread) {
         waiting_threads.erase(itr);
 }
 
-SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() {
+SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
     Thread* candidate = nullptr;
     u32 candidate_priority = THREADPRIO_LOWEST + 1;
 
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h
index 04464a51a..3271a30a7 100644
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -54,7 +54,7 @@ public:
     void WakeupWaitingThread(SharedPtr<Thread> thread);
 
     /// Obtains the highest priority thread that is ready to run from this object's waiting list.
-    SharedPtr<Thread> GetHighestPriorityReadyThread();
+    SharedPtr<Thread> GetHighestPriorityReadyThread() const;
 
     /// Get a const reference to the waiting threads list for debug use
     const std::vector<SharedPtr<Thread>>& GetWaitingThreads() const;
diff --git a/src/core/hle/kernel/writable_event.h b/src/core/hle/kernel/writable_event.h
index c9068dd3d..d00c92a6b 100644
--- a/src/core/hle/kernel/writable_event.h
+++ b/src/core/hle/kernel/writable_event.h
@@ -37,7 +37,7 @@ public:
         return name;
     }
 
-    static const HandleType HANDLE_TYPE = HandleType::WritableEvent;
+    static constexpr HandleType HANDLE_TYPE = HandleType::WritableEvent;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
     }
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 1f8ed265e..ba7d7acbd 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -137,6 +137,7 @@ private:
 class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
 public:
     IManagerForApplication() : ServiceFramework("IManagerForApplication") {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, &IManagerForApplication::CheckAvailability, "CheckAvailability"},
             {1, &IManagerForApplication::GetAccountId, "GetAccountId"},
@@ -145,7 +146,10 @@ public:
             {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"},
             {150, nullptr, "CreateAuthorizationRequest"},
             {160, nullptr, "StoreOpenContext"},
+            {170, nullptr, "LoadNetworkServiceLicenseKindAsync"},
         };
+        // clang-format on
+
         RegisterHandlers(functions);
     }
 
diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp
index 5e2030355..d66233cad 100644
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -8,6 +8,7 @@ namespace Service::Account {
 
 ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager)
     : Module::Interface(std::move(module), std::move(profile_manager), "acc:su") {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &ACC_SU::GetUserCount, "GetUserCount"},
         {1, &ACC_SU::GetUserExistence, "GetUserExistence"},
@@ -19,6 +20,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
         {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
         {60, nullptr, "ListOpenContextStoredUsers"},
+        {99, nullptr, "DebugActivateOpenContextRetention"},
         {100, nullptr, "GetUserRegistrationNotifier"},
         {101, nullptr, "GetUserStateChangeNotifier"},
         {102, nullptr, "GetBaasAccountManagerForSystemService"},
@@ -29,6 +31,8 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {111, nullptr, "ClearSaveDataThumbnail"},
         {112, nullptr, "LoadSaveDataThumbnail"},
         {113, nullptr, "GetSaveDataThumbnailExistence"},
+        {130, nullptr, "ActivateOpenContextRetention"},
+        {140, nullptr, "ListQualifiedUsers"},
         {190, nullptr, "GetUserLastOpenedApplication"},
         {191, nullptr, "ActivateOpenContextHolder"},
         {200, nullptr, "BeginUserRegistration"},
@@ -48,6 +52,8 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {998, nullptr, "DebugSetUserStateClose"},
         {999, nullptr, "DebugSetUserStateOpen"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp
index a4d705b45..182f7c7e5 100644
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -8,6 +8,7 @@ namespace Service::Account {
 
 ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager)
     : Module::Interface(std::move(module), std::move(profile_manager), "acc:u0") {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &ACC_U0::GetUserCount, "GetUserCount"},
         {1, &ACC_U0::GetUserExistence, "GetUserExistence"},
@@ -19,6 +20,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
         {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
         {60, nullptr, "ListOpenContextStoredUsers"},
+        {99, nullptr, "DebugActivateOpenContextRetention"},
         {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
         {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
         {102, nullptr, "AuthenticateApplicationAsync"},
@@ -27,7 +29,13 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {111, nullptr, "ClearSaveDataThumbnail"},
         {120, nullptr, "CreateGuestLoginRequest"},
         {130, nullptr, "LoadOpenContext"},
+        {131, nullptr, "ListOpenContextStoredUsers"},
+        {140, nullptr, "InitializeApplicationInfo"},
+        {141, nullptr, "ListQualifiedUsers"},
+        {150, nullptr, "IsUserAccountSwitchLocked"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp
index 8fffc93b5..2dd17d935 100644
--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -8,6 +8,7 @@ namespace Service::Account {
 
 ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager)
     : Module::Interface(std::move(module), std::move(profile_manager), "acc:u1") {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &ACC_U1::GetUserCount, "GetUserCount"},
         {1, &ACC_U1::GetUserExistence, "GetUserExistence"},
@@ -19,6 +20,7 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
         {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
         {60, nullptr, "ListOpenContextStoredUsers"},
+        {99, nullptr, "DebugActivateOpenContextRetention"},
         {100, nullptr, "GetUserRegistrationNotifier"},
         {101, nullptr, "GetUserStateChangeNotifier"},
         {102, nullptr, "GetBaasAccountManagerForSystemService"},
@@ -29,12 +31,16 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
         {111, nullptr, "ClearSaveDataThumbnail"},
         {112, nullptr, "LoadSaveDataThumbnail"},
         {113, nullptr, "GetSaveDataThumbnailExistence"},
+        {130, nullptr, "ActivateOpenContextRetention"},
+        {140, nullptr, "ListQualifiedUsers"},
         {190, nullptr, "GetUserLastOpenedApplication"},
         {191, nullptr, "ActivateOpenContextHolder"},
         {997, nullptr, "DebugInvalidateTokenCacheForUser"},
         {998, nullptr, "DebugSetUserStateClose"},
         {999, nullptr, "DebugSetUserStateOpen"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 85271d418..1aa4ce1ac 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -224,6 +224,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
         {20, nullptr, "InvalidateTransitionLayer"},
         {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
         {40, nullptr, "GetAppletResourceUsageInfo"},
+        {41, nullptr, "SetCpuBoostModeForApplet"},
     };
     // clang-format on
 
@@ -256,6 +257,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
         {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"},
         {41, nullptr, "IsSystemBufferSharingEnabled"},
         {42, nullptr, "GetSystemSharedLayerHandle"},
+        {43, nullptr, "GetSystemSharedBufferHandle"},
         {50, &ISelfController::SetHandlesRequestToDisplay, "SetHandlesRequestToDisplay"},
         {51, nullptr, "ApproveToDisplay"},
         {60, nullptr, "OverrideAutoSleepTimeAndDimmingTime"},
@@ -269,9 +271,11 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
         {68, nullptr, "SetAutoSleepDisabled"},
         {69, nullptr, "IsAutoSleepDisabled"},
         {70, nullptr, "ReportMultimediaError"},
+        {71, nullptr, "GetCurrentIlluminanceEx"},
         {80, nullptr, "SetWirelessPriorityMode"},
         {90, nullptr, "GetAccumulatedSuspendedTickValue"},
         {91, nullptr, "GetAccumulatedSuspendedTickChangedEvent"},
+        {100, nullptr, "SetAlbumImageTakenNotificationEnabled"},
         {1000, nullptr, "GetDebugStorageChannel"},
     };
     // clang-format on
@@ -516,11 +520,20 @@ ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_q
         {50, nullptr, "IsVrModeEnabled"},
         {51, nullptr, "SetVrModeEnabled"},
         {52, nullptr, "SwitchLcdBacklight"},
+        {53, nullptr, "BeginVrModeEx"},
+        {54, nullptr, "EndVrModeEx"},
         {55, nullptr, "IsInControllerFirmwareUpdateSection"},
         {60, &ICommonStateGetter::GetDefaultDisplayResolution, "GetDefaultDisplayResolution"},
         {61, &ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent, "GetDefaultDisplayResolutionChangeEvent"},
         {62, nullptr, "GetHdcpAuthenticationState"},
         {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"},
+        {64, nullptr, "SetTvPowerStateMatchingMode"},
+        {65, nullptr, "GetApplicationIdByContentActionName"},
+        {66, nullptr, "SetCpuBoostMode"},
+        {80, nullptr, "PerformSystemButtonPressingIfInFocus"},
+        {90, nullptr, "SetPerformanceConfigurationChangedNotification"},
+        {91, nullptr, "GetCurrentPerformanceConfiguration"},
+        {200, nullptr, "GetOperationModeSystemInfo"},
     };
     // clang-format on
 
@@ -960,6 +973,8 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF
         {11, nullptr, "CreateApplicationAndPushAndRequestToStartForQuest"},
         {12, nullptr, "CreateApplicationAndRequestToStart"},
         {13, &IApplicationFunctions::CreateApplicationAndRequestToStartForQuest, "CreateApplicationAndRequestToStartForQuest"},
+        {14, nullptr, "CreateApplicationWithAttributeAndPushAndRequestToStartForQuest"},
+        {15, nullptr, "CreateApplicationWithAttributeAndRequestToStartForQuest"},
         {20, &IApplicationFunctions::EnsureSaveData, "EnsureSaveData"},
         {21, &IApplicationFunctions::GetDesiredLanguage, "GetDesiredLanguage"},
         {22, &IApplicationFunctions::SetTerminateResult, "SetTerminateResult"},
@@ -1233,6 +1248,7 @@ IGlobalStateController::IGlobalStateController() : ServiceFramework("IGlobalStat
         {2, nullptr, "StartSleepSequence"},
         {3, nullptr, "StartShutdownSequence"},
         {4, nullptr, "StartRebootSequence"},
+        {9, nullptr, "IsAutoPowerDownRequested"},
         {10, nullptr, "LoadAndApplyIdlePolicySettings"},
         {11, nullptr, "NotifyCecSettingsChanged"},
         {12, nullptr, "SetDefaultHomeButtonLongPressTime"},
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp
index b888f861d..488add8e7 100644
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -16,6 +16,7 @@ public:
                                  std::shared_ptr<AppletMessageQueue> msg_queue)
         : ServiceFramework("ILibraryAppletProxy"), nvflinger(std::move(nvflinger)),
           msg_queue(std::move(msg_queue)) {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, &ILibraryAppletProxy::GetCommonStateGetter, "GetCommonStateGetter"},
             {1, &ILibraryAppletProxy::GetSelfController, "GetSelfController"},
@@ -25,8 +26,11 @@ public:
             {10, &ILibraryAppletProxy::GetProcessWindingController, "GetProcessWindingController"},
             {11, &ILibraryAppletProxy::GetLibraryAppletCreator, "GetLibraryAppletCreator"},
             {20, &ILibraryAppletProxy::GetApplicationFunctions, "GetApplicationFunctions"},
+            {21, nullptr, "GetAppletCommonFunctions"},
             {1000, &ILibraryAppletProxy::GetDebugFunctions, "GetDebugFunctions"},
         };
+        // clang-format on
+
         RegisterHandlers(functions);
     }
 
@@ -113,6 +117,7 @@ public:
                                 std::shared_ptr<AppletMessageQueue> msg_queue)
         : ServiceFramework("ISystemAppletProxy"), nvflinger(std::move(nvflinger)),
           msg_queue(std::move(msg_queue)) {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, &ISystemAppletProxy::GetCommonStateGetter, "GetCommonStateGetter"},
             {1, &ISystemAppletProxy::GetSelfController, "GetSelfController"},
@@ -124,8 +129,11 @@ public:
             {20, &ISystemAppletProxy::GetHomeMenuFunctions, "GetHomeMenuFunctions"},
             {21, &ISystemAppletProxy::GetGlobalStateController, "GetGlobalStateController"},
             {22, &ISystemAppletProxy::GetApplicationCreator, "GetApplicationCreator"},
+            {23, nullptr, "GetAppletCommonFunctions"},
             {1000, &ISystemAppletProxy::GetDebugFunctions, "GetDebugFunctions"},
         };
+        // clang-format on
+
         RegisterHandlers(functions);
     }
 
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index 2d768d9fc..51d8c26b4 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -50,6 +50,7 @@ static std::vector<u64> AccumulateAOCTitleIDs() {
 }
 
 AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs()) {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, nullptr, "CountAddOnContentByApplicationId"},
         {1, nullptr, "ListAddOnContentByApplicationId"},
@@ -60,7 +61,10 @@ AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs
         {6, nullptr, "PrepareAddOnContentByApplicationId"},
         {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
         {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
+        {100, nullptr, "CreateEcPurchasedEventManager"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 
     auto& kernel = Core::System::GetInstance().Kernel();
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp
index fcacbab72..d058c0245 100644
--- a/src/core/hle/service/apm/interface.cpp
+++ b/src/core/hle/service/apm/interface.cpp
@@ -87,6 +87,8 @@ APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} {
         {3, nullptr, "GetLastThrottlingState"},
         {4, nullptr, "ClearLastThrottlingState"},
         {5, nullptr, "LoadAndApplySettings"},
+        {6, nullptr, "SetCpuBoostMode"},
+        {7, nullptr, "GetCurrentPerformanceConfiguration"},
     };
     // clang-format on
 
diff --git a/src/core/hle/service/audio/audctl.cpp b/src/core/hle/service/audio/audctl.cpp
index b6b71f966..f43e512e9 100644
--- a/src/core/hle/service/audio/audctl.cpp
+++ b/src/core/hle/service/audio/audctl.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/logging/log.h"
+#include "core/hle/ipc_helpers.h"
 #include "core/hle/service/audio/audctl.h"
 
 namespace Service::Audio {
@@ -11,8 +13,8 @@ AudCtl::AudCtl() : ServiceFramework{"audctl"} {
     static const FunctionInfo functions[] = {
         {0, nullptr, "GetTargetVolume"},
         {1, nullptr, "SetTargetVolume"},
-        {2, nullptr, "GetTargetVolumeMin"},
-        {3, nullptr, "GetTargetVolumeMax"},
+        {2, &AudCtl::GetTargetVolumeMin, "GetTargetVolumeMin"},
+        {3, &AudCtl::GetTargetVolumeMax, "GetTargetVolumeMax"},
         {4, nullptr, "IsTargetMute"},
         {5, nullptr, "SetTargetMute"},
         {6, nullptr, "IsTargetConnected"},
@@ -44,4 +46,28 @@ AudCtl::AudCtl() : ServiceFramework{"audctl"} {
 
 AudCtl::~AudCtl() = default;
 
+void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Audio, "called.");
+
+    // This service function is currently hardcoded on the
+    // actual console to this value (as of 6.0.0).
+    constexpr s32 target_min_volume = 0;
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.Push(target_min_volume);
+}
+
+void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Audio, "called.");
+
+    // This service function is currently hardcoded on the
+    // actual console to this value (as of 6.0.0).
+    constexpr s32 target_max_volume = 15;
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.Push(target_max_volume);
+}
+
 } // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audctl.h b/src/core/hle/service/audio/audctl.h
index 9d2d9e83b..c7fafc02e 100644
--- a/src/core/hle/service/audio/audctl.h
+++ b/src/core/hle/service/audio/audctl.h
@@ -12,6 +12,10 @@ class AudCtl final : public ServiceFramework<AudCtl> {
 public:
     explicit AudCtl();
     ~AudCtl() override;
+
+private:
+    void GetTargetVolumeMin(Kernel::HLERequestContext& ctx);
+    void GetTargetVolumeMax(Kernel::HLERequestContext& ctx);
 };
 
 } // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index e5daefdde..d7f1d348d 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -25,6 +25,7 @@ public:
             {11, nullptr, "GetAudioInBufferCount"},
             {12, nullptr, "SetAudioInDeviceGain"},
             {13, nullptr, "GetAudioInDeviceGain"},
+            {14, nullptr, "FlushAudioInBuffers"},
         };
         // clang-format on
 
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 39acb7b23..12875fb42 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -44,7 +44,7 @@ public:
               std::string&& unique_name)
         : ServiceFramework("IAudioOut"), audio_core(audio_core),
           device_name(std::move(device_name)), audio_params(audio_params) {
-
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"},
             {1, &IAudioOut::StartAudioOut, "StartAudioOut"},
@@ -58,7 +58,10 @@ public:
             {9, &IAudioOut::GetAudioOutBufferCount, "GetAudioOutBufferCount"},
             {10, nullptr, "GetAudioOutPlayedSampleCount"},
             {11, nullptr, "FlushAudioOutBuffers"},
+            {12, nullptr, "SetAudioOutVolume"},
+            {13, nullptr, "GetAudioOutVolume"},
         };
+        // clang-format on
         RegisterHandlers(functions);
 
         // This is the event handle used to check if the audio buffer was released
diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp
index 59ef603e1..974ff8e1a 100644
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -154,7 +154,8 @@ public:
             {96, nullptr, "GetLeHidEventInfo"},
             {97, nullptr, "RegisterBleHidEvent"},
             {98, nullptr, "SetLeScanParameter"},
-            {256, nullptr, "GetIsManufacturingMode"}
+            {256, nullptr, "GetIsManufacturingMode"},
+            {257, nullptr, "EmulateBluetoothCrash"},
         };
         // clang-format on
 
diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp
index ae7b0720b..907f464ab 100644
--- a/src/core/hle/service/caps/caps.cpp
+++ b/src/core/hle/service/caps/caps.cpp
@@ -15,32 +15,41 @@ public:
     explicit CAPS_A() : ServiceFramework{"caps:a"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "Unknown1"},
-            {1, nullptr, "Unknown2"},
-            {2, nullptr, "Unknown3"},
-            {3, nullptr, "Unknown4"},
-            {4, nullptr, "Unknown5"},
-            {5, nullptr, "Unknown6"},
-            {6, nullptr, "Unknown7"},
-            {7, nullptr, "Unknown8"},
-            {8, nullptr, "Unknown9"},
-            {9, nullptr, "Unknown10"},
-            {10, nullptr, "Unknown11"},
-            {11, nullptr, "Unknown12"},
-            {12, nullptr, "Unknown13"},
-            {13, nullptr, "Unknown14"},
-            {14, nullptr, "Unknown15"},
-            {301, nullptr, "Unknown16"},
-            {401, nullptr, "Unknown17"},
-            {501, nullptr, "Unknown18"},
-            {1001, nullptr, "Unknown19"},
-            {1002, nullptr, "Unknown20"},
-            {8001, nullptr, "Unknown21"},
-            {8002, nullptr, "Unknown22"},
-            {8011, nullptr, "Unknown23"},
-            {8012, nullptr, "Unknown24"},
-            {8021, nullptr, "Unknown25"},
-            {10011, nullptr, "Unknown26"},
+            {0, nullptr, "GetAlbumFileCount"},
+            {1, nullptr, "GetAlbumFileList"},
+            {2, nullptr, "LoadAlbumFile"},
+            {3, nullptr, "DeleteAlbumFile"},
+            {4, nullptr, "StorageCopyAlbumFile"},
+            {5, nullptr, "IsAlbumMounted"},
+            {6, nullptr, "GetAlbumUsage"},
+            {7, nullptr, "GetAlbumFileSize"},
+            {8, nullptr, "LoadAlbumFileThumbnail"},
+            {9, nullptr, "LoadAlbumScreenShotImage"},
+            {10, nullptr, "LoadAlbumScreenShotThumbnailImage"},
+            {11, nullptr, "GetAlbumEntryFromApplicationAlbumEntry"},
+            {12, nullptr, "Unknown12"},
+            {13, nullptr, "Unknown13"},
+            {14, nullptr, "Unknown14"},
+            {15, nullptr, "Unknown15"},
+            {16, nullptr, "Unknown16"},
+            {17, nullptr, "Unknown17"},
+            {18, nullptr, "Unknown18"},
+            {202, nullptr, "SaveEditedScreenShot"},
+            {301, nullptr, "GetLastThumbnail"},
+            {401, nullptr, "GetAutoSavingStorage"},
+            {501, nullptr, "GetRequiredStorageSpaceSizeToCopyAll"},
+            {1001, nullptr, "Unknown1001"},
+            {1002, nullptr, "Unknown1002"},
+            {1003, nullptr, "Unknown1003"},
+            {8001, nullptr, "ForceAlbumUnmounted"},
+            {8002, nullptr, "ResetAlbumMountStatus"},
+            {8011, nullptr, "RefreshAlbumCache"},
+            {8012, nullptr, "GetAlbumCache"},
+            {8013, nullptr, "Unknown8013"},
+            {8021, nullptr, "GetAlbumEntryFromApplicationAlbumEntryAruid"},
+            {10011, nullptr, "SetInternalErrorConversionEnabled"},
+            {50000, nullptr, "Unknown50000"},
+            {60002, nullptr, "Unknown60002"},
         };
         // clang-format on
 
@@ -53,16 +62,17 @@ public:
     explicit CAPS_C() : ServiceFramework{"caps:c"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {2001, nullptr, "Unknown1"},
-            {2002, nullptr, "Unknown2"},
-            {2011, nullptr, "Unknown3"},
-            {2012, nullptr, "Unknown4"},
-            {2013, nullptr, "Unknown5"},
-            {2014, nullptr, "Unknown6"},
-            {2101, nullptr, "Unknown7"},
-            {2102, nullptr, "Unknown8"},
-            {2201, nullptr, "Unknown9"},
-            {2301, nullptr, "Unknown10"},
+            {33, nullptr, "Unknown33"},
+            {2001, nullptr, "Unknown2001"},
+            {2002, nullptr, "Unknown2002"},
+            {2011, nullptr, "Unknown2011"},
+            {2012, nullptr, "Unknown2012"},
+            {2013, nullptr, "Unknown2013"},
+            {2014, nullptr, "Unknown2014"},
+            {2101, nullptr, "Unknown2101"},
+            {2102, nullptr, "Unknown2102"},
+            {2201, nullptr, "Unknown2201"},
+            {2301, nullptr, "Unknown2301"},
         };
         // clang-format on
 
@@ -127,11 +137,18 @@ public:
     explicit CAPS_U() : ServiceFramework{"caps:u"} {
         // clang-format off
         static const FunctionInfo functions[] = {
+            {32, nullptr, "SetShimLibraryVersion"},
             {102, nullptr, "GetAlbumFileListByAruid"},
             {103, nullptr, "DeleteAlbumFileByAruid"},
             {104, nullptr, "GetAlbumFileSizeByAruid"},
+            {105, nullptr, "DeleteAlbumFileByAruidForDebug"},
             {110, nullptr, "LoadAlbumScreenShotImageByAruid"},
             {120, nullptr, "LoadAlbumScreenShotThumbnailImageByAruid"},
+            {130, nullptr, "PrecheckToCreateContentsByAruid"},
+            {140, nullptr, "GetAlbumFileList1AafeAruidDeprecated"},
+            {141, nullptr, "GetAlbumFileList2AafeUidAruidDeprecated"},
+            {142, nullptr, "GetAlbumFileList3AaeAruid"},
+            {143, nullptr, "GetAlbumFileList4AaeUidAruid"},
             {60002, nullptr, "OpenAccessorSessionForApplication"},
         };
         // clang-format on
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index 657baddb8..e7df8fd98 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -115,11 +115,12 @@ private:
 
     void Read(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
-        const u64 unk = rp.Pop<u64>();
+        const u64 option = rp.Pop<u64>();
         const s64 offset = rp.Pop<s64>();
         const s64 length = rp.Pop<s64>();
 
-        LOG_DEBUG(Service_FS, "called, offset=0x{:X}, length={}", offset, length);
+        LOG_DEBUG(Service_FS, "called, option={}, offset=0x{:X}, length={}", option, offset,
+                  length);
 
         // Error checking
         if (length < 0) {
@@ -148,11 +149,12 @@ private:
 
     void Write(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
-        const u64 unk = rp.Pop<u64>();
+        const u64 option = rp.Pop<u64>();
         const s64 offset = rp.Pop<s64>();
         const s64 length = rp.Pop<s64>();
 
-        LOG_DEBUG(Service_FS, "called, offset=0x{:X}, length={}", offset, length);
+        LOG_DEBUG(Service_FS, "called, option={}, offset=0x{:X}, length={}", option, offset,
+                  length);
 
         // Error checking
         if (length < 0) {
@@ -250,10 +252,7 @@ private:
     u64 next_entry_index = 0;
 
     void Read(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-        const u64 unk = rp.Pop<u64>();
-
-        LOG_DEBUG(Service_FS, "called, unk=0x{:X}", unk);
+        LOG_DEBUG(Service_FS, "called.");
 
         // Calculate how many entries we can fit in the output buffer
         const u64 count_entries = ctx.GetWriteBufferSize() / sizeof(FileSys::Entry);
@@ -665,10 +664,13 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
         {100, nullptr, "OpenImageDirectoryFileSystem"},
         {110, nullptr, "OpenContentStorageFileSystem"},
         {120, nullptr, "OpenCloudBackupWorkStorageFileSystem"},
+        {130, nullptr, "OpenCustomStorageFileSystem"},
         {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"},
         {201, nullptr, "OpenDataStorageByProgramId"},
         {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"},
         {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"},
+        {204, nullptr, "OpenDataFileSystemByProgramIndex"},
+        {205, nullptr, "OpenDataStorageByProgramIndex"},
         {400, nullptr, "OpenDeviceOperator"},
         {500, nullptr, "OpenSdCardDetectionEventNotifier"},
         {501, nullptr, "OpenGameCardDetectionEventNotifier"},
@@ -692,6 +694,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
         {614, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId"},
         {615, nullptr, "QuerySaveDataInternalStorageTotalSize"},
         {616, nullptr, "GetSaveDataCommitId"},
+        {617, nullptr, "UnregisterExternalKey"},
         {620, nullptr, "SetSdCardEncryptionSeed"},
         {630, nullptr, "SetSdCardAccessibility"},
         {631, nullptr, "IsSdCardAccessible"},
@@ -702,6 +705,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
         {710, nullptr, "ResolveAccessFailure"},
         {720, nullptr, "AbandonAccessFailure"},
         {800, nullptr, "GetAndClearFileSystemProxyErrorInfo"},
+        {810, nullptr, "RegisterProgramIndexMapInfo"},
         {1000, nullptr, "SetBisRootForHost"},
         {1001, nullptr, "SetSaveDataSize"},
         {1002, nullptr, "SetSaveDataRootPath"},
@@ -712,6 +716,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
         {1007, nullptr, "RegisterUpdatePartition"},
         {1008, nullptr, "OpenRegisteredUpdatePartition"},
         {1009, nullptr, "GetAndClearMemoryReportInfo"},
+        {1010, nullptr, "SetDataStorageRedirectTarget"},
+        {1011, nullptr, "OutputAccessLogToSdCard2"},
         {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
         {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
         {1200, nullptr, "OpenMultiCommitManager"},
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index d9225d624..5100e376c 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -12,6 +12,7 @@ namespace Service::Friend {
 class IFriendService final : public ServiceFramework<IFriendService> {
 public:
     IFriendService() : ServiceFramework("IFriendService") {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, nullptr, "GetCompletionEvent"},
             {1, nullptr, "Cancel"},
@@ -24,8 +25,7 @@ public:
             {10400, nullptr, "GetBlockedUserListIds"},
             {10500, nullptr, "GetProfileList"},
             {10600, nullptr, "DeclareOpenOnlinePlaySession"},
-            {10601, &IFriendService::DeclareCloseOnlinePlaySession,
-             "DeclareCloseOnlinePlaySession"},
+            {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
             {10610, &IFriendService::UpdateUserPresence, "UpdateUserPresence"},
             {10700, nullptr, "GetPlayHistoryRegistrationKey"},
             {10701, nullptr, "GetPlayHistoryRegistrationKeyWithNetworkServiceAccountId"},
@@ -88,6 +88,7 @@ public:
             {30830, nullptr, "ClearPlayLog"},
             {49900, nullptr, "DeleteNetworkServiceAccountCache"},
         };
+        // clang-format on
 
         RegisterHandlers(functions);
     }
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 63b55758b..a4ad95d96 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -210,6 +210,7 @@ Hid::Hid() : ServiceFramework("hid") {
         {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"},
         {132, nullptr, "EnableUnintendedHomeButtonInputProtection"},
         {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"},
+        {134, nullptr, "SetNpadAnalogStickUseCenterClamp"},
         {200, &Hid::GetVibrationDeviceInfo, "GetVibrationDeviceInfo"},
         {201, &Hid::SendVibrationValue, "SendVibrationValue"},
         {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"},
@@ -221,6 +222,7 @@ Hid::Hid() : ServiceFramework("hid") {
         {208, nullptr, "GetActualVibrationGcErmCommand"},
         {209, &Hid::BeginPermitVibrationSession, "BeginPermitVibrationSession"},
         {210, &Hid::EndPermitVibrationSession, "EndPermitVibrationSession"},
+        {211, nullptr, "IsVibrationDeviceMounted"},
         {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"},
         {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"},
         {302, nullptr, "StopConsoleSixAxisSensor"},
@@ -265,6 +267,7 @@ Hid::Hid() : ServiceFramework("hid") {
         {523, nullptr, "SetIsPalmaPairedConnectable"},
         {524, nullptr, "PairPalma"},
         {525, &Hid::SetPalmaBoostMode, "SetPalmaBoostMode"},
+        {526, nullptr, "CancelWritePalmaWaveEntry"},
         {1000, nullptr, "SetNpadCommunicationMode"},
         {1001, nullptr, "GetNpadCommunicationMode"},
     };
@@ -797,12 +800,22 @@ public:
             {232, nullptr, "EnableShipmentMode"},
             {233, nullptr, "ClearPairingInfo"},
             {234, nullptr, "GetUniquePadDeviceTypeSetInternal"},
+            {235, nullptr, "EnableAnalogStickPower"},
             {301, nullptr, "GetAbstractedPadHandles"},
             {302, nullptr, "GetAbstractedPadState"},
             {303, nullptr, "GetAbstractedPadsState"},
             {321, nullptr, "SetAutoPilotVirtualPadState"},
             {322, nullptr, "UnsetAutoPilotVirtualPadState"},
             {323, nullptr, "UnsetAllAutoPilotVirtualPadState"},
+            {324, nullptr, "AttachHdlsWorkBuffer"},
+            {325, nullptr, "ReleaseHdlsWorkBuffer"},
+            {326, nullptr, "DumpHdlsNpadAssignmentState"},
+            {327, nullptr, "DumpHdlsStates"},
+            {328, nullptr, "ApplyHdlsNpadAssignmentState"},
+            {329, nullptr, "ApplyHdlsStateList"},
+            {330, nullptr, "AttachHdlsVirtualDevice"},
+            {331, nullptr, "DetachHdlsVirtualDevice"},
+            {332, nullptr, "SetHdlsState"},
             {350, nullptr, "AddRegisteredDevice"},
             {400, nullptr, "DisableExternalMcuOnNxDevice"},
             {401, nullptr, "DisableRailDeviceFiltering"},
@@ -825,6 +838,7 @@ public:
             {131, nullptr, "ActivateSleepButton"},
             {141, nullptr, "AcquireCaptureButtonEventHandle"},
             {151, nullptr, "ActivateCaptureButton"},
+            {161, nullptr, "GetPlatformConfig"},
             {210, nullptr, "AcquireNfcDeviceUpdateEventHandle"},
             {211, nullptr, "GetNpadsWithNfc"},
             {212, nullptr, "AcquireNfcActivateEventHandle"},
@@ -894,6 +908,7 @@ public:
             {827, nullptr, "IsAnalogStickButtonPressed"},
             {828, nullptr, "IsAnalogStickInReleasePosition"},
             {829, nullptr, "IsAnalogStickInCircumference"},
+            {830, nullptr, "SetNotificationLedPattern"},
             {850, nullptr, "IsUsbFullKeyControllerEnabled"},
             {851, nullptr, "EnableUsbFullKeyController"},
             {852, nullptr, "IsUsbConnected"},
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index e250595e3..ed5059047 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -52,9 +52,11 @@ public:
     }
 };
 
-class ILocalCommunicationService final : public ServiceFramework<ILocalCommunicationService> {
+class ISystemLocalCommunicationService final
+    : public ServiceFramework<ISystemLocalCommunicationService> {
 public:
-    explicit ILocalCommunicationService(const char* name) : ServiceFramework{name} {
+    explicit ISystemLocalCommunicationService()
+        : ServiceFramework{"ISystemLocalCommunicationService"} {
         // clang-format off
         static const FunctionInfo functions[] = {
             {0, nullptr, "GetState"},
@@ -84,6 +86,50 @@ public:
             {304, nullptr, "Disconnect"},
             {400, nullptr, "InitializeSystem"},
             {401, nullptr, "FinalizeSystem"},
+            {402, nullptr, "SetOperationMode"},
+            {403, nullptr, "InitializeSystem2"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+};
+
+class IUserLocalCommunicationService final
+    : public ServiceFramework<IUserLocalCommunicationService> {
+public:
+    explicit IUserLocalCommunicationService() : ServiceFramework{"IUserLocalCommunicationService"} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {0, nullptr, "GetState"},
+            {1, nullptr, "GetNetworkInfo"},
+            {2, nullptr, "GetIpv4Address"},
+            {3, nullptr, "GetDisconnectReason"},
+            {4, nullptr, "GetSecurityParameter"},
+            {5, nullptr, "GetNetworkConfig"},
+            {100, nullptr, "AttachStateChangeEvent"},
+            {101, nullptr, "GetNetworkInfoLatestUpdate"},
+            {102, nullptr, "Scan"},
+            {103, nullptr, "ScanPrivate"},
+            {104, nullptr, "SetWirelessControllerRestriction"},
+            {200, nullptr, "OpenAccessPoint"},
+            {201, nullptr, "CloseAccessPoint"},
+            {202, nullptr, "CreateNetwork"},
+            {203, nullptr, "CreateNetworkPrivate"},
+            {204, nullptr, "DestroyNetwork"},
+            {205, nullptr, "Reject"},
+            {206, nullptr, "SetAdvertiseData"},
+            {207, nullptr, "SetStationAcceptPolicy"},
+            {208, nullptr, "AddAcceptFilterEntry"},
+            {209, nullptr, "ClearAcceptFilter"},
+            {300, nullptr, "OpenStation"},
+            {301, nullptr, "CloseStation"},
+            {302, nullptr, "Connect"},
+            {303, nullptr, "ConnectPrivate"},
+            {304, nullptr, "Disconnect"},
+            {400, nullptr, "Initialize"},
+            {401, nullptr, "Finalize"},
+            {402, nullptr, "SetOperationMode"},
         };
         // clang-format on
 
@@ -108,7 +154,7 @@ public:
 
         IPC::ResponseBuilder rb{ctx, 2, 0, 1};
         rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<ILocalCommunicationService>("ISystemLocalCommunicationService");
+        rb.PushIpcInterface<ISystemLocalCommunicationService>();
     }
 };
 
@@ -129,7 +175,7 @@ public:
 
         IPC::ResponseBuilder rb{ctx, 2, 0, 1};
         rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<ILocalCommunicationService>("IUserLocalCommunicationService");
+        rb.PushIpcInterface<IUserLocalCommunicationService>();
     }
 };
 
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index d65693fc7..5af925515 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -86,6 +86,7 @@ public:
             {2, &RelocatableObject::LoadNrr, "LoadNrr"},
             {3, &RelocatableObject::UnloadNrr, "UnloadNrr"},
             {4, &RelocatableObject::Initialize, "Initialize"},
+            {10, nullptr, "LoadNrrEx"},
         };
         // clang-format on
 
@@ -93,12 +94,18 @@ public:
     }
 
     void LoadNrr(Kernel::HLERequestContext& ctx) {
+        struct Parameters {
+            u64_le process_id;
+            u64_le nrr_address;
+            u64_le nrr_size;
+        };
+
         IPC::RequestParser rp{ctx};
-        rp.Skip(2, false);
-        const VAddr nrr_addr{rp.Pop<VAddr>()};
-        const u64 nrr_size{rp.Pop<u64>()};
-        LOG_DEBUG(Service_LDR, "called with nrr_addr={:016X}, nrr_size={:016X}", nrr_addr,
-                  nrr_size);
+        const auto [process_id, nrr_address, nrr_size] = rp.PopRaw<Parameters>();
+
+        LOG_DEBUG(Service_LDR,
+                  "called with process_id={:016X}, nrr_address={:016X}, nrr_size={:016X}",
+                  process_id, nrr_address, nrr_size);
 
         if (!initialized) {
             LOG_ERROR(Service_LDR, "LDR:RO not initialized before use!");
@@ -116,24 +123,26 @@ public:
         }
 
         // NRR Address does not fall on 0x1000 byte boundary
-        if (!Common::Is4KBAligned(nrr_addr)) {
-            LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!", nrr_addr);
+        if (!Common::Is4KBAligned(nrr_address)) {
+            LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!",
+                      nrr_address);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERROR_INVALID_ALIGNMENT);
             return;
         }
 
         // NRR Size is zero or causes overflow
-        if (nrr_addr + nrr_size <= nrr_addr || nrr_size == 0 || !Common::Is4KBAligned(nrr_size)) {
+        if (nrr_address + nrr_size <= nrr_address || nrr_size == 0 ||
+            !Common::Is4KBAligned(nrr_size)) {
             LOG_ERROR(Service_LDR, "NRR Size is invalid! (nrr_address={:016X}, nrr_size={:016X})",
-                      nrr_addr, nrr_size);
+                      nrr_address, nrr_size);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERROR_INVALID_SIZE);
             return;
         }
         // Read NRR data from memory
         std::vector<u8> nrr_data(nrr_size);
-        Memory::ReadBlock(nrr_addr, nrr_data.data(), nrr_size);
+        Memory::ReadBlock(nrr_address, nrr_data.data(), nrr_size);
         NRRHeader header;
         std::memcpy(&header, nrr_data.data(), sizeof(NRRHeader));
 
@@ -174,7 +183,7 @@ public:
             hashes.emplace_back(hash);
         }
 
-        nrr.insert_or_assign(nrr_addr, std::move(hashes));
+        nrr.insert_or_assign(nrr_address, std::move(hashes));
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
@@ -188,23 +197,30 @@ public:
             return;
         }
 
+        struct Parameters {
+            u64_le process_id;
+            u64_le nrr_address;
+        };
+
         IPC::RequestParser rp{ctx};
-        rp.Skip(2, false);
-        const auto nrr_addr{rp.Pop<VAddr>()};
-        LOG_DEBUG(Service_LDR, "called with nrr_addr={:016X}", nrr_addr);
+        const auto [process_id, nrr_address] = rp.PopRaw<Parameters>();
+
+        LOG_DEBUG(Service_LDR, "called with process_id={:016X}, nrr_addr={:016X}", process_id,
+                  nrr_address);
 
-        if (!Common::Is4KBAligned(nrr_addr)) {
-            LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!", nrr_addr);
+        if (!Common::Is4KBAligned(nrr_address)) {
+            LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!",
+                      nrr_address);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERROR_INVALID_ALIGNMENT);
             return;
         }
 
-        const auto iter = nrr.find(nrr_addr);
+        const auto iter = nrr.find(nrr_address);
         if (iter == nrr.end()) {
             LOG_ERROR(Service_LDR,
                       "Attempting to unload NRR which has not been loaded! (addr={:016X})",
-                      nrr_addr);
+                      nrr_address);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERROR_INVALID_NRR_ADDRESS);
             return;
@@ -216,16 +232,22 @@ public:
     }
 
     void LoadNro(Kernel::HLERequestContext& ctx) {
+        struct Parameters {
+            u64_le process_id;
+            u64_le image_address;
+            u64_le image_size;
+            u64_le bss_address;
+            u64_le bss_size;
+        };
+
         IPC::RequestParser rp{ctx};
-        rp.Skip(2, false);
-        const VAddr nro_addr{rp.Pop<VAddr>()};
-        const u64 nro_size{rp.Pop<u64>()};
-        const VAddr bss_addr{rp.Pop<VAddr>()};
-        const u64 bss_size{rp.Pop<u64>()};
-        LOG_DEBUG(
-            Service_LDR,
-            "called with nro_addr={:016X}, nro_size={:016X}, bss_addr={:016X}, bss_size={:016X}",
-            nro_addr, nro_size, bss_addr, bss_size);
+        const auto [process_id, nro_address, nro_size, bss_address, bss_size] =
+            rp.PopRaw<Parameters>();
+
+        LOG_DEBUG(Service_LDR,
+                  "called with pid={:016X}, nro_addr={:016X}, nro_size={:016X}, bss_addr={:016X}, "
+                  "bss_size={:016X}",
+                  process_id, nro_address, nro_size, bss_address, bss_size);
 
         if (!initialized) {
             LOG_ERROR(Service_LDR, "LDR:RO not initialized before use!");
@@ -243,8 +265,9 @@ public:
         }
 
         // NRO Address does not fall on 0x1000 byte boundary
-        if (!Common::Is4KBAligned(nro_addr)) {
-            LOG_ERROR(Service_LDR, "NRO Address has invalid alignment (actual {:016X})!", nro_addr);
+        if (!Common::Is4KBAligned(nro_address)) {
+            LOG_ERROR(Service_LDR, "NRO Address has invalid alignment (actual {:016X})!",
+                      nro_address);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERROR_INVALID_ALIGNMENT);
             return;
@@ -252,15 +275,15 @@ public:
 
         // NRO Size or BSS Size is zero or causes overflow
         const auto nro_size_valid =
-            nro_size != 0 && nro_addr + nro_size > nro_addr && Common::Is4KBAligned(nro_size);
-        const auto bss_size_valid =
-            nro_size + bss_size >= nro_size && (bss_size == 0 || bss_addr + bss_size > bss_addr);
+            nro_size != 0 && nro_address + nro_size > nro_address && Common::Is4KBAligned(nro_size);
+        const auto bss_size_valid = nro_size + bss_size >= nro_size &&
+                                    (bss_size == 0 || bss_address + bss_size > bss_address);
 
         if (!nro_size_valid || !bss_size_valid) {
             LOG_ERROR(Service_LDR,
                       "NRO Size or BSS Size is invalid! (nro_address={:016X}, nro_size={:016X}, "
                       "bss_address={:016X}, bss_size={:016X})",
-                      nro_addr, nro_size, bss_addr, bss_size);
+                      nro_address, nro_size, bss_address, bss_size);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERROR_INVALID_SIZE);
             return;
@@ -268,7 +291,7 @@ public:
 
         // Read NRO data from memory
         std::vector<u8> nro_data(nro_size);
-        Memory::ReadBlock(nro_addr, nro_data.data(), nro_size);
+        Memory::ReadBlock(nro_address, nro_data.data(), nro_size);
 
         SHA256Hash hash{};
         mbedtls_sha256(nro_data.data(), nro_data.size(), hash.data(), 0);
@@ -318,17 +341,18 @@ public:
             return;
         }
 
-        ASSERT(vm_manager
-                   .MirrorMemory(*map_address, nro_addr, nro_size, Kernel::MemoryState::ModuleCode)
-                   .IsSuccess());
-        ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess());
+        ASSERT(
+            vm_manager
+                .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode)
+                .IsSuccess());
+        ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess());
 
         if (bss_size > 0) {
             ASSERT(vm_manager
-                       .MirrorMemory(*map_address + nro_size, bss_addr, bss_size,
+                       .MirrorMemory(*map_address + nro_size, bss_address, bss_size,
                                      Kernel::MemoryState::ModuleCode)
                        .IsSuccess());
-            ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess());
+            ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess());
         }
 
         vm_manager.ReprotectRange(*map_address, header.text_size,
@@ -348,13 +372,6 @@ public:
     }
 
     void UnloadNro(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-        rp.Skip(2, false);
-        const VAddr mapped_addr{rp.PopRaw<VAddr>()};
-        const VAddr heap_addr{rp.PopRaw<VAddr>()};
-        LOG_DEBUG(Service_LDR, "called with mapped_addr={:016X}, heap_addr={:016X}", mapped_addr,
-                  heap_addr);
-
         if (!initialized) {
             LOG_ERROR(Service_LDR, "LDR:RO not initialized before use!");
             IPC::ResponseBuilder rb{ctx, 2};
@@ -362,22 +379,30 @@ public:
             return;
         }
 
-        if (!Common::Is4KBAligned(mapped_addr) || !Common::Is4KBAligned(heap_addr)) {
-            LOG_ERROR(Service_LDR,
-                      "NRO/BSS Address has invalid alignment (actual nro_addr={:016X}, "
-                      "bss_addr={:016X})!",
-                      mapped_addr, heap_addr);
+        struct Parameters {
+            u64_le process_id;
+            u64_le nro_address;
+        };
+
+        IPC::RequestParser rp{ctx};
+        const auto [process_id, nro_address] = rp.PopRaw<Parameters>();
+        LOG_DEBUG(Service_LDR, "called with process_id={:016X}, nro_address=0x{:016X}", process_id,
+                  nro_address);
+
+        if (!Common::Is4KBAligned(nro_address)) {
+            LOG_ERROR(Service_LDR, "NRO address has invalid alignment (nro_address=0x{:016X})",
+                      nro_address);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERROR_INVALID_ALIGNMENT);
             return;
         }
 
-        const auto iter = nro.find(mapped_addr);
+        const auto iter = nro.find(nro_address);
         if (iter == nro.end()) {
             LOG_ERROR(Service_LDR,
-                      "The NRO attempting to unmap was not mapped or has an invalid address "
-                      "(actual {:016X})!",
-                      mapped_addr);
+                      "The NRO attempting to be unmapped was not mapped or has an invalid address "
+                      "(nro_address=0x{:016X})!",
+                      nro_address);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ERROR_INVALID_NRO_ADDRESS);
             return;
@@ -386,10 +411,7 @@ public:
         auto& vm_manager = Core::CurrentProcess()->VMManager();
         const auto& nro_size = iter->second.size;
 
-        ASSERT(vm_manager
-                   .MirrorMemory(heap_addr, mapped_addr, nro_size, Kernel::MemoryState::ModuleCode)
-                   .IsSuccess());
-        ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess());
+        ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess());
 
         Core::System::GetInstance().InvalidateCpuInstructionCaches();
 
@@ -459,11 +481,10 @@ private:
     std::map<VAddr, NROInfo> nro;
     std::map<VAddr, std::vector<SHA256Hash>> nrr;
 
-    bool IsValidNROHash(const SHA256Hash& hash) {
-        return std::any_of(
-            nrr.begin(), nrr.end(), [&hash](const std::pair<VAddr, std::vector<SHA256Hash>>& p) {
-                return std::find(p.second.begin(), p.second.end(), hash) != p.second.end();
-            });
+    bool IsValidNROHash(const SHA256Hash& hash) const {
+        return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) {
+            return std::find(p.second.begin(), p.second.end(), hash) != p.second.end();
+        });
     }
 
     static bool IsValidNRO(const NROHeader& header, u64 nro_size, u64 bss_size) {
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 60479bb45..f92571008 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -15,12 +15,16 @@ namespace Service::NIFM {
 class IScanRequest final : public ServiceFramework<IScanRequest> {
 public:
     explicit IScanRequest() : ServiceFramework("IScanRequest") {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, nullptr, "Submit"},
             {1, nullptr, "IsProcessing"},
             {2, nullptr, "GetResult"},
             {3, nullptr, "GetSystemEventReadableHandle"},
+            {4, nullptr, "SetChannels"},
         };
+        // clang-format on
+
         RegisterHandlers(functions);
     }
 };
diff --git a/src/core/hle/service/npns/npns.cpp b/src/core/hle/service/npns/npns.cpp
index ccb6f9da9..8751522ca 100644
--- a/src/core/hle/service/npns/npns.cpp
+++ b/src/core/hle/service/npns/npns.cpp
@@ -45,7 +45,7 @@ public:
             {114, nullptr, "AttachJid"},
             {115, nullptr, "DetachJid"},
             {201, nullptr, "RequestChangeStateForceTimed"},
-            {102, nullptr, "RequestChangeStateForceAsync"},
+            {202, nullptr, "RequestChangeStateForceAsync"},
         };
         // clang-format on
 
@@ -73,6 +73,7 @@ public:
             {103, nullptr, "GetState"},
             {104, nullptr, "GetStatistics"},
             {111, nullptr, "GetJid"},
+            {120, nullptr, "CreateNotificationReceiver"},
         };
         // clang-format on
 
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index c7f5bbf28..3c5c53e24 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -21,12 +21,13 @@
 #include "core/hle/service/vi/display/vi_display.h"
 #include "core/hle/service/vi/layer/vi_layer.h"
 #include "core/perf_stats.h"
+#include "core/settings.h"
 #include "video_core/renderer_base.h"
 
 namespace Service::NVFlinger {
 
-constexpr std::size_t SCREEN_REFRESH_RATE = 60;
-constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
+constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 30);
 
 NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
     displays.emplace_back(0, "Default");
@@ -36,13 +37,15 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t
     displays.emplace_back(4, "Null");
 
     // Schedule the screen composition events
-    composition_event =
-        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
+    const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks;
+
+    composition_event = core_timing.RegisterEvent(
+        "ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) {
             Compose();
-            this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
+            this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event);
         });
 
-    core_timing.ScheduleEvent(frame_ticks, composition_event);
+    core_timing.ScheduleEvent(ticks, composition_event);
 }
 
 NVFlinger::~NVFlinger() {
@@ -62,6 +65,7 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
     const auto itr =
         std::find_if(displays.begin(), displays.end(),
                      [&](const VI::Display& display) { return display.GetName() == name; });
+
     if (itr == displays.end()) {
         return {};
     }
diff --git a/src/core/hle/service/pctl/module.cpp b/src/core/hle/service/pctl/module.cpp
index 6081f41e1..c75b4ee34 100644
--- a/src/core/hle/service/pctl/module.cpp
+++ b/src/core/hle/service/pctl/module.cpp
@@ -12,10 +12,10 @@ namespace Service::PCTL {
 class IParentalControlService final : public ServiceFramework<IParentalControlService> {
 public:
     IParentalControlService() : ServiceFramework("IParentalControlService") {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {1, &IParentalControlService::Initialize, "Initialize"},
-            {1001, &IParentalControlService::CheckFreeCommunicationPermission,
-             "CheckFreeCommunicationPermission"},
+            {1001, &IParentalControlService::CheckFreeCommunicationPermission, "CheckFreeCommunicationPermission"},
             {1002, nullptr, "ConfirmLaunchApplicationPermission"},
             {1003, nullptr, "ConfirmResumeApplicationPermission"},
             {1004, nullptr, "ConfirmSnsPostPermission"},
@@ -30,6 +30,7 @@ public:
             {1013, nullptr, "ConfirmStereoVisionPermission"},
             {1014, nullptr, "ConfirmPlayableApplicationVideoOld"},
             {1015, nullptr, "ConfirmPlayableApplicationVideo"},
+            {1016, nullptr, "ConfirmShowNewsPermission"},
             {1031, nullptr, "IsRestrictionEnabled"},
             {1032, nullptr, "GetSafetyLevel"},
             {1033, nullptr, "SetSafetyLevel"},
@@ -45,6 +46,7 @@ public:
             {1045, nullptr, "UpdateFreeCommunicationApplicationList"},
             {1046, nullptr, "DisableFeaturesForReset"},
             {1047, nullptr, "NotifyApplicationDownloadStarted"},
+            {1048, nullptr, "NotifyNetworkProfileCreated"},
             {1061, nullptr, "ConfirmStereoVisionRestrictionConfigurable"},
             {1062, nullptr, "GetStereoVisionRestriction"},
             {1063, nullptr, "SetStereoVisionRestriction"},
@@ -63,6 +65,7 @@ public:
             {1411, nullptr, "GetPairingAccountInfo"},
             {1421, nullptr, "GetAccountNickname"},
             {1424, nullptr, "GetAccountState"},
+            {1425, nullptr, "RequestPostEvents"},
             {1432, nullptr, "GetSynchronizationEvent"},
             {1451, nullptr, "StartPlayTimer"},
             {1452, nullptr, "StopPlayTimer"},
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp
index 6b27dc4a3..ebcc41a43 100644
--- a/src/core/hle/service/pm/pm.cpp
+++ b/src/core/hle/service/pm/pm.cpp
@@ -42,15 +42,18 @@ private:
 class DebugMonitor final : public ServiceFramework<DebugMonitor> {
 public:
     explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} {
+        // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "IsDebugMode"},
-            {1, nullptr, "GetDebugProcesses"},
-            {2, nullptr, "StartDebugProcess"},
-            {3, nullptr, "GetTitlePid"},
-            {4, nullptr, "EnableDebugForTitleId"},
-            {5, nullptr, "GetApplicationPid"},
-            {6, nullptr, "EnableDebugForApplication"},
+            {0, nullptr, "GetDebugProcesses"},
+            {1, nullptr, "StartDebugProcess"},
+            {2, nullptr, "GetTitlePid"},
+            {3, nullptr, "EnableDebugForTitleId"},
+            {4, nullptr, "GetApplicationPid"},
+            {5, nullptr, "EnableDebugForApplication"},
+            {6, nullptr, "DisableDebug"},
         };
+        // clang-format on
+
         RegisterHandlers(functions);
     }
 };
@@ -68,6 +71,7 @@ public:
 class Shell final : public ServiceFramework<Shell> {
 public:
     explicit Shell() : ServiceFramework{"pm:shell"} {
+        // clang-format off
         static const FunctionInfo functions[] = {
             {0, nullptr, "LaunchProcess"},
             {1, nullptr, "TerminateProcessByPid"},
@@ -77,7 +81,10 @@ public:
             {5, nullptr, "NotifyBootFinished"},
             {6, nullptr, "GetApplicationPid"},
             {7, nullptr, "BoostSystemMemoryResourceLimit"},
+            {8, nullptr, "EnableAdditionalSystemThreads"},
         };
+        // clang-format on
+
         RegisterHandlers(functions);
     }
 };
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index 1afc43f75..4ecb6bcef 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -116,6 +116,7 @@ void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
 }
 
 SET::SET() : ServiceFramework("set") {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &SET::GetLanguageCode, "GetLanguageCode"},
         {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"},
@@ -126,7 +127,10 @@ SET::SET() : ServiceFramework("set") {
         {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"},
         {7, nullptr, "GetKeyCodeMap"},
         {8, nullptr, "GetQuestFlag"},
+        {9, nullptr, "GetKeyCodeMap2"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
diff --git a/src/core/hle/service/set/set_cal.cpp b/src/core/hle/service/set/set_cal.cpp
index 34654bb07..5981c575c 100644
--- a/src/core/hle/service/set/set_cal.cpp
+++ b/src/core/hle/service/set/set_cal.cpp
@@ -40,7 +40,7 @@ SET_CAL::SET_CAL() : ServiceFramework("set:cal") {
         {30, nullptr, "GetAmiiboEcqvBlsCertificate"},
         {31, nullptr, "GetAmiiboEcqvBlsRootCertificate"},
         {32, nullptr, "GetUsbTypeCPowerSourceCircuitVersion"},
-        {33, nullptr, "GetBatteryVersion"},
+        {41, nullptr, "GetBatteryVersion"},
     };
     RegisterHandlers(functions);
 }
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index ecee554bf..98d0cfdfd 100644
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -104,6 +104,7 @@ void SET_SYS::SetColorSetId(Kernel::HLERequestContext& ctx) {
 }
 
 SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, nullptr, "SetLanguageCode"},
         {1, nullptr, "SetNetworkSettings"},
@@ -252,7 +253,33 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
         {147, nullptr, "GetConsoleSixAxisSensorAngularAcceleration"},
         {148, nullptr, "SetConsoleSixAxisSensorAngularAcceleration"},
         {149, nullptr, "GetRebootlessSystemUpdateVersion"},
+        {150, nullptr, "GetDeviceTimeZoneLocationUpdatedTime"},
+        {151, nullptr, "SetDeviceTimeZoneLocationUpdatedTime"},
+        {152, nullptr, "GetUserSystemClockAutomaticCorrectionUpdatedTime"},
+        {153, nullptr, "SetUserSystemClockAutomaticCorrectionUpdatedTime"},
+        {154, nullptr, "GetAccountOnlineStorageSettings"},
+        {155, nullptr, "SetAccountOnlineStorageSettings"},
+        {156, nullptr, "GetPctlReadyFlag"},
+        {157, nullptr, "SetPctlReadyFlag"},
+        {162, nullptr, "GetPtmBatteryVersion"},
+        {163, nullptr, "SetPtmBatteryVersion"},
+        {164, nullptr, "GetUsb30HostEnableFlag"},
+        {165, nullptr, "SetUsb30HostEnableFlag"},
+        {166, nullptr, "GetUsb30DeviceEnableFlag"},
+        {167, nullptr, "SetUsb30DeviceEnableFlag"},
+        {168, nullptr, "GetThemeId"},
+        {169, nullptr, "SetThemeId"},
+        {170, nullptr, "GetChineseTraditionalInputMethod"},
+        {171, nullptr, "SetChineseTraditionalInputMethod"},
+        {172, nullptr, "GetPtmCycleCountReliability"},
+        {173, nullptr, "SetPtmCycleCountReliability"},
+        {175, nullptr, "GetThemeSettings"},
+        {176, nullptr, "SetThemeSettings"},
+        {177, nullptr, "GetThemeKey"},
+        {178, nullptr, "SetThemeKey"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index 4342f3b2d..884ad173b 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -73,6 +73,7 @@ void BSD::Close(Kernel::HLERequestContext& ctx) {
 }
 
 BSD::BSD(const char* name) : ServiceFramework(name) {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &BSD::RegisterClient, "RegisterClient"},
         {1, &BSD::StartMonitoring, "StartMonitoring"},
@@ -105,7 +106,11 @@ BSD::BSD(const char* name) : ServiceFramework(name) {
         {28, nullptr, "GetResourceStatistics"},
         {29, nullptr, "RecvMMsg"},
         {30, nullptr, "SendMMsg"},
+        {31, nullptr, "EventFd"},
+        {32, nullptr, "RegisterResourceStatisticsName"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp
index f7f87a958..65040c077 100644
--- a/src/core/hle/service/ssl/ssl.cpp
+++ b/src/core/hle/service/ssl/ssl.cpp
@@ -103,6 +103,8 @@ public:
             {4, nullptr, "DebugIoctl"},
             {5, &SSL::SetInterfaceVersion, "SetInterfaceVersion"},
             {6, nullptr, "FlushSessionCache"},
+            {7, nullptr, "SetDebugOption"},
+            {8, nullptr, "GetDebugOption"},
         };
         // clang-format on
 
diff --git a/src/core/hle/service/time/interface.cpp b/src/core/hle/service/time/interface.cpp
index b3a196f65..8d122ae33 100644
--- a/src/core/hle/service/time/interface.cpp
+++ b/src/core/hle/service/time/interface.cpp
@@ -8,6 +8,7 @@ namespace Service::Time {
 
 Time::Time(std::shared_ptr<Module> time, const char* name)
     : Module::Interface(std::move(time), name) {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"},
         {1, &Time::GetStandardNetworkSystemClock, "GetStandardNetworkSystemClock"},
@@ -15,18 +16,23 @@ Time::Time(std::shared_ptr<Module> time, const char* name)
         {3, &Time::GetTimeZoneService, "GetTimeZoneService"},
         {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"},
         {5, nullptr, "GetEphemeralNetworkSystemClock"},
+        {20, nullptr, "GetSharedMemoryNativeHandle"},
+        {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"},
+        {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"},
         {50, nullptr, "SetStandardSteadyClockInternalOffset"},
         {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"},
         {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"},
         {102, nullptr, "GetStandardUserSystemClockInitialYear"},
         {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"},
+        {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"},
         {300, nullptr, "CalculateMonotonicSystemClockBaseTimePoint"},
         {400, &Time::GetClockSnapshot, "GetClockSnapshot"},
         {401, nullptr, "GetClockSnapshotFromSystemClockContext"},
-        {500, &Time::CalculateStandardUserSystemClockDifferenceByUser,
-         "CalculateStandardUserSystemClockDifferenceByUser"},
+        {500, &Time::CalculateStandardUserSystemClockDifferenceByUser, "CalculateStandardUserSystemClockDifferenceByUser"},
         {501, nullptr, "CalculateSpanBetween"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index 07aa7a1cd..10b13fb1d 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -86,25 +86,29 @@ FileType AppLoader_DeconstructedRomDirectory::IdentifyType(const FileSys::Virtua
     return FileType::Error;
 }
 
-ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) {
+AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirectory::Load(
+    Kernel::Process& process) {
     if (is_loaded) {
-        return ResultStatus::ErrorAlreadyLoaded;
+        return {ResultStatus::ErrorAlreadyLoaded, {}};
     }
 
     if (dir == nullptr) {
-        if (file == nullptr)
-            return ResultStatus::ErrorNullFile;
+        if (file == nullptr) {
+            return {ResultStatus::ErrorNullFile, {}};
+        }
+
         dir = file->GetContainingDirectory();
     }
 
     // Read meta to determine title ID
     FileSys::VirtualFile npdm = dir->GetFile("main.npdm");
-    if (npdm == nullptr)
-        return ResultStatus::ErrorMissingNPDM;
+    if (npdm == nullptr) {
+        return {ResultStatus::ErrorMissingNPDM, {}};
+    }
 
-    ResultStatus result = metadata.Load(npdm);
+    const ResultStatus result = metadata.Load(npdm);
     if (result != ResultStatus::Success) {
-        return result;
+        return {result, {}};
     }
 
     if (override_update) {
@@ -114,23 +118,24 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
 
     // Reread in case PatchExeFS affected the main.npdm
     npdm = dir->GetFile("main.npdm");
-    if (npdm == nullptr)
-        return ResultStatus::ErrorMissingNPDM;
+    if (npdm == nullptr) {
+        return {ResultStatus::ErrorMissingNPDM, {}};
+    }
 
-    ResultStatus result2 = metadata.Load(npdm);
+    const ResultStatus result2 = metadata.Load(npdm);
     if (result2 != ResultStatus::Success) {
-        return result2;
+        return {result2, {}};
     }
     metadata.Print();
 
     const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()};
     if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit ||
         arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) {
-        return ResultStatus::Error32BitISA;
+        return {ResultStatus::Error32BitISA, {}};
     }
 
     if (process.LoadFromMetadata(metadata).IsError()) {
-        return ResultStatus::ErrorUnableToParseKernelMetadata;
+        return {ResultStatus::ErrorUnableToParseKernelMetadata, {}};
     }
 
     const FileSys::PatchManager pm(metadata.GetTitleID());
@@ -150,7 +155,7 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
         const auto tentative_next_load_addr =
             AppLoader_NSO::LoadModule(process, *module_file, load_addr, should_pass_arguments, pm);
         if (!tentative_next_load_addr) {
-            return ResultStatus::ErrorLoadingNSO;
+            return {ResultStatus::ErrorLoadingNSO, {}};
         }
 
         next_load_addr = *tentative_next_load_addr;
@@ -159,8 +164,6 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
         GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false);
     }
 
-    process.Run(base_address, metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize());
-
     // Find the RomFS by searching for a ".romfs" file in this directory
     const auto& files = dir->GetFiles();
     const auto romfs_iter =
@@ -175,7 +178,8 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
     }
 
     is_loaded = true;
-    return ResultStatus::Success;
+    return {ResultStatus::Success,
+            LoadParameters{metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize()}};
 }
 
 ResultStatus AppLoader_DeconstructedRomDirectory::ReadRomFS(FileSys::VirtualFile& dir) {
diff --git a/src/core/loader/deconstructed_rom_directory.h b/src/core/loader/deconstructed_rom_directory.h
index 1615cb5a8..1a65c16a4 100644
--- a/src/core/loader/deconstructed_rom_directory.h
+++ b/src/core/loader/deconstructed_rom_directory.h
@@ -37,7 +37,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::Process& process) override;
+    LoadResult Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
     ResultStatus ReadIcon(std::vector<u8>& buffer) override;
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 46ac372f6..6d4b02375 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -382,13 +382,15 @@ FileType AppLoader_ELF::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::Error;
 }
 
-ResultStatus AppLoader_ELF::Load(Kernel::Process& process) {
-    if (is_loaded)
-        return ResultStatus::ErrorAlreadyLoaded;
+AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) {
+    if (is_loaded) {
+        return {ResultStatus::ErrorAlreadyLoaded, {}};
+    }
 
     std::vector<u8> buffer = file->ReadAllBytes();
-    if (buffer.size() != file->GetSize())
-        return ResultStatus::ErrorIncorrectELFFileSize;
+    if (buffer.size() != file->GetSize()) {
+        return {ResultStatus::ErrorIncorrectELFFileSize, {}};
+    }
 
     const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
     ElfReader elf_reader(&buffer[0]);
@@ -396,10 +398,9 @@ ResultStatus AppLoader_ELF::Load(Kernel::Process& process) {
     const VAddr entry_point = codeset.entrypoint;
 
     process.LoadModule(std::move(codeset), entry_point);
-    process.Run(entry_point, 48, Memory::DEFAULT_STACK_SIZE);
 
     is_loaded = true;
-    return ResultStatus::Success;
+    return {ResultStatus::Success, LoadParameters{48, Memory::DEFAULT_STACK_SIZE}};
 }
 
 } // namespace Loader
diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h
index a2d33021c..7ef7770a6 100644
--- a/src/core/loader/elf.h
+++ b/src/core/loader/elf.h
@@ -26,7 +26,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::Process& process) override;
+    LoadResult Load(Kernel::Process& process) override;
 };
 
 } // namespace Loader
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index bb925f4a6..f7846db52 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -131,6 +131,12 @@ std::ostream& operator<<(std::ostream& os, ResultStatus status);
 /// Interface for loading an application
 class AppLoader : NonCopyable {
 public:
+    struct LoadParameters {
+        s32 main_thread_priority;
+        u64 main_thread_stack_size;
+    };
+    using LoadResult = std::pair<ResultStatus, std::optional<LoadParameters>>;
+
     explicit AppLoader(FileSys::VirtualFile file);
     virtual ~AppLoader();
 
@@ -145,7 +151,7 @@ public:
      * @param process The newly created process.
      * @return The status result of the operation.
      */
-    virtual ResultStatus Load(Kernel::Process& process) = 0;
+    virtual LoadResult Load(Kernel::Process& process) = 0;
 
     /**
      * Loads the system mode that this application needs.
diff --git a/src/core/loader/nax.cpp b/src/core/loader/nax.cpp
index 93a970d10..34efef09a 100644
--- a/src/core/loader/nax.cpp
+++ b/src/core/loader/nax.cpp
@@ -41,31 +41,37 @@ FileType AppLoader_NAX::GetFileType() const {
     return IdentifyTypeImpl(*nax);
 }
 
-ResultStatus AppLoader_NAX::Load(Kernel::Process& process) {
+AppLoader_NAX::LoadResult AppLoader_NAX::Load(Kernel::Process& process) {
     if (is_loaded) {
-        return ResultStatus::ErrorAlreadyLoaded;
+        return {ResultStatus::ErrorAlreadyLoaded, {}};
     }
 
-    if (nax->GetStatus() != ResultStatus::Success)
-        return nax->GetStatus();
+    const auto nax_status = nax->GetStatus();
+    if (nax_status != ResultStatus::Success) {
+        return {nax_status, {}};
+    }
 
     const auto nca = nax->AsNCA();
     if (nca == nullptr) {
-        if (!Core::Crypto::KeyManager::KeyFileExists(false))
-            return ResultStatus::ErrorMissingProductionKeyFile;
-        return ResultStatus::ErrorNAXInconvertibleToNCA;
+        if (!Core::Crypto::KeyManager::KeyFileExists(false)) {
+            return {ResultStatus::ErrorMissingProductionKeyFile, {}};
+        }
+
+        return {ResultStatus::ErrorNAXInconvertibleToNCA, {}};
     }
 
-    if (nca->GetStatus() != ResultStatus::Success)
-        return nca->GetStatus();
+    const auto nca_status = nca->GetStatus();
+    if (nca_status != ResultStatus::Success) {
+        return {nca_status, {}};
+    }
 
     const auto result = nca_loader->Load(process);
-    if (result != ResultStatus::Success)
+    if (result.first != ResultStatus::Success) {
         return result;
+    }
 
     is_loaded = true;
-
-    return ResultStatus::Success;
+    return result;
 }
 
 ResultStatus AppLoader_NAX::ReadRomFS(FileSys::VirtualFile& dir) {
diff --git a/src/core/loader/nax.h b/src/core/loader/nax.h
index f40079574..00f1659c1 100644
--- a/src/core/loader/nax.h
+++ b/src/core/loader/nax.h
@@ -33,7 +33,7 @@ public:
 
     FileType GetFileType() const override;
 
-    ResultStatus Load(Kernel::Process& process) override;
+    LoadResult Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
     u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp
index ce8196fcf..b3f8f1083 100644
--- a/src/core/loader/nca.cpp
+++ b/src/core/loader/nca.cpp
@@ -30,36 +30,38 @@ FileType AppLoader_NCA::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::Error;
 }
 
-ResultStatus AppLoader_NCA::Load(Kernel::Process& process) {
+AppLoader_NCA::LoadResult AppLoader_NCA::Load(Kernel::Process& process) {
     if (is_loaded) {
-        return ResultStatus::ErrorAlreadyLoaded;
+        return {ResultStatus::ErrorAlreadyLoaded, {}};
     }
 
     const auto result = nca->GetStatus();
     if (result != ResultStatus::Success) {
-        return result;
+        return {result, {}};
     }
 
-    if (nca->GetType() != FileSys::NCAContentType::Program)
-        return ResultStatus::ErrorNCANotProgram;
+    if (nca->GetType() != FileSys::NCAContentType::Program) {
+        return {ResultStatus::ErrorNCANotProgram, {}};
+    }
 
     const auto exefs = nca->GetExeFS();
-
-    if (exefs == nullptr)
-        return ResultStatus::ErrorNoExeFS;
+    if (exefs == nullptr) {
+        return {ResultStatus::ErrorNoExeFS, {}};
+    }
 
     directory_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(exefs, true);
 
     const auto load_result = directory_loader->Load(process);
-    if (load_result != ResultStatus::Success)
+    if (load_result.first != ResultStatus::Success) {
         return load_result;
+    }
 
-    if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0)
+    if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0) {
         Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this));
+    }
 
     is_loaded = true;
-
-    return ResultStatus::Success;
+    return load_result;
 }
 
 ResultStatus AppLoader_NCA::ReadRomFS(FileSys::VirtualFile& dir) {
diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h
index b9f077468..94f0ed677 100644
--- a/src/core/loader/nca.h
+++ b/src/core/loader/nca.h
@@ -33,7 +33,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::Process& process) override;
+    LoadResult Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
     u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 31e4a0c84..6a0ca389b 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -201,25 +201,25 @@ bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& fi
     return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base);
 }
 
-ResultStatus AppLoader_NRO::Load(Kernel::Process& process) {
+AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) {
     if (is_loaded) {
-        return ResultStatus::ErrorAlreadyLoaded;
+        return {ResultStatus::ErrorAlreadyLoaded, {}};
     }
 
     // Load NRO
     const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
 
     if (!LoadNro(process, *file, base_address)) {
-        return ResultStatus::ErrorLoadingNRO;
+        return {ResultStatus::ErrorLoadingNRO, {}};
     }
 
-    if (romfs != nullptr)
+    if (romfs != nullptr) {
         Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this));
-
-    process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
+    }
 
     is_loaded = true;
-    return ResultStatus::Success;
+    return {ResultStatus::Success,
+            LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}};
 }
 
 ResultStatus AppLoader_NRO::ReadIcon(std::vector<u8>& buffer) {
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 85b0ed644..1ffdae805 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -37,7 +37,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::Process& process) override;
+    LoadResult Load(Kernel::Process& process) override;
 
     ResultStatus ReadIcon(std::vector<u8>& buffer) override;
     ResultStatus ReadProgramId(u64& out_program_id) override;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index d7c47c197..a86653204 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -169,22 +169,21 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
     return load_base + image_size;
 }
 
-ResultStatus AppLoader_NSO::Load(Kernel::Process& process) {
+AppLoader_NSO::LoadResult AppLoader_NSO::Load(Kernel::Process& process) {
     if (is_loaded) {
-        return ResultStatus::ErrorAlreadyLoaded;
+        return {ResultStatus::ErrorAlreadyLoaded, {}};
     }
 
     // Load module
     const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
     if (!LoadModule(process, *file, base_address, true)) {
-        return ResultStatus::ErrorLoadingNSO;
+        return {ResultStatus::ErrorLoadingNSO, {}};
     }
     LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), base_address);
 
-    process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
-
     is_loaded = true;
-    return ResultStatus::Success;
+    return {ResultStatus::Success,
+            LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}};
 }
 
 } // namespace Loader
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 4674c3724..fdce9191c 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -84,7 +84,7 @@ public:
                                            VAddr load_base, bool should_pass_arguments,
                                            std::optional<FileSys::PatchManager> pm = {});
 
-    ResultStatus Load(Kernel::Process& process) override;
+    LoadResult Load(Kernel::Process& process) override;
 };
 
 } // namespace Loader
diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp
index 7da1f8960..ad56bbb38 100644
--- a/src/core/loader/nsp.cpp
+++ b/src/core/loader/nsp.cpp
@@ -72,37 +72,45 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::Error;
 }
 
-ResultStatus AppLoader_NSP::Load(Kernel::Process& process) {
+AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) {
     if (is_loaded) {
-        return ResultStatus::ErrorAlreadyLoaded;
+        return {ResultStatus::ErrorAlreadyLoaded, {}};
     }
 
-    if (title_id == 0)
-        return ResultStatus::ErrorNSPMissingProgramNCA;
+    if (title_id == 0) {
+        return {ResultStatus::ErrorNSPMissingProgramNCA, {}};
+    }
 
-    if (nsp->GetStatus() != ResultStatus::Success)
-        return nsp->GetStatus();
+    const auto nsp_status = nsp->GetStatus();
+    if (nsp_status != ResultStatus::Success) {
+        return {nsp_status, {}};
+    }
 
-    if (nsp->GetProgramStatus(title_id) != ResultStatus::Success)
-        return nsp->GetProgramStatus(title_id);
+    const auto nsp_program_status = nsp->GetProgramStatus(title_id);
+    if (nsp_program_status != ResultStatus::Success) {
+        return {nsp_program_status, {}};
+    }
 
     if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) {
-        if (!Core::Crypto::KeyManager::KeyFileExists(false))
-            return ResultStatus::ErrorMissingProductionKeyFile;
-        return ResultStatus::ErrorNSPMissingProgramNCA;
+        if (!Core::Crypto::KeyManager::KeyFileExists(false)) {
+            return {ResultStatus::ErrorMissingProductionKeyFile, {}};
+        }
+
+        return {ResultStatus::ErrorNSPMissingProgramNCA, {}};
     }
 
     const auto result = secondary_loader->Load(process);
-    if (result != ResultStatus::Success)
+    if (result.first != ResultStatus::Success) {
         return result;
+    }
 
     FileSys::VirtualFile update_raw;
-    if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr)
+    if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) {
         Service::FileSystem::SetPackedUpdate(std::move(update_raw));
+    }
 
     is_loaded = true;
-
-    return ResultStatus::Success;
+    return result;
 }
 
 ResultStatus AppLoader_NSP::ReadRomFS(FileSys::VirtualFile& file) {
diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h
index 953a1b508..85e870bdf 100644
--- a/src/core/loader/nsp.h
+++ b/src/core/loader/nsp.h
@@ -35,7 +35,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::Process& process) override;
+    LoadResult Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& file) override;
     u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/loader/xci.cpp b/src/core/loader/xci.cpp
index 89f7bbf77..1e285a053 100644
--- a/src/core/loader/xci.cpp
+++ b/src/core/loader/xci.cpp
@@ -48,31 +48,35 @@ FileType AppLoader_XCI::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::Error;
 }
 
-ResultStatus AppLoader_XCI::Load(Kernel::Process& process) {
+AppLoader_XCI::LoadResult AppLoader_XCI::Load(Kernel::Process& process) {
     if (is_loaded) {
-        return ResultStatus::ErrorAlreadyLoaded;
+        return {ResultStatus::ErrorAlreadyLoaded, {}};
     }
 
-    if (xci->GetStatus() != ResultStatus::Success)
-        return xci->GetStatus();
+    if (xci->GetStatus() != ResultStatus::Success) {
+        return {xci->GetStatus(), {}};
+    }
 
-    if (xci->GetProgramNCAStatus() != ResultStatus::Success)
-        return xci->GetProgramNCAStatus();
+    if (xci->GetProgramNCAStatus() != ResultStatus::Success) {
+        return {xci->GetProgramNCAStatus(), {}};
+    }
 
-    if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false))
-        return ResultStatus::ErrorMissingProductionKeyFile;
+    if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false)) {
+        return {ResultStatus::ErrorMissingProductionKeyFile, {}};
+    }
 
     const auto result = nca_loader->Load(process);
-    if (result != ResultStatus::Success)
+    if (result.first != ResultStatus::Success) {
         return result;
+    }
 
     FileSys::VirtualFile update_raw;
-    if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr)
+    if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) {
         Service::FileSystem::SetPackedUpdate(std::move(update_raw));
+    }
 
     is_loaded = true;
-
-    return ResultStatus::Success;
+    return result;
 }
 
 ResultStatus AppLoader_XCI::ReadRomFS(FileSys::VirtualFile& file) {
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h
index 436f7387c..ae7145b14 100644
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -35,7 +35,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::Process& process) override;
+    LoadResult Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& file) override;
     u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 4e0538bc2..f18f6226b 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -26,16 +26,16 @@ namespace Memory {
 
 static Common::PageTable* current_page_table = nullptr;
 
-void SetCurrentPageTable(Common::PageTable* page_table) {
-    current_page_table = page_table;
+void SetCurrentPageTable(Kernel::Process& process) {
+    current_page_table = &process.VMManager().page_table;
+
+    const std::size_t address_space_width = process.VMManager().GetAddressSpaceWidth();
 
     auto& system = Core::System::GetInstance();
-    if (system.IsPoweredOn()) {
-        system.ArmInterface(0).PageTableChanged();
-        system.ArmInterface(1).PageTableChanged();
-        system.ArmInterface(2).PageTableChanged();
-        system.ArmInterface(3).PageTableChanged();
-    }
+    system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width);
+    system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width);
+    system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width);
+    system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
 }
 
 static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
diff --git a/src/core/memory.h b/src/core/memory.h
index 6845f5fe1..b9fa18b1d 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -40,8 +40,9 @@ enum : VAddr {
     KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
 };
 
-/// Changes the currently active page table.
-void SetCurrentPageTable(Common::PageTable* page_table);
+/// Changes the currently active page table to that of
+/// the given process instance.
+void SetCurrentPageTable(Kernel::Process& process);
 
 /// Determines if the given VAddr is valid for the specified process.
 bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
diff --git a/src/core/settings.h b/src/core/settings.h
index d543eb32f..b84390745 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -393,6 +393,7 @@ struct Values {
     bool use_disk_shader_cache;
     bool use_accurate_gpu_emulation;
     bool use_asynchronous_gpu_emulation;
+    bool force_30fps_mode;
 
     float bg_red;
     float bg_green;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 5de3eb686..6821f275d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -48,6 +48,8 @@ add_library(video_core STATIC
     renderer_opengl/gl_rasterizer_cache.h
     renderer_opengl/gl_resource_manager.cpp
     renderer_opengl/gl_resource_manager.h
+    renderer_opengl/gl_sampler_cache.cpp
+    renderer_opengl/gl_sampler_cache.h
     renderer_opengl/gl_shader_cache.cpp
     renderer_opengl/gl_shader_cache.h
     renderer_opengl/gl_shader_decompiler.cpp
@@ -69,6 +71,8 @@ add_library(video_core STATIC
     renderer_opengl/renderer_opengl.h
     renderer_opengl/utils.cpp
     renderer_opengl/utils.h
+    sampler_cache.cpp
+    sampler_cache.h
     shader/decode/arithmetic.cpp
     shader/decode/arithmetic_immediate.cpp
     shader/decode/bfe.cpp
@@ -108,6 +112,8 @@ add_library(video_core STATIC
     textures/decoders.cpp
     textures/decoders.h
     textures/texture.h
+    texture_cache.cpp
+    texture_cache.h
     video_core.cpp
     video_core.h
 )
@@ -129,12 +135,14 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_sampler_cache.h
         renderer_vulkan/vk_scheduler.cpp
         renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_shader_decompiler.cpp
+        renderer_vulkan/vk_shader_decompiler.h
         renderer_vulkan/vk_stream_buffer.cpp
         renderer_vulkan/vk_stream_buffer.h
         renderer_vulkan/vk_swapchain.cpp
         renderer_vulkan/vk_swapchain.h)
 
-    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
+    target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
 endif()
 
@@ -142,3 +150,6 @@ create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
 target_link_libraries(video_core PRIVATE glad)
+if (ENABLE_VULKAN)
+    target_link_libraries(video_core PRIVATE sirit)
+endif()
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 046d047cb..6674d9405 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -57,8 +57,8 @@ bool DmaPusher::Step() {
 
     // Push buffer non-empty, read a word
     command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
-                                  command_list_header.size * sizeof(u32));
+    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                        command_list_header.size * sizeof(u32));
 
     for (const CommandHeader& command_header : command_headers) {
 
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index cd51a31d7..7387886a3 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -10,6 +10,7 @@
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"
 
 namespace Tegra::Engines {
 
@@ -27,30 +28,46 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
 
     switch (method_call.method) {
     case KEPLERMEMORY_REG_INDEX(exec): {
-        state.write_offset = 0;
+        ProcessExec();
         break;
     }
     case KEPLERMEMORY_REG_INDEX(data): {
-        ProcessData(method_call.argument);
+        ProcessData(method_call.argument, method_call.IsLastCall());
         break;
     }
     }
 }
 
-void KeplerMemory::ProcessData(u32 data) {
-    ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
-    ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
-
-    // We have to invalidate the destination region to evict any outdated surfaces from the cache.
-    // We do this before actually writing the new data because the destination address might
-    // contain a dirty surface that will have to be written back to memory.
-    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
-    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
-    memory_manager.Write<u32>(address, data);
-
-    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+void KeplerMemory::ProcessExec() {
+    state.write_offset = 0;
+    state.copy_size = regs.line_length_in * regs.line_count;
+    state.inner_buffer.resize(state.copy_size);
+}
 
-    state.write_offset++;
+void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
+    const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
+    std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
+    state.write_offset += sub_copy_size;
+    if (is_last_call) {
+        const GPUVAddr address{regs.dest.Address()};
+        if (regs.exec.linear != 0) {
+            memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
+        } else {
+            UNIMPLEMENTED_IF(regs.dest.z != 0);
+            UNIMPLEMENTED_IF(regs.dest.depth != 1);
+            UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
+            UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
+            const std::size_t dst_size = Tegra::Texture::CalculateSize(
+                true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
+            std::vector<u8> tmp_buffer(dst_size);
+            memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+            Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
+                                          regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
+                                          state.inner_buffer.data(), tmp_buffer.data());
+            memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+        }
+        system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    }
 }
 
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 78b6c3e45..5f892ddad 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <cstddef>
+#include <vector>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -51,7 +52,11 @@ public:
                     u32 address_high;
                     u32 address_low;
                     u32 pitch;
-                    u32 block_dimensions;
+                    union {
+                        BitField<0, 4, u32> block_width;
+                        BitField<4, 4, u32> block_height;
+                        BitField<8, 4, u32> block_depth;
+                    };
                     u32 width;
                     u32 height;
                     u32 depth;
@@ -63,6 +68,18 @@ public:
                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
                                                      address_low);
                     }
+
+                    u32 BlockWidth() const {
+                        return 1U << block_width.Value();
+                    }
+
+                    u32 BlockHeight() const {
+                        return 1U << block_height.Value();
+                    }
+
+                    u32 BlockDepth() const {
+                        return 1U << block_depth.Value();
+                    }
                 } dest;
 
                 struct {
@@ -81,6 +98,8 @@ public:
 
     struct {
         u32 write_offset = 0;
+        u32 copy_size = 0;
+        std::vector<u8> inner_buffer;
     } state{};
 
 private:
@@ -88,7 +107,8 @@ private:
     VideoCore::RasterizerInterface& rasterizer;
     MemoryManager& memory_manager;
 
-    void ProcessData(u32 data);
+    void ProcessExec();
+    void ProcessData(u32 data, bool is_last_call);
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 74403eed4..9780417f2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -418,7 +418,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
     const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
 
     Texture::TICEntry tic_entry;
-    memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
+    memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
 
     ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -439,7 +439,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
     const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
 
     Texture::TSCEntry tsc_entry;
-    memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
     return tsc_entry;
 }
 
@@ -482,19 +482,8 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     return textures;
 }
 
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
-                                                    std::size_t offset) const {
-    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
-    auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
-    ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
-
-    const GPUVAddr tex_info_address =
-        tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
-
-    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
-
-    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
-
+Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle,
+                                                   std::size_t offset) const {
     Texture::FullTextureInfo tex_info{};
     tex_info.index = static_cast<u32>(offset);
 
@@ -511,6 +500,22 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
     return tex_info;
 }
 
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
+                                                    std::size_t offset) const {
+    const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+    const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
+    ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
+
+    const GPUVAddr tex_info_address =
+        tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
+
+    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
+
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+
+    return GetTextureInfo(tex_handle, offset);
+}
+
 u32 Maxwell3D::GetRegisterValue(u32 method) const {
     ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
     return regs.reg_array[method];
@@ -524,4 +529,12 @@ void Maxwell3D::ProcessClearBuffers() {
     rasterizer.Clear();
 }
 
+u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const {
+    const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
+    const auto& buffer = shader_stage.const_buffers[const_buffer];
+    u32 result;
+    std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
+    return result;
+}
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 321af3297..cc2424d38 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1131,12 +1131,18 @@ public:
     /// Write the value to the register identified by method.
     void CallMethod(const GPU::MethodCall& method_call);
 
+    /// Given a Texture Handle, returns the TSC and TIC entries.
+    Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
+                                            std::size_t offset) const;
+
     /// Returns a list of enabled textures for the specified shader stage.
     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
 
     /// Returns the texture information for a specific texture in a specific shader stage.
     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
 
+    u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const;
+
     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
     /// we've seen used.
     using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 2e1e96c81..e5b4eadea 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -387,6 +387,20 @@ enum class IpaSampleMode : u64 {
     Offset = 2,
 };
 
+enum class LmemLoadCacheManagement : u64 {
+    Default = 0,
+    LU = 1,
+    CI = 2,
+    CV = 3,
+};
+
+enum class LmemStoreCacheManagement : u64 {
+    Default = 0,
+    CG = 1,
+    CS = 2,
+    WT = 3,
+};
+
 struct IpaMode {
     IpaInterpMode interpolation_mode;
     IpaSampleMode sampling_mode;
@@ -782,7 +796,7 @@ union Instruction {
     } ld_l;
 
     union {
-        BitField<44, 2, u64> unknown;
+        BitField<44, 2, LmemStoreCacheManagement> cache_management;
     } st_l;
 
     union {
@@ -792,6 +806,12 @@ union Instruction {
     } ldg;
 
     union {
+        BitField<48, 3, UniformType> type;
+        BitField<46, 2, u64> cache_mode;
+        BitField<20, 24, s64> immediate_offset;
+    } stg;
+
+    union {
         BitField<0, 3, u64> pred0;
         BitField<3, 3, u64> pred3;
         BitField<7, 1, u64> abs_a;
@@ -917,21 +937,34 @@ union Instruction {
     } iset;
 
     union {
-        BitField<8, 2, Register::Size> dest_size;
-        BitField<10, 2, Register::Size> src_size;
-        BitField<12, 1, u64> is_output_signed;
-        BitField<13, 1, u64> is_input_signed;
-        BitField<41, 2, u64> selector;
+        BitField<41, 2, u64> selector; // i2i and i2f only
         BitField<45, 1, u64> negate_a;
         BitField<49, 1, u64> abs_a;
+        BitField<10, 2, Register::Size> src_size;
+        BitField<13, 1, u64> is_input_signed;
+        BitField<8, 2, Register::Size> dst_size;
+        BitField<12, 1, u64> is_output_signed;
+
+        union {
+            BitField<39, 2, u64> tab5cb8_2;
+        } i2f;
 
         union {
             BitField<39, 2, F2iRoundingOp> rounding;
         } f2i;
 
         union {
-            BitField<39, 4, F2fRoundingOp> rounding;
+            BitField<8, 2, Register::Size> src_size;
+            BitField<10, 2, Register::Size> dst_size;
+            BitField<39, 4, u64> rounding;
+            // H0, H1 extract for F16 missing
+            BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
+            F2fRoundingOp GetRoundingMode() const {
+                constexpr u64 rounding_mask = 0x0B;
+                return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
+            }
         } f2f;
+
     } conversion;
 
     union {
@@ -967,6 +1000,38 @@ union Instruction {
     } tex;
 
     union {
+        BitField<28, 1, u64> array;
+        BitField<29, 2, TextureType> texture_type;
+        BitField<31, 4, u64> component_mask;
+        BitField<49, 1, u64> nodep_flag;
+        BitField<50, 1, u64> dc_flag;
+        BitField<36, 1, u64> aoffi_flag;
+        BitField<37, 3, TextureProcessMode> process_mode;
+
+        bool IsComponentEnabled(std::size_t component) const {
+            return ((1ULL << component) & component_mask) != 0;
+        }
+
+        TextureProcessMode GetTextureProcessMode() const {
+            return process_mode;
+        }
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::DC:
+                return dc_flag != 0;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            case TextureMiscMode::AOFFI:
+                return aoffi_flag != 0;
+            default:
+                break;
+            }
+            return false;
+        }
+    } tex_b;
+
+    union {
         BitField<22, 6, TextureQueryType> query_type;
         BitField<31, 4, u64> component_mask;
         BitField<49, 1, u64> nodep_flag;
@@ -1312,7 +1377,9 @@ public:
         LDG, // Load from global memory
         STG, // Store in global memory
         TEX,
+        TEX_B,  // Texture Load Bindless
         TXQ,    // Texture Query
+        TXQ_B,  // Texture Query Bindless
         TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
         TLDS,   // Texture Load with scalar/non-vec4 source/destinations
         TLD4,   // Texture Load 4
@@ -1580,7 +1647,9 @@ private:
             INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
             INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
+            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
             INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
+            INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
             INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
             INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
             INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
@@ -1678,7 +1747,7 @@ private:
             INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
             INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
             INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
-            INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
+            INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
             INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
             INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
             INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index de30ea354..fe6628923 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -207,6 +207,11 @@ public:
         };
     } regs{};
 
+    /// Performs any additional setup necessary in order to begin GPU emulation.
+    /// This can be used to launch any necessary threads and register any necessary
+    /// core timing events.
+    virtual void Start() = 0;
+
     /// Push GPU command entries to be processed
     virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
 
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index db507cf04..d4e2553a9 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -9,10 +9,14 @@
 namespace VideoCommon {
 
 GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
-    : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {}
+    : GPU(system, renderer), gpu_thread{system} {}
 
 GPUAsynch::~GPUAsynch() = default;
 
+void GPUAsynch::Start() {
+    gpu_thread.StartThread(renderer, *dma_pusher);
+}
+
 void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
     gpu_thread.SubmitList(std::move(entries));
 }
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 1dcc61a6c..30be74cba 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -13,16 +13,13 @@ class RendererBase;
 
 namespace VideoCommon {
 
-namespace GPUThread {
-class ThreadManager;
-} // namespace GPUThread
-
 /// Implementation of GPU interface that runs the GPU asynchronously
 class GPUAsynch : public Tegra::GPU {
 public:
     explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
     ~GPUAsynch() override;
 
+    void Start() override;
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(
         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 2cfc900ed..45e43b1dc 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -8,10 +8,12 @@
 namespace VideoCommon {
 
 GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
-    : Tegra::GPU(system, renderer) {}
+    : GPU(system, renderer) {}
 
 GPUSynch::~GPUSynch() = default;
 
+void GPUSynch::Start() {}
+
 void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
     dma_pusher->Push(std::move(entries));
     dma_pusher->DispatchCalls();
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 766b5631c..3031fcf72 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -18,6 +18,7 @@ public:
     explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
     ~GPUSynch() override;
 
+    void Start() override;
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(
         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index cc56cf467..c9a2077de 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -55,19 +55,24 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
     }
 }
 
-ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
-                             Tegra::DmaPusher& dma_pusher)
-    : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} {
-    synchronization_event = system.CoreTiming().RegisterEvent(
-        "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
-}
+ThreadManager::ThreadManager(Core::System& system) : system{system} {}
 
 ThreadManager::~ThreadManager() {
+    if (!thread.joinable()) {
+        return;
+    }
+
     // Notify GPU thread that a shutdown is pending
     PushCommand(EndProcessingCommand());
     thread.join();
 }
 
+void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
+    thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
+    synchronization_event = system.CoreTiming().RegisterEvent(
+        "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
+}
+
 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
     const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
     const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 62bcea5bb..cc14527c7 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -138,10 +138,12 @@ struct SynchState final {
 /// Class used to manage the GPU thread
 class ThreadManager final {
 public:
-    explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
-                           Tegra::DmaPusher& dma_pusher);
+    explicit ThreadManager(Core::System& system);
     ~ThreadManager();
 
+    /// Creates and starts the GPU thread.
+    void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+
     /// Push GPU command entries to be processed
     void SubmitList(Tegra::CommandList&& entries);
 
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 0f4e820aa..6c98c6701 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -199,7 +199,15 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
     return {};
 }
 
-void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
+bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
+    const GPUVAddr end = start + size;
+    const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
+    const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
+    const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
+    return range == size;
+}
+
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
     std::size_t remaining_size{size};
     std::size_t page_index{src_addr >> page_bits};
     std::size_t page_offset{src_addr & page_mask};
@@ -226,7 +234,30 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t
     }
 }
 
-void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
+                                    const std::size_t size) const {
+    std::size_t remaining_size{size};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+        const u8* page_pointer = page_table.pointers[page_index];
+        if (page_pointer) {
+            const u8* src_ptr{page_pointer + page_offset};
+            std::memcpy(dest_buffer, src_ptr, copy_amount);
+        } else {
+            std::memset(dest_buffer, 0, copy_amount);
+        }
+        page_index++;
+        page_offset = 0;
+        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+        remaining_size -= copy_amount;
+    }
+}
+
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) {
     std::size_t remaining_size{size};
     std::size_t page_index{dest_addr >> page_bits};
     std::size_t page_offset{dest_addr & page_mask};
@@ -253,7 +284,28 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::
     }
 }
 
-void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
+                                     const std::size_t size) {
+    std::size_t remaining_size{size};
+    std::size_t page_index{dest_addr >> page_bits};
+    std::size_t page_offset{dest_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+        u8* page_pointer = page_table.pointers[page_index];
+        if (page_pointer) {
+            u8* dest_ptr{page_pointer + page_offset};
+            std::memcpy(dest_ptr, src_buffer, copy_amount);
+        }
+        page_index++;
+        page_offset = 0;
+        src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+        remaining_size -= copy_amount;
+    }
+}
+
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
     std::size_t remaining_size{size};
     std::size_t page_index{src_addr >> page_bits};
     std::size_t page_offset{src_addr & page_mask};
@@ -281,6 +333,12 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t
     }
 }
 
+void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
+    std::vector<u8> tmp_buffer(size);
+    ReadBlockUnsafe(src_addr, tmp_buffer.data(), size);
+    WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
+}
+
 void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
                              VAddr backing_addr) {
     LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 647cbf93a..e4f0c4bd6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -65,9 +65,32 @@ public:
     u8* GetPointer(GPUVAddr addr);
     const u8* GetPointer(GPUVAddr addr) const;
 
-    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
-    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
-    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
+    // Returns true if the block is continous in host memory, false otherwise
+    bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
+
+    /**
+     * ReadBlock and WriteBlock are full read and write operations over virtual
+     * GPU Memory. It's important to use these when GPU memory may not be continous
+     * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
+     * Flushes and Invalidations, respectively to each operation.
+     */
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+
+    /**
+     * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
+     * WriteBlock respectively. In this versions, no flushing or invalidation is actually
+     * done and their performance is similar to a memcpy. This functions can be used
+     * on either of this 2 scenarios instead of their safe counterpart:
+     * - Memory which is sure to never be represented in the Host GPU.
+     * - Memory Managed by a Cache Manager. Example: Texture Flushing should use
+     * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
+     * being flushed.
+     */
+    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
+    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
+    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
 
 private:
     using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 8d9ee81f1..ea4a593af 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -14,28 +14,28 @@
 
 namespace OpenGL {
 
-CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
-    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
+      max_size{max_size} {
     buffer.Create();
-    // Bind and unbind the buffer so it gets allocated by the driver
-    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
-    glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
     LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
 }
 
-void CachedGlobalRegion::Reload(u32 size_) {
-    constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
+CachedGlobalRegion::~CachedGlobalRegion() = default;
 
+void CachedGlobalRegion::Reload(u32 size_) {
     size = size_;
     if (size > max_size) {
         size = max_size;
-        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
+        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
                      max_size);
     }
+    glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
+}
 
-    // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
-    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
-    glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
+void CachedGlobalRegion::Flush() {
+    LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
+    glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
 }
 
 GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
@@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr,
     return search->second;
 }
 
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
-                                                              u8* host_ptr) {
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
+                                                              u32 size) {
     GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
     if (!region) {
         // No reserved surface available, create a new one and reserve it
         auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-        const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
-        region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
+        ASSERT(cpu_addr);
+
+        region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
         ReserveGlobalRegion(region);
     }
     region->Reload(size);
@@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
 }
 
 GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
-    : RasterizerCache{rasterizer} {}
+    : RasterizerCache{rasterizer} {
+    GLint max_ssbo_size_;
+    glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
+    max_ssbo_size = static_cast<u32>(max_ssbo_size_);
+}
 
 GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
     const GLShader::GlobalMemoryEntry& global_region,
@@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
 
     auto& gpu{Core::System::GetInstance().GPU()};
     auto& memory_manager{gpu.MemoryManager()};
-    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
     const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
                     global_region.GetCbufOffset()};
     const auto actual_addr{memory_manager.Read<u64>(addr)};
@@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
 
     if (!region) {
         // No global region found - create a new one
-        region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
+        region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
         Register(region);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 5a21ab66f..196e6e278 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -19,7 +19,7 @@ namespace OpenGL {
 
 namespace GLShader {
 class GlobalMemoryEntry;
-} // namespace GLShader
+}
 
 class RasterizerOpenGL;
 class CachedGlobalRegion;
@@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
 
 class CachedGlobalRegion final : public RasterizerCacheObject {
 public:
-    explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
+    explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
+    ~CachedGlobalRegion();
 
     VAddr GetCpuAddr() const override {
         return cpu_addr;
@@ -45,14 +46,14 @@ public:
     /// Reloads the global region from guest memory
     void Reload(u32 size_);
 
-    // TODO(Rodrigo): When global memory is written (STG), implement flushing
-    void Flush() override {
-        UNIMPLEMENTED();
-    }
+    void Flush() override;
 
 private:
     VAddr cpu_addr{};
+    u8* host_ptr{};
     u32 size{};
+    u32 max_size{};
+
     OGLBuffer buffer;
 };
 
@@ -66,10 +67,11 @@ public:
 
 private:
     GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
+    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
     void ReserveGlobalRegion(GlobalRegion region);
 
     std::unordered_map<CacheAddr, GlobalRegion> reserve;
+    u32 max_ssbo_size{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ff5d05ced..9a088a503 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -101,12 +101,6 @@ struct FramebufferCacheKey {
 RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
     : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system},
       screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
-    // Create sampler objects
-    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
-        texture_samplers[i].Create();
-        state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
-    }
-
     OpenGLState::ApplyDefaultState();
 
     shader_program_manager = std::make_unique<GLShader::ProgramManager>();
@@ -580,9 +574,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
 }
 
 void RasterizerOpenGL::Clear() {
-    const auto prev_state{state};
-    SCOPE_EXIT({ prev_state.Apply(); });
-
     const auto& regs = system.GPU().Maxwell3D().regs;
     bool use_color{};
     bool use_depth{};
@@ -654,7 +645,10 @@ void RasterizerOpenGL::Clear() {
         clear_state.EmulateViewportWithScissor();
     }
 
-    clear_state.Apply();
+    clear_state.ApplyColorMask();
+    clear_state.ApplyDepth();
+    clear_state.ApplyStencilTest();
+    clear_state.ApplyViewport();
 
     if (use_color) {
         glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -755,6 +749,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
         return;
     }
     res_cache.FlushRegion(addr, size);
+    global_cache.FlushRegion(addr, size);
 }
 
 void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -811,92 +806,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     return true;
 }
 
-void RasterizerOpenGL::SamplerInfo::Create() {
-    sampler.Create();
-    mag_filter = Tegra::Texture::TextureFilter::Linear;
-    min_filter = Tegra::Texture::TextureFilter::Linear;
-    wrap_u = Tegra::Texture::WrapMode::Wrap;
-    wrap_v = Tegra::Texture::WrapMode::Wrap;
-    wrap_p = Tegra::Texture::WrapMode::Wrap;
-    use_depth_compare = false;
-    depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
-
-    // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
-    glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
-
-    // Other attributes have correct defaults
-}
-
-void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
-    const GLuint sampler_id = sampler.handle;
-    if (mag_filter != config.mag_filter) {
-        mag_filter = config.mag_filter;
-        glSamplerParameteri(
-            sampler_id, GL_TEXTURE_MAG_FILTER,
-            MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
-    }
-    if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
-        min_filter = config.min_filter;
-        mipmap_filter = config.mipmap_filter;
-        glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
-                            MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
-    }
-
-    if (wrap_u != config.wrap_u) {
-        wrap_u = config.wrap_u;
-        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
-    }
-    if (wrap_v != config.wrap_v) {
-        wrap_v = config.wrap_v;
-        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
-    }
-    if (wrap_p != config.wrap_p) {
-        wrap_p = config.wrap_p;
-        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
-    }
-
-    if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
-        use_depth_compare = enabled;
-        glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
-                            use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
-    }
-
-    if (depth_compare_func != config.depth_compare_func) {
-        depth_compare_func = config.depth_compare_func;
-        glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
-                            MaxwellToGL::DepthCompareFunc(depth_compare_func));
-    }
-
-    if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
-        border_color = new_border_color;
-        glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
-    }
-
-    if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
-        max_anisotropic = anisotropic;
-        if (GLAD_GL_ARB_texture_filter_anisotropic) {
-            glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
-        } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
-            glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
-        }
-    }
-
-    if (const float min = config.GetMinLod(); min_lod != min) {
-        min_lod = min;
-        glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
-    }
-    if (const float max = config.GetMaxLod(); max_lod != max) {
-        max_lod = max;
-        glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
-    }
-
-    if (const float bias = config.GetLodBias(); lod_bias != bias) {
-        lod_bias = bias;
-        glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
-    }
-}
-
 void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
                                          const Shader& shader, GLuint program_handle,
                                          BaseBindings base_bindings) {
@@ -952,6 +861,9 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
     for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& entry{entries[bindpoint]};
         const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+        if (entry.IsWritten()) {
+            region->MarkAsModified(true, global_cache);
+        }
         bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
                                   static_cast<GLsizeiptr>(region->GetSizeInBytes()));
     }
@@ -969,10 +881,18 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
 
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& entry = entries[bindpoint];
-        const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+        Tegra::Texture::FullTextureInfo texture;
+        if (entry.IsBindless()) {
+            const auto cbuf = entry.GetBindlessCBuf();
+            Tegra::Texture::TextureHandle tex_handle;
+            tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
+            texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
+        } else {
+            texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+        }
         const u32 current_bindpoint = base_bindings.sampler + bindpoint;
 
-        texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+        state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
 
         if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
             state.texture_units[current_bindpoint].texture =
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index cbcce8597..71b9c5ead 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -26,6 +26,7 @@
 #include "video_core/renderer_opengl/gl_primitive_assembler.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_sampler_cache.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
@@ -72,39 +73,7 @@ public:
     static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
                   "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
 
-    static constexpr std::size_t MaxGlobalMemorySize = 0x10000;
-    static_assert(MaxGlobalMemorySize % sizeof(float) == 0,
-                  "The maximum size of a global memory must be a multiple of the size of float");
-
 private:
-    class SamplerInfo {
-    public:
-        OGLSampler sampler;
-
-        /// Creates the sampler object, initializing its state so that it's in sync with the
-        /// SamplerInfo struct.
-        void Create();
-        /// Syncs the sampler object with the config, updating any necessary state.
-        void SyncWithConfig(const Tegra::Texture::TSCEntry& info);
-
-    private:
-        Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
-        Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
-        Tegra::Texture::TextureMipmapFilter mipmap_filter =
-            Tegra::Texture::TextureMipmapFilter::None;
-        Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
-        Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
-        Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
-        bool use_depth_compare = false;
-        Tegra::Texture::DepthCompareFunc depth_compare_func =
-            Tegra::Texture::DepthCompareFunc::Always;
-        GLvec4 border_color = {};
-        float min_lod = 0.0f;
-        float max_lod = 16.0f;
-        float lod_bias = 0.0f;
-        float max_anisotropic = 1.0f;
-    };
-
     struct FramebufferConfigState {
         bool using_color_fb{};
         bool using_depth_fb{};
@@ -210,6 +179,7 @@ private:
     RasterizerCacheOpenGL res_cache;
     ShaderCacheOpenGL shader_cache;
     GlobalRegionCacheOpenGL global_cache;
+    SamplerCacheOpenGL sampler_cache;
 
     Core::System& system;
     ScreenInfo& screen_info;
@@ -224,8 +194,6 @@ private:
     FramebufferConfigState current_framebuffer_config_state;
     std::pair<bool, bool> current_depth_stencil_usage{};
 
-    std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
-
     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
     OGLBufferCache buffer_cache;
     PrimitiveAssembler primitive_assembler{buffer_cache};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index aa6da1944..5a25f5b37 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -112,11 +112,26 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
     params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
                                                        params.srgb_conversion);
 
-    if (params.pixel_format == PixelFormat::R16U && config.tsc.depth_compare_enabled) {
+    if (config.tsc.depth_compare_enabled) {
         // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled,
         // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also
         // causes GetFormatType to properly return 'Depth' below).
-        params.pixel_format = PixelFormat::Z16;
+        if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) {
+            switch (params.pixel_format) {
+            case PixelFormat::R16S:
+            case PixelFormat::R16U:
+            case PixelFormat::R16F:
+                params.pixel_format = PixelFormat::Z16;
+                break;
+            case PixelFormat::R32F:
+                params.pixel_format = PixelFormat::Z32F;
+                break;
+            default:
+                LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}",
+                            static_cast<u32>(params.pixel_format));
+                break;
+            }
+        }
     }
 
     params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
@@ -266,6 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
     params.component_type = ComponentTypeFromRenderTarget(config.format);
     params.type = GetFormatType(params.pixel_format);
     params.width = config.width;
+    params.pitch = config.pitch;
     params.height = config.height;
     params.unaligned_height = config.height;
     params.target = SurfaceTarget::Texture2D;
@@ -624,13 +640,16 @@ void CachedSurface::LoadGLBuffer() {
             SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
     } else {
         const u32 bpp = params.GetFormatBpp() / 8;
-        const u32 copy_size = params.width * bpp;
+        const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) /
+                              GetDefaultBlockWidth(params.pixel_format);
         if (params.pitch == copy_size) {
             std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
         } else {
+            const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) /
+                               GetDefaultBlockHeight(params.pixel_format);
             const u8* start{params.host_ptr};
             u8* write_to = gl_buffer[0].data();
-            for (u32 h = params.height; h > 0; h--) {
+            for (u32 h = height; h > 0; h--) {
                 std::memcpy(write_to, start, copy_size);
                 start += params.pitch;
                 write_to += copy_size;
@@ -1175,10 +1194,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
         return new_surface;
     }
 
+    const bool old_compressed =
+        GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
+    const bool new_compressed =
+        GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
+    const bool compatible_formats =
+        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
+        !(old_compressed || new_compressed);
     // For compatible surfaces, we can just do fast glCopyImageSubData based copy
-    if (old_params.target == new_params.target && old_params.type == new_params.type &&
-        old_params.depth == new_params.depth && old_params.depth == 1 &&
-        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
+    if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
+        old_params.depth == 1 && compatible_formats) {
         FastCopySurface(old_surface, new_surface);
         return new_surface;
     }
@@ -1193,7 +1218,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
     case SurfaceTarget::TextureCubemap:
     case SurfaceTarget::Texture2DArray:
     case SurfaceTarget::TextureCubeArray:
-        if (old_params.pixel_format == new_params.pixel_format)
+        if (compatible_formats)
             FastLayeredCopySurface(old_surface, new_surface);
         else {
             AccurateCopySurface(old_surface, new_surface);
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
new file mode 100644
index 000000000..3ded5ecea
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_sampler_cache.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
+
+namespace OpenGL {
+
+SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
+
+SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
+
+OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
+    OGLSampler sampler;
+    sampler.Create();
+
+    const GLuint sampler_id{sampler.handle};
+    glSamplerParameteri(
+        sampler_id, GL_TEXTURE_MAG_FILTER,
+        MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
+    glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
+                        MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
+    glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
+    glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
+    glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
+    glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
+                        tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
+    glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
+                        MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
+    glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
+    glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
+    glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
+    glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
+    if (GLAD_GL_ARB_texture_filter_anisotropic) {
+        glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
+    } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
+        glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
+    } else if (tsc.GetMaxAnisotropy() != 1) {
+        LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
+    }
+
+    return sampler;
+}
+
+GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
+    return sampler.handle;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
new file mode 100644
index 000000000..defbc2d81
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.h
@@ -0,0 +1,25 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/sampler_cache.h"
+
+namespace OpenGL {
+
+class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
+public:
+    explicit SamplerCacheOpenGL();
+    ~SamplerCacheOpenGL();
+
+protected:
+    OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const;
+
+    GLuint ToSamplerType(const OGLSampler& sampler) const;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4efeb9c3e..2a81b1169 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -38,13 +38,15 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
 }
 
 /// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(const u8* host_ptr) {
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+                          const u8* host_ptr) {
     ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
     ASSERT_OR_EXECUTE(host_ptr != nullptr, {
         std::fill(program_code.begin(), program_code.end(), 0);
         return program_code;
     });
-    std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
+    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
+                                   program_code.size() * sizeof(u64));
     return program_code;
 }
 
@@ -497,11 +499,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
     if (!shader) {
         // No shader found - create a new one
-        ProgramCode program_code{GetShaderCode(host_ptr)};
+        ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
         ProgramCode program_code_b;
         if (program == Maxwell::ShaderProgram::VertexA) {
-            program_code_b = GetShaderCode(
-                memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
+            const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)};
+            program_code_b = GetShaderCode(memory_manager, program_addr_b,
+                                           memory_manager.GetPointer(program_addr_b));
         }
         const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
         const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index aee9939db..ef1a1995f 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -46,8 +46,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>;
 enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
     static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
-constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
-    static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
 
 class ShaderWriter {
 public:
@@ -122,14 +120,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
 
 /// Returns true if an object has to be treated as precise
 bool IsPrecise(Operation operand) {
-    const auto& meta = operand.GetMeta();
-
+    const auto& meta{operand.GetMeta()};
     if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
         return arithmetic->precise;
     }
-    if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) {
-        return half_arithmetic->precise;
-    }
     return false;
 }
 
@@ -210,8 +204,10 @@ public:
         for (const auto& sampler : ir.GetSamplers()) {
             entries.samplers.emplace_back(sampler);
         }
-        for (const auto& gmem : ir.GetGlobalMemoryBases()) {
-            entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
+        for (const auto& gmem_pair : ir.GetGlobalMemory()) {
+            const auto& [base, usage] = gmem_pair;
+            entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
+                                                       usage.is_read, usage.is_written);
         }
         entries.clip_distances = ir.GetClipDistances();
         entries.shader_length = ir.GetLength();
@@ -382,12 +378,22 @@ private:
     }
 
     void DeclareGlobalMemory() {
-        for (const auto& entry : ir.GetGlobalMemoryBases()) {
+        for (const auto& gmem : ir.GetGlobalMemory()) {
+            const auto& [base, usage] = gmem;
+
+            // Since we don't know how the shader will use the shader, hint the driver to disable as
+            // much optimizations as possible
+            std::string qualifier = "coherent volatile";
+            if (usage.is_read && !usage.is_written)
+                qualifier += " readonly";
+            else if (usage.is_written && !usage.is_read)
+                qualifier += " writeonly";
+
             const std::string binding =
-                fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
-            code.AddLine("layout (std430, binding = " + binding + ") buffer " +
-                         GetGlobalMemoryBlock(entry) + " {");
-            code.AddLine("    float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
+                fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset);
+            code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " +
+                         GetGlobalMemoryBlock(base) + " {");
+            code.AddLine("    float " + GetGlobalMemory(base) + "[];");
             code.AddLine("};");
             code.AddNewLine();
         }
@@ -619,28 +625,7 @@ private:
     }
 
     std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
-        std::string value = VisitOperand(operation, operand_index);
-        switch (type) {
-        case Type::HalfFloat: {
-            const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
-            if (!half_meta) {
-                value = "toHalf2(" + value + ')';
-            }
-
-            switch (half_meta->types.at(operand_index)) {
-            case Tegra::Shader::HalfType::H0_H1:
-                return "toHalf2(" + value + ')';
-            case Tegra::Shader::HalfType::F32:
-                return "vec2(" + value + ')';
-            case Tegra::Shader::HalfType::H0_H0:
-                return "vec2(toHalf2(" + value + ")[0])";
-            case Tegra::Shader::HalfType::H1_H1:
-                return "vec2(toHalf2(" + value + ")[1])";
-            }
-        }
-        default:
-            return CastOperand(value, type);
-        }
+        return CastOperand(VisitOperand(operation, operand_index), type);
     }
 
     std::string CastOperand(const std::string& value, Type type) const {
@@ -654,9 +639,7 @@ private:
         case Type::Uint:
             return "ftou(" + value + ')';
         case Type::HalfFloat:
-            // Can't be handled as a stand-alone value
-            UNREACHABLE();
-            return value;
+            return "toHalf2(" + value + ')';
         }
         UNREACHABLE();
         return value;
@@ -874,6 +857,12 @@ private:
         } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
             target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
 
+        } else if (const auto gmem = std::get_if<GmemNode>(dest)) {
+            const std::string real = Visit(gmem->GetRealAddress());
+            const std::string base = Visit(gmem->GetBaseAddress());
+            const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+            target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+
         } else {
             UNREACHABLE_MSG("Assign called without a proper target");
         }
@@ -1073,13 +1062,40 @@ private:
         return BitwiseCastResult(value, Type::HalfFloat);
     }
 
+    std::string HClamp(Operation operation) {
+        const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
+        const std::string min = VisitOperand(operation, 1, Type::Float);
+        const std::string max = VisitOperand(operation, 2, Type::Float);
+        const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))";
+        return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
+    }
+
+    std::string HUnpack(Operation operation) {
+        const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
+        const auto value = [&]() -> std::string {
+            switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+            case Tegra::Shader::HalfType::H0_H1:
+                return operand;
+            case Tegra::Shader::HalfType::F32:
+                return "vec2(fromHalf2(" + operand + "))";
+            case Tegra::Shader::HalfType::H0_H0:
+                return "vec2(" + operand + "[0])";
+            case Tegra::Shader::HalfType::H1_H1:
+                return "vec2(" + operand + "[1])";
+            }
+            UNREACHABLE();
+            return "0";
+        }();
+        return "fromHalf2(" + value + ')';
+    }
+
     std::string HMergeF32(Operation operation) {
         return "float(toHalf2(" + Visit(operation[0]) + ")[0])";
     }
 
     std::string HMergeH0(Operation operation) {
-        return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" +
-               Visit(operation[1]) + ")[0]))";
+        return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" +
+               Visit(operation[0]) + ")[1]))";
     }
 
     std::string HMergeH1(Operation operation) {
@@ -1179,34 +1195,46 @@ private:
         return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
     }
 
+    template <bool with_nan>
+    std::string GenerateHalfComparison(Operation operation, std::string compare_op) {
+        std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
+                                                  Type::HalfFloat, Type::HalfFloat)};
+        if constexpr (!with_nan) {
+            return comparison;
+        }
+        return "halfFloatNanComparison(" + comparison + ", " +
+               VisitOperand(operation, 0, Type::HalfFloat) + ", " +
+               VisitOperand(operation, 1, Type::HalfFloat) + ')';
+    }
+
+    template <bool with_nan>
     std::string Logical2HLessThan(Operation operation) {
-        return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat,
-                                  Type::HalfFloat);
+        return GenerateHalfComparison<with_nan>(operation, "lessThan");
     }
 
+    template <bool with_nan>
     std::string Logical2HEqual(Operation operation) {
-        return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat,
-                                  Type::HalfFloat);
+        return GenerateHalfComparison<with_nan>(operation, "equal");
     }
 
+    template <bool with_nan>
     std::string Logical2HLessEqual(Operation operation) {
-        return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat,
-                                  Type::HalfFloat);
+        return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
     }
 
+    template <bool with_nan>
     std::string Logical2HGreaterThan(Operation operation) {
-        return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat,
-                                  Type::HalfFloat);
+        return GenerateHalfComparison<with_nan>(operation, "greaterThan");
     }
 
+    template <bool with_nan>
     std::string Logical2HNotEqual(Operation operation) {
-        return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat,
-                                  Type::HalfFloat);
+        return GenerateHalfComparison<with_nan>(operation, "notEqual");
     }
 
+    template <bool with_nan>
     std::string Logical2HGreaterEqual(Operation operation) {
-        return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat,
-                                  Type::HalfFloat);
+        return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
     }
 
     std::string Texture(Operation operation) {
@@ -1495,6 +1523,8 @@ private:
         &GLSLDecompiler::Fma<Type::HalfFloat>,
         &GLSLDecompiler::Absolute<Type::HalfFloat>,
         &GLSLDecompiler::HNegate,
+        &GLSLDecompiler::HClamp,
+        &GLSLDecompiler::HUnpack,
         &GLSLDecompiler::HMergeF32,
         &GLSLDecompiler::HMergeH0,
         &GLSLDecompiler::HMergeH1,
@@ -1531,12 +1561,18 @@ private:
         &GLSLDecompiler::LogicalNotEqual<Type::Uint>,
         &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
 
-        &GLSLDecompiler::Logical2HLessThan,
-        &GLSLDecompiler::Logical2HEqual,
-        &GLSLDecompiler::Logical2HLessEqual,
-        &GLSLDecompiler::Logical2HGreaterThan,
-        &GLSLDecompiler::Logical2HNotEqual,
-        &GLSLDecompiler::Logical2HGreaterEqual,
+        &GLSLDecompiler::Logical2HLessThan<false>,
+        &GLSLDecompiler::Logical2HEqual<false>,
+        &GLSLDecompiler::Logical2HLessEqual<false>,
+        &GLSLDecompiler::Logical2HGreaterThan<false>,
+        &GLSLDecompiler::Logical2HNotEqual<false>,
+        &GLSLDecompiler::Logical2HGreaterEqual<false>,
+        &GLSLDecompiler::Logical2HLessThan<true>,
+        &GLSLDecompiler::Logical2HEqual<true>,
+        &GLSLDecompiler::Logical2HLessEqual<true>,
+        &GLSLDecompiler::Logical2HGreaterThan<true>,
+        &GLSLDecompiler::Logical2HNotEqual<true>,
+        &GLSLDecompiler::Logical2HGreaterEqual<true>,
 
         &GLSLDecompiler::Texture,
         &GLSLDecompiler::TextureLod,
@@ -1628,9 +1664,7 @@ private:
 
 std::string GetCommonDeclarations() {
     const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
-    const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
     return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
-           "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
            "#define ftoi floatBitsToInt\n"
            "#define ftou floatBitsToUint\n"
            "#define itof intBitsToFloat\n"
@@ -1640,6 +1674,12 @@ std::string GetCommonDeclarations() {
            "}\n\n"
            "vec2 toHalf2(float value) {\n"
            "    return unpackHalf2x16(ftou(value));\n"
+           "}\n\n"
+           "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n"
+           "    bvec2 is_nan1 = isnan(pair1);\n"
+           "    bvec2 is_nan2 = isnan(pair2);\n"
+           "    return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
+           "is_nan2.y);\n"
            "}\n";
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index fa6b0a10f..c1569e737 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -43,8 +43,9 @@ private:
 
 class GlobalMemoryEntry {
 public:
-    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
-        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{
+                                                                                  is_written} {}
 
     u32 GetCbufIndex() const {
         return cbuf_index;
@@ -54,14 +55,25 @@ public:
         return cbuf_offset;
     }
 
+    bool IsRead() const {
+        return is_read;
+    }
+
+    bool IsWritten() const {
+        return is_written;
+    }
+
 private:
     u32 cbuf_index{};
     u32 cbuf_offset{};
+    bool is_read{};
+    bool is_written{};
 };
 
 struct ShaderEntries {
     std::vector<ConstBufferEntry> const_buffers;
     std::vector<SamplerEntry> samplers;
+    std::vector<SamplerEntry> bindless_samplers;
     std::vector<GlobalMemoryEntry> global_memory_entries;
     std::array<bool, Maxwell::NumClipDistances> clip_distances{};
     std::size_t shader_length{};
@@ -72,4 +84,4 @@ std::string GetCommonDeclarations();
 ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
                         Maxwell::ShaderStage stage, const std::string& suffix);
 
-} // namespace OpenGL::GLShader
-\ No newline at end of file
+} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 8a43eb157..53752b38d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -319,16 +319,19 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
         u32 type{};
         u8 is_array{};
         u8 is_shadow{};
+        u8 is_bindless{};
         if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
             file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
             file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
             file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
-            file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
+            file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
+            file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
             return {};
         }
-        entry.entries.samplers.emplace_back(
-            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
-            static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
+        entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
+                                            static_cast<std::size_t>(index),
+                                            static_cast<Tegra::Shader::TextureType>(type),
+                                            is_array != 0, is_shadow != 0, is_bindless != 0);
     }
 
     u32 global_memory_count{};
@@ -337,11 +340,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
     for (u32 i = 0; i < global_memory_count; ++i) {
         u32 cbuf_index{};
         u32 cbuf_offset{};
+        u8 is_read{};
+        u8 is_written{};
         if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
-            file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
+            file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
+            file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
             return {};
         }
-        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
+        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
+                                                         is_written != 0);
     }
 
     for (auto& clip_distance : entry.entries.clip_distances) {
@@ -388,7 +396,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
             file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
             file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
             file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
-            file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
+            file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 ||
+            file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) {
             return false;
         }
     }
@@ -397,7 +406,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
         return false;
     for (const auto& gmem : entries.global_memory_entries) {
         if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
-            file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
+            file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
+            file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
+            file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
             return false;
         }
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index eaf3e03a0..05ab01dcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,12 +2,44 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 
 namespace OpenGL::GLShader {
 
 using Tegra::Engines::Maxwell3D;
 
+ProgramManager::ProgramManager() {
+    pipeline.Create();
+}
+
+ProgramManager::~ProgramManager() = default;
+
+void ProgramManager::ApplyTo(OpenGLState& state) {
+    UpdatePipeline();
+    state.draw.shader_program = 0;
+    state.draw.program_pipeline = pipeline.handle;
+}
+
+void ProgramManager::UpdatePipeline() {
+    // Avoid updating the pipeline when values have no changed
+    if (old_state == current_state) {
+        return;
+    }
+
+    // Workaround for AMD bug
+    constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
+                                     GL_FRAGMENT_SHADER_BIT};
+    glUseProgramStages(pipeline.handle, all_used_stages, 0);
+
+    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
+    glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
+    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+
+    old_state = current_state;
+}
+
 void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
     const auto& regs = maxwell.regs;
     const auto& state = maxwell.state;
@@ -16,7 +48,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade
     viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
     viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
 
-    u32 func = static_cast<u32>(regs.alpha_test_func);
+    auto func{static_cast<u32>(regs.alpha_test_func)};
     // Normalize the gl variants of opCompare to be the same as the normal variants
     const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
     if (func >= op_gl_variant_base) {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 37dcfefdb..cec18a832 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include <cstddef>
+
 #include <glad/glad.h>
 
 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -38,55 +40,48 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
 
 class ProgramManager {
 public:
-    ProgramManager() {
-        pipeline.Create();
-    }
+    explicit ProgramManager();
+    ~ProgramManager();
+
+    void ApplyTo(OpenGLState& state);
 
     void UseProgrammableVertexShader(GLuint program) {
-        vs = program;
+        current_state.vertex_shader = program;
     }
 
     void UseProgrammableGeometryShader(GLuint program) {
-        gs = program;
+        current_state.geometry_shader = program;
     }
 
     void UseProgrammableFragmentShader(GLuint program) {
-        fs = program;
+        current_state.fragment_shader = program;
     }
 
     void UseTrivialGeometryShader() {
-        gs = 0;
-    }
-
-    void ApplyTo(OpenGLState& state) {
-        UpdatePipeline();
-        state.draw.shader_program = 0;
-        state.draw.program_pipeline = pipeline.handle;
+        current_state.geometry_shader = 0;
     }
 
 private:
-    void UpdatePipeline() {
-        // Avoid updating the pipeline when values have no changed
-        if (old_vs == vs && old_fs == fs && old_gs == gs)
-            return;
-        // Workaround for AMD bug
-        glUseProgramStages(pipeline.handle,
-                           GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
-                           0);
-
-        glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs);
-        glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs);
-        glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs);
-
-        // Update the old values
-        old_vs = vs;
-        old_fs = fs;
-        old_gs = gs;
-    }
+    struct PipelineState {
+        bool operator==(const PipelineState& rhs) const {
+            return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
+                   geometry_shader == rhs.geometry_shader;
+        }
+
+        bool operator!=(const PipelineState& rhs) const {
+            return !operator==(rhs);
+        }
+
+        GLuint vertex_shader{};
+        GLuint fragment_shader{};
+        GLuint geometry_shader{};
+    };
+
+    void UpdatePipeline();
 
     OGLPipeline pipeline;
-    GLuint vs{}, fs{}, gs{};
-    GLuint old_vs{}, old_fs{}, old_gs{};
+    PipelineState current_state;
+    PipelineState old_state;
 };
 
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index ed3178f09..801826d3d 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -7,7 +7,6 @@
 #include <unordered_map>
 
 #include "common/assert.h"
-#include "common/cityhash.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_sampler_cache.h"
@@ -28,39 +27,20 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4>
     }
 }
 
-std::size_t SamplerCacheKey::Hash() const {
-    static_assert(sizeof(raw) % sizeof(u64) == 0);
-    return static_cast<std::size_t>(
-        Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
-}
-
-bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
-    return raw == rhs.raw;
-}
-
 VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
 
 VKSamplerCache::~VKSamplerCache() = default;
 
-vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
-    const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
-    auto& sampler = entry->second;
-    if (is_cache_miss) {
-        sampler = CreateSampler(tsc);
-    }
-    return *sampler;
-}
-
-UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
-    const float max_anisotropy = tsc.GetMaxAnisotropy();
-    const bool has_anisotropy = max_anisotropy > 1.0f;
+UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
+    const float max_anisotropy{tsc.GetMaxAnisotropy()};
+    const bool has_anisotropy{max_anisotropy > 1.0f};
 
-    const auto border_color = tsc.GetBorderColor();
-    const auto vk_border_color = TryConvertBorderColor(border_color);
+    const auto border_color{tsc.GetBorderColor()};
+    const auto vk_border_color{TryConvertBorderColor(border_color)};
     UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
                          border_color[0], border_color[1], border_color[2], border_color[3]);
 
-    constexpr bool unnormalized_coords = false;
+    constexpr bool unnormalized_coords{false};
 
     const vk::SamplerCreateInfo sampler_ci(
         {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
@@ -73,9 +53,13 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
         tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
         unnormalized_coords);
 
-    const auto& dld = device.GetDispatchLoader();
-    const auto dev = device.GetLogical();
+    const auto& dld{device.GetDispatchLoader()};
+    const auto dev{device.GetLogical()};
     return dev.createSamplerUnique(sampler_ci, nullptr, dld);
 }
 
+vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const {
+    return *sampler;
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
index c6394dc87..771b05c73 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -8,49 +8,25 @@
 
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/sampler_cache.h"
 #include "video_core/textures/texture.h"
 
 namespace Vulkan {
 
 class VKDevice;
 
-struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
-    std::size_t Hash() const;
-
-    bool operator==(const SamplerCacheKey& rhs) const;
-
-    bool operator!=(const SamplerCacheKey& rhs) const {
-        return !operator==(rhs);
-    }
-};
-
-} // namespace Vulkan
-
-namespace std {
-
-template <>
-struct hash<Vulkan::SamplerCacheKey> {
-    std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
-        return k.Hash();
-    }
-};
-
-} // namespace std
-
-namespace Vulkan {
-
-class VKSamplerCache {
+class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> {
 public:
     explicit VKSamplerCache(const VKDevice& device);
     ~VKSamplerCache();
 
-    vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
+protected:
+    UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const;
 
-private:
-    UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
+    vk::Sampler ToSamplerType(const UniqueSampler& sampler) const;
 
+private:
     const VKDevice& device;
-    std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
new file mode 100644
index 000000000..23d9b10db
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -0,0 +1,1396 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <functional>
+#include <map>
+#include <set>
+
+#include <fmt/format.h>
+
+#include <sirit/sirit.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace Vulkan::VKShader {
+
+using Sirit::Id;
+using Tegra::Shader::Attribute;
+using Tegra::Shader::AttributeUse;
+using Tegra::Shader::Register;
+using namespace VideoCommon::Shader;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
+using Operation = const OperationNode&;
+
+// TODO(Rodrigo): Use rasterizer's value
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000;
+constexpr u32 STAGE_BINDING_STRIDE = 0x100;
+
+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+
+struct SamplerImage {
+    Id image_type;
+    Id sampled_image_type;
+    Id sampler;
+};
+
+namespace {
+
+spv::Dim GetSamplerDim(const Sampler& sampler) {
+    switch (sampler.GetType()) {
+    case Tegra::Shader::TextureType::Texture1D:
+        return spv::Dim::Dim1D;
+    case Tegra::Shader::TextureType::Texture2D:
+        return spv::Dim::Dim2D;
+    case Tegra::Shader::TextureType::Texture3D:
+        return spv::Dim::Dim3D;
+    case Tegra::Shader::TextureType::TextureCube:
+        return spv::Dim::Cube;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast<u32>(sampler.GetType()));
+        return spv::Dim::Dim2D;
+    }
+}
+
+/// Returns true if an attribute index is one of the 32 generic attributes
+constexpr bool IsGenericAttribute(Attribute::Index attribute) {
+    return attribute >= Attribute::Index::Attribute_0 &&
+           attribute <= Attribute::Index::Attribute_31;
+}
+
+/// Returns the location of a generic attribute
+constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) {
+    ASSERT(IsGenericAttribute(attribute));
+    return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
+/// Returns true if an object has to be treated as precise
+bool IsPrecise(Operation operand) {
+    const auto& meta{operand.GetMeta()};
+    if (std::holds_alternative<MetaArithmetic>(meta)) {
+        return std::get<MetaArithmetic>(meta).precise;
+    }
+    return false;
+}
+
+} // namespace
+
+class SPIRVDecompiler : public Sirit::Module {
+public:
+    explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage)
+        : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} {
+        AddCapability(spv::Capability::Shader);
+        AddExtension("SPV_KHR_storage_buffer_storage_class");
+        AddExtension("SPV_KHR_variable_pointers");
+    }
+
+    void Decompile() {
+        AllocateBindings();
+        AllocateLabels();
+
+        DeclareVertex();
+        DeclareGeometry();
+        DeclareFragment();
+        DeclareRegisters();
+        DeclarePredicates();
+        DeclareLocalMemory();
+        DeclareInternalFlags();
+        DeclareInputAttributes();
+        DeclareOutputAttributes();
+        DeclareConstantBuffers();
+        DeclareGlobalBuffers();
+        DeclareSamplers();
+
+        execute_function =
+            Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void)));
+        Emit(OpLabel());
+
+        const u32 first_address = ir.GetBasicBlocks().begin()->first;
+        const Id loop_label = OpLabel("loop");
+        const Id merge_label = OpLabel("merge");
+        const Id dummy_label = OpLabel();
+        const Id jump_label = OpLabel();
+        continue_label = OpLabel("continue");
+
+        std::vector<Sirit::Literal> literals;
+        std::vector<Id> branch_labels;
+        for (const auto& pair : labels) {
+            const auto [literal, label] = pair;
+            literals.push_back(literal);
+            branch_labels.push_back(label);
+        }
+
+        // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
+        // that shaders will use 20 nested SSYs and PBKs.
+        constexpr u32 FLOW_STACK_SIZE = 20;
+        const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
+        jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
+                                 spv::StorageClass::Function, Constant(t_uint, first_address)));
+        flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type),
+                                     spv::StorageClass::Function, ConstantNull(flow_stack_type)));
+        flow_stack_top =
+            Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0)));
+
+        Name(jmp_to, "jmp_to");
+        Name(flow_stack, "flow_stack");
+        Name(flow_stack_top, "flow_stack_top");
+
+        Emit(OpBranch(loop_label));
+        Emit(loop_label);
+        Emit(OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::Unroll));
+        Emit(OpBranch(dummy_label));
+
+        Emit(dummy_label);
+        const Id default_branch = OpLabel();
+        const Id jmp_to_load = Emit(OpLoad(t_uint, jmp_to));
+        Emit(OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone));
+        Emit(OpSwitch(jmp_to_load, default_branch, literals, branch_labels));
+
+        Emit(default_branch);
+        Emit(OpReturn());
+
+        for (const auto& pair : ir.GetBasicBlocks()) {
+            const auto& [address, bb] = pair;
+            Emit(labels.at(address));
+
+            VisitBasicBlock(bb);
+
+            const auto next_it = labels.lower_bound(address + 1);
+            const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
+            Emit(OpBranch(next_label));
+        }
+
+        Emit(jump_label);
+        Emit(OpBranch(continue_label));
+        Emit(continue_label);
+        Emit(OpBranch(loop_label));
+        Emit(merge_label);
+        Emit(OpReturn());
+        Emit(OpFunctionEnd());
+    }
+
+    ShaderEntries GetShaderEntries() const {
+        ShaderEntries entries;
+        entries.const_buffers_base_binding = const_buffers_base_binding;
+        entries.global_buffers_base_binding = global_buffers_base_binding;
+        entries.samplers_base_binding = samplers_base_binding;
+        for (const auto& cbuf : ir.GetConstantBuffers()) {
+            entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
+        }
+        for (const auto& gmem_pair : ir.GetGlobalMemory()) {
+            const auto& [base, usage] = gmem_pair;
+            entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset);
+        }
+        for (const auto& sampler : ir.GetSamplers()) {
+            entries.samplers.emplace_back(sampler);
+        }
+        for (const auto& attr : ir.GetInputAttributes()) {
+            entries.attributes.insert(GetGenericAttributeLocation(attr.first));
+        }
+        entries.clip_distances = ir.GetClipDistances();
+        entries.shader_length = ir.GetLength();
+        entries.entry_function = execute_function;
+        entries.interfaces = interfaces;
+        return entries;
+    }
+
+private:
+    using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
+    using OperationDecompilersArray =
+        std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
+
+    static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
+    static constexpr u32 CBUF_STRIDE = 16;
+
+    void AllocateBindings() {
+        const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
+        u32 binding_iterator = binding_base;
+
+        const auto Allocate = [&binding_iterator](std::size_t count) {
+            const u32 current_binding = binding_iterator;
+            binding_iterator += static_cast<u32>(count);
+            return current_binding;
+        };
+        const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size());
+        global_buffers_base_binding = Allocate(ir.GetGlobalMemory().size());
+        samplers_base_binding = Allocate(ir.GetSamplers().size());
+
+        ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE,
+                   "Stage binding stride is too small");
+    }
+
+    void AllocateLabels() {
+        for (const auto& pair : ir.GetBasicBlocks()) {
+            const u32 address = pair.first;
+            labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
+        }
+    }
+
+    void DeclareVertex() {
+        if (stage != ShaderStage::Vertex)
+            return;
+
+        DeclareVertexRedeclarations();
+    }
+
+    void DeclareGeometry() {
+        if (stage != ShaderStage::Geometry)
+            return;
+
+        UNIMPLEMENTED();
+    }
+
+    void DeclareFragment() {
+        if (stage != ShaderStage::Fragment)
+            return;
+
+        for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
+            if (!IsRenderTargetUsed(rt)) {
+                continue;
+            }
+
+            const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
+            Name(id, fmt::format("frag_color{}", rt));
+            Decorate(id, spv::Decoration::Location, rt);
+
+            frag_colors[rt] = id;
+            interfaces.push_back(id);
+        }
+
+        if (header.ps.omap.depth) {
+            frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
+            Name(frag_depth, "frag_depth");
+            Decorate(frag_depth, spv::Decoration::BuiltIn,
+                     static_cast<u32>(spv::BuiltIn::FragDepth));
+
+            interfaces.push_back(frag_depth);
+        }
+
+        frag_coord = DeclareBuiltIn(spv::BuiltIn::FragCoord, spv::StorageClass::Input, t_in_float4,
+                                    "frag_coord");
+        front_facing = DeclareBuiltIn(spv::BuiltIn::FrontFacing, spv::StorageClass::Input,
+                                      t_in_bool, "front_facing");
+    }
+
+    void DeclareRegisters() {
+        for (const u32 gpr : ir.GetRegisters()) {
+            const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
+            Name(id, fmt::format("gpr_{}", gpr));
+            registers.emplace(gpr, AddGlobalVariable(id));
+        }
+    }
+
+    void DeclarePredicates() {
+        for (const auto pred : ir.GetPredicates()) {
+            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+            Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
+            predicates.emplace(pred, AddGlobalVariable(id));
+        }
+    }
+
+    void DeclareLocalMemory() {
+        if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
+            const auto element_count = static_cast<u32>(Common::AlignUp(local_memory_size, 4) / 4);
+            const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
+            const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
+            Name(type_pointer, "LocalMemory");
+
+            local_memory =
+                OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
+            AddGlobalVariable(Name(local_memory, "local_memory"));
+        }
+    }
+
+    void DeclareInternalFlags() {
+        constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
+                                                                         "overflow"};
+        for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
+            const auto flag_code = static_cast<InternalFlag>(flag);
+            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+            internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
+        }
+    }
+
+    void DeclareInputAttributes() {
+        for (const auto element : ir.GetInputAttributes()) {
+            const Attribute::Index index = element.first;
+            if (!IsGenericAttribute(index)) {
+                continue;
+            }
+
+            UNIMPLEMENTED_IF(stage == ShaderStage::Geometry);
+
+            const u32 location = GetGenericAttributeLocation(index);
+            const Id id = OpVariable(t_in_float4, spv::StorageClass::Input);
+            Name(AddGlobalVariable(id), fmt::format("in_attr{}", location));
+            input_attributes.emplace(index, id);
+            interfaces.push_back(id);
+
+            Decorate(id, spv::Decoration::Location, location);
+
+            if (stage != ShaderStage::Fragment) {
+                continue;
+            }
+            switch (header.ps.GetAttributeUse(location)) {
+            case AttributeUse::Constant:
+                Decorate(id, spv::Decoration::Flat);
+                break;
+            case AttributeUse::ScreenLinear:
+                Decorate(id, spv::Decoration::NoPerspective);
+                break;
+            case AttributeUse::Perspective:
+                // Default
+                break;
+            default:
+                UNREACHABLE_MSG("Unused attribute being fetched");
+            }
+        }
+    }
+
+    void DeclareOutputAttributes() {
+        for (const auto index : ir.GetOutputAttributes()) {
+            if (!IsGenericAttribute(index)) {
+                continue;
+            }
+            const auto location = GetGenericAttributeLocation(index);
+            const Id id = OpVariable(t_out_float4, spv::StorageClass::Output);
+            Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
+            output_attributes.emplace(index, id);
+            interfaces.push_back(id);
+
+            Decorate(id, spv::Decoration::Location, location);
+        }
+    }
+
+    void DeclareConstantBuffers() {
+        u32 binding = const_buffers_base_binding;
+        for (const auto& entry : ir.GetConstantBuffers()) {
+            const auto [index, size] = entry;
+            const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform);
+            AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+            constant_buffers.emplace(index, id);
+        }
+    }
+
+    void DeclareGlobalBuffers() {
+        u32 binding = global_buffers_base_binding;
+        for (const auto& entry : ir.GetGlobalMemory()) {
+            const auto [base, usage] = entry;
+            const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
+            AddGlobalVariable(
+                Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+            global_buffers.emplace(base, id);
+        }
+    }
+
+    void DeclareSamplers() {
+        u32 binding = samplers_base_binding;
+        for (const auto& sampler : ir.GetSamplers()) {
+            const auto dim = GetSamplerDim(sampler);
+            const int depth = sampler.IsShadow() ? 1 : 0;
+            const int arrayed = sampler.IsArray() ? 1 : 0;
+            // TODO(Rodrigo): Sampled 1 indicates that the image will be used with a sampler. When
+            // SULD and SUST instructions are implemented, replace this value.
+            const int sampled = 1;
+            const Id image_type =
+                TypeImage(t_float, dim, depth, arrayed, false, sampled, spv::ImageFormat::Unknown);
+            const Id sampled_image_type = TypeSampledImage(image_type);
+            const Id pointer_type =
+                TypePointer(spv::StorageClass::UniformConstant, sampled_image_type);
+            const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
+            AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex())));
+
+            sampler_images.insert(
+                {static_cast<u32>(sampler.GetIndex()), {image_type, sampled_image_type, id}});
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+        }
+    }
+
+    void DeclareVertexRedeclarations() {
+        vertex_index = DeclareBuiltIn(spv::BuiltIn::VertexIndex, spv::StorageClass::Input,
+                                      t_in_uint, "vertex_index");
+        instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
+                                        t_in_uint, "instance_index");
+
+        bool is_point_size_declared = false;
+        bool is_clip_distances_declared = false;
+        for (const auto index : ir.GetOutputAttributes()) {
+            if (index == Attribute::Index::PointSize) {
+                is_point_size_declared = true;
+            } else if (index == Attribute::Index::ClipDistances0123 ||
+                       index == Attribute::Index::ClipDistances4567) {
+                is_clip_distances_declared = true;
+            }
+        }
+
+        std::vector<Id> members;
+        members.push_back(t_float4);
+        if (is_point_size_declared) {
+            members.push_back(t_float);
+        }
+        if (is_clip_distances_declared) {
+            members.push_back(TypeArray(t_float, Constant(t_uint, 8)));
+        }
+
+        const Id gl_per_vertex_struct = Name(TypeStruct(members), "PerVertex");
+        Decorate(gl_per_vertex_struct, spv::Decoration::Block);
+
+        u32 declaration_index = 0;
+        const auto MemberDecorateBuiltIn = [&](spv::BuiltIn builtin, std::string name,
+                                               bool condition) {
+            if (!condition)
+                return u32{};
+            MemberName(gl_per_vertex_struct, declaration_index, name);
+            MemberDecorate(gl_per_vertex_struct, declaration_index, spv::Decoration::BuiltIn,
+                           static_cast<u32>(builtin));
+            return declaration_index++;
+        };
+
+        position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
+        point_size_index =
+            MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared);
+        clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
+                                                     is_clip_distances_declared);
+
+        const Id type_pointer = TypePointer(spv::StorageClass::Output, gl_per_vertex_struct);
+        per_vertex = OpVariable(type_pointer, spv::StorageClass::Output);
+        AddGlobalVariable(Name(per_vertex, "per_vertex"));
+        interfaces.push_back(per_vertex);
+    }
+
+    void VisitBasicBlock(const NodeBlock& bb) {
+        for (const Node node : bb) {
+            static_cast<void>(Visit(node));
+        }
+    }
+
+    Id Visit(Node node) {
+        if (const auto operation = std::get_if<OperationNode>(node)) {
+            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
+            const auto decompiler = operation_decompilers[operation_index];
+            if (decompiler == nullptr) {
+                UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
+            }
+            return (this->*decompiler)(*operation);
+
+        } else if (const auto gpr = std::get_if<GprNode>(node)) {
+            const u32 index = gpr->GetIndex();
+            if (index == Register::ZeroIndex) {
+                return Constant(t_float, 0.0f);
+            }
+            return Emit(OpLoad(t_float, registers.at(index)));
+
+        } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+            return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue()));
+
+        } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
+            const auto value = [&]() -> Id {
+                switch (const auto index = predicate->GetIndex(); index) {
+                case Tegra::Shader::Pred::UnusedIndex:
+                    return v_true;
+                case Tegra::Shader::Pred::NeverExecute:
+                    return v_false;
+                default:
+                    return Emit(OpLoad(t_bool, predicates.at(index)));
+                }
+            }();
+            if (predicate->IsNegated()) {
+                return Emit(OpLogicalNot(t_bool, value));
+            }
+            return value;
+
+        } else if (const auto abuf = std::get_if<AbufNode>(node)) {
+            const auto attribute = abuf->GetIndex();
+            const auto element = abuf->GetElement();
+
+            switch (attribute) {
+            case Attribute::Index::Position:
+                if (stage != ShaderStage::Fragment) {
+                    UNIMPLEMENTED();
+                    break;
+                } else {
+                    if (element == 3) {
+                        return Constant(t_float, 1.0f);
+                    }
+                    return Emit(OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)));
+                }
+            case Attribute::Index::TessCoordInstanceIDVertexID:
+                // TODO(Subv): Find out what the values are for the first two elements when inside a
+                // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+                // shader.
+                ASSERT(stage == ShaderStage::Vertex);
+                switch (element) {
+                case 2:
+                    return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index)));
+                case 3:
+                    return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, vertex_index)));
+                }
+                UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+                return Constant(t_float, 0);
+            case Attribute::Index::FrontFacing:
+                // TODO(Subv): Find out what the values are for the other elements.
+                ASSERT(stage == ShaderStage::Fragment);
+                if (element == 3) {
+                    const Id is_front_facing = Emit(OpLoad(t_bool, front_facing));
+                    const Id true_value =
+                        BitcastTo<Type::Float>(Constant(t_int, static_cast<s32>(-1)));
+                    const Id false_value = BitcastTo<Type::Float>(Constant(t_int, 0));
+                    return Emit(OpSelect(t_float, is_front_facing, true_value, false_value));
+                }
+                UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
+                return Constant(t_float, 0.0f);
+            default:
+                if (IsGenericAttribute(attribute)) {
+                    const Id pointer =
+                        AccessElement(t_in_float, input_attributes.at(attribute), element);
+                    return Emit(OpLoad(t_float, pointer));
+                }
+                break;
+            }
+            UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+
+        } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
+            const Node offset = cbuf->GetOffset();
+            const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
+
+            Id buffer_index{};
+            Id buffer_element{};
+
+            if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+                // Direct access
+                const u32 offset_imm = immediate->GetValue();
+                ASSERT(offset_imm % 4 == 0);
+                buffer_index = Constant(t_uint, offset_imm / 16);
+                buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
+
+            } else if (std::holds_alternative<OperationNode>(*offset)) {
+                // Indirect access
+                // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
+                // emits sub-optimal code on GLSL from my testing).
+                const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
+                const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
+                const Id final_offset = Emit(
+                    OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
+                buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
+                buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
+
+            } else {
+                UNREACHABLE_MSG("Unmanaged offset node type");
+            }
+
+            const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
+                                                  buffer_index, buffer_element));
+            return Emit(OpLoad(t_float, pointer));
+
+        } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
+            const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress()));
+            const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress()));
+
+            Id offset = Emit(OpISub(t_uint, real, base));
+            offset = Emit(OpUDiv(t_uint, offset, Constant(t_uint, 4u)));
+            return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer,
+                                                           Constant(t_uint, 0u), offset))));
+
+        } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+            // It's invalid to call conditional on nested nodes, use an operation instead
+            const Id true_label = OpLabel();
+            const Id skip_label = OpLabel();
+            Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label));
+            Emit(true_label);
+
+            VisitBasicBlock(conditional->GetCode());
+
+            Emit(OpBranch(skip_label));
+            Emit(skip_label);
+            return {};
+
+        } else if (const auto comment = std::get_if<CommentNode>(node)) {
+            Name(Emit(OpUndef(t_void)), comment->GetText());
+            return {};
+        }
+
+        UNREACHABLE();
+        return {};
+    }
+
+    template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
+    Id Unary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a>
+    Id Binary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a, Type type_c = type_b>
+    Id Ternary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+        const Id op_c = VisitOperand<type_c>(operation, 2);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
+    Id Quaternary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+        const Id op_c = VisitOperand<type_c>(operation, 2);
+        const Id op_d = VisitOperand<type_d>(operation, 3);
+
+        const Id value =
+            BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c, op_d)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    Id Assign(Operation operation) {
+        const Node dest = operation[0];
+        const Node src = operation[1];
+
+        Id target{};
+        if (const auto gpr = std::get_if<GprNode>(dest)) {
+            if (gpr->GetIndex() == Register::ZeroIndex) {
+                // Writing to Register::ZeroIndex is a no op
+                return {};
+            }
+            target = registers.at(gpr->GetIndex());
+
+        } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+            target = [&]() -> Id {
+                switch (const auto attribute = abuf->GetIndex(); attribute) {
+                case Attribute::Index::Position:
+                    return AccessElement(t_out_float, per_vertex, position_index,
+                                         abuf->GetElement());
+                case Attribute::Index::PointSize:
+                    return AccessElement(t_out_float, per_vertex, point_size_index);
+                case Attribute::Index::ClipDistances0123:
+                    return AccessElement(t_out_float, per_vertex, clip_distances_index,
+                                         abuf->GetElement());
+                case Attribute::Index::ClipDistances4567:
+                    return AccessElement(t_out_float, per_vertex, clip_distances_index,
+                                         abuf->GetElement() + 4);
+                default:
+                    if (IsGenericAttribute(attribute)) {
+                        return AccessElement(t_out_float, output_attributes.at(attribute),
+                                             abuf->GetElement());
+                    }
+                    UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
+                                      static_cast<u32>(attribute));
+                    return {};
+                }
+            }();
+
+        } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
+            Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress()));
+            address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4)));
+            target = Emit(OpAccessChain(t_prv_float, local_memory, {address}));
+        }
+
+        Emit(OpStore(target, Visit(src)));
+        return {};
+    }
+
+    Id HNegate(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HClamp(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HUnpack(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeF32(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeH0(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeH1(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HPack2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAssign(Operation operation) {
+        const Node dest = operation[0];
+        const Node src = operation[1];
+
+        Id target{};
+        if (const auto pred = std::get_if<PredicateNode>(dest)) {
+            ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
+
+            const auto index = pred->GetIndex();
+            switch (index) {
+            case Tegra::Shader::Pred::NeverExecute:
+            case Tegra::Shader::Pred::UnusedIndex:
+                // Writing to these predicates is a no-op
+                return {};
+            }
+            target = predicates.at(index);
+
+        } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
+            target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
+        }
+
+        Emit(OpStore(target, Visit(src)));
+        return {};
+    }
+
+    Id LogicalPick2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAll2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAny2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id GetTextureSampler(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex()));
+        return Emit(OpLoad(entry.sampled_image_type, entry.sampler));
+    }
+
+    Id GetTextureImage(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex()));
+        return Emit(OpImage(entry.image_type, GetTextureSampler(operation)));
+    }
+
+    Id GetTextureCoordinates(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        std::vector<Id> coords;
+        for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
+            coords.push_back(Visit(operation[i]));
+        }
+        if (meta->sampler.IsArray()) {
+            const Id array_integer = BitcastTo<Type::Int>(Visit(meta->array));
+            coords.push_back(Emit(OpConvertSToF(t_float, array_integer)));
+        }
+        if (meta->sampler.IsShadow()) {
+            coords.push_back(Visit(meta->depth_compare));
+        }
+
+        const std::array<Id, 4> t_float_lut = {nullptr, t_float2, t_float3, t_float4};
+        return coords.size() == 1
+                   ? coords[0]
+                   : Emit(OpCompositeConstruct(t_float_lut.at(coords.size() - 1), coords));
+    }
+
+    Id GetTextureElement(Operation operation, Id sample_value) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
+        return Emit(OpCompositeExtract(t_float, sample_value, meta->element));
+    }
+
+    Id Texture(Operation operation) {
+        const Id texture = Emit(OpImageSampleImplicitLod(t_float4, GetTextureSampler(operation),
+                                                         GetTextureCoordinates(operation)));
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureLod(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const Id texture = Emit(OpImageSampleExplicitLod(
+            t_float4, GetTextureSampler(operation), GetTextureCoordinates(operation),
+            spv::ImageOperandsMask::Lod, Visit(meta->lod)));
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureGather(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto coords = GetTextureCoordinates(operation);
+
+        Id texture;
+        if (meta->sampler.IsShadow()) {
+            texture = Emit(OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
+                                             Visit(meta->component)));
+        } else {
+            u32 component_value = 0;
+            if (meta->component) {
+                const auto component = std::get_if<ImmediateNode>(meta->component);
+                ASSERT_MSG(component, "Component is not an immediate value");
+                component_value = component->GetValue();
+            }
+            texture = Emit(OpImageGather(t_float4, GetTextureSampler(operation), coords,
+                                         Constant(t_uint, component_value)));
+        }
+
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureQueryDimensions(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto image_id = GetTextureImage(operation);
+        AddCapability(spv::Capability::ImageQuery);
+
+        if (meta->element == 3) {
+            return BitcastTo<Type::Float>(Emit(OpImageQueryLevels(t_int, image_id)));
+        }
+
+        const Id lod = VisitOperand<Type::Uint>(operation, 0);
+        const std::size_t coords_count = [&]() {
+            switch (const auto type = meta->sampler.GetType(); type) {
+            case Tegra::Shader::TextureType::Texture1D:
+                return 1;
+            case Tegra::Shader::TextureType::Texture2D:
+            case Tegra::Shader::TextureType::TextureCube:
+                return 2;
+            case Tegra::Shader::TextureType::Texture3D:
+                return 3;
+            default:
+                UNREACHABLE_MSG("Invalid texture type={}", static_cast<u32>(type));
+                return 2;
+            }
+        }();
+
+        if (meta->element >= coords_count) {
+            return Constant(t_float, 0.0f);
+        }
+
+        const std::array<Id, 3> types = {t_int, t_int2, t_int3};
+        const Id sizes = Emit(OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod));
+        const Id size = Emit(OpCompositeExtract(t_int, sizes, meta->element));
+        return BitcastTo<Type::Float>(size);
+    }
+
+    Id TextureQueryLod(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id TexelFetch(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id Branch(Operation operation) {
+        const auto target = std::get_if<ImmediateNode>(operation[0]);
+        UNIMPLEMENTED_IF(!target);
+
+        Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue())));
+        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        return {};
+    }
+
+    Id PushFlowStack(Operation operation) {
+        const auto target = std::get_if<ImmediateNode>(operation[0]);
+        ASSERT(target);
+
+        const Id current = Emit(OpLoad(t_uint, flow_stack_top));
+        const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1)));
+        const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current));
+
+        Emit(OpStore(access, Constant(t_uint, target->GetValue())));
+        Emit(OpStore(flow_stack_top, next));
+        return {};
+    }
+
+    Id PopFlowStack(Operation operation) {
+        const Id current = Emit(OpLoad(t_uint, flow_stack_top));
+        const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1)));
+        const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous));
+        const Id target = Emit(OpLoad(t_uint, access));
+
+        Emit(OpStore(flow_stack_top, previous));
+        Emit(OpStore(jmp_to, target));
+        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        return {};
+    }
+
+    Id Exit(Operation operation) {
+        switch (stage) {
+        case ShaderStage::Vertex: {
+            // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
+            // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
+            const Id position = AccessElement(t_float4, per_vertex, position_index);
+            Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2)));
+            depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
+            depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
+            Emit(OpStore(AccessElement(t_out_float, position, 2), depth));
+            break;
+        }
+        case ShaderStage::Fragment: {
+            const auto SafeGetRegister = [&](u32 reg) {
+                // TODO(Rodrigo): Replace with contains once C++20 releases
+                if (const auto it = registers.find(reg); it != registers.end()) {
+                    return Emit(OpLoad(t_float, it->second));
+                }
+                return Constant(t_float, 0.0f);
+            };
+
+            UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
+                                 "Sample mask write is unimplemented");
+
+            // TODO(Rodrigo): Alpha testing
+
+            // Write the color outputs using the data in the shader registers, disabled
+            // rendertargets/components are skipped in the register assignment.
+            u32 current_reg = 0;
+            for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+                // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
+                for (u32 component = 0; component < 4; ++component) {
+                    if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
+                        Emit(OpStore(AccessElement(t_out_float, frag_colors.at(rt), component),
+                                     SafeGetRegister(current_reg)));
+                        ++current_reg;
+                    }
+                }
+            }
+            if (header.ps.omap.depth) {
+                // The depth output is always 2 registers after the last color output, and
+                // current_reg already contains one past the last color register.
+                Emit(OpStore(frag_depth, SafeGetRegister(current_reg + 1)));
+            }
+            break;
+        }
+        }
+
+        BranchingOp([&]() { Emit(OpReturn()); });
+        return {};
+    }
+
+    Id Discard(Operation operation) {
+        BranchingOp([&]() { Emit(OpKill()); });
+        return {};
+    }
+
+    Id EmitVertex(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id EndPrimitive(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id YNegate(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
+                      const std::string& name) {
+        const Id id = OpVariable(type, storage);
+        Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
+        AddGlobalVariable(Name(id, name));
+        interfaces.push_back(id);
+        return id;
+    }
+
+    bool IsRenderTargetUsed(u32 rt) const {
+        for (u32 component = 0; component < 4; ++component) {
+            if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    template <typename... Args>
+    Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
+        std::vector<Id> members;
+        auto elements = {elements_...};
+        for (const auto element : elements) {
+            members.push_back(Constant(t_uint, element));
+        }
+
+        return Emit(OpAccessChain(pointer_type, composite, members));
+    }
+
+    template <Type type>
+    Id VisitOperand(Operation operation, std::size_t operand_index) {
+        const Id value = Visit(operation[operand_index]);
+
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+            return Emit(OpBitcast(t_int, value));
+        case Type::Uint:
+            return Emit(OpBitcast(t_uint, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    template <Type type>
+    Id BitcastFrom(Id value) {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+        case Type::Uint:
+            return Emit(OpBitcast(t_float, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    template <Type type>
+    Id BitcastTo(Id value) {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+            UNREACHABLE();
+        case Type::Float:
+            return Emit(OpBitcast(t_float, value));
+        case Type::Int:
+            return Emit(OpBitcast(t_int, value));
+        case Type::Uint:
+            return Emit(OpBitcast(t_uint, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    Id GetTypeDefinition(Type type) {
+        switch (type) {
+        case Type::Bool:
+            return t_bool;
+        case Type::Bool2:
+            return t_bool2;
+        case Type::Float:
+            return t_float;
+        case Type::Int:
+            return t_int;
+        case Type::Uint:
+            return t_uint;
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return {};
+    }
+
+    void BranchingOp(std::function<void()> call) {
+        const Id true_label = OpLabel();
+        const Id skip_label = OpLabel();
+        Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::Flatten));
+        Emit(OpBranchConditional(v_true, true_label, skip_label, 1, 0));
+        Emit(true_label);
+        call();
+
+        Emit(skip_label);
+    }
+
+    static constexpr OperationDecompilersArray operation_decompilers = {
+        &SPIRVDecompiler::Assign,
+
+        &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
+                                  Type::Float>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
+        &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
+        &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
+
+        &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
+        &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
+        &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
+
+        &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
+        &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
+        &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
+        &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
+        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
+        &SPIRVDecompiler::HNegate,
+        &SPIRVDecompiler::HClamp,
+        &SPIRVDecompiler::HUnpack,
+        &SPIRVDecompiler::HMergeF32,
+        &SPIRVDecompiler::HMergeH0,
+        &SPIRVDecompiler::HMergeH1,
+        &SPIRVDecompiler::HPack2,
+
+        &SPIRVDecompiler::LogicalAssign,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
+        &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
+        &SPIRVDecompiler::LogicalPick2,
+        &SPIRVDecompiler::LogicalAll2,
+        &SPIRVDecompiler::LogicalAny2,
+
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool>,
+
+        &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
+
+        &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>,
+        // TODO(Rodrigo): Should these use the OpFUnord* variants?
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>,
+
+        &SPIRVDecompiler::Texture,
+        &SPIRVDecompiler::TextureLod,
+        &SPIRVDecompiler::TextureGather,
+        &SPIRVDecompiler::TextureQueryDimensions,
+        &SPIRVDecompiler::TextureQueryLod,
+        &SPIRVDecompiler::TexelFetch,
+
+        &SPIRVDecompiler::Branch,
+        &SPIRVDecompiler::PushFlowStack,
+        &SPIRVDecompiler::PopFlowStack,
+        &SPIRVDecompiler::Exit,
+        &SPIRVDecompiler::Discard,
+
+        &SPIRVDecompiler::EmitVertex,
+        &SPIRVDecompiler::EndPrimitive,
+
+        &SPIRVDecompiler::YNegate,
+    };
+
+    const ShaderIR& ir;
+    const ShaderStage stage;
+    const Tegra::Shader::Header header;
+
+    const Id t_void = Name(TypeVoid(), "void");
+
+    const Id t_bool = Name(TypeBool(), "bool");
+    const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
+
+    const Id t_int = Name(TypeInt(32, true), "int");
+    const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
+    const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
+    const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
+
+    const Id t_uint = Name(TypeInt(32, false), "uint");
+    const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
+    const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
+    const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
+
+    const Id t_float = Name(TypeFloat(32), "float");
+    const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
+    const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
+    const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
+
+    const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
+    const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
+
+    const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
+
+    const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
+    const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
+    const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
+    const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
+
+    const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
+    const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
+
+    const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
+    const Id t_cbuf_array =
+        Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"),
+                 spv::Decoration::ArrayStride, CBUF_STRIDE);
+    const Id t_cbuf_struct = MemberDecorate(
+        Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct);
+
+    const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
+    const Id t_gmem_array =
+        Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4u), "GmemArray");
+    const Id t_gmem_struct = MemberDecorate(
+        Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
+
+    const Id v_float_zero = Constant(t_float, 0.0f);
+    const Id v_true = ConstantTrue(t_bool);
+    const Id v_false = ConstantFalse(t_bool);
+
+    Id per_vertex{};
+    std::map<u32, Id> registers;
+    std::map<Tegra::Shader::Pred, Id> predicates;
+    Id local_memory{};
+    std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
+    std::map<Attribute::Index, Id> input_attributes;
+    std::map<Attribute::Index, Id> output_attributes;
+    std::map<u32, Id> constant_buffers;
+    std::map<GlobalMemoryBase, Id> global_buffers;
+    std::map<u32, SamplerImage> sampler_images;
+
+    Id instance_index{};
+    Id vertex_index{};
+    std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
+    Id frag_depth{};
+    Id frag_coord{};
+    Id front_facing{};
+
+    u32 position_index{};
+    u32 point_size_index{};
+    u32 clip_distances_index{};
+
+    std::vector<Id> interfaces;
+
+    u32 const_buffers_base_binding{};
+    u32 global_buffers_base_binding{};
+    u32 samplers_base_binding{};
+
+    Id execute_function{};
+    Id jmp_to{};
+    Id flow_stack_top{};
+    Id flow_stack{};
+    Id continue_label{};
+    std::map<u32, Id> labels;
+};
+
+DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) {
+    auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage);
+    decompiler->Decompile();
+    return {std::move(decompiler), decompiler->GetShaderEntries()};
+}
+
+} // namespace Vulkan::VKShader
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
new file mode 100644
index 000000000..329d8fa38
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+class ShaderIR;
+}
+
+namespace Vulkan::VKShader {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+using SamplerEntry = VideoCommon::Shader::Sampler;
+
+constexpr u32 DESCRIPTOR_SET = 0;
+
+class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
+public:
+    explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index)
+        : VideoCommon::Shader::ConstBuffer{entry}, index{index} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+private:
+    u32 index{};
+};
+
+class GlobalBufferEntry {
+public:
+    explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+
+    u32 GetCbufIndex() const {
+        return cbuf_index;
+    }
+
+    u32 GetCbufOffset() const {
+        return cbuf_offset;
+    }
+
+private:
+    u32 cbuf_index{};
+    u32 cbuf_offset{};
+};
+
+struct ShaderEntries {
+    u32 const_buffers_base_binding{};
+    u32 global_buffers_base_binding{};
+    u32 samplers_base_binding{};
+    std::vector<ConstBufferEntry> const_buffers;
+    std::vector<GlobalBufferEntry> global_buffers;
+    std::vector<SamplerEntry> samplers;
+    std::set<u32> attributes;
+    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+    std::size_t shader_length{};
+    Sirit::Id entry_function{};
+    std::vector<Sirit::Id> interfaces;
+};
+
+using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
+
+DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);
+
+} // namespace Vulkan::VKShader
diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp
new file mode 100644
index 000000000..53c7ef12d
--- /dev/null
+++ b/src/video_core/sampler_cache.cpp
@@ -0,0 +1,21 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "video_core/sampler_cache.h"
+
+namespace VideoCommon {
+
+std::size_t SamplerCacheKey::Hash() const {
+    static_assert(sizeof(raw) % sizeof(u64) == 0);
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
+}
+
+bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
+    return raw == rhs.raw;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h
new file mode 100644
index 000000000..cbe3ad071
--- /dev/null
+++ b/src/video_core/sampler_cache.h
@@ -0,0 +1,60 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <unordered_map>
+
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
+    std::size_t Hash() const;
+
+    bool operator==(const SamplerCacheKey& rhs) const;
+
+    bool operator!=(const SamplerCacheKey& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+} // namespace VideoCommon
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::SamplerCacheKey> {
+    std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace VideoCommon {
+
+template <typename SamplerType, typename SamplerStorageType>
+class SamplerCache {
+public:
+    SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) {
+        const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
+        auto& sampler = entry->second;
+        if (is_cache_miss) {
+            sampler = CreateSampler(tsc);
+        }
+        return ToSamplerType(sampler);
+    }
+
+protected:
+    virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0;
+
+    virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0;
+
+private:
+    std::unordered_map<SamplerCacheKey, SamplerStorageType> cache;
+};
+
+} // namespace VideoCommon
+\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index baee89107..9467f9417 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -18,7 +18,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
 
     if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
         opcode->get().GetId() == OpCode::Id::HADD2_R) {
-        UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
+        if (instr.alu_half.ftz != 0) {
+            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        }
     }
     UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
 
@@ -27,9 +29,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
     const bool negate_b =
         opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
 
-    const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a);
-
-    // instr.alu_half.type_a
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
+    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
 
     Node op_b = [&]() {
         switch (opcode->get().GetId()) {
@@ -44,17 +45,17 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
             return Immediate(0);
         }
     }();
+    op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b);
     op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
 
     Node value = [&]() {
-        MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
         switch (opcode->get().GetId()) {
         case OpCode::Id::HADD2_C:
         case OpCode::Id::HADD2_R:
-            return Operation(OperationCode::HAdd, meta, op_a, op_b);
+            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
         case OpCode::Id::HMUL2_C:
         case OpCode::Id::HMUL2_R:
-            return Operation(OperationCode::HMul, meta, op_a, op_b);
+            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
         default:
             UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
             return Immediate(0);
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index c2164ba50..fbcd35b18 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -17,34 +17,33 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
-        UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
+        if (instr.alu_half_imm.ftz != 0) {
+            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        }
     } else {
         UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
     }
-    UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
-                         "Half float immediate saturation not implemented");
 
-    Node op_a = GetRegister(instr.gpr8);
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
     op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
 
     const Node op_b = UnpackHalfImmediate(instr, true);
 
     Node value = [&]() {
-        MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
         switch (opcode->get().GetId()) {
         case OpCode::Id::HADD2_IMM:
-            return Operation(OperationCode::HAdd, meta, op_a, op_b);
+            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
         case OpCode::Id::HMUL2_IMM:
-            return Operation(OperationCode::HMul, meta, op_a, op_b);
+            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
         default:
             UNREACHABLE();
             return Immediate(0);
         }
     }();
-    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
 
+    value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
     SetRegister(bb, instr.gpr0, value);
-
     return pc;
 }
 
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 55a6fbbf2..ba15b1115 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -18,13 +18,29 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     switch (opcode->get().GetId()) {
-    case OpCode::Id::I2I_R: {
+    case OpCode::Id::I2I_R:
+    case OpCode::Id::I2I_C:
+    case OpCode::Id::I2I_IMM: {
         UNIMPLEMENTED_IF(instr.conversion.selector);
+        UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.alu.saturate_d);
 
         const bool input_signed = instr.conversion.is_input_signed;
         const bool output_signed = instr.conversion.is_output_signed;
 
-        Node value = GetRegister(instr.gpr20);
+        Node value = [&]() {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::I2I_R:
+                return GetRegister(instr.gpr20);
+            case OpCode::Id::I2I_C:
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            case OpCode::Id::I2I_IMM:
+                return Immediate(instr.alu.GetSignedImm20_20());
+            default:
+                UNREACHABLE();
+                return Immediate(0);
+            }
+        }();
         value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
 
         value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
@@ -38,17 +54,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::I2F_R:
-    case OpCode::Id::I2F_C: {
-        UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
+    case OpCode::Id::I2F_C:
+    case OpCode::Id::I2F_IMM: {
+        UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
         UNIMPLEMENTED_IF(instr.conversion.selector);
         UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                              "Condition codes generation in I2F is not implemented");
 
         Node value = [&]() {
-            if (instr.is_b_gpr) {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::I2F_R:
                 return GetRegister(instr.gpr20);
-            } else {
+            case OpCode::Id::I2F_C:
                 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            case OpCode::Id::I2F_IMM:
+                return Immediate(instr.alu.GetSignedImm20_20());
+            default:
+                UNREACHABLE();
+                return Immediate(0);
             }
         }();
         const bool input_signed = instr.conversion.is_input_signed;
@@ -62,24 +85,31 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::F2F_R:
-    case OpCode::Id::F2F_C: {
-        UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
-        UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
+    case OpCode::Id::F2F_C:
+    case OpCode::Id::F2F_IMM: {
+        UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word);
         UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                              "Condition codes generation in F2F is not implemented");
 
         Node value = [&]() {
-            if (instr.is_b_gpr) {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::F2F_R:
                 return GetRegister(instr.gpr20);
-            } else {
+            case OpCode::Id::F2F_C:
                 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            case OpCode::Id::F2F_IMM:
+                return GetImmediate19(instr);
+            default:
+                UNREACHABLE();
+                return Immediate(0);
             }
         }();
 
         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
 
         value = [&]() {
-            switch (instr.conversion.f2f.rounding) {
+            switch (instr.conversion.f2f.GetRoundingMode()) {
             case Tegra::Shader::F2fRoundingOp::None:
                 return value;
             case Tegra::Shader::F2fRoundingOp::Round:
@@ -102,15 +132,22 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::F2I_R:
-    case OpCode::Id::F2I_C: {
+    case OpCode::Id::F2I_C:
+    case OpCode::Id::F2I_IMM: {
         UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
         UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                              "Condition codes generation in F2I is not implemented");
         Node value = [&]() {
-            if (instr.is_b_gpr) {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::F2I_R:
                 return GetRegister(instr.gpr20);
-            } else {
+            case OpCode::Id::F2I_C:
                 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            case OpCode::Id::F2I_IMM:
+                return GetImmediate19(instr);
+            default:
+                UNREACHABLE();
+                return Immediate(0);
             }
         }();
 
@@ -134,7 +171,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
         }();
         const bool is_signed = instr.conversion.is_output_signed;
         value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
-        value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed);
+        value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
 
         SetRegister(bb, instr.gpr0, value);
         break;
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 748368555..1dd94bf9d 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -18,11 +18,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
+    if (instr.hset2.ftz != 0) {
+        LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+    }
+
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
+    op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
 
-    // instr.hset2.type_a
-    // instr.hset2.type_b
-    Node op_a = GetRegister(instr.gpr8);
     Node op_b = [&]() {
         switch (opcode->get().GetId()) {
         case OpCode::Id::HSET2_R:
@@ -32,14 +34,12 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
             return Immediate(0);
         }
     }();
-
-    op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
+    op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
     op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
 
     const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
 
-    MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}};
-    const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b);
+    const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b);
 
     const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
 
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index e68512692..6e59eb650 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
 
     UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
 
-    Node op_a = GetRegister(instr.gpr8);
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
     op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
 
-    const Node op_b = [&]() {
+    Node op_b = [&]() {
         switch (opcode->get().GetId()) {
         case OpCode::Id::HSETP2_R:
             return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
@@ -32,6 +32,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
             return Immediate(0);
         }
     }();
+    op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b);
 
     // We can't use the constant predicate as destination.
     ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
@@ -42,8 +43,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
     const OperationCode pair_combiner =
         instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
 
-    MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}};
-    const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
+    const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
     const Node first_pred = Operation(pair_combiner, comparison);
 
     // Set the primary predicate to the result of Predicate OP SecondPredicate
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 7a07c5ec6..5c1becce5 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -27,10 +27,6 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
     }
 
     constexpr auto identity = HalfType::H0_H1;
-
-    const HalfType type_a = instr.hfma2.type_a;
-    const Node op_a = GetRegister(instr.gpr8);
-
     bool neg_b{}, neg_c{};
     auto [saturate, type_b, op_b, type_c,
           op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
@@ -62,11 +58,11 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
     }();
     UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
 
-    op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
-    op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
+    const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
+    op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
+    op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
 
-    MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
-    Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
+    Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
     value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
 
     SetRegister(bb, instr.gpr0, value);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ea3c71eed..ea1092db1 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -8,6 +8,7 @@
 
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -18,6 +19,23 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 
+namespace {
+u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
+    switch (uniform_type) {
+    case Tegra::Shader::UniformType::Single:
+        return 1;
+    case Tegra::Shader::UniformType::Double:
+        return 2;
+    case Tegra::Shader::UniformType::Quad:
+    case Tegra::Shader::UniformType::UnsignedQuad:
+        return 4;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+        return 1;
+    }
+}
+} // namespace
+
 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -85,8 +103,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::LD_L: {
-        UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
-                             static_cast<u32>(instr.ld_l.unknown.Value()));
+        LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}",
+                  static_cast<u64>(instr.ld_l.unknown.Value()));
 
         const auto GetLmem = [&](s32 offset) {
             ASSERT(offset % 4 == 0);
@@ -126,45 +144,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::LDG: {
-        const u32 count = [&]() {
-            switch (instr.ldg.type) {
-            case Tegra::Shader::UniformType::Single:
-                return 1;
-            case Tegra::Shader::UniformType::Double:
-                return 2;
-            case Tegra::Shader::UniformType::Quad:
-            case Tegra::Shader::UniformType::UnsignedQuad:
-                return 4;
-            default:
-                UNIMPLEMENTED_MSG("Unimplemented LDG size!");
-                return 1;
-            }
-        }();
-
-        const Node addr_register = GetRegister(instr.gpr8);
-        const Node base_address =
-            TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
-        const auto cbuf = std::get_if<CbufNode>(base_address);
-        ASSERT(cbuf != nullptr);
-        const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
-        ASSERT(cbuf_offset_imm != nullptr);
-        const auto cbuf_offset = cbuf_offset_imm->GetValue();
-
-        bb.push_back(Comment(
-            fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
-
-        const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
-        used_global_memory_bases.insert(descriptor);
-
-        const Node immediate_offset =
-            Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
-        const Node base_real_address =
-            Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
+        const auto [real_address_base, base_address, descriptor] =
+            TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
+                                    static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
 
+        const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
         for (u32 i = 0; i < count; ++i) {
             const Node it_offset = Immediate(i * 4);
             const Node real_address =
-                Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
+                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
             const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
 
             SetTemporal(bb, i, gmem);
@@ -174,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::STG: {
+        const auto [real_address_base, base_address, descriptor] =
+            TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
+                                    static_cast<u32>(instr.stg.immediate_offset.Value()), true);
+
+        // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
+        SetTemporal(bb, 0, real_address_base);
+
+        const u32 count = GetUniformTypeElementsCount(instr.stg.type);
+        for (u32 i = 0; i < count; ++i) {
+            SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
+        }
+        for (u32 i = 0; i < count; ++i) {
+            const Node it_offset = Immediate(i * 4);
+            const Node real_address =
+                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
+            const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
+
+            bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
+        }
+        break;
+    }
     case OpCode::Id::ST_A: {
         UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
                              "Indirect attribute loads are not supported");
@@ -205,8 +215,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::ST_L: {
-        UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
-                             static_cast<u32>(instr.st_l.unknown.Value()));
+        LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}",
+                  static_cast<u64>(instr.st_l.cache_management.Value()));
 
         const auto GetLmemAddr = [&](s32 offset) {
             ASSERT(offset % 4 == 0);
@@ -236,4 +246,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
+std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
+                                                                           Node addr_register,
+                                                                           u32 immediate_offset,
+                                                                           bool is_write) {
+    const Node base_address{
+        TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
+    const auto cbuf = std::get_if<CbufNode>(base_address);
+    ASSERT(cbuf != nullptr);
+    const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+    ASSERT(cbuf_offset_imm != nullptr);
+    const auto cbuf_offset = cbuf_offset_imm->GetValue();
+
+    bb.push_back(
+        Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
+
+    const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
+    const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
+    auto& usage = entry->second;
+    if (is_write) {
+        usage.is_written = true;
+    } else {
+        usage.is_read = true;
+    }
+
+    const auto real_address =
+        Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
+
+    return {real_address, base_address, descriptor};
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a775b402b..fa65ac9a9 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
 u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
-
+    bool is_bindless = false;
     switch (opcode->get().GetId()) {
     case OpCode::Id::TEX: {
         if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
@@ -54,7 +54,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const auto process_mode = instr.tex.GetTextureProcessMode();
         WriteTexInstructionFloat(
             bb, instr,
-            GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
+            GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
+        break;
+    }
+    case OpCode::Id::TEX_B: {
+        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+
+        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
+        }
+
+        const TextureType texture_type{instr.tex_b.texture_type};
+        const bool is_array = instr.tex_b.array != 0;
+        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.tex_b.GetTextureProcessMode();
+        WriteTexInstructionFloat(bb, instr,
+                                 GetTexCode(instr, texture_type, process_mode, depth_compare,
+                                            is_array, is_aoffi, {instr.gpr20}));
         break;
     }
     case OpCode::Id::TEXS: {
@@ -134,6 +152,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         WriteTexsInstructionFloat(bb, instr, values);
         break;
     }
+    case OpCode::Id::TXQ_B:
+        is_bindless = true;
+        [[fallthrough]];
     case OpCode::Id::TXQ: {
         if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
             LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
@@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         // Sadly, not all texture instructions specify the type of texture their sampler
         // uses. This must be fixed at a later instance.
         const auto& sampler =
-            GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+            is_bindless
+                ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
+                                     false)
+                : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
 
         u32 indexer = 0;
         switch (instr.txq.query_type) {
@@ -154,7 +178,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                 }
                 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
                 const Node value =
-                    Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+                    Operation(OperationCode::TextureQueryDimensions, meta,
+                              GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
                 SetTemporal(bb, indexer++, value);
             }
             for (u32 i = 0; i < indexer; ++i) {
@@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::TMML_B:
+        is_bindless = true;
+        [[fallthrough]];
     case OpCode::Id::TMML: {
         UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
                              "NDV is not implemented");
@@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
 
         auto texture_type = instr.tmml.texture_type.Value();
         const bool is_array = instr.tmml.array != 0;
-        const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+        const auto& sampler = is_bindless
+                                  ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
+                                  : GetSampler(instr.sampler, texture_type, is_array, false);
 
         std::vector<Node> coords;
 
@@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             coords.push_back(GetRegister(instr.gpr8.Value() + 1));
             texture_type = TextureType::Texture2D;
         }
-
+        u32 indexer = 0;
         for (u32 element = 0; element < 2; ++element) {
+            if (!instr.tmml.IsComponentEnabled(element)) {
+                continue;
+            }
             auto params = coords;
             MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
-            SetTemporal(bb, element, value);
+            SetTemporal(bb, indexer++, value);
         }
-        for (u32 element = 0; element < 2; ++element) {
-            SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
+        for (u32 i = 0; i < indexer; ++i) {
+            SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
         }
-
         break;
     }
     case OpCode::Id::TLDS: {
@@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
     return *used_samplers.emplace(entry).first;
 }
 
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
+                                            bool is_array, bool is_shadow) {
+    const Node sampler_register = GetRegister(reg);
+    const Node base_sampler =
+        TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
+    const auto cbuf = std::get_if<CbufNode>(base_sampler);
+    const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+    ASSERT(cbuf_offset_imm != nullptr);
+    const auto cbuf_offset = cbuf_offset_imm->GetValue();
+    const auto cbuf_index = cbuf->GetIndex();
+    const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset;
+
+    // If this sampler has already been used, return the existing mapping.
+    const auto itr =
+        std::find_if(used_samplers.begin(), used_samplers.end(),
+                     [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
+    if (itr != used_samplers.end()) {
+        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
+               itr->IsShadow() == is_shadow);
+        return *itr;
+    }
+
+    // Otherwise create a new mapping for this sampler
+    const std::size_t next_index = used_samplers.size();
+    const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
+    return *used_samplers.emplace(entry).first;
+}
+
 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
     u32 dest_elem = 0;
     for (u32 elem = 0; elem < 4; ++elem) {
@@ -326,22 +386,27 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                                TextureProcessMode process_mode, std::vector<Node> coords,
                                Node array, Node depth_compare, u32 bias_offset,
-                               std::vector<Node> aoffi) {
+                               std::vector<Node> aoffi,
+                               std::optional<Tegra::Shader::Register> bindless_reg) {
     const bool is_array = array;
     const bool is_shadow = depth_compare;
+    const bool is_bindless = bindless_reg.has_value();
 
     UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
                              (texture_type == TextureType::TextureCube && is_array && is_shadow),
                          "This method is not supported.");
 
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+    const auto& sampler = is_bindless
+                              ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
+                              : GetSampler(instr.sampler, texture_type, is_array, is_shadow);
 
     const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                             process_mode == TextureProcessMode::LL ||
                             process_mode == TextureProcessMode::LLA;
 
-    // LOD selection (either via bias or explicit textureLod) not supported in GL for
-    // sampler2DArrayShadow and samplerCubeArrayShadow.
+    // LOD selection (either via bias or explicit textureLod) not
+    // supported in GL for sampler2DArrayShadow and
+    // samplerCubeArrayShadow.
     const bool gl_lod_supported =
         !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
           (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
@@ -359,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
             lod = Immediate(0.0f);
             break;
         case TextureProcessMode::LB:
-            // If present, lod or bias are always stored in the register indexed by the gpr20
-            // field with an offset depending on the usage of the other registers
+            // If present, lod or bias are always stored in the register
+            // indexed by the gpr20 field with an offset depending on the
+            // usage of the other registers
             bias = GetRegister(instr.gpr20.Value() + bias_offset);
             break;
         case TextureProcessMode::LL:
@@ -384,11 +450,18 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
 
 Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
                            TextureProcessMode process_mode, bool depth_compare, bool is_array,
-                           bool is_aoffi) {
+                           bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
     const bool lod_bias_enabled{
         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
 
+    const bool is_bindless = bindless_reg.has_value();
+
     u64 parameter_register = instr.gpr20.Value();
+    if (is_bindless) {
+        ++parameter_register;
+    }
+
+    const u32 bias_lod_offset = (is_bindless ? 1 : 0);
     if (lod_bias_enabled) {
         ++parameter_register;
     }
@@ -423,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
         dc = GetRegister(parameter_register++);
     }
 
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
+                          aoffi, bindless_reg);
 }
 
 Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -459,7 +533,8 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
         dc = GetRegister(depth_register);
     }
 
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
+                          {});
 }
 
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index ac5112d78..17f2f711c 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -189,7 +189,11 @@ Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
     const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
     const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
 
-    return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate);
+    return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate);
+}
+
+Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
+    return Operation(OperationCode::HUnpack, type, value);
 }
 
 Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -209,17 +213,26 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
 
 Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
     if (absolute) {
-        value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value);
+        value = Operation(OperationCode::HAbsolute, NO_PRECISE, value);
     }
     if (negate) {
-        value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true),
+        value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true),
                           GetPredicate(true));
     }
     return value;
 }
 
+Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
+    if (!saturate) {
+        return value;
+    }
+    const Node positive_zero = Immediate(std::copysignf(0, 1));
+    const Node positive_one = Immediate(1.0f);
+    return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one);
+}
+
 Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
-    static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+    const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
         {PredCondition::LessThan, OperationCode::LogicalFLessThan},
         {PredCondition::Equal, OperationCode::LogicalFEqual},
         {PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
@@ -255,7 +268,7 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
 
 Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
                                              Node op_b) {
-    static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+    const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
         {PredCondition::LessThan, OperationCode::LogicalILessThan},
         {PredCondition::Equal, OperationCode::LogicalIEqual},
         {PredCondition::LessEqual, OperationCode::LogicalILessEqual},
@@ -283,40 +296,32 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
     return predicate;
 }
 
-Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
-                                          const MetaHalfArithmetic& meta, Node op_a, Node op_b) {
-
-    UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
-                             condition == PredCondition::NotEqualWithNan ||
-                             condition == PredCondition::LessEqualWithNan ||
-                             condition == PredCondition::GreaterThanWithNan ||
-                             condition == PredCondition::GreaterEqualWithNan,
-                         "Unimplemented NaN comparison for half floats");
-
-    static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
+Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
+                                          Node op_b) {
+    const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
         {PredCondition::LessThan, OperationCode::Logical2HLessThan},
         {PredCondition::Equal, OperationCode::Logical2HEqual},
         {PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
         {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
         {PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
         {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
-        {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan},
-        {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual},
-        {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual},
-        {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan},
-        {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}};
+        {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
+        {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
+        {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
+        {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
+        {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}};
 
     const auto comparison{PredicateComparisonTable.find(condition)};
     UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
                          "Unknown predicate comparison operation");
 
-    const Node predicate = Operation(comparison->second, meta, op_a, op_b);
+    const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
 
     return predicate;
 }
 
 OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
-    static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
+    const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
         {PredOperation::And, OperationCode::LogicalAnd},
         {PredOperation::Or, OperationCode::LogicalOr},
         {PredOperation::Xor, OperationCode::LogicalXor},
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 4888998d3..81278fb33 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -109,11 +109,13 @@ enum class OperationCode {
     UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
     UBitCount,        /// (MetaArithmetic, uint) -> uint
 
-    HAdd,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HMul,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HFma,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
+    HAdd,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HMul,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HFma,      /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
     HAbsolute, /// (f16vec2 a) -> f16vec2
     HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2
+    HClamp,    /// (f16vec2 src, float min, float max) -> f16vec2
+    HUnpack,   /// (Tegra::Shader::HalfType, T value) -> f16vec2
     HMergeF32, /// (f16vec2 src) -> float
     HMergeH0,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
     HMergeH1,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
@@ -150,12 +152,18 @@ enum class OperationCode {
     LogicalUNotEqual,     /// (uint a, uint b) -> bool
     LogicalUGreaterEqual, /// (uint a, uint b) -> bool
 
-    Logical2HLessThan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HEqual,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HLessEqual,    /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HGreaterThan,  /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HNotEqual,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessThan,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HEqual,               /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessEqual,           /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterThan,         /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HNotEqual,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterEqual,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessThanWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HEqualWithNan,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessEqualWithNan,    /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterThanWithNan,  /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HNotEqualWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 
     Texture,                /// (MetaTexture, float[N] coords) -> float4
     TextureLod,             /// (MetaTexture, float[N] coords) -> float4
@@ -196,9 +204,23 @@ enum class ExitMethod {
 
 class Sampler {
 public:
+    // Use this constructor for bounded Samplers
     explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
                      bool is_array, bool is_shadow)
-        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
+        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+          is_bindless{false} {}
+
+    // Use this constructor for bindless Samplers
+    explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
+                     Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
+        : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
+          is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
+
+    // Use this only for serialization/deserialization
+    explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
+                     bool is_array, bool is_shadow, bool is_bindless)
+        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+          is_bindless{is_bindless} {}
 
     std::size_t GetOffset() const {
         return offset;
@@ -220,6 +242,14 @@ public:
         return is_shadow;
     }
 
+    bool IsBindless() const {
+        return is_bindless;
+    }
+
+    std::pair<u32, u32> GetBindlessCBuf() const {
+        return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
+    }
+
     bool operator<(const Sampler& rhs) const {
         return std::tie(offset, index, type, is_array, is_shadow) <
                std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
@@ -231,8 +261,9 @@ private:
     std::size_t offset{};
     std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
-    bool is_array{};  ///< Whether the texture is being sampled as an array texture or not.
-    bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+    bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
+    bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
+    bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
 };
 
 class ConstBuffer {
@@ -276,15 +307,13 @@ struct GlobalMemoryBase {
     }
 };
 
-struct MetaArithmetic {
-    bool precise{};
+struct GlobalMemoryUsage {
+    bool is_read{};
+    bool is_written{};
 };
 
-struct MetaHalfArithmetic {
+struct MetaArithmetic {
     bool precise{};
-    std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1,
-                                                    Tegra::Shader::HalfType::H0_H1,
-                                                    Tegra::Shader::HalfType::H0_H1};
 };
 
 struct MetaTexture {
@@ -298,11 +327,10 @@ struct MetaTexture {
     u32 element{};
 };
 
-constexpr MetaArithmetic PRECISE = {true};
-constexpr MetaArithmetic NO_PRECISE = {false};
-constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false};
+inline constexpr MetaArithmetic PRECISE = {true};
+inline constexpr MetaArithmetic NO_PRECISE = {false};
 
-using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>;
+using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
 
 /// Holds any kind of operation that can be done in the IR
 class OperationNode final {
@@ -578,8 +606,8 @@ public:
         return used_clip_distances;
     }
 
-    const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
-        return used_global_memory_bases;
+    const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
+        return used_global_memory;
     }
 
     std::size_t GetLength() const {
@@ -706,10 +734,14 @@ private:
 
     /// Unpacks a half immediate from an instruction
     Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
+    /// Unpacks a binary value into a half float pair with a type format
+    Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
     /// Merges a half pair into another value
     Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
     /// Conditionally absolute/negated half float pair. Absolute is applied first
     Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
+    /// Conditionally saturates a half float pair
+    Node GetSaturatedHalfFloat(Node value, bool saturate = true);
 
     /// Returns a predicate comparing two floats
     Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
@@ -717,8 +749,7 @@ private:
     Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
                                        Node op_a, Node op_b);
     /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
-    Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
-                                    const MetaHalfArithmetic& meta, Node op_a, Node op_b);
+    Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
 
     /// Returns a predicate combiner operation
     OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
@@ -730,6 +761,11 @@ private:
     const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
                               Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
 
+    // Accesses a texture sampler for a bindless texture.
+    const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
+                                      Tegra::Shader::TextureType type, bool is_array,
+                                      bool is_shadow);
+
     /// Extracts a sequence of bits from a node
     Node BitfieldExtract(Node value, u32 offset, u32 bits);
 
@@ -743,7 +779,8 @@ private:
 
     Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-                     bool is_array, bool is_aoffi);
+                     bool is_array, bool is_aoffi,
+                     std::optional<Tegra::Shader::Register> bindless_reg);
 
     Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                       Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
@@ -763,7 +800,8 @@ private:
 
     Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                          Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
-                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
+                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
+                         std::optional<Tegra::Shader::Register> bindless_reg);
 
     Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
                          u64 byte_height);
@@ -781,6 +819,11 @@ private:
 
     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
 
+    std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
+                                                                     Node addr_register,
+                                                                     u32 immediate_offset,
+                                                                     bool is_write);
+
     template <typename... T>
     Node Operation(OperationCode code, const T*... operands) {
         return StoreNode(OperationNode(code, operands...));
@@ -834,7 +877,7 @@ private:
     std::map<u32, ConstBuffer> used_cbufs;
     std::set<Sampler> used_samplers;
     std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
-    std::set<GlobalMemoryBase> used_global_memory_bases;
+    std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
 
     Tegra::Shader::Header header;
 };
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index a7ac26d71..3b022a456 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -294,6 +294,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
         return PixelFormat::Z16;
     case Tegra::Texture::TextureFormat::Z24S8:
         return PixelFormat::Z24S8;
+    case Tegra::Texture::TextureFormat::ZF32_X24S8:
+        return PixelFormat::Z32FS8;
     case Tegra::Texture::TextureFormat::DXT1:
         return is_srgb ? PixelFormat::DXT1_SRGB : PixelFormat::DXT1;
     case Tegra::Texture::TextureFormat::DXT23:
diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp
new file mode 100644
index 000000000..e96eba7cc
--- /dev/null
+++ b/src/video_core/texture_cache.cpp
@@ -0,0 +1,386 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache.h"
+#include "video_core/textures/decoders.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using VideoCore::Surface::SurfaceTarget;
+
+using VideoCore::Surface::ComponentTypeFromDepthFormat;
+using VideoCore::Surface::ComponentTypeFromRenderTarget;
+using VideoCore::Surface::ComponentTypeFromTexture;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::PixelFormatFromTextureFormat;
+using VideoCore::Surface::SurfaceTargetFromTextureType;
+
+constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
+    return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
+}
+
+SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
+                                              const Tegra::Texture::FullTextureInfo& config) {
+    SurfaceParams params;
+    params.is_tiled = config.tic.IsTiled();
+    params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
+    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
+    params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
+    params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
+    params.pixel_format =
+        PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false);
+    params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
+    params.type = GetFormatType(params.pixel_format);
+    params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
+    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
+    params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
+    params.depth = config.tic.Depth();
+    if (params.target == SurfaceTarget::TextureCubemap ||
+        params.target == SurfaceTarget::TextureCubeArray) {
+        params.depth *= 6;
+    }
+    params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
+    params.unaligned_height = config.tic.Height();
+    params.num_levels = config.tic.max_mip_level + 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForDepthBuffer(
+    Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
+    u32 block_width, u32 block_height, u32 block_depth,
+    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
+    SurfaceParams params;
+    params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+    params.block_width = 1 << std::min(block_width, 5U);
+    params.block_height = 1 << std::min(block_height, 5U);
+    params.block_depth = 1 << std::min(block_depth, 5U);
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromDepthFormat(format);
+    params.component_type = ComponentTypeFromDepthFormat(format);
+    params.type = GetFormatType(params.pixel_format);
+    params.width = zeta_width;
+    params.height = zeta_height;
+    params.unaligned_height = zeta_height;
+    params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
+    const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+    SurfaceParams params;
+    params.is_tiled =
+        config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+    params.block_width = 1 << config.memory_layout.block_width;
+    params.block_height = 1 << config.memory_layout.block_height;
+    params.block_depth = 1 << config.memory_layout.block_depth;
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.component_type = ComponentTypeFromRenderTarget(config.format);
+    params.type = GetFormatType(params.pixel_format);
+    if (params.is_tiled) {
+        params.width = config.width;
+    } else {
+        const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
+        params.pitch = config.width;
+        params.width = params.pitch / bpp;
+    }
+    params.height = config.height;
+    params.depth = 1;
+    params.unaligned_height = config.height;
+    params.target = SurfaceTarget::Texture2D;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForFermiCopySurface(
+    const Tegra::Engines::Fermi2D::Regs::Surface& config) {
+    SurfaceParams params{};
+    params.is_tiled = !config.linear;
+    params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
+    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
+    params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.component_type = ComponentTypeFromRenderTarget(config.format);
+    params.type = GetFormatType(params.pixel_format);
+    params.width = config.width;
+    params.height = config.height;
+    params.unaligned_height = config.height;
+    // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
+    params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+u32 SurfaceParams::GetMipWidth(u32 level) const {
+    return std::max(1U, width >> level);
+}
+
+u32 SurfaceParams::GetMipHeight(u32 level) const {
+    return std::max(1U, height >> level);
+}
+
+u32 SurfaceParams::GetMipDepth(u32 level) const {
+    return IsLayered() ? depth : std::max(1U, depth >> level);
+}
+
+bool SurfaceParams::IsLayered() const {
+    switch (target) {
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubeArray:
+    case SurfaceTarget::TextureCubemap:
+        return true;
+    default:
+        return false;
+    }
+}
+
+u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
+    // Auto block resizing algorithm from:
+    // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+    if (level == 0) {
+        return block_height;
+    }
+    const u32 height{GetMipHeight(level)};
+    const u32 default_block_height{GetDefaultBlockHeight(pixel_format)};
+    const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height};
+    u32 block_height = 16;
+    while (block_height > 1 && blocks_in_y <= block_height * 4) {
+        block_height >>= 1;
+    }
+    return block_height;
+}
+
+u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
+    if (level == 0)
+        return block_depth;
+    if (target != SurfaceTarget::Texture3D)
+        return 1;
+
+    const u32 depth{GetMipDepth(level)};
+    u32 block_depth = 32;
+    while (block_depth > 1 && depth * 2 <= block_depth) {
+        block_depth >>= 1;
+    }
+    if (block_depth == 32 && GetMipBlockHeight(level) >= 4) {
+        return 16;
+    }
+    return block_depth;
+}
+
+std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
+    std::size_t offset = 0;
+    for (u32 i = 0; i < level; i++) {
+        offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false);
+    }
+    return offset;
+}
+
+std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
+    std::size_t offset = 0;
+    for (u32 i = 0; i < level; i++) {
+        offset += GetInnerMipmapMemorySize(i, true, false, false);
+    }
+    return offset;
+}
+
+std::size_t SurfaceParams::GetGuestLayerSize() const {
+    return GetInnerMemorySize(false, true, false);
+}
+
+std::size_t SurfaceParams::GetHostLayerSize(u32 level) const {
+    return GetInnerMipmapMemorySize(level, true, IsLayered(), false);
+}
+
+bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const {
+    if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) !=
+        std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format,
+                 view_params.component_type, view_params.type)) {
+        return false;
+    }
+
+    const SurfaceTarget view_target{view_params.target};
+    if (view_target == target) {
+        return true;
+    }
+
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D:
+        return false;
+    case SurfaceTarget::Texture1DArray:
+        return view_target == SurfaceTarget::Texture1D;
+    case SurfaceTarget::Texture2DArray:
+        return view_target == SurfaceTarget::Texture2D;
+    case SurfaceTarget::TextureCubemap:
+        return view_target == SurfaceTarget::Texture2D ||
+               view_target == SurfaceTarget::Texture2DArray;
+    case SurfaceTarget::TextureCubeArray:
+        return view_target == SurfaceTarget::Texture2D ||
+               view_target == SurfaceTarget::Texture2DArray ||
+               view_target == SurfaceTarget::TextureCubemap;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target));
+        return false;
+    }
+}
+
+bool SurfaceParams::IsPixelFormatZeta() const {
+    return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
+           pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
+}
+
+void SurfaceParams::CalculateCachedValues() {
+    guest_size_in_bytes = GetInnerMemorySize(false, false, false);
+
+    // ASTC is uncompressed in software, in emulated as RGBA8
+    if (IsPixelFormatASTC(pixel_format)) {
+        host_size_in_bytes = width * height * depth * 4;
+    } else {
+        host_size_in_bytes = GetInnerMemorySize(true, false, false);
+    }
+
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D:
+        num_layers = 1;
+        break;
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        num_layers = depth;
+        break;
+    default:
+        UNREACHABLE();
+    }
+}
+
+std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
+                                                    bool uncompressed) const {
+    const bool tiled{as_host_size ? false : is_tiled};
+    const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
+    const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
+    const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)};
+    const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)};
+    const u32 depth{layer_only ? 1U : GetMipDepth(level)};
+    return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height,
+                                         depth, GetMipBlockHeight(level), GetMipBlockDepth(level));
+}
+
+std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only,
+                                              bool uncompressed) const {
+    std::size_t size = 0;
+    for (u32 level = 0; level < num_levels; ++level) {
+        size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed);
+    }
+    if (!as_host_size && is_tiled) {
+        size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth);
+    }
+    return size;
+}
+
+std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const {
+    std::map<u64, std::pair<u32, u32>> view_offset_map;
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D: {
+        constexpr u32 layer = 0;
+        for (u32 level = 0; level < num_levels; ++level) {
+            const std::size_t offset{GetGuestMipmapLevelOffset(level)};
+            view_offset_map.insert({offset, {layer, level}});
+        }
+        break;
+    }
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray: {
+        const std::size_t layer_size{GetGuestLayerSize()};
+        for (u32 level = 0; level < num_levels; ++level) {
+            const std::size_t level_offset{GetGuestMipmapLevelOffset(level)};
+            for (u32 layer = 0; layer < num_layers; ++layer) {
+                const auto layer_offset{static_cast<std::size_t>(layer_size * layer)};
+                const std::size_t offset{level_offset + layer_offset};
+                view_offset_map.insert({offset, {layer, level}});
+            }
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target));
+    }
+    return view_offset_map;
+}
+
+bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const {
+    return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) &&
+           IsInBounds(view_params, layer, level);
+}
+
+bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const {
+    return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level);
+}
+
+bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const {
+    if (view_params.target != SurfaceTarget::Texture3D) {
+        return true;
+    }
+    return view_params.depth == GetMipDepth(level);
+}
+
+bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const {
+    return layer + view_params.num_layers <= num_layers &&
+           level + view_params.num_levels <= num_levels;
+}
+
+std::size_t HasheableSurfaceParams::Hash() const {
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+}
+
+bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const {
+    return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
+                    height, depth, pitch, unaligned_height, num_levels, pixel_format,
+                    component_type, type, target) ==
+           std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
+                    rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
+                    rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type,
+                    rhs.type, rhs.target);
+}
+
+std::size_t ViewKey::Hash() const {
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+}
+
+bool ViewKey::operator==(const ViewKey& rhs) const {
+    return std::tie(base_layer, num_layers, base_level, num_levels) ==
+           std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h
new file mode 100644
index 000000000..041551691
--- /dev/null
+++ b/src/video_core/texture_cache.h
@@ -0,0 +1,586 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <list>
+#include <memory>
+#include <set>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/memory.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra::Texture {
+struct FullTextureInfo;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace VideoCommon {
+
+class HasheableSurfaceParams {
+public:
+    std::size_t Hash() const;
+
+    bool operator==(const HasheableSurfaceParams& rhs) const;
+
+protected:
+    // Avoid creation outside of a managed environment.
+    HasheableSurfaceParams() = default;
+
+    bool is_tiled;
+    u32 block_width;
+    u32 block_height;
+    u32 block_depth;
+    u32 tile_width_spacing;
+    u32 width;
+    u32 height;
+    u32 depth;
+    u32 pitch;
+    u32 unaligned_height;
+    u32 num_levels;
+    VideoCore::Surface::PixelFormat pixel_format;
+    VideoCore::Surface::ComponentType component_type;
+    VideoCore::Surface::SurfaceType type;
+    VideoCore::Surface::SurfaceTarget target;
+};
+
+class SurfaceParams final : public HasheableSurfaceParams {
+public:
+    /// Creates SurfaceCachedParams from a texture configuration.
+    static SurfaceParams CreateForTexture(Core::System& system,
+                                          const Tegra::Texture::FullTextureInfo& config);
+
+    /// Creates SurfaceCachedParams for a depth buffer configuration.
+    static SurfaceParams CreateForDepthBuffer(
+        Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
+        u32 block_width, u32 block_height, u32 block_depth,
+        Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
+
+    /// Creates SurfaceCachedParams from a framebuffer configuration.
+    static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
+
+    /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
+    static SurfaceParams CreateForFermiCopySurface(
+        const Tegra::Engines::Fermi2D::Regs::Surface& config);
+
+    bool IsTiled() const {
+        return is_tiled;
+    }
+
+    u32 GetBlockWidth() const {
+        return block_width;
+    }
+
+    u32 GetTileWidthSpacing() const {
+        return tile_width_spacing;
+    }
+
+    u32 GetWidth() const {
+        return width;
+    }
+
+    u32 GetHeight() const {
+        return height;
+    }
+
+    u32 GetDepth() const {
+        return depth;
+    }
+
+    u32 GetPitch() const {
+        return pitch;
+    }
+
+    u32 GetNumLevels() const {
+        return num_levels;
+    }
+
+    VideoCore::Surface::PixelFormat GetPixelFormat() const {
+        return pixel_format;
+    }
+
+    VideoCore::Surface::ComponentType GetComponentType() const {
+        return component_type;
+    }
+
+    VideoCore::Surface::SurfaceTarget GetTarget() const {
+        return target;
+    }
+
+    VideoCore::Surface::SurfaceType GetType() const {
+        return type;
+    }
+
+    std::size_t GetGuestSizeInBytes() const {
+        return guest_size_in_bytes;
+    }
+
+    std::size_t GetHostSizeInBytes() const {
+        return host_size_in_bytes;
+    }
+
+    u32 GetNumLayers() const {
+        return num_layers;
+    }
+
+    /// Returns the width of a given mipmap level.
+    u32 GetMipWidth(u32 level) const;
+
+    /// Returns the height of a given mipmap level.
+    u32 GetMipHeight(u32 level) const;
+
+    /// Returns the depth of a given mipmap level.
+    u32 GetMipDepth(u32 level) const;
+
+    /// Returns true if these parameters are from a layered surface.
+    bool IsLayered() const;
+
+    /// Returns the block height of a given mipmap level.
+    u32 GetMipBlockHeight(u32 level) const;
+
+    /// Returns the block depth of a given mipmap level.
+    u32 GetMipBlockDepth(u32 level) const;
+
+    /// Returns the offset in bytes in guest memory of a given mipmap level.
+    std::size_t GetGuestMipmapLevelOffset(u32 level) const;
+
+    /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
+    std::size_t GetHostMipmapLevelOffset(u32 level) const;
+
+    /// Returns the size of a layer in bytes in guest memory.
+    std::size_t GetGuestLayerSize() const;
+
+    /// Returns the size of a layer in bytes in host memory for a given mipmap level.
+    std::size_t GetHostLayerSize(u32 level) const;
+
+    /// Returns true if another surface can be familiar with this. This is a loosely defined term
+    /// that reflects the possibility of these two surface parameters potentially being part of a
+    /// bigger superset.
+    bool IsFamiliar(const SurfaceParams& view_params) const;
+
+    /// Returns true if the pixel format is a depth and/or stencil format.
+    bool IsPixelFormatZeta() const;
+
+    /// Creates a map that redirects an address difference to a layer and mipmap level.
+    std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const;
+
+    /// Returns true if the passed surface view parameters is equal or a valid subset of this.
+    bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const;
+
+private:
+    /// Calculates values that can be deduced from HasheableSurfaceParams.
+    void CalculateCachedValues();
+
+    /// Returns the size of a given mipmap level.
+    std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
+                                         bool uncompressed) const;
+
+    /// Returns the size of all mipmap levels and aligns as needed.
+    std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const;
+
+    /// Returns true if the passed view width and height match the size of this params in a given
+    /// mipmap level.
+    bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const;
+
+    /// Returns true if the passed view depth match the size of this params in a given mipmap level.
+    bool IsDepthValid(const SurfaceParams& view_params, u32 level) const;
+
+    /// Returns true if the passed view layers and mipmap levels are in bounds.
+    bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const;
+
+    std::size_t guest_size_in_bytes;
+    std::size_t host_size_in_bytes;
+    u32 num_layers;
+};
+
+struct ViewKey {
+    std::size_t Hash() const;
+
+    bool operator==(const ViewKey& rhs) const;
+
+    u32 base_layer{};
+    u32 num_layers{};
+    u32 base_level{};
+    u32 num_levels{};
+};
+
+} // namespace VideoCommon
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::SurfaceParams> {
+    std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+template <>
+struct hash<VideoCommon::ViewKey> {
+    std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace VideoCommon {
+
+template <typename TView, typename TExecutionContext>
+class SurfaceBase {
+    static_assert(std::is_trivially_copyable_v<TExecutionContext>);
+
+public:
+    virtual void LoadBuffer() = 0;
+
+    virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0;
+
+    virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0;
+
+    TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) {
+        if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) {
+            // It can't be a view if it's in a prior address.
+            return {};
+        }
+
+        const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)};
+        const auto it{view_offset_map.find(relative_offset)};
+        if (it == view_offset_map.end()) {
+            // Couldn't find an aligned view.
+            return {};
+        }
+        const auto [layer, level] = it->second;
+
+        if (!params.IsViewValid(view_params, layer, level)) {
+            return {};
+        }
+
+        return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels());
+    }
+
+    VAddr GetCpuAddr() const {
+        ASSERT(is_registered);
+        return cpu_addr;
+    }
+
+    u8* GetHostPtr() const {
+        ASSERT(is_registered);
+        return host_ptr;
+    }
+
+    CacheAddr GetCacheAddr() const {
+        ASSERT(is_registered);
+        return cache_addr;
+    }
+
+    std::size_t GetSizeInBytes() const {
+        return params.GetGuestSizeInBytes();
+    }
+
+    void MarkAsModified(bool is_modified_) {
+        is_modified = is_modified_;
+    }
+
+    const SurfaceParams& GetSurfaceParams() const {
+        return params;
+    }
+
+    TView* GetView(VAddr view_addr, const SurfaceParams& view_params) {
+        TView* view{TryGetView(view_addr, view_params)};
+        ASSERT(view != nullptr);
+        return view;
+    }
+
+    void Register(VAddr cpu_addr_, u8* host_ptr_) {
+        ASSERT(!is_registered);
+        is_registered = true;
+        cpu_addr = cpu_addr_;
+        host_ptr = host_ptr_;
+        cache_addr = ToCacheAddr(host_ptr_);
+    }
+
+    void Register(VAddr cpu_addr_) {
+        Register(cpu_addr_, Memory::GetPointer(cpu_addr_));
+    }
+
+    void Unregister() {
+        ASSERT(is_registered);
+        is_registered = false;
+    }
+
+    bool IsRegistered() const {
+        return is_registered;
+    }
+
+protected:
+    explicit SurfaceBase(const SurfaceParams& params)
+        : params{params}, view_offset_map{params.CreateViewOffsetMap()} {}
+
+    ~SurfaceBase() = default;
+
+    virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0;
+
+    bool IsModified() const {
+        return is_modified;
+    }
+
+    const SurfaceParams params;
+
+private:
+    TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) {
+        const ViewKey key{base_layer, num_layers, base_level, num_levels};
+        const auto [entry, is_cache_miss] = views.try_emplace(key);
+        auto& view{entry->second};
+        if (is_cache_miss) {
+            view = CreateView(key);
+        }
+        return view.get();
+    }
+
+    const std::map<u64, std::pair<u32, u32>> view_offset_map;
+
+    VAddr cpu_addr{};
+    u8* host_ptr{};
+    CacheAddr cache_addr{};
+    bool is_modified{};
+    bool is_registered{};
+    std::unordered_map<ViewKey, std::unique_ptr<TView>> views;
+};
+
+template <typename TSurface, typename TView, typename TExecutionContext>
+class TextureCache {
+    static_assert(std::is_trivially_copyable_v<TExecutionContext>);
+    using ResultType = std::tuple<TView*, TExecutionContext>;
+    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>;
+    using IntervalType = typename IntervalMap::interval_type;
+
+public:
+    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+        for (TSurface* surface : GetSurfacesInRegion(addr, size)) {
+            if (!surface->IsRegistered()) {
+                // Skip duplicates
+                continue;
+            }
+            Unregister(surface);
+        }
+    }
+
+    ResultType GetTextureSurface(TExecutionContext exctx,
+                                 const Tegra::Texture::FullTextureInfo& config) {
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+        const auto params{SurfaceParams::CreateForTexture(system, config)};
+        return GetSurfaceView(exctx, *cpu_addr, params, true);
+    }
+
+    ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) {
+        const auto& regs{system.GPU().Maxwell3D().regs};
+        if (!regs.zeta.Address() || !regs.zeta_enable) {
+            return {{}, exctx};
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+
+        const auto depth_params{SurfaceParams::CreateForDepthBuffer(
+            system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
+            regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
+            regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
+        return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents);
+    }
+
+    ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index,
+                                     bool preserve_contents) {
+        ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
+
+        const auto& regs{system.GPU().Maxwell3D().regs};
+        if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
+            regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
+            return {{}, exctx};
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(
+            config.Address() + config.base_layer * config.layer_stride * sizeof(u32))};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+
+        return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+                              preserve_contents);
+    }
+
+    ResultType GetFermiSurface(TExecutionContext exctx,
+                               const Tegra::Engines::Fermi2D::Regs::Surface& config) {
+        const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())};
+        ASSERT(cpu_addr);
+        return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config),
+                              true);
+    }
+
+    TSurface* TryFindFramebufferSurface(const u8* host_ptr) const {
+        const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};
+        return it != registered_surfaces.end() ? *it->second.begin() : nullptr;
+    }
+
+protected:
+    TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+        : system{system}, rasterizer{rasterizer} {}
+
+    ~TextureCache() = default;
+
+    virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
+                                             const SurfaceParams& params, bool preserve_contents,
+                                             const std::vector<TSurface*>& overlaps) = 0;
+
+    virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0;
+
+    void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) {
+        surface->Register(cpu_addr, host_ptr);
+        registered_surfaces.add({GetSurfaceInterval(surface), {surface}});
+        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1);
+    }
+
+    void Unregister(TSurface* surface) {
+        registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}});
+        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1);
+        surface->Unregister();
+    }
+
+    TSurface* GetUncachedSurface(const SurfaceParams& params) {
+        if (TSurface* surface = TryGetReservedSurface(params); surface)
+            return surface;
+        // No reserved surface available, create a new one and reserve it
+        auto new_surface{CreateSurface(params)};
+        TSurface* surface{new_surface.get()};
+        ReserveSurface(params, std::move(new_surface));
+        return surface;
+    }
+
+    Core::System& system;
+
+private:
+    ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params,
+                              bool preserve_contents) {
+        const auto host_ptr{Memory::GetPointer(cpu_addr)};
+        const auto cache_addr{ToCacheAddr(host_ptr)};
+        const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())};
+        if (overlaps.empty()) {
+            return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
+        }
+
+        if (overlaps.size() == 1) {
+            if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view)
+                return {view, exctx};
+        }
+
+        TView* fast_view;
+        std::tie(fast_view, exctx) =
+            TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps);
+
+        for (TSurface* surface : overlaps) {
+            if (!fast_view) {
+                // Flush even when we don't care about the contents, to preserve memory not written
+                // by the new surface.
+                exctx = surface->FlushBuffer(exctx);
+            }
+            Unregister(surface);
+        }
+
+        if (fast_view) {
+            return {fast_view, exctx};
+        }
+
+        return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
+    }
+
+    ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
+                               const SurfaceParams& params, bool preserve_contents) {
+        TSurface* new_surface{GetUncachedSurface(params)};
+        Register(new_surface, cpu_addr, host_ptr);
+        if (preserve_contents) {
+            exctx = LoadSurface(exctx, new_surface);
+        }
+        return {new_surface->GetView(cpu_addr, params), exctx};
+    }
+
+    TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) {
+        surface->LoadBuffer();
+        exctx = surface->UploadTexture(exctx);
+        surface->MarkAsModified(false);
+        return exctx;
+    }
+
+    std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
+        if (size == 0) {
+            return {};
+        }
+        const IntervalType interval{cache_addr, cache_addr + size};
+
+        std::vector<TSurface*> surfaces;
+        for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
+            surfaces.push_back(*pair.second.begin());
+        }
+        return surfaces;
+    }
+
+    void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) {
+        surface_reserve[params].push_back(std::move(surface));
+    }
+
+    TSurface* TryGetReservedSurface(const SurfaceParams& params) {
+        auto search{surface_reserve.find(params)};
+        if (search == surface_reserve.end()) {
+            return {};
+        }
+        for (auto& surface : search->second) {
+            if (!surface->IsRegistered()) {
+                return surface.get();
+            }
+        }
+        return {};
+    }
+
+    IntervalType GetSurfaceInterval(TSurface* surface) const {
+        return IntervalType::right_open(surface->GetCacheAddr(),
+                                        surface->GetCacheAddr() + surface->GetSizeInBytes());
+    }
+
+    VideoCore::RasterizerInterface& rasterizer;
+
+    IntervalMap registered_surfaces;
+
+    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
+    /// previously been used. This is to prevent surfaces from being constantly created and
+    /// destroyed when used with different surface parameters.
+    std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 995d0e068..217805386 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -288,6 +288,29 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
     }
 }
 
+void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
+                   const u32 block_height, const std::size_t copy_size, const u8* source_data,
+                   u8* swizzle_data) {
+    const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
+    std::size_t count = 0;
+    for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
+        const std::size_t gob_address_y =
+            (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
+            ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
+        const auto& table = legacy_swizzle_table[y % gob_size_y];
+        for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
+            const std::size_t gob_address =
+                gob_address_y + (x / gob_size_x) * gob_size * block_height;
+            const std::size_t swizzled_offset = gob_address + table[x % gob_size_x];
+            const u8* source_line = source_data + count;
+            u8* dest_addr = swizzle_data + swizzled_offset;
+            count++;
+
+            std::memcpy(dest_addr, source_line, 1);
+        }
+    }
+}
+
 std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
                               u32 height) {
     std::vector<u8> rgba_data;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index e078fa274..e072d8401 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -51,4 +51,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
                       u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
                       u32 offset_x, u32 offset_y);
 
+void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
+                   const u32 block_height, const std::size_t copy_size, const u8* source_data,
+                   u8* swizzle_data);
+
 } // namespace Tegra::Texture
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index cb82ecf3f..60cda0ca3 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -5,6 +5,8 @@
 #include <memory>
 #include "core/core.h"
 #include "core/settings.h"
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
 #include "video_core/video_core.h"
@@ -16,6 +18,14 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind
     return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
 }
 
+std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer());
+    }
+
+    return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer());
+}
+
 u16 GetResolutionScaleFactor(const RendererBase& renderer) {
     return static_cast<u16>(
         Settings::values.resolution_factor
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 3c583f195..b8e0ac372 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -14,6 +14,10 @@ namespace Core::Frontend {
 class EmuWindow;
 }
 
+namespace Tegra {
+class GPU;
+}
+
 namespace VideoCore {
 
 class RendererBase;
@@ -27,6 +31,9 @@ class RendererBase;
 std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
                                              Core::System& system);
 
+/// Creates an emulated GPU instance using the given system context.
+std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system);
+
 u16 GetResolutionScaleFactor(const RendererBase& renderer);
 
 } // namespace VideoCore
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 2eb86d6e5..31b65c04c 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -151,6 +151,12 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core)
 target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets)
 target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
 
+target_compile_definitions(yuzu PRIVATE
+    # Use QStringBuilder for string concatenation to reduce
+    # the overall number of temporary strings created.
+    -DQT_USE_QSTRINGBUILDER
+)
+
 if (YUZU_ENABLE_COMPATIBILITY_REPORTING)
     target_compile_definitions(yuzu PRIVATE -DYUZU_ENABLE_COMPATIBILITY_REPORTING)
 endif()
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 7438fbc0a..7eed9fcf3 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -1,6 +1,13 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <QApplication>
 #include <QHBoxLayout>
 #include <QKeyEvent>
+#include <QOffscreenSurface>
+#include <QOpenGLWindow>
+#include <QPainter>
 #include <QScreen>
 #include <QWindow>
 #include <fmt/format.h>
@@ -82,13 +89,36 @@ void EmuThread::run() {
     render_window->moveContext();
 }
 
+class GGLContext : public Core::Frontend::GraphicsContext {
+public:
+    explicit GGLContext(QOpenGLContext* shared_context)
+        : context{std::make_unique<QOpenGLContext>(shared_context)} {
+        surface.setFormat(shared_context->format());
+        surface.create();
+    }
+
+    void MakeCurrent() override {
+        context->makeCurrent(&surface);
+    }
+
+    void DoneCurrent() override {
+        context->doneCurrent();
+    }
+
+    void SwapBuffers() override {}
+
+private:
+    std::unique_ptr<QOpenGLContext> context;
+    QOffscreenSurface surface;
+};
+
 // This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL
 // context.
 // The corresponding functionality is handled in EmuThread instead
-class GGLWidgetInternal : public QGLWidget {
+class GGLWidgetInternal : public QOpenGLWindow {
 public:
-    GGLWidgetInternal(QGLFormat fmt, GRenderWindow* parent)
-        : QGLWidget(fmt, parent), parent(parent) {}
+    GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context)
+        : QOpenGLWindow(shared_context), parent(parent) {}
 
     void paintEvent(QPaintEvent* ev) override {
         if (do_painting) {
@@ -101,9 +131,51 @@ public:
         parent->OnFramebufferSizeChanged();
     }
 
+    void keyPressEvent(QKeyEvent* event) override {
+        InputCommon::GetKeyboard()->PressKey(event->key());
+    }
+
+    void keyReleaseEvent(QKeyEvent* event) override {
+        InputCommon::GetKeyboard()->ReleaseKey(event->key());
+    }
+
+    void mousePressEvent(QMouseEvent* event) override {
+        if (event->source() == Qt::MouseEventSynthesizedBySystem)
+            return; // touch input is handled in TouchBeginEvent
+
+        const auto pos{event->pos()};
+        if (event->button() == Qt::LeftButton) {
+            const auto [x, y] = parent->ScaleTouch(pos);
+            parent->TouchPressed(x, y);
+        } else if (event->button() == Qt::RightButton) {
+            InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y());
+        }
+    }
+
+    void mouseMoveEvent(QMouseEvent* event) override {
+        if (event->source() == Qt::MouseEventSynthesizedBySystem)
+            return; // touch input is handled in TouchUpdateEvent
+
+        const auto pos{event->pos()};
+        const auto [x, y] = parent->ScaleTouch(pos);
+        parent->TouchMoved(x, y);
+        InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y());
+    }
+
+    void mouseReleaseEvent(QMouseEvent* event) override {
+        if (event->source() == Qt::MouseEventSynthesizedBySystem)
+            return; // touch input is handled in TouchEndEvent
+
+        if (event->button() == Qt::LeftButton)
+            parent->TouchReleased();
+        else if (event->button() == Qt::RightButton)
+            InputCommon::GetMotionEmu()->EndTilt();
+    }
+
     void DisablePainting() {
         do_painting = false;
     }
+
     void EnablePainting() {
         do_painting = true;
     }
@@ -114,8 +186,7 @@ private:
 };
 
 GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
-    : QWidget(parent), child(nullptr), emu_thread(emu_thread) {
-
+    : QWidget(parent), emu_thread(emu_thread) {
     setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
                        .arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc));
     setAttribute(Qt::WA_AcceptTouchEvents);
@@ -137,19 +208,19 @@ void GRenderWindow::moveContext() {
     auto thread = (QThread::currentThread() == qApp->thread() && emu_thread != nullptr)
                       ? emu_thread
                       : qApp->thread();
-    child->context()->moveToThread(thread);
+    context->moveToThread(thread);
 }
 
 void GRenderWindow::SwapBuffers() {
-    // In our multi-threaded QGLWidget use case we shouldn't need to call `makeCurrent`,
+    // In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
     // since we never call `doneCurrent` in this thread.
     // However:
     // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
     // since the last time `swapBuffers` was executed;
     // - On macOS, if `makeCurrent` isn't called explicitely, resizing the buffer breaks.
-    child->makeCurrent();
+    context->makeCurrent(child);
 
-    child->swapBuffers();
+    context->swapBuffers(child);
     if (!first_frame) {
         emit FirstFrameDisplayed();
         first_frame = true;
@@ -157,11 +228,11 @@ void GRenderWindow::SwapBuffers() {
 }
 
 void GRenderWindow::MakeCurrent() {
-    child->makeCurrent();
+    context->makeCurrent(child);
 }
 
 void GRenderWindow::DoneCurrent() {
-    child->doneCurrent();
+    context->doneCurrent();
 }
 
 void GRenderWindow::PollEvents() {}
@@ -174,14 +245,26 @@ void GRenderWindow::PollEvents() {}
 void GRenderWindow::OnFramebufferSizeChanged() {
     // Screen changes potentially incur a change in screen DPI, hence we should update the
     // framebuffer size
-    qreal pixelRatio = windowPixelRatio();
+    qreal pixelRatio = GetWindowPixelRatio();
     unsigned width = child->QPaintDevice::width() * pixelRatio;
     unsigned height = child->QPaintDevice::height() * pixelRatio;
     UpdateCurrentFramebufferLayout(width, height);
 }
 
+void GRenderWindow::ForwardKeyPressEvent(QKeyEvent* event) {
+    if (child) {
+        child->keyPressEvent(event);
+    }
+}
+
+void GRenderWindow::ForwardKeyReleaseEvent(QKeyEvent* event) {
+    if (child) {
+        child->keyReleaseEvent(event);
+    }
+}
+
 void GRenderWindow::BackupGeometry() {
-    geometry = ((QGLWidget*)this)->saveGeometry();
+    geometry = ((QWidget*)this)->saveGeometry();
 }
 
 void GRenderWindow::RestoreGeometry() {
@@ -199,18 +282,18 @@ QByteArray GRenderWindow::saveGeometry() {
     // If we are a top-level widget, store the current geometry
     // otherwise, store the last backup
     if (parent() == nullptr)
-        return ((QGLWidget*)this)->saveGeometry();
+        return ((QWidget*)this)->saveGeometry();
     else
         return geometry;
 }
 
-qreal GRenderWindow::windowPixelRatio() const {
+qreal GRenderWindow::GetWindowPixelRatio() const {
     // windowHandle() might not be accessible until the window is displayed to screen.
     return windowHandle() ? windowHandle()->screen()->devicePixelRatio() : 1.0f;
 }
 
 std::pair<unsigned, unsigned> GRenderWindow::ScaleTouch(const QPointF pos) const {
-    const qreal pixel_ratio = windowPixelRatio();
+    const qreal pixel_ratio = GetWindowPixelRatio();
     return {static_cast<unsigned>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})),
             static_cast<unsigned>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))};
 }
@@ -220,47 +303,6 @@ void GRenderWindow::closeEvent(QCloseEvent* event) {
     QWidget::closeEvent(event);
 }
 
-void GRenderWindow::keyPressEvent(QKeyEvent* event) {
-    InputCommon::GetKeyboard()->PressKey(event->key());
-}
-
-void GRenderWindow::keyReleaseEvent(QKeyEvent* event) {
-    InputCommon::GetKeyboard()->ReleaseKey(event->key());
-}
-
-void GRenderWindow::mousePressEvent(QMouseEvent* event) {
-    if (event->source() == Qt::MouseEventSynthesizedBySystem)
-        return; // touch input is handled in TouchBeginEvent
-
-    auto pos = event->pos();
-    if (event->button() == Qt::LeftButton) {
-        const auto [x, y] = ScaleTouch(pos);
-        this->TouchPressed(x, y);
-    } else if (event->button() == Qt::RightButton) {
-        InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y());
-    }
-}
-
-void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
-    if (event->source() == Qt::MouseEventSynthesizedBySystem)
-        return; // touch input is handled in TouchUpdateEvent
-
-    auto pos = event->pos();
-    const auto [x, y] = ScaleTouch(pos);
-    this->TouchMoved(x, y);
-    InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y());
-}
-
-void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) {
-    if (event->source() == Qt::MouseEventSynthesizedBySystem)
-        return; // touch input is handled in TouchEndEvent
-
-    if (event->button() == Qt::LeftButton)
-        this->TouchReleased();
-    else if (event->button() == Qt::RightButton)
-        InputCommon::GetMotionEmu()->EndTilt();
-}
-
 void GRenderWindow::TouchBeginEvent(const QTouchEvent* event) {
     // TouchBegin always has exactly one touch point, so take the .first()
     const auto [x, y] = ScaleTouch(event->touchPoints().first().pos());
@@ -313,35 +355,60 @@ void GRenderWindow::OnClientAreaResized(unsigned width, unsigned height) {
     NotifyClientAreaSizeChanged(std::make_pair(width, height));
 }
 
+std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
+    return std::make_unique<GGLContext>(shared_context.get());
+}
+
 void GRenderWindow::InitRenderTarget() {
-    if (child) {
-        delete child;
-    }
+    shared_context.reset();
+    context.reset();
 
-    if (layout()) {
-        delete layout();
-    }
+    delete child;
+    child = nullptr;
+
+    delete container;
+    container = nullptr;
+
+    delete layout();
 
     first_frame = false;
 
     // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground,
     // WA_DontShowOnScreen, WA_DeleteOnClose
-    QGLFormat fmt;
+    QSurfaceFormat fmt;
     fmt.setVersion(4, 3);
-    fmt.setProfile(QGLFormat::CoreProfile);
+    fmt.setProfile(QSurfaceFormat::CoreProfile);
+    // TODO: expose a setting for buffer value (ie default/single/double/triple)
+    fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
+    shared_context = std::make_unique<QOpenGLContext>();
+    shared_context->setFormat(fmt);
+    shared_context->create();
+    context = std::make_unique<QOpenGLContext>();
+    context->setShareContext(shared_context.get());
+    context->setFormat(fmt);
+    context->create();
     fmt.setSwapInterval(false);
 
-    // Requests a forward-compatible context, which is required to get a 3.2+ context on OS X
-    fmt.setOption(QGL::NoDeprecatedFunctions);
+    child = new GGLWidgetInternal(this, shared_context.get());
+    container = QWidget::createWindowContainer(child, this);
 
-    child = new GGLWidgetInternal(fmt, this);
     QBoxLayout* layout = new QHBoxLayout(this);
-
-    resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
-    layout->addWidget(child);
+    layout->addWidget(container);
     layout->setMargin(0);
     setLayout(layout);
 
+    // Reset minimum size to avoid unwanted resizes when this function is called for a second time.
+    setMinimumSize(1, 1);
+
+    // Show causes the window to actually be created and the OpenGL context as well, but we don't
+    // want the widget to be shown yet, so immediately hide it.
+    show();
+    hide();
+
+    resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
+    child->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
+    container->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
+
     OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size);
 
     OnFramebufferSizeChanged();
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 3183621bc..3df33aca1 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -7,10 +7,9 @@
 #include <atomic>
 #include <condition_variable>
 #include <mutex>
-#include <QGLWidget>
 #include <QImage>
 #include <QThread>
-#include "common/thread.h"
+#include <QWidget>
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
 
@@ -21,6 +20,8 @@ class QTouchEvent;
 class GGLWidgetInternal;
 class GMainWindow;
 class GRenderWindow;
+class QSurface;
+class QOpenGLContext;
 
 namespace VideoCore {
 enum class LoadCallbackStage;
@@ -121,25 +122,21 @@ public:
     void MakeCurrent() override;
     void DoneCurrent() override;
     void PollEvents() override;
+    std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
+
+    void ForwardKeyPressEvent(QKeyEvent* event);
+    void ForwardKeyReleaseEvent(QKeyEvent* event);
 
     void BackupGeometry();
     void RestoreGeometry();
     void restoreGeometry(const QByteArray& geometry); // overridden
     QByteArray saveGeometry();                        // overridden
 
-    qreal windowPixelRatio() const;
+    qreal GetWindowPixelRatio() const;
+    std::pair<unsigned, unsigned> ScaleTouch(const QPointF pos) const;
 
     void closeEvent(QCloseEvent* event) override;
-
-    void keyPressEvent(QKeyEvent* event) override;
-    void keyReleaseEvent(QKeyEvent* event) override;
-
-    void mousePressEvent(QMouseEvent* event) override;
-    void mouseMoveEvent(QMouseEvent* event) override;
-    void mouseReleaseEvent(QMouseEvent* event) override;
-
     bool event(QEvent* event) override;
-
     void focusOutEvent(QFocusEvent* event) override;
 
     void OnClientAreaResized(unsigned width, unsigned height);
@@ -161,7 +158,6 @@ signals:
     void FirstFrameDisplayed();
 
 private:
-    std::pair<unsigned, unsigned> ScaleTouch(const QPointF pos) const;
     void TouchBeginEvent(const QTouchEvent* event);
     void TouchUpdateEvent(const QTouchEvent* event);
     void TouchEndEvent();
@@ -169,11 +165,17 @@ private:
     void OnMinimalClientAreaChangeRequest(
         const std::pair<unsigned, unsigned>& minimal_size) override;
 
-    GGLWidgetInternal* child;
+    QWidget* container = nullptr;
+    GGLWidgetInternal* child = nullptr;
 
     QByteArray geometry;
 
     EmuThread* emu_thread;
+    // Context that backs the GGLWidgetInternal (and will be used by core to render)
+    std::unique_ptr<QOpenGLContext> context;
+    // Context that will be shared between all newly created contexts. This should never be made
+    // current
+    std::unique_ptr<QOpenGLContext> shared_context;
 
     /// Temporary storage of the screenshot taken
     QImage screenshot_image;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 802db3945..8725a78dc 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -394,6 +394,7 @@ void Config::ReadValues() {
         ReadSetting("use_accurate_gpu_emulation", false).toBool();
     Settings::values.use_asynchronous_gpu_emulation =
         ReadSetting("use_asynchronous_gpu_emulation", false).toBool();
+    Settings::values.force_30fps_mode = ReadSetting("force_30fps_mode", false).toBool();
 
     Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
     Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
@@ -523,8 +524,8 @@ void Config::ReadValues() {
     qt_config->beginGroup("Paths");
     UISettings::values.roms_path = ReadSetting("romsPath").toString();
     UISettings::values.symbols_path = ReadSetting("symbolsPath").toString();
-    UISettings::values.gamedir = ReadSetting("gameListRootDir", ".").toString();
-    UISettings::values.gamedir_deepscan = ReadSetting("gameListDeepScan", false).toBool();
+    UISettings::values.game_directory_path = ReadSetting("gameListRootDir", ".").toString();
+    UISettings::values.game_directory_deepscan = ReadSetting("gameListDeepScan", false).toBool();
     UISettings::values.recent_files = ReadSetting("recentFiles").toStringList();
     qt_config->endGroup();
 
@@ -664,6 +665,7 @@ void Config::SaveValues() {
     WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false);
     WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation,
                  false);
+    WriteSetting("force_30fps_mode", Settings::values.force_30fps_mode, false);
 
     // Cast to double because Qt's written float values are not human-readable
     WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);
@@ -768,8 +770,8 @@ void Config::SaveValues() {
     WriteSetting("romsPath", UISettings::values.roms_path);
     WriteSetting("symbolsPath", UISettings::values.symbols_path);
     WriteSetting("screenshotPath", UISettings::values.screenshot_path);
-    WriteSetting("gameListRootDir", UISettings::values.gamedir, ".");
-    WriteSetting("gameListDeepScan", UISettings::values.gamedir_deepscan, false);
+    WriteSetting("gameListRootDir", UISettings::values.game_directory_path, ".");
+    WriteSetting("gameListDeepScan", UISettings::values.game_directory_deepscan, false);
     WriteSetting("recentFiles", UISettings::values.recent_files);
     qt_config->endGroup();
 
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index 51bd1f121..a5218b051 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -12,7 +12,7 @@
 #include "yuzu/hotkeys.h"
 
 ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
-    : QDialog(parent), registry(registry), ui(new Ui::ConfigureDialog) {
+    : QDialog(parent), ui(new Ui::ConfigureDialog), registry(registry) {
     ui->setupUi(this);
     ui->hotkeysTab->Populate(registry);
     this->setConfiguration();
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index eeb038afb..e48f4f5a3 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -28,7 +28,7 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
 ConfigureGeneral::~ConfigureGeneral() = default;
 
 void ConfigureGeneral::setConfiguration() {
-    ui->toggle_deepscan->setChecked(UISettings::values.gamedir_deepscan);
+    ui->toggle_deepscan->setChecked(UISettings::values.game_directory_deepscan);
     ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
     ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
     ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
@@ -36,7 +36,7 @@ void ConfigureGeneral::setConfiguration() {
 }
 
 void ConfigureGeneral::applyConfiguration() {
-    UISettings::values.gamedir_deepscan = ui->toggle_deepscan->isChecked();
+    UISettings::values.game_directory_deepscan = ui->toggle_deepscan->isChecked();
     UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
     UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked();
     UISettings::values.theme =
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index dd1d67488..0a9883d37 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -77,6 +77,8 @@ void ConfigureGraphics::setConfiguration() {
     ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
     ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
     ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
+    ui->force_30fps_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
     UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
                                                  Settings::values.bg_blue));
 }
@@ -90,6 +92,7 @@ void ConfigureGraphics::applyConfiguration() {
     Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
     Settings::values.use_asynchronous_gpu_emulation =
         ui->use_asynchronous_gpu_emulation->isChecked();
+    Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
     Settings::values.bg_red = static_cast<float>(bg_color.redF());
     Settings::values.bg_green = static_cast<float>(bg_color.greenF());
     Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index c6767e0ca..15ab18ecd 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -71,6 +71,13 @@
          </widget>
         </item>
         <item>
+         <widget class="QCheckBox" name="force_30fps_mode">
+          <property name="text">
+           <string>Force 30 FPS mode</string>
+          </property>
+         </widget>
+        </item>
+        <item>
          <layout class="QHBoxLayout" name="horizontalLayout">
           <item>
            <widget class="QLabel" name="label">
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp
index bfb562535..a7a8752e5 100644
--- a/src/yuzu/configuration/configure_hotkeys.cpp
+++ b/src/yuzu/configuration/configure_hotkeys.cpp
@@ -66,20 +66,21 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) {
 }
 
 void ConfigureHotkeys::Configure(QModelIndex index) {
-    if (index.parent() == QModelIndex())
+    if (!index.parent().isValid()) {
         return;
+    }
 
     index = index.sibling(index.row(), 1);
-    auto* model = ui->hotkey_list->model();
-    auto previous_key = model->data(index);
-
-    auto* hotkey_dialog = new SequenceDialog;
-    int return_code = hotkey_dialog->exec();
+    auto* const model = ui->hotkey_list->model();
+    const auto previous_key = model->data(index);
 
-    auto key_sequence = hotkey_dialog->GetSequence();
+    SequenceDialog hotkey_dialog{this};
 
-    if (return_code == QDialog::Rejected || key_sequence.isEmpty())
+    const int return_code = hotkey_dialog.exec();
+    const auto key_sequence = hotkey_dialog.GetSequence();
+    if (return_code == QDialog::Rejected || key_sequence.isEmpty()) {
         return;
+    }
 
     if (IsUsedKey(key_sequence) && key_sequence != QKeySequence(previous_key.toString())) {
         QMessageBox::critical(this, tr("Error in inputted key"),
@@ -90,7 +91,7 @@ void ConfigureHotkeys::Configure(QModelIndex index) {
     }
 }
 
-bool ConfigureHotkeys::IsUsedKey(QKeySequence key_sequence) {
+bool ConfigureHotkeys::IsUsedKey(QKeySequence key_sequence) const {
     return GetUsedKeyList().contains(key_sequence);
 }
 
diff --git a/src/yuzu/configuration/configure_hotkeys.h b/src/yuzu/configuration/configure_hotkeys.h
index cd203aad6..73fb8a175 100644
--- a/src/yuzu/configuration/configure_hotkeys.h
+++ b/src/yuzu/configuration/configure_hotkeys.h
@@ -6,7 +6,6 @@
 
 #include <memory>
 #include <QWidget>
-#include "core/settings.h"
 
 namespace Ui {
 class ConfigureHotkeys;
@@ -39,7 +38,7 @@ signals:
 
 private:
     void Configure(QModelIndex index);
-    bool IsUsedKey(QKeySequence key_sequence);
+    bool IsUsedKey(QKeySequence key_sequence) const;
     QList<QKeySequence> GetUsedKeyList() const;
 
     std::unique_ptr<Ui::ConfigureHotkeys> ui;
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 4b67656ac..b0ca766ec 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -467,9 +467,10 @@ void GameList::LoadInterfaceLayout() {
 const QStringList GameList::supported_file_extensions = {"nso", "nro", "nca", "xci", "nsp"};
 
 void GameList::RefreshGameDirectory() {
-    if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) {
+    if (!UISettings::values.game_directory_path.isEmpty() && current_worker != nullptr) {
         LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list.");
         search_field->clear();
-        PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+        PopulateAsync(UISettings::values.game_directory_path,
+                      UISettings::values.game_directory_deepscan);
     }
 }
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 3db0e90da..2cf5c58a0 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -95,7 +95,7 @@ public:
             if (row2.isEmpty())
                 return row1;
 
-            return row1 + "\n    " + row2;
+            return QString(row1 + "\n    " + row2);
         }
 
         return GameListItem::data(role);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ca231d710..bdee44b04 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -213,7 +213,8 @@ GMainWindow::GMainWindow()
     OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning);
 
     game_list->LoadCompatibilityList();
-    game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+    game_list->PopulateAsync(UISettings::values.game_directory_path,
+                             UISettings::values.game_directory_deepscan);
 
     // Show one-time "callout" messages to the user
     ShowTelemetryCallout();
@@ -1278,8 +1279,8 @@ void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) {
 
         const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false);
         if (reload) {
-            game_list->PopulateAsync(UISettings::values.gamedir,
-                                     UISettings::values.gamedir_deepscan);
+            game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                     UISettings::values.game_directory_deepscan);
         }
 
         config->Save();
@@ -1367,7 +1368,8 @@ void GMainWindow::OnMenuInstallToNAND() {
     const auto success = [this]() {
         QMessageBox::information(this, tr("Successfully Installed"),
                                  tr("The file was successfully installed."));
-        game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+        game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                 UISettings::values.game_directory_deepscan);
     };
 
     const auto failed = [this]() {
@@ -1494,8 +1496,8 @@ void GMainWindow::OnMenuInstallToNAND() {
 void GMainWindow::OnMenuSelectGameListRoot() {
     QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
     if (!dir_path.isEmpty()) {
-        UISettings::values.gamedir = dir_path;
-        game_list->PopulateAsync(dir_path, UISettings::values.gamedir_deepscan);
+        UISettings::values.game_directory_path = dir_path;
+        game_list->PopulateAsync(dir_path, UISettings::values.game_directory_deepscan);
     }
 }
 
@@ -1517,7 +1519,8 @@ void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target)
                                                                       : FileUtil::UserPath::NANDDir,
                               dir_path.toStdString());
         Service::FileSystem::CreateFactories(*vfs);
-        game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+        game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                 UISettings::values.game_directory_deepscan);
     }
 }
 
@@ -1669,8 +1672,8 @@ void GMainWindow::OnConfigure() {
 
         const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false);
         if (reload) {
-            game_list->PopulateAsync(UISettings::values.gamedir,
-                                     UISettings::values.gamedir_deepscan);
+            game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                     UISettings::values.game_directory_deepscan);
         }
 
         config->Save();
@@ -1920,7 +1923,8 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
     Service::FileSystem::CreateFactories(*vfs);
 
     if (behavior == ReinitializeKeyBehavior::Warning) {
-        game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
+        game_list->PopulateAsync(UISettings::values.game_directory_path,
+                                 UISettings::values.game_directory_deepscan);
     }
 }
 
@@ -2027,6 +2031,18 @@ void GMainWindow::dragMoveEvent(QDragMoveEvent* event) {
     event->acceptProposedAction();
 }
 
+void GMainWindow::keyPressEvent(QKeyEvent* event) {
+    if (render_window) {
+        render_window->ForwardKeyPressEvent(event);
+    }
+}
+
+void GMainWindow::keyReleaseEvent(QKeyEvent* event) {
+    if (render_window) {
+        render_window->ForwardKeyReleaseEvent(event);
+    }
+}
+
 bool GMainWindow::ConfirmChangeGame() {
     if (emu_thread == nullptr)
         return true;
@@ -2094,7 +2110,8 @@ int main(int argc, char* argv[]) {
     QCoreApplication::setOrganizationName("yuzu team");
     QCoreApplication::setApplicationName("yuzu");
 
-    QApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity);
+    // Enables the core to make the qt created contexts current on std::threads
+    QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity);
     QApplication app(argc, argv);
 
     // Qt changes the locale and causes issues in float conversion using std::to_string() when
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 85e3810f2..ce5045819 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -252,4 +252,8 @@ protected:
     void dropEvent(QDropEvent* event) override;
     void dragEnterEvent(QDragEnterEvent* event) override;
     void dragMoveEvent(QDragMoveEvent* event) override;
+
+    // Overrides used to forward signals to the render window when the focus moves out.
+    void keyPressEvent(QKeyEvent* event) override;
+    void keyReleaseEvent(QKeyEvent* event) override;
 };
diff --git a/src/yuzu/ui_settings.h b/src/yuzu/ui_settings.h
index 45e705b61..dbd318e20 100644
--- a/src/yuzu/ui_settings.h
+++ b/src/yuzu/ui_settings.h
@@ -55,8 +55,8 @@ struct Values {
     QString roms_path;
     QString symbols_path;
     QString screenshot_path;
-    QString gamedir;
-    bool gamedir_deepscan;
+    QString game_directory_path;
+    bool game_directory_deepscan;
     QStringList recent_files;
 
     QString theme;
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index de7a26e14..68a176032 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -19,6 +19,37 @@
 #include "input_common/sdl/sdl.h"
 #include "yuzu_cmd/emu_window/emu_window_sdl2.h"
 
+class SDLGLContext : public Core::Frontend::GraphicsContext {
+public:
+    explicit SDLGLContext() {
+        // create a hidden window to make the shared context against
+        window = SDL_CreateWindow("", SDL_WINDOWPOS_UNDEFINED, // x position
+                                  SDL_WINDOWPOS_UNDEFINED,     // y position
+                                  Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height,
+                                  SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
+        context = SDL_GL_CreateContext(window);
+    }
+
+    ~SDLGLContext() {
+        SDL_GL_DeleteContext(context);
+        SDL_DestroyWindow(window);
+    }
+
+    void MakeCurrent() override {
+        SDL_GL_MakeCurrent(window, context);
+    }
+
+    void DoneCurrent() override {
+        SDL_GL_MakeCurrent(window, nullptr);
+    }
+
+    void SwapBuffers() override {}
+
+private:
+    SDL_Window* window;
+    SDL_GLContext context;
+};
+
 void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) {
     TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0));
     InputCommon::GetMotionEmu()->Tilt(x, y);
@@ -153,6 +184,7 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
     SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
     SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
     SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
+    SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
 
     std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,
                                            Common::g_scm_branch, Common::g_scm_desc);
@@ -171,7 +203,6 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
     if (fullscreen) {
         Fullscreen();
     }
-
     gl_context = SDL_GL_CreateContext(render_window);
 
     if (gl_context == nullptr) {
@@ -278,3 +309,7 @@ void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(
 
     SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second);
 }
+
+std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2::CreateSharedContext() const {
+    return std::make_unique<SDLGLContext>();
+}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
index b0d4116cc..17e98227f 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
@@ -27,6 +27,8 @@ public:
     /// Releases the GL context from the caller thread
     void DoneCurrent() override;
 
+    std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
+
     /// Whether the window is still open, and a close request hasn't yet been sent
     bool IsOpen() const;