diff options
Diffstat (limited to 'src')
45 files changed, 1310 insertions, 362 deletions
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp index a58f24169..49ab9d3e1 100644 --- a/src/audio_core/algorithm/interpolate.cpp +++ b/src/audio_core/algorithm/interpolate.cpp @@ -8,13 +8,14 @@ #include <climits> #include <cmath> #include <vector> + #include "audio_core/algorithm/interpolate.h" #include "common/common_types.h" #include "common/logging/log.h" namespace AudioCore { -constexpr std::array<s16, 512> curve_lut0 = { +constexpr std::array<s16, 512> curve_lut0{ 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239, 19412, 7093, 22, 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377, 7472, 41, 5773, 19361, 7600, 48, 5659, 19342, 7728, 55, 5546, 19321, 7857, @@ -56,7 +57,7 @@ constexpr std::array<s16, 512> curve_lut0 = { 19403, 6121, 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424, 6479, 3, 6722, 19426, 6600}; -constexpr std::array<s16, 512> curve_lut1 = { +constexpr std::array<s16, 512> curve_lut1{ -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450, 32586, 512, -36, -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454, 1000, -69, -891, 32393, 1174, -80, -990, 32323, 1352, -92, -1084, 32244, 1536, @@ -98,7 +99,7 @@ constexpr std::array<s16, 512> curve_lut1 = { 32551, -568, -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630, -200, -5, 69, 32639, -68}; -constexpr std::array<s16, 512> curve_lut2 = { +constexpr std::array<s16, 512> curve_lut2{ 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811, 26253, 3751, -42, 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169, 4199, -54, 2338, 26130, 4354, -58, 2227, 26085, 4512, -63, 2120, 26035, 4673, @@ -146,10 +147,10 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, if (ratio <= 0) { LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio); - ratio = 1.0; + return input; } - const int step = static_cast<int>(ratio * 0x8000); + const s32 step{static_cast<s32>(ratio * 0x8000)}; const std::array<s16, 512>& lut = [step] { if (step > 0xaaaa) { return curve_lut0; @@ -160,28 +161,37 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, return curve_lut2; }(); - std::vector<s16> output(static_cast<std::size_t>(input.size() / ratio)); - int in_offset = 0; - for (std::size_t out_offset = 0; out_offset < output.size(); out_offset += 2) { - const int lut_index = (state.fraction >> 8) * 4; + const std::size_t num_frames{input.size() / 2}; + + std::vector<s16> output; + output.reserve(static_cast<std::size_t>(input.size() / ratio + InterpolationState::taps)); - const int l = input[(in_offset + 0) * 2 + 0] * lut[lut_index + 0] + - input[(in_offset + 1) * 2 + 0] * lut[lut_index + 1] + - input[(in_offset + 2) * 2 + 0] * lut[lut_index + 2] + - input[(in_offset + 3) * 2 + 0] * lut[lut_index + 3]; + for (std::size_t frame{}; frame < num_frames; ++frame) { + const std::size_t lut_index{(state.fraction >> 8) * InterpolationState::taps}; - const int r = input[(in_offset + 0) * 2 + 1] * lut[lut_index + 0] + - input[(in_offset + 1) * 2 + 1] * lut[lut_index + 1] + - input[(in_offset + 2) * 2 + 1] * lut[lut_index + 2] + - input[(in_offset + 3) * 2 + 1] * lut[lut_index + 3]; + std::rotate(state.history.begin(), state.history.end() - 1, state.history.end()); + state.history[0][0] = input[frame * 2 + 0]; + state.history[0][1] = input[frame * 2 + 1]; - const int new_offset = state.fraction + step; + while (state.position <= 1.0) { + const s32 left{state.history[0][0] * lut[lut_index + 0] + + state.history[1][0] * lut[lut_index + 1] + + state.history[2][0] * lut[lut_index + 2] + + state.history[3][0] * lut[lut_index + 3]}; + const s32 right{state.history[0][1] * lut[lut_index + 0] + + state.history[1][1] * lut[lut_index + 1] + + state.history[2][1] * lut[lut_index + 2] + + state.history[3][1] * lut[lut_index + 3]}; + const s32 new_offset{state.fraction + step}; - in_offset += new_offset >> 15; - state.fraction = new_offset & 0x7fff; + state.fraction = new_offset & 0x7fff; - output[out_offset + 0] = static_cast<s16>(std::clamp(l >> 15, SHRT_MIN, SHRT_MAX)); - output[out_offset + 1] = static_cast<s16>(std::clamp(r >> 15, SHRT_MIN, SHRT_MAX)); + output.emplace_back(static_cast<s16>(std::clamp(left >> 15, SHRT_MIN, SHRT_MAX))); + output.emplace_back(static_cast<s16>(std::clamp(right >> 15, SHRT_MIN, SHRT_MAX))); + + state.position += ratio; + } + state.position -= 1.0; } return output; diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h index 1b9831a75..ab1a31754 100644 --- a/src/audio_core/algorithm/interpolate.h +++ b/src/audio_core/algorithm/interpolate.h @@ -6,12 +6,17 @@ #include <array> #include <vector> + #include "common/common_types.h" namespace AudioCore { struct InterpolationState { - int fraction = 0; + static constexpr std::size_t taps{4}; + static constexpr std::size_t history_size{taps * 2 - 1}; + std::array<std::array<s16, 2>, history_size> history{}; + double position{}; + s32 fraction{}; }; /// Interpolates input signal to produce output signal. diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 54be7dc0c..b31a0328c 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -595,8 +595,12 @@ endif() if (ARCHITECTURE_x86_64) target_sources(core PRIVATE - arm/dynarmic/arm_dynarmic.cpp - arm/dynarmic/arm_dynarmic.h + arm/dynarmic/arm_dynarmic_32.cpp + arm/dynarmic/arm_dynarmic_32.h + arm/dynarmic/arm_dynarmic_64.cpp + arm/dynarmic/arm_dynarmic_64.h + arm/dynarmic/arm_dynarmic_cp15.cpp + arm/dynarmic/arm_dynarmic_cp15.h ) target_link_libraries(core PRIVATE dynarmic) endif() diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 47b964eb7..57eae839e 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -25,7 +25,20 @@ public: explicit ARM_Interface(System& system_) : system{system_} {} virtual ~ARM_Interface() = default; - struct ThreadContext { + struct ThreadContext32 { + std::array<u32, 16> cpu_registers; + u32 cpsr; + std::array<u8, 4> padding; + std::array<u64, 32> fprs; + u32 fpscr; + u32 fpexc; + u32 tpidr; + }; + // Internally within the kernel, it expects the AArch32 version of the + // thread context to be 344 bytes in size. + static_assert(sizeof(ThreadContext32) == 0x158); + + struct ThreadContext64 { std::array<u64, 31> cpu_registers; u64 sp; u64 pc; @@ -38,7 +51,7 @@ public: }; // Internally within the kernel, it expects the AArch64 version of the // thread context to be 800 bytes in size. - static_assert(sizeof(ThreadContext) == 0x320); + static_assert(sizeof(ThreadContext64) == 0x320); /// Runs the CPU until an event happens virtual void Run() = 0; @@ -130,17 +143,10 @@ public: */ virtual void SetTPIDR_EL0(u64 value) = 0; - /** - * Saves the current CPU context - * @param ctx Thread context to save - */ - virtual void SaveContext(ThreadContext& ctx) = 0; - - /** - * Loads a CPU context - * @param ctx Thread context to load - */ - virtual void LoadContext(const ThreadContext& ctx) = 0; + virtual void SaveContext(ThreadContext32& ctx) = 0; + virtual void SaveContext(ThreadContext64& ctx) = 0; + virtual void LoadContext(const ThreadContext32& ctx) = 0; + virtual void LoadContext(const ThreadContext64& ctx) = 0; /// Clears the exclusive monitor's state. virtual void ClearExclusiveState() = 0; diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp new file mode 100644 index 000000000..187a972ac --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -0,0 +1,208 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cinttypes> +#include <memory> +#include <dynarmic/A32/a32.h> +#include <dynarmic/A32/config.h> +#include <dynarmic/A32/context.h> +#include "common/microprofile.h" +#include "core/arm/dynarmic/arm_dynarmic_32.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" +#include "core/arm/dynarmic/arm_dynarmic_cp15.h" +#include "core/core.h" +#include "core/core_manager.h" +#include "core/core_timing.h" +#include "core/hle/kernel/svc.h" +#include "core/memory.h" + +namespace Core { + +class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { +public: + explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent) : parent(parent) {} + + u8 MemoryRead8(u32 vaddr) override { + return parent.system.Memory().Read8(vaddr); + } + u16 MemoryRead16(u32 vaddr) override { + return parent.system.Memory().Read16(vaddr); + } + u32 MemoryRead32(u32 vaddr) override { + return parent.system.Memory().Read32(vaddr); + } + u64 MemoryRead64(u32 vaddr) override { + return parent.system.Memory().Read64(vaddr); + } + + void MemoryWrite8(u32 vaddr, u8 value) override { + parent.system.Memory().Write8(vaddr, value); + } + void MemoryWrite16(u32 vaddr, u16 value) override { + parent.system.Memory().Write16(vaddr, value); + } + void MemoryWrite32(u32 vaddr, u32 value) override { + parent.system.Memory().Write32(vaddr, value); + } + void MemoryWrite64(u32 vaddr, u64 value) override { + parent.system.Memory().Write64(vaddr, value); + } + + void InterpreterFallback(u32 pc, std::size_t num_instructions) override { + UNIMPLEMENTED(); + } + + void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { + switch (exception) { + case Dynarmic::A32::Exception::UndefinedInstruction: + case Dynarmic::A32::Exception::UnpredictableInstruction: + break; + case Dynarmic::A32::Exception::Breakpoint: + break; + } + LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", + static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); + UNIMPLEMENTED(); + } + + void CallSVC(u32 swi) override { + Kernel::CallSVC(parent.system, swi); + } + + void AddTicks(u64 ticks) override { + // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a + // rough approximation of the amount of executed ticks in the system, it may be thrown off + // if not all cores are doing a similar amount of work. Instead of doing this, we should + // device a way so that timing is consistent across all cores without increasing the ticks 4 + // times. + u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; + // Always execute at least one tick. + amortized_ticks = std::max<u64>(amortized_ticks, 1); + + parent.system.CoreTiming().AddTicks(amortized_ticks); + num_interpreted_instructions = 0; + } + u64 GetTicksRemaining() override { + return std::max(parent.system.CoreTiming().GetDowncount(), {}); + } + + ARM_Dynarmic_32& parent; + std::size_t num_interpreted_instructions{}; + u64 tpidrro_el0{}; + u64 tpidr_el0{}; +}; + +std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const { + Dynarmic::A32::UserConfig config; + config.callbacks = cb.get(); + // TODO(bunnei): Implement page table for 32-bit + // config.page_table = &page_table.pointers; + config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); + config.define_unpredictable_behaviour = true; + return std::make_unique<Dynarmic::A32::Jit>(config); +} + +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); + +void ARM_Dynarmic_32::Run() { + MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32); + jit->Run(); +} + +void ARM_Dynarmic_32::Step() { + cb->InterpreterFallback(jit->Regs()[15], 1); +} + +ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, + std::size_t core_index) + : ARM_Interface{system}, + cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, + exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} + +ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; + +void ARM_Dynarmic_32::SetPC(u64 pc) { + jit->Regs()[15] = static_cast<u32>(pc); +} + +u64 ARM_Dynarmic_32::GetPC() const { + return jit->Regs()[15]; +} + +u64 ARM_Dynarmic_32::GetReg(int index) const { + return jit->Regs()[index]; +} + +void ARM_Dynarmic_32::SetReg(int index, u64 value) { + jit->Regs()[index] = static_cast<u32>(value); +} + +u128 ARM_Dynarmic_32::GetVectorReg(int index) const { + return {}; +} + +void ARM_Dynarmic_32::SetVectorReg(int index, u128 value) {} + +u32 ARM_Dynarmic_32::GetPSTATE() const { + return jit->Cpsr(); +} + +void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) { + jit->SetCpsr(cpsr); +} + +u64 ARM_Dynarmic_32::GetTlsAddress() const { + return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; +} + +void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { + CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); +} + +u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { + return cb->tpidr_el0; +} + +void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { + cb->tpidr_el0 = value; +} + +void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { + Dynarmic::A32::Context context; + jit->SaveContext(context); + ctx.cpu_registers = context.Regs(); + ctx.cpsr = context.Cpsr(); +} + +void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) { + Dynarmic::A32::Context context; + context.Regs() = ctx.cpu_registers; + context.SetCpsr(ctx.cpsr); + jit->LoadContext(context); +} + +void ARM_Dynarmic_32::PrepareReschedule() { + jit->HaltExecution(); +} + +void ARM_Dynarmic_32::ClearInstructionCache() { + jit->ClearCache(); +} + +void ARM_Dynarmic_32::ClearExclusiveState() {} + +void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table, + std::size_t new_address_space_size_in_bits) { + auto key = std::make_pair(&page_table, new_address_space_size_in_bits); + auto iter = jit_cache.find(key); + if (iter != jit_cache.end()) { + jit = iter->second; + return; + } + jit = MakeJit(page_table, new_address_space_size_in_bits); + jit_cache.emplace(key, jit); +} + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h new file mode 100644 index 000000000..143e46e4d --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -0,0 +1,77 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <unordered_map> + +#include <dynarmic/A32/a32.h> +#include <dynarmic/A64/a64.h> +#include <dynarmic/A64/exclusive_monitor.h> +#include "common/common_types.h" +#include "common/hash.h" +#include "core/arm/arm_interface.h" +#include "core/arm/exclusive_monitor.h" + +namespace Memory { +class Memory; +} + +namespace Core { + +class DynarmicCallbacks32; +class DynarmicExclusiveMonitor; +class System; + +class ARM_Dynarmic_32 final : public ARM_Interface { +public: + ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ~ARM_Dynarmic_32() override; + + void SetPC(u64 pc) override; + u64 GetPC() const override; + u64 GetReg(int index) const override; + void SetReg(int index, u64 value) override; + u128 GetVectorReg(int index) const override; + void SetVectorReg(int index, u128 value) override; + u32 GetPSTATE() const override; + void SetPSTATE(u32 pstate) override; + void Run() override; + void Step() override; + VAddr GetTlsAddress() const override; + void SetTlsAddress(VAddr address) override; + void SetTPIDR_EL0(u64 value) override; + u64 GetTPIDR_EL0() const override; + + void SaveContext(ThreadContext32& ctx) override; + void SaveContext(ThreadContext64& ctx) override {} + void LoadContext(const ThreadContext32& ctx) override; + void LoadContext(const ThreadContext64& ctx) override {} + + void PrepareReschedule() override; + void ClearExclusiveState() override; + + void ClearInstructionCache() override; + void PageTableChanged(Common::PageTable& new_page_table, + std::size_t new_address_space_size_in_bits) override; + +private: + std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const; + + using JitCacheKey = std::pair<Common::PageTable*, std::size_t>; + using JitCacheType = + std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; + + friend class DynarmicCallbacks32; + std::unique_ptr<DynarmicCallbacks32> cb; + JitCacheType jit_cache; + std::shared_ptr<Dynarmic::A32::Jit> jit; + std::size_t core_index; + DynarmicExclusiveMonitor& exclusive_monitor; + std::array<u32, 84> CP15_regs{}; +}; + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 29eaf74e5..a53a58ba0 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -8,7 +8,7 @@ #include <dynarmic/A64/config.h> #include "common/logging/log.h" #include "common/microprofile.h" -#include "core/arm/dynarmic/arm_dynarmic.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" #include "core/core.h" #include "core/core_manager.h" #include "core/core_timing.h" @@ -25,9 +25,9 @@ namespace Core { using Vector = Dynarmic::A64::Vector; -class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks { +class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { public: - explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {} + explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent) : parent(parent) {} u8 MemoryRead8(u64 vaddr) override { return parent.system.Memory().Read8(vaddr); @@ -68,7 +68,7 @@ public: LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, num_instructions, MemoryReadCode(pc)); - ARM_Interface::ThreadContext ctx; + ARM_Interface::ThreadContext64 ctx; parent.SaveContext(ctx); parent.inner_unicorn.LoadContext(ctx); parent.inner_unicorn.ExecuteInstructions(num_instructions); @@ -90,7 +90,7 @@ public: parent.jit->HaltExecution(); parent.SetPC(pc); Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); - parent.SaveContext(thread->GetContext()); + parent.SaveContext(thread->GetContext64()); GDBStub::Break(); GDBStub::SendTrap(thread, 5); return; @@ -126,14 +126,14 @@ public: return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); } - ARM_Dynarmic& parent; + ARM_Dynarmic_64& parent; std::size_t num_interpreted_instructions = 0; u64 tpidrro_el0 = 0; u64 tpidr_el0 = 0; }; -std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table, - std::size_t address_space_bits) const { +std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const { Dynarmic::A64::UserConfig config; // Callbacks @@ -159,79 +159,79 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag // Unpredictable instructions config.define_unpredictable_behaviour = true; - return std::make_unique<Dynarmic::A64::Jit>(config); + return std::make_shared<Dynarmic::A64::Jit>(config); } -MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); -void ARM_Dynarmic::Run() { - MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); +void ARM_Dynarmic_64::Run() { + MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64); jit->Run(); } -void ARM_Dynarmic::Step() { +void ARM_Dynarmic_64::Step() { cb->InterpreterFallback(jit->GetPC(), 1); } -ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, - std::size_t core_index) +ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, + std::size_t core_index) : ARM_Interface{system}, - cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, + cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system}, core_index{core_index}, exclusive_monitor{ dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} -ARM_Dynarmic::~ARM_Dynarmic() = default; +ARM_Dynarmic_64::~ARM_Dynarmic_64() = default; -void ARM_Dynarmic::SetPC(u64 pc) { +void ARM_Dynarmic_64::SetPC(u64 pc) { jit->SetPC(pc); } -u64 ARM_Dynarmic::GetPC() const { +u64 ARM_Dynarmic_64::GetPC() const { return jit->GetPC(); } -u64 ARM_Dynarmic::GetReg(int index) const { +u64 ARM_Dynarmic_64::GetReg(int index) const { return jit->GetRegister(index); } -void ARM_Dynarmic::SetReg(int index, u64 value) { +void ARM_Dynarmic_64::SetReg(int index, u64 value) { jit->SetRegister(index, value); } -u128 ARM_Dynarmic::GetVectorReg(int index) const { +u128 ARM_Dynarmic_64::GetVectorReg(int index) const { return jit->GetVector(index); } -void ARM_Dynarmic::SetVectorReg(int index, u128 value) { +void ARM_Dynarmic_64::SetVectorReg(int index, u128 value) { jit->SetVector(index, value); } -u32 ARM_Dynarmic::GetPSTATE() const { +u32 ARM_Dynarmic_64::GetPSTATE() const { return jit->GetPstate(); } -void ARM_Dynarmic::SetPSTATE(u32 pstate) { +void ARM_Dynarmic_64::SetPSTATE(u32 pstate) { jit->SetPstate(pstate); } -u64 ARM_Dynarmic::GetTlsAddress() const { +u64 ARM_Dynarmic_64::GetTlsAddress() const { return cb->tpidrro_el0; } -void ARM_Dynarmic::SetTlsAddress(VAddr address) { +void ARM_Dynarmic_64::SetTlsAddress(VAddr address) { cb->tpidrro_el0 = address; } -u64 ARM_Dynarmic::GetTPIDR_EL0() const { +u64 ARM_Dynarmic_64::GetTPIDR_EL0() const { return cb->tpidr_el0; } -void ARM_Dynarmic::SetTPIDR_EL0(u64 value) { +void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) { cb->tpidr_el0 = value; } -void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { +void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) { ctx.cpu_registers = jit->GetRegisters(); ctx.sp = jit->GetSP(); ctx.pc = jit->GetPC(); @@ -242,7 +242,7 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { ctx.tpidr = cb->tpidr_el0; } -void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { +void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) { jit->SetRegisters(ctx.cpu_registers); jit->SetSP(ctx.sp); jit->SetPC(ctx.pc); @@ -253,25 +253,32 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { SetTPIDR_EL0(ctx.tpidr); } -void ARM_Dynarmic::PrepareReschedule() { +void ARM_Dynarmic_64::PrepareReschedule() { jit->HaltExecution(); } -void ARM_Dynarmic::ClearInstructionCache() { +void ARM_Dynarmic_64::ClearInstructionCache() { jit->ClearCache(); } -void ARM_Dynarmic::ClearExclusiveState() { +void ARM_Dynarmic_64::ClearExclusiveState() { jit->ClearExclusiveState(); } -void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table, - std::size_t new_address_space_size_in_bits) { +void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table, + std::size_t new_address_space_size_in_bits) { + auto key = std::make_pair(&page_table, new_address_space_size_in_bits); + auto iter = jit_cache.find(key); + if (iter != jit_cache.end()) { + jit = iter->second; + return; + } jit = MakeJit(page_table, new_address_space_size_in_bits); + jit_cache.emplace(key, jit); } -DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count) - : monitor(core_count), memory{memory_} {} +DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count) + : monitor(core_count), memory{memory} {} DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 9cd475cfb..e71240a96 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -5,9 +5,12 @@ #pragma once #include <memory> +#include <unordered_map> + #include <dynarmic/A64/a64.h> #include <dynarmic/A64/exclusive_monitor.h> #include "common/common_types.h" +#include "common/hash.h" #include "core/arm/arm_interface.h" #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" @@ -18,14 +21,14 @@ class Memory; namespace Core { -class ARM_Dynarmic_Callbacks; +class DynarmicCallbacks64; class DynarmicExclusiveMonitor; class System; -class ARM_Dynarmic final : public ARM_Interface { +class ARM_Dynarmic_64 final : public ARM_Interface { public: - ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); - ~ARM_Dynarmic() override; + ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ~ARM_Dynarmic_64() override; void SetPC(u64 pc) override; u64 GetPC() const override; @@ -42,8 +45,10 @@ public: void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void SaveContext(ThreadContext& ctx) override; - void LoadContext(const ThreadContext& ctx) override; + void SaveContext(ThreadContext32& ctx) override {} + void SaveContext(ThreadContext64& ctx) override; + void LoadContext(const ThreadContext32& ctx) override {} + void LoadContext(const ThreadContext64& ctx) override; void PrepareReschedule() override; void ClearExclusiveState() override; @@ -53,12 +58,17 @@ public: std::size_t new_address_space_size_in_bits) override; private: - std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, + std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, std::size_t address_space_bits) const; - friend class ARM_Dynarmic_Callbacks; - std::unique_ptr<ARM_Dynarmic_Callbacks> cb; - std::unique_ptr<Dynarmic::A64::Jit> jit; + using JitCacheKey = std::pair<Common::PageTable*, std::size_t>; + using JitCacheType = + std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A64::Jit>, Common::PairHash>; + + friend class DynarmicCallbacks64; + std::unique_ptr<DynarmicCallbacks64> cb; + JitCacheType jit_cache; + std::shared_ptr<Dynarmic::A64::Jit> jit; ARM_Unicorn inner_unicorn; std::size_t core_index; @@ -67,7 +77,7 @@ private: class DynarmicExclusiveMonitor final : public ExclusiveMonitor { public: - explicit DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count); + explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count); ~DynarmicExclusiveMonitor() override; void SetExclusive(std::size_t core_index, VAddr addr) override; @@ -80,7 +90,7 @@ public: bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; private: - friend class ARM_Dynarmic; + friend class ARM_Dynarmic_64; Dynarmic::A64::ExclusiveMonitor monitor; Memory::Memory& memory; }; diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp new file mode 100644 index 000000000..3fdcdebde --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp @@ -0,0 +1,80 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/arm/dynarmic/arm_dynarmic_cp15.h" + +using Callback = Dynarmic::A32::Coprocessor::Callback; +using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; +using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; + +std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, + CoprocReg CRd, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + return {}; +} + +CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + // TODO(merry): Privileged CP15 registers + + if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { + // This is a dummy write, we ignore the value written here. + return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; + } + + if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { + switch (opc2) { + case 4: + // This is a dummy write, we ignore the value written here. + return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; + case 5: + // This is a dummy write, we ignore the value written here. + return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; + default: + return {}; + } + } + + if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { + return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; + } + + return {}; +} + +CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { + return {}; +} + +CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + // TODO(merry): Privileged CP15 registers + + if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { + switch (opc2) { + case 2: + return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; + case 3: + return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; + default: + return {}; + } + } + + return {}; +} + +CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { + return {}; +} + +std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, + std::optional<u8> option) { + return {}; +} + +std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, + std::optional<u8> option) { + return {}; +} diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h new file mode 100644 index 000000000..07bcde5f9 --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h @@ -0,0 +1,152 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> + +#include <dynarmic/A32/coprocessor.h> +#include "common/common_types.h" + +enum class CP15Register { + // c0 - Information registers + CP15_MAIN_ID, + CP15_CACHE_TYPE, + CP15_TCM_STATUS, + CP15_TLB_TYPE, + CP15_CPU_ID, + CP15_PROCESSOR_FEATURE_0, + CP15_PROCESSOR_FEATURE_1, + CP15_DEBUG_FEATURE_0, + CP15_AUXILIARY_FEATURE_0, + CP15_MEMORY_MODEL_FEATURE_0, + CP15_MEMORY_MODEL_FEATURE_1, + CP15_MEMORY_MODEL_FEATURE_2, + CP15_MEMORY_MODEL_FEATURE_3, + CP15_ISA_FEATURE_0, + CP15_ISA_FEATURE_1, + CP15_ISA_FEATURE_2, + CP15_ISA_FEATURE_3, + CP15_ISA_FEATURE_4, + + // c1 - Control registers + CP15_CONTROL, + CP15_AUXILIARY_CONTROL, + CP15_COPROCESSOR_ACCESS_CONTROL, + + // c2 - Translation table registers + CP15_TRANSLATION_BASE_TABLE_0, + CP15_TRANSLATION_BASE_TABLE_1, + CP15_TRANSLATION_BASE_CONTROL, + CP15_DOMAIN_ACCESS_CONTROL, + CP15_RESERVED, + + // c5 - Fault status registers + CP15_FAULT_STATUS, + CP15_INSTR_FAULT_STATUS, + CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS, + CP15_INST_FSR, + + // c6 - Fault Address registers + CP15_FAULT_ADDRESS, + CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS, + CP15_WFAR, + CP15_IFAR, + + // c7 - Cache operation registers + CP15_WAIT_FOR_INTERRUPT, + CP15_PHYS_ADDRESS, + CP15_INVALIDATE_INSTR_CACHE, + CP15_INVALIDATE_INSTR_CACHE_USING_MVA, + CP15_INVALIDATE_INSTR_CACHE_USING_INDEX, + CP15_FLUSH_PREFETCH_BUFFER, + CP15_FLUSH_BRANCH_TARGET_CACHE, + CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY, + CP15_INVALIDATE_DATA_CACHE, + CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA, + CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, + CP15_INVALIDATE_DATA_AND_INSTR_CACHE, + CP15_CLEAN_DATA_CACHE, + CP15_CLEAN_DATA_CACHE_LINE_USING_MVA, + CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX, + CP15_DATA_SYNC_BARRIER, + CP15_DATA_MEMORY_BARRIER, + CP15_CLEAN_AND_INVALIDATE_DATA_CACHE, + CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA, + CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, + + // c8 - TLB operations + CP15_INVALIDATE_ITLB, + CP15_INVALIDATE_ITLB_SINGLE_ENTRY, + CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH, + CP15_INVALIDATE_ITLB_ENTRY_ON_MVA, + CP15_INVALIDATE_DTLB, + CP15_INVALIDATE_DTLB_SINGLE_ENTRY, + CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH, + CP15_INVALIDATE_DTLB_ENTRY_ON_MVA, + CP15_INVALIDATE_UTLB, + CP15_INVALIDATE_UTLB_SINGLE_ENTRY, + CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH, + CP15_INVALIDATE_UTLB_ENTRY_ON_MVA, + + // c9 - Data cache lockdown register + CP15_DATA_CACHE_LOCKDOWN, + + // c10 - TLB/Memory map registers + CP15_TLB_LOCKDOWN, + CP15_PRIMARY_REGION_REMAP, + CP15_NORMAL_REGION_REMAP, + + // c13 - Thread related registers + CP15_PID, + CP15_CONTEXT_ID, + CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write + CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W) + CP15_THREAD_PRW, // Thread ID register - Privileged R/W only. + + // c15 - Performance and TLB lockdown registers + CP15_PERFORMANCE_MONITOR_CONTROL, + CP15_CYCLE_COUNTER, + CP15_COUNT_0, + CP15_COUNT_1, + CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY, + CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY, + CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS, + CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS, + CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE, + CP15_TLB_DEBUG_CONTROL, + + // Skyeye defined + CP15_TLB_FAULT_ADDR, + CP15_TLB_FAULT_STATUS, + + // Not an actual register. + // All registers should be defined above this. + CP15_REGISTER_COUNT, +}; + +class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { +public: + using CoprocReg = Dynarmic::A32::CoprocReg; + + explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; + + std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, + CoprocReg CRn, CoprocReg CRm, + unsigned opc2) override; + CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) override; + CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override; + CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, + unsigned opc2) override; + CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override; + std::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, + std::optional<u8> option) override; + std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, + std::optional<u8> option) override; + +private: + u32* CP15{}; +}; diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp index 94570e520..b32401e0b 100644 --- a/src/core/arm/exclusive_monitor.cpp +++ b/src/core/arm/exclusive_monitor.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" #endif #include "core/arm/exclusive_monitor.h" #include "core/memory.h" diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index f99ad5802..8a9800a96 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -53,7 +53,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si void* user_data) { auto* const system = static_cast<System*>(user_data); - ARM_Interface::ThreadContext ctx{}; + ARM_Interface::ThreadContext64 ctx{}; system->CurrentArmInterface().SaveContext(ctx); ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, ctx.pc, ctx.cpu_registers[30]); @@ -179,7 +179,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { } Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); - SaveContext(thread->GetContext()); + SaveContext(thread->GetContext64()); if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { last_bkpt_hit = false; GDBStub::Break(); @@ -188,7 +188,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { } } -void ARM_Unicorn::SaveContext(ThreadContext& ctx) { +void ARM_Unicorn::SaveContext(ThreadContext64& ctx) { int uregs[32]; void* tregs[32]; @@ -215,7 +215,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) { CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32)); } -void ARM_Unicorn::LoadContext(const ThreadContext& ctx) { +void ARM_Unicorn::LoadContext(const ThreadContext64& ctx) { int uregs[32]; void* tregs[32]; diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 3c5b155f9..f30d13cb6 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -30,8 +30,6 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void SaveContext(ThreadContext& ctx) override; - void LoadContext(const ThreadContext& ctx) override; void PrepareReschedule() override; void ClearExclusiveState() override; void ExecuteInstructions(std::size_t num_instructions); @@ -41,6 +39,11 @@ public: void PageTableChanged(Common::PageTable&, std::size_t) override {} void RecordBreak(GDBStub::BreakpointAddress bkpt); + void SaveContext(ThreadContext32& ctx) override {} + void SaveContext(ThreadContext64& ctx) override; + void LoadContext(const ThreadContext32& ctx) override {} + void LoadContext(const ThreadContext64& ctx) override; + private: static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data); diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp index 8eacf92dd..b6b797c80 100644 --- a/src/core/core_manager.cpp +++ b/src/core/core_manager.cpp @@ -6,9 +6,6 @@ #include <mutex> #include "common/logging/log.h" -#ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" -#endif #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 67e95999d..e8d8871a7 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -217,7 +217,7 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) { return 0; } - const auto& thread_context = thread->GetContext(); + const auto& thread_context = thread->GetContext64(); if (id < SP_REGISTER) { return thread_context.cpu_registers[id]; @@ -239,7 +239,7 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr) return; } - auto& thread_context = thread->GetContext(); + auto& thread_context = thread->GetContext64(); if (id < SP_REGISTER) { thread_context.cpu_registers[id] = val; @@ -259,7 +259,7 @@ static u128 FpuRead(std::size_t id, Kernel::Thread* thread = nullptr) { return u128{0}; } - auto& thread_context = thread->GetContext(); + auto& thread_context = thread->GetContext64(); if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { return thread_context.vector_registers[id - UC_ARM64_REG_Q0]; @@ -275,7 +275,7 @@ static void FpuWrite(std::size_t id, u128 val, Kernel::Thread* thread = nullptr) return; } - auto& thread_context = thread->GetContext(); + auto& thread_context = thread->GetContext64(); if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val; @@ -916,7 +916,7 @@ static void WriteRegister() { // Update ARM context, skipping scheduler - no running threads at this point Core::System::GetInstance() .ArmInterface(current_core) - .LoadContext(current_thread->GetContext()); + .LoadContext(current_thread->GetContext64()); SendReply("OK"); } @@ -947,7 +947,7 @@ static void WriteRegisters() { // Update ARM context, skipping scheduler - no running threads at this point Core::System::GetInstance() .ArmInterface(current_core) - .LoadContext(current_thread->GetContext()); + .LoadContext(current_thread->GetContext64()); SendReply("OK"); } @@ -1019,7 +1019,7 @@ static void Step() { // Update ARM context, skipping scheduler - no running threads at this point Core::System::GetInstance() .ArmInterface(current_core) - .LoadContext(current_thread->GetContext()); + .LoadContext(current_thread->GetContext64()); } step_loop = true; halt_loop = true; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 9232f4d7e..e47f1deed 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -186,6 +186,10 @@ struct KernelCore::Impl { return; } + for (auto& core : cores) { + core.SetIs64Bit(process->Is64BitProcess()); + } + system.Memory().SetCurrentPageTable(*process); } diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 9303dd273..aa2787467 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -5,7 +5,8 @@ #include "common/logging/log.h" #include "core/arm/arm_interface.h" #ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" +#include "core/arm/dynarmic/arm_dynarmic_32.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" #endif #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" @@ -20,13 +21,17 @@ PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor) : core_index{id} { #ifdef ARCHITECTURE_x86_64 - arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index); + arm_interface_32 = + std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index); + arm_interface_64 = + std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index); + #else arm_interface = std::make_shared<Core::ARM_Unicorn>(system); LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); #endif - scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); + scheduler = std::make_unique<Kernel::Scheduler>(system, core_index); } PhysicalCore::~PhysicalCore() = default; @@ -48,4 +53,12 @@ void PhysicalCore::Shutdown() { scheduler->Shutdown(); } +void PhysicalCore::SetIs64Bit(bool is_64_bit) { + if (is_64_bit) { + arm_interface = arm_interface_64.get(); + } else { + arm_interface = arm_interface_32.get(); + } +} + } // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 4c32c0f1b..3269166be 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -68,10 +68,14 @@ public: return *scheduler; } + void SetIs64Bit(bool is_64_bit); + private: std::size_t core_index; - std::unique_ptr<Core::ARM_Interface> arm_interface; + std::unique_ptr<Core::ARM_Interface> arm_interface_32; + std::unique_ptr<Core::ARM_Interface> arm_interface_64; std::unique_ptr<Kernel::Scheduler> scheduler; + Core::ARM_Interface* arm_interface{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 2fcb7326c..edc414d69 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -42,7 +42,8 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { // Register 1 must be a handle to the main thread const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); - thread->GetContext().cpu_registers[1] = thread_handle; + thread->GetContext32().cpu_registers[1] = thread_handle; + thread->GetContext64().cpu_registers[1] = thread_handle; // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires thread->ResumeFromWait(); diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index c65f82fb7..1140c72a3 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -383,8 +383,8 @@ void GlobalScheduler::Unlock() { // TODO(Blinkhawk): Setup the interrupts and change context on current core. } -Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id) - : system(system), cpu_core(cpu_core), core_id(core_id) {} +Scheduler::Scheduler(Core::System& system, std::size_t core_id) + : system{system}, core_id{core_id} {} Scheduler::~Scheduler() = default; @@ -422,9 +422,10 @@ void Scheduler::UnloadThread() { // Save context for previous thread if (previous_thread) { - cpu_core.SaveContext(previous_thread->GetContext()); + system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); + system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); // Save the TPIDR_EL0 system register in case it was modified. - previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); if (previous_thread->GetStatus() == ThreadStatus::Running) { // This is only the case when a reschedule is triggered without the current thread @@ -451,9 +452,10 @@ void Scheduler::SwitchContext() { // Save context for previous thread if (previous_thread) { - cpu_core.SaveContext(previous_thread->GetContext()); + system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); + system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); // Save the TPIDR_EL0 system register in case it was modified. - previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); if (previous_thread->GetStatus() == ThreadStatus::Running) { // This is only the case when a reschedule is triggered without the current thread @@ -481,9 +483,10 @@ void Scheduler::SwitchContext() { system.Kernel().MakeCurrentProcess(thread_owner_process); } - cpu_core.LoadContext(new_thread->GetContext()); - cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); - cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); + system.ArmInterface(core_id).LoadContext(new_thread->GetContext32()); + system.ArmInterface(core_id).LoadContext(new_thread->GetContext64()); + system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress()); + system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); } else { current_thread = nullptr; // Note: We do not reset the current process and current page table when idling because diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 1c93a838c..07df33f9c 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -181,7 +181,7 @@ private: class Scheduler final { public: - explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id); + explicit Scheduler(Core::System& system, std::size_t core_id); ~Scheduler(); /// Returns whether there are any threads that are ready to run. @@ -235,7 +235,6 @@ private: std::shared_ptr<Thread> selected_thread = nullptr; Core::System& system; - Core::ARM_Interface& cpu_core; u64 last_context_switch_time = 0; u64 idle_selection_count = 0; const std::size_t core_id; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index fd91779a3..4ffc113c2 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -187,6 +187,13 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s return RESULT_SUCCESS; } +static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_size) { + VAddr temp_heap_addr{}; + const ResultCode result{SetHeapSize(system, &temp_heap_addr, heap_size)}; + *heap_addr = static_cast<u32>(temp_heap_addr); + return result; +} + static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) { LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); @@ -371,6 +378,12 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, return RESULT_SUCCESS; } +static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle, + u32 port_name_address) { + + return ConnectToNamedPort(system, out_handle, port_name_address); +} + /// Makes a blocking IPC call to an OS service. static ResultCode SendSyncRequest(Core::System& system, Handle handle) { const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); @@ -390,6 +403,10 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { return session->SendSyncRequest(SharedFrom(thread), system.Memory()); } +static ResultCode SendSyncRequest32(Core::System& system, Handle handle) { + return SendSyncRequest(system, handle); +} + /// Get the ID for the specified thread. static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) { LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); @@ -405,6 +422,17 @@ static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle threa return RESULT_SUCCESS; } +static ResultCode GetThreadId32(Core::System& system, u32* thread_id_low, u32* thread_id_high, + Handle thread_handle) { + u64 thread_id{}; + const ResultCode result{GetThreadId(system, &thread_id, thread_handle)}; + + *thread_id_low = static_cast<u32>(thread_id >> 32); + *thread_id_high = static_cast<u32>(thread_id & std::numeric_limits<u32>::max()); + + return result; +} + /// Gets the ID of the specified process or a specified thread's owning process. static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) { LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle); @@ -479,6 +507,12 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr return result; } +static ResultCode WaitSynchronization32(Core::System& system, u32 timeout_low, u32 handles_address, + s32 handle_count, u32 timeout_high, Handle* index) { + const s64 nano_seconds{(static_cast<s64>(timeout_high) << 32) | static_cast<s64>(timeout_low)}; + return WaitSynchronization(system, index, handles_address, handle_count, nano_seconds); +} + /// Resumes a thread waiting on WaitSynchronization static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) { LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); @@ -917,6 +951,18 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha } } +static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_high, u32 sub_id_low, + u32 info_id, u32 handle, u32 sub_id_high) { + const u64 sub_id{static_cast<u64>(sub_id_low | (static_cast<u64>(sub_id_high) << 32))}; + u64 res_value{}; + + const ResultCode result{GetInfo(system, &res_value, info_id, handle, sub_id)}; + *result_high = static_cast<u32>(res_value >> 32); + *result_low = static_cast<u32>(res_value & std::numeric_limits<u32>::max()); + + return result; +} + /// Maps memory at a desired address static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); @@ -1058,7 +1104,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H return ERR_BUSY; } - Core::ARM_Interface::ThreadContext ctx = thread->GetContext(); + Core::ARM_Interface::ThreadContext64 ctx = thread->GetContext64(); // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. ctx.pstate &= 0xFF0FFE20; @@ -1088,6 +1134,10 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle return RESULT_SUCCESS; } +static ResultCode GetThreadPriority32(Core::System& system, u32* priority, Handle handle) { + return GetThreadPriority(system, priority, handle); +} + /// Sets the priority for the specified thread static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) { LOG_TRACE(Kernel_SVC, "called"); @@ -1259,6 +1309,11 @@ static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address, query_address); } +static ResultCode QueryMemory32(Core::System& system, u32 memory_info_address, + u32 page_info_address, u32 query_address) { + return QueryMemory(system, memory_info_address, page_info_address, query_address); +} + static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, u64 src_address, u64 size) { LOG_DEBUG(Kernel_SVC, @@ -1675,6 +1730,10 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ } } +static void SignalProcessWideKey32(Core::System& system, u32 condition_variable_addr, s32 target) { + SignalProcessWideKey(system, condition_variable_addr, target); +} + // Wait for an address (via Address Arbiter) static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, s64 timeout) { @@ -1760,6 +1819,10 @@ static ResultCode CloseHandle(Core::System& system, Handle handle) { return handle_table.Close(handle); } +static ResultCode CloseHandle32(Core::System& system, Handle handle) { + return CloseHandle(system, handle); +} + /// Clears the signaled state of an event or process. static ResultCode ResetSignal(Core::System& system, Handle handle) { LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); @@ -2317,69 +2380,196 @@ struct FunctionDef { }; } // namespace -static const FunctionDef SVC_Table[] = { +static const FunctionDef SVC_Table_32[] = { {0x00, nullptr, "Unknown"}, - {0x01, SvcWrap<SetHeapSize>, "SetHeapSize"}, - {0x02, SvcWrap<SetMemoryPermission>, "SetMemoryPermission"}, - {0x03, SvcWrap<SetMemoryAttribute>, "SetMemoryAttribute"}, - {0x04, SvcWrap<MapMemory>, "MapMemory"}, - {0x05, SvcWrap<UnmapMemory>, "UnmapMemory"}, - {0x06, SvcWrap<QueryMemory>, "QueryMemory"}, - {0x07, SvcWrap<ExitProcess>, "ExitProcess"}, - {0x08, SvcWrap<CreateThread>, "CreateThread"}, - {0x09, SvcWrap<StartThread>, "StartThread"}, - {0x0A, SvcWrap<ExitThread>, "ExitThread"}, - {0x0B, SvcWrap<SleepThread>, "SleepThread"}, - {0x0C, SvcWrap<GetThreadPriority>, "GetThreadPriority"}, - {0x0D, SvcWrap<SetThreadPriority>, "SetThreadPriority"}, - {0x0E, SvcWrap<GetThreadCoreMask>, "GetThreadCoreMask"}, - {0x0F, SvcWrap<SetThreadCoreMask>, "SetThreadCoreMask"}, - {0x10, SvcWrap<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"}, - {0x11, SvcWrap<SignalEvent>, "SignalEvent"}, - {0x12, SvcWrap<ClearEvent>, "ClearEvent"}, - {0x13, SvcWrap<MapSharedMemory>, "MapSharedMemory"}, - {0x14, SvcWrap<UnmapSharedMemory>, "UnmapSharedMemory"}, - {0x15, SvcWrap<CreateTransferMemory>, "CreateTransferMemory"}, - {0x16, SvcWrap<CloseHandle>, "CloseHandle"}, - {0x17, SvcWrap<ResetSignal>, "ResetSignal"}, - {0x18, SvcWrap<WaitSynchronization>, "WaitSynchronization"}, - {0x19, SvcWrap<CancelSynchronization>, "CancelSynchronization"}, - {0x1A, SvcWrap<ArbitrateLock>, "ArbitrateLock"}, - {0x1B, SvcWrap<ArbitrateUnlock>, "ArbitrateUnlock"}, - {0x1C, SvcWrap<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"}, - {0x1D, SvcWrap<SignalProcessWideKey>, "SignalProcessWideKey"}, - {0x1E, SvcWrap<GetSystemTick>, "GetSystemTick"}, - {0x1F, SvcWrap<ConnectToNamedPort>, "ConnectToNamedPort"}, + {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"}, + {0x02, nullptr, "Unknown"}, + {0x03, nullptr, "SetMemoryAttribute32"}, + {0x04, nullptr, "MapMemory32"}, + {0x05, nullptr, "UnmapMemory32"}, + {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"}, + {0x07, nullptr, "ExitProcess32"}, + {0x08, nullptr, "CreateThread32"}, + {0x09, nullptr, "StartThread32"}, + {0x0a, nullptr, "ExitThread32"}, + {0x0b, nullptr, "SleepThread32"}, + {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"}, + {0x0d, nullptr, "SetThreadPriority32"}, + {0x0e, nullptr, "GetThreadCoreMask32"}, + {0x0f, nullptr, "SetThreadCoreMask32"}, + {0x10, nullptr, "GetCurrentProcessorNumber32"}, + {0x11, nullptr, "SignalEvent32"}, + {0x12, nullptr, "ClearEvent32"}, + {0x13, nullptr, "MapSharedMemory32"}, + {0x14, nullptr, "UnmapSharedMemory32"}, + {0x15, nullptr, "CreateTransferMemory32"}, + {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"}, + {0x17, nullptr, "ResetSignal32"}, + {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"}, + {0x19, nullptr, "CancelSynchronization32"}, + {0x1a, nullptr, "ArbitrateLock32"}, + {0x1b, nullptr, "ArbitrateUnlock32"}, + {0x1c, nullptr, "WaitProcessWideKeyAtomic32"}, + {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"}, + {0x1e, nullptr, "GetSystemTick32"}, + {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"}, + {0x20, nullptr, "Unknown"}, + {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"}, + {0x22, nullptr, "SendSyncRequestWithUserBuffer32"}, + {0x23, nullptr, "Unknown"}, + {0x24, nullptr, "GetProcessId32"}, + {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"}, + {0x26, nullptr, "Break32"}, + {0x27, nullptr, "OutputDebugString32"}, + {0x28, nullptr, "Unknown"}, + {0x29, SvcWrap32<GetInfo32>, "GetInfo32"}, + {0x2a, nullptr, "Unknown"}, + {0x2b, nullptr, "Unknown"}, + {0x2c, nullptr, "MapPhysicalMemory32"}, + {0x2d, nullptr, "UnmapPhysicalMemory32"}, + {0x2e, nullptr, "Unknown"}, + {0x2f, nullptr, "Unknown"}, + {0x30, nullptr, "Unknown"}, + {0x31, nullptr, "Unknown"}, + {0x32, nullptr, "SetThreadActivity32"}, + {0x33, nullptr, "GetThreadContext32"}, + {0x34, nullptr, "WaitForAddress32"}, + {0x35, nullptr, "SignalToAddress32"}, + {0x36, nullptr, "Unknown"}, + {0x37, nullptr, "Unknown"}, + {0x38, nullptr, "Unknown"}, + {0x39, nullptr, "Unknown"}, + {0x3a, nullptr, "Unknown"}, + {0x3b, nullptr, "Unknown"}, + {0x3c, nullptr, "Unknown"}, + {0x3d, nullptr, "Unknown"}, + {0x3e, nullptr, "Unknown"}, + {0x3f, nullptr, "Unknown"}, + {0x40, nullptr, "CreateSession32"}, + {0x41, nullptr, "AcceptSession32"}, + {0x42, nullptr, "Unknown"}, + {0x43, nullptr, "ReplyAndReceive32"}, + {0x44, nullptr, "Unknown"}, + {0x45, nullptr, "CreateEvent32"}, + {0x46, nullptr, "Unknown"}, + {0x47, nullptr, "Unknown"}, + {0x48, nullptr, "Unknown"}, + {0x49, nullptr, "Unknown"}, + {0x4a, nullptr, "Unknown"}, + {0x4b, nullptr, "Unknown"}, + {0x4c, nullptr, "Unknown"}, + {0x4d, nullptr, "Unknown"}, + {0x4e, nullptr, "Unknown"}, + {0x4f, nullptr, "Unknown"}, + {0x50, nullptr, "Unknown"}, + {0x51, nullptr, "Unknown"}, + {0x52, nullptr, "Unknown"}, + {0x53, nullptr, "Unknown"}, + {0x54, nullptr, "Unknown"}, + {0x55, nullptr, "Unknown"}, + {0x56, nullptr, "Unknown"}, + {0x57, nullptr, "Unknown"}, + {0x58, nullptr, "Unknown"}, + {0x59, nullptr, "Unknown"}, + {0x5a, nullptr, "Unknown"}, + {0x5b, nullptr, "Unknown"}, + {0x5c, nullptr, "Unknown"}, + {0x5d, nullptr, "Unknown"}, + {0x5e, nullptr, "Unknown"}, + {0x5F, nullptr, "FlushProcessDataCache32"}, + {0x60, nullptr, "Unknown"}, + {0x61, nullptr, "Unknown"}, + {0x62, nullptr, "Unknown"}, + {0x63, nullptr, "Unknown"}, + {0x64, nullptr, "Unknown"}, + {0x65, nullptr, "GetProcessList32"}, + {0x66, nullptr, "Unknown"}, + {0x67, nullptr, "Unknown"}, + {0x68, nullptr, "Unknown"}, + {0x69, nullptr, "Unknown"}, + {0x6A, nullptr, "Unknown"}, + {0x6B, nullptr, "Unknown"}, + {0x6C, nullptr, "Unknown"}, + {0x6D, nullptr, "Unknown"}, + {0x6E, nullptr, "Unknown"}, + {0x6f, nullptr, "GetSystemInfo32"}, + {0x70, nullptr, "CreatePort32"}, + {0x71, nullptr, "ManageNamedPort32"}, + {0x72, nullptr, "ConnectToPort32"}, + {0x73, nullptr, "SetProcessMemoryPermission32"}, + {0x74, nullptr, "Unknown"}, + {0x75, nullptr, "Unknown"}, + {0x76, nullptr, "Unknown"}, + {0x77, nullptr, "MapProcessCodeMemory32"}, + {0x78, nullptr, "UnmapProcessCodeMemory32"}, + {0x79, nullptr, "Unknown"}, + {0x7A, nullptr, "Unknown"}, + {0x7B, nullptr, "TerminateProcess32"}, +}; + +static const FunctionDef SVC_Table_64[] = { + {0x00, nullptr, "Unknown"}, + {0x01, SvcWrap64<SetHeapSize>, "SetHeapSize"}, + {0x02, SvcWrap64<SetMemoryPermission>, "SetMemoryPermission"}, + {0x03, SvcWrap64<SetMemoryAttribute>, "SetMemoryAttribute"}, + {0x04, SvcWrap64<MapMemory>, "MapMemory"}, + {0x05, SvcWrap64<UnmapMemory>, "UnmapMemory"}, + {0x06, SvcWrap64<QueryMemory>, "QueryMemory"}, + {0x07, SvcWrap64<ExitProcess>, "ExitProcess"}, + {0x08, SvcWrap64<CreateThread>, "CreateThread"}, + {0x09, SvcWrap64<StartThread>, "StartThread"}, + {0x0A, SvcWrap64<ExitThread>, "ExitThread"}, + {0x0B, SvcWrap64<SleepThread>, "SleepThread"}, + {0x0C, SvcWrap64<GetThreadPriority>, "GetThreadPriority"}, + {0x0D, SvcWrap64<SetThreadPriority>, "SetThreadPriority"}, + {0x0E, SvcWrap64<GetThreadCoreMask>, "GetThreadCoreMask"}, + {0x0F, SvcWrap64<SetThreadCoreMask>, "SetThreadCoreMask"}, + {0x10, SvcWrap64<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"}, + {0x11, SvcWrap64<SignalEvent>, "SignalEvent"}, + {0x12, SvcWrap64<ClearEvent>, "ClearEvent"}, + {0x13, SvcWrap64<MapSharedMemory>, "MapSharedMemory"}, + {0x14, SvcWrap64<UnmapSharedMemory>, "UnmapSharedMemory"}, + {0x15, SvcWrap64<CreateTransferMemory>, "CreateTransferMemory"}, + {0x16, SvcWrap64<CloseHandle>, "CloseHandle"}, + {0x17, SvcWrap64<ResetSignal>, "ResetSignal"}, + {0x18, SvcWrap64<WaitSynchronization>, "WaitSynchronization"}, + {0x19, SvcWrap64<CancelSynchronization>, "CancelSynchronization"}, + {0x1A, SvcWrap64<ArbitrateLock>, "ArbitrateLock"}, + {0x1B, SvcWrap64<ArbitrateUnlock>, "ArbitrateUnlock"}, + {0x1C, SvcWrap64<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"}, + {0x1D, SvcWrap64<SignalProcessWideKey>, "SignalProcessWideKey"}, + {0x1E, SvcWrap64<GetSystemTick>, "GetSystemTick"}, + {0x1F, SvcWrap64<ConnectToNamedPort>, "ConnectToNamedPort"}, {0x20, nullptr, "SendSyncRequestLight"}, - {0x21, SvcWrap<SendSyncRequest>, "SendSyncRequest"}, + {0x21, SvcWrap64<SendSyncRequest>, "SendSyncRequest"}, {0x22, nullptr, "SendSyncRequestWithUserBuffer"}, {0x23, nullptr, "SendAsyncRequestWithUserBuffer"}, - {0x24, SvcWrap<GetProcessId>, "GetProcessId"}, - {0x25, SvcWrap<GetThreadId>, "GetThreadId"}, - {0x26, SvcWrap<Break>, "Break"}, - {0x27, SvcWrap<OutputDebugString>, "OutputDebugString"}, + {0x24, SvcWrap64<GetProcessId>, "GetProcessId"}, + {0x25, SvcWrap64<GetThreadId>, "GetThreadId"}, + {0x26, SvcWrap64<Break>, "Break"}, + {0x27, SvcWrap64<OutputDebugString>, "OutputDebugString"}, {0x28, nullptr, "ReturnFromException"}, - {0x29, SvcWrap<GetInfo>, "GetInfo"}, + {0x29, SvcWrap64<GetInfo>, "GetInfo"}, {0x2A, nullptr, "FlushEntireDataCache"}, {0x2B, nullptr, "FlushDataCache"}, - {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, - {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, + {0x2C, SvcWrap64<MapPhysicalMemory>, "MapPhysicalMemory"}, + {0x2D, SvcWrap64<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, {0x2E, nullptr, "GetFutureThreadInfo"}, {0x2F, nullptr, "GetLastThreadInfo"}, - {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, - {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, - {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, - {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, - {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, - {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, + {0x30, SvcWrap64<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, + {0x31, SvcWrap64<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, + {0x32, SvcWrap64<SetThreadActivity>, "SetThreadActivity"}, + {0x33, SvcWrap64<GetThreadContext>, "GetThreadContext"}, + {0x34, SvcWrap64<WaitForAddress>, "WaitForAddress"}, + {0x35, SvcWrap64<SignalToAddress>, "SignalToAddress"}, {0x36, nullptr, "SynchronizePreemptionState"}, {0x37, nullptr, "Unknown"}, {0x38, nullptr, "Unknown"}, {0x39, nullptr, "Unknown"}, {0x3A, nullptr, "Unknown"}, {0x3B, nullptr, "Unknown"}, - {0x3C, SvcWrap<KernelDebug>, "KernelDebug"}, - {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"}, + {0x3C, SvcWrap64<KernelDebug>, "KernelDebug"}, + {0x3D, SvcWrap64<ChangeKernelTraceState>, "ChangeKernelTraceState"}, {0x3E, nullptr, "Unknown"}, {0x3F, nullptr, "Unknown"}, {0x40, nullptr, "CreateSession"}, @@ -2387,7 +2577,7 @@ static const FunctionDef SVC_Table[] = { {0x42, nullptr, "ReplyAndReceiveLight"}, {0x43, nullptr, "ReplyAndReceive"}, {0x44, nullptr, "ReplyAndReceiveWithUserBuffer"}, - {0x45, SvcWrap<CreateEvent>, "CreateEvent"}, + {0x45, SvcWrap64<CreateEvent>, "CreateEvent"}, {0x46, nullptr, "Unknown"}, {0x47, nullptr, "Unknown"}, {0x48, nullptr, "MapPhysicalMemoryUnsafe"}, @@ -2398,9 +2588,9 @@ static const FunctionDef SVC_Table[] = { {0x4D, nullptr, "SleepSystem"}, {0x4E, nullptr, "ReadWriteRegister"}, {0x4F, nullptr, "SetProcessActivity"}, - {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"}, - {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"}, - {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"}, + {0x50, SvcWrap64<CreateSharedMemory>, "CreateSharedMemory"}, + {0x51, SvcWrap64<MapTransferMemory>, "MapTransferMemory"}, + {0x52, SvcWrap64<UnmapTransferMemory>, "UnmapTransferMemory"}, {0x53, nullptr, "CreateInterruptEvent"}, {0x54, nullptr, "QueryPhysicalAddress"}, {0x55, nullptr, "QueryIoMapping"}, @@ -2419,8 +2609,8 @@ static const FunctionDef SVC_Table[] = { {0x62, nullptr, "TerminateDebugProcess"}, {0x63, nullptr, "GetDebugEvent"}, {0x64, nullptr, "ContinueDebugEvent"}, - {0x65, SvcWrap<GetProcessList>, "GetProcessList"}, - {0x66, SvcWrap<GetThreadList>, "GetThreadList"}, + {0x65, SvcWrap64<GetProcessList>, "GetProcessList"}, + {0x66, SvcWrap64<GetThreadList>, "GetThreadList"}, {0x67, nullptr, "GetDebugThreadContext"}, {0x68, nullptr, "SetDebugThreadContext"}, {0x69, nullptr, "QueryDebugProcessMemory"}, @@ -2436,24 +2626,32 @@ static const FunctionDef SVC_Table[] = { {0x73, nullptr, "SetProcessMemoryPermission"}, {0x74, nullptr, "MapProcessMemory"}, {0x75, nullptr, "UnmapProcessMemory"}, - {0x76, SvcWrap<QueryProcessMemory>, "QueryProcessMemory"}, - {0x77, SvcWrap<MapProcessCodeMemory>, "MapProcessCodeMemory"}, - {0x78, SvcWrap<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, + {0x76, SvcWrap64<QueryProcessMemory>, "QueryProcessMemory"}, + {0x77, SvcWrap64<MapProcessCodeMemory>, "MapProcessCodeMemory"}, + {0x78, SvcWrap64<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, {0x79, nullptr, "CreateProcess"}, {0x7A, nullptr, "StartProcess"}, {0x7B, nullptr, "TerminateProcess"}, - {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, - {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"}, - {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, + {0x7C, SvcWrap64<GetProcessInfo>, "GetProcessInfo"}, + {0x7D, SvcWrap64<CreateResourceLimit>, "CreateResourceLimit"}, + {0x7E, SvcWrap64<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, {0x7F, nullptr, "CallSecureMonitor"}, }; -static const FunctionDef* GetSVCInfo(u32 func_num) { - if (func_num >= std::size(SVC_Table)) { +static const FunctionDef* GetSVCInfo32(u32 func_num) { + if (func_num >= std::size(SVC_Table_32)) { + LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); + return nullptr; + } + return &SVC_Table_32[func_num]; +} + +static const FunctionDef* GetSVCInfo64(u32 func_num) { + if (func_num >= std::size(SVC_Table_64)) { LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); return nullptr; } - return &SVC_Table[func_num]; + return &SVC_Table_64[func_num]; } MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); @@ -2464,7 +2662,8 @@ void CallSVC(Core::System& system, u32 immediate) { // Lock the global kernel mutex when we enter the kernel HLE. std::lock_guard lock{HLE::g_hle_lock}; - const FunctionDef* info = GetSVCInfo(immediate); + const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate) + : GetSVCInfo32(immediate); if (info) { if (info->func) { info->func(system); diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 29a2cfa9d..7d735e3fa 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -15,6 +15,10 @@ static inline u64 Param(const Core::System& system, int n) { return system.CurrentArmInterface().GetReg(n); } +static inline u32 Param32(const Core::System& system, int n) { + return static_cast<u32>(system.CurrentArmInterface().GetReg(n)); +} + /** * HLE a function return from the current ARM userland process * @param system System context @@ -24,40 +28,44 @@ static inline void FuncReturn(Core::System& system, u64 result) { system.CurrentArmInterface().SetReg(0, result); } +static inline void FuncReturn32(Core::System& system, u32 result) { + system.CurrentArmInterface().SetReg(0, (u64)result); +} + //////////////////////////////////////////////////////////////////////////////////////////////////// // Function wrappers that return type ResultCode template <ResultCode func(Core::System&, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0)).raw); } template <ResultCode func(Core::System&, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); } template <ResultCode func(Core::System&, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); } template <ResultCode func(Core::System&, u32, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn( system, func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); } template <ResultCode func(Core::System&, u32, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), Param(system, 3)) .raw); } template <ResultCode func(Core::System&, u32*)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param = 0; const u32 retval = func(system, ¶m).raw; system.CurrentArmInterface().SetReg(1, param); @@ -65,7 +73,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; system.CurrentArmInterface().SetReg(1, param_1); @@ -73,7 +81,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u32*)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; u32 param_2 = 0; const u32 retval = func(system, ¶m_1, ¶m_2).raw; @@ -86,7 +94,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; system.CurrentArmInterface().SetReg(1, param_1); @@ -94,7 +102,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw; @@ -104,7 +112,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64*, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u64 param_1 = 0; const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; @@ -113,12 +121,12 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); } template <ResultCode func(Core::System&, u64*, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u64 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; @@ -127,7 +135,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64*, u32, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u64 param_1 = 0; const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1)), static_cast<u32>(Param(system, 2))) @@ -138,19 +146,19 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw); } template <ResultCode func(Core::System&, u32, u32, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)), Param(system, 2)) .raw); } template <ResultCode func(Core::System&, u32, u32*, u64*)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; u64 param_2 = 0; const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), ¶m_1, ¶m_2); @@ -161,54 +169,54 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64, u64, u32, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) .raw); } template <ResultCode func(Core::System&, u64, u64, u32, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2)), Param(system, 3)) .raw); } template <ResultCode func(Core::System&, u32, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), static_cast<u32>(Param(system, 2))) .raw); } template <ResultCode func(Core::System&, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw); } template <ResultCode func(Core::System&, u64, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn( system, func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw); } template <ResultCode func(Core::System&, u32, u64, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), static_cast<u32>(Param(system, 3))) .raw); } template <ResultCode func(Core::System&, u32, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn( system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw); } template <ResultCode func(Core::System&, u32*, u64, u64, s64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) @@ -219,14 +227,14 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64, u64, u32, s64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) .raw); } template <ResultCode func(Core::System&, u64*, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u64 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw; @@ -236,7 +244,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3), static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5))) @@ -247,7 +255,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u64, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), static_cast<u32>(Param(system, 3))) @@ -258,7 +266,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, Handle*, u64, u32, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) @@ -269,14 +277,14 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64, u32, s32, s64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) .raw); } template <ResultCode func(Core::System&, u64, u32, s32, s32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) .raw); @@ -286,7 +294,7 @@ void SvcWrap(Core::System& system) { // Function wrappers that return type u32 template <u32 func(Core::System&)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system)); } @@ -294,7 +302,7 @@ void SvcWrap(Core::System& system) { // Function wrappers that return type u64 template <u64 func(Core::System&)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system)); } @@ -302,44 +310,110 @@ void SvcWrap(Core::System& system) { /// Function wrappers that return type void template <void func(Core::System&)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system); } template <void func(Core::System&, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, static_cast<u32>(Param(system, 0))); } template <void func(Core::System&, u32, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), Param(system, 3)); } template <void func(Core::System&, s64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, static_cast<s64>(Param(system, 0))); } template <void func(Core::System&, u64, s32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, Param(system, 0), static_cast<s32>(Param(system, 1))); } template <void func(Core::System&, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, Param(system, 0), Param(system, 1)); } template <void func(Core::System&, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, Param(system, 0), Param(system, 1), Param(system, 2)); } template <void func(Core::System&, u32, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); } +// Used by QueryMemory32 +template <ResultCode func(Core::System&, u32, u32, u32)> +void SvcWrap32(Core::System& system) { + FuncReturn32(system, + func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw); +} + +// Used by GetInfo32 +template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + u32 param_2 = 0; + + const u32 retval = func(system, ¶m_1, ¶m_2, Param32(system, 0), Param32(system, 1), + Param32(system, 2), Param32(system, 3)) + .raw; + + system.CurrentArmInterface().SetReg(1, param_1); + system.CurrentArmInterface().SetReg(2, param_2); + FuncReturn(system, retval); +} + +// Used by GetThreadPriority32, ConnectToNamedPort32 +template <ResultCode func(Core::System&, u32*, u32)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + const u32 retval = func(system, ¶m_1, Param32(system, 1)).raw; + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); +} + +// Used by GetThreadId32 +template <ResultCode func(Core::System&, u32*, u32*, u32)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + u32 param_2 = 0; + + const u32 retval = func(system, ¶m_1, ¶m_2, Param32(system, 1)).raw; + system.CurrentArmInterface().SetReg(1, param_1); + system.CurrentArmInterface().SetReg(2, param_2); + FuncReturn(system, retval); +} + +// Used by SignalProcessWideKey32 +template <void func(Core::System&, u32, s32)> +void SvcWrap32(Core::System& system) { + func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1))); +} + +// Used by SendSyncRequest32 +template <ResultCode func(Core::System&, u32)> +void SvcWrap32(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); +} + +// Used by WaitSynchronization32 +template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + const u32 retval = func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2), + Param32(system, 3), ¶m_1) + .raw; + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index bf850e0b2..83e956036 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -133,15 +133,16 @@ void Thread::CancelWait() { ResumeFromWait(); } -/** - * Resets a thread context, making it ready to be scheduled and run by the CPU - * @param context Thread context to reset - * @param stack_top Address of the top of the stack - * @param entry_point Address of entry point for execution - * @param arg User argument for thread - */ -static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top, - VAddr entry_point, u64 arg) { +static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top, + u32 entry_point, u32 arg) { + context = {}; + context.cpu_registers[0] = arg; + context.cpu_registers[15] = entry_point; + context.cpu_registers[13] = stack_top; +} + +static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, VAddr stack_top, + VAddr entry_point, u64 arg) { context = {}; context.cpu_registers[0] = arg; context.pc = entry_point; @@ -198,9 +199,9 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin thread->owner_process->RegisterThread(thread.get()); - // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used - // to initialize the context - ResetThreadContext(thread->context, stack_top, entry_point, arg); + ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top), + static_cast<u32>(entry_point), static_cast<u32>(arg)); + ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); } @@ -213,11 +214,13 @@ void Thread::SetPriority(u32 priority) { } void Thread::SetWaitSynchronizationResult(ResultCode result) { - context.cpu_registers[0] = result.raw; + context_32.cpu_registers[0] = result.raw; + context_64.cpu_registers[0] = result.raw; } void Thread::SetWaitSynchronizationOutput(s32 output) { - context.cpu_registers[1] = output; + context_32.cpu_registers[1] = output; + context_64.cpu_registers[1] = output; } s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 129e7858a..23fdef8a4 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -102,7 +102,8 @@ public: using MutexWaitingThreads = std::vector<std::shared_ptr<Thread>>; - using ThreadContext = Core::ARM_Interface::ThreadContext; + using ThreadContext32 = Core::ARM_Interface::ThreadContext32; + using ThreadContext64 = Core::ARM_Interface::ThreadContext64; using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; @@ -273,12 +274,20 @@ public: return status == ThreadStatus::WaitSynch; } - ThreadContext& GetContext() { - return context; + ThreadContext32& GetContext32() { + return context_32; } - const ThreadContext& GetContext() const { - return context; + const ThreadContext32& GetContext32() const { + return context_32; + } + + ThreadContext64& GetContext64() { + return context_64; + } + + const ThreadContext64& GetContext64() const { + return context_64; } ThreadStatus GetStatus() const { @@ -466,7 +475,8 @@ private: void AdjustSchedulingOnPriority(u32 old_priority); void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); - Core::ARM_Interface::ThreadContext context{}; + ThreadContext32 context_32{}; + ThreadContext64 context_64{}; u64 thread_id = 0; diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index d19c3623c..53559e8b1 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -129,12 +129,6 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect } metadata.Print(); - const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()}; - if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit || - arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { - return {ResultStatus::Error32BitISA, {}}; - } - if (process.LoadFromMetadata(metadata).IsError()) { return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; } diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index f95eee3b1..85ac81ef7 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -111,7 +111,7 @@ json GetProcessorStateDataAuto(Core::System& system) { const auto& vm_manager{process->VMManager()}; auto& arm{system.CurrentArmInterface()}; - Core::ARM_Interface::ThreadContext context{}; + Core::ARM_Interface::ThreadContext64 context{}; arm.SaveContext(context); return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32", diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index 2228571a6..e82ae7ef1 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp @@ -32,8 +32,16 @@ public: SocketCallback callback) : callback(std::move(callback)), timer(io_service), socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), - pad_index(pad_index), - send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {} + pad_index(pad_index) { + boost::system::error_code ec{}; + auto ipv4 = boost::asio::ip::make_address_v4(host, ec); + if (ec.failed()) { + LOG_ERROR(Input, "Invalid IPv4 address \"{}\" provided to socket", host); + ipv4 = boost::asio::ip::address_v4{}; + } + + send_endpoint = {udp::endpoint(ipv4, port)}; + } void Stop() { io_service.stop(); @@ -85,17 +93,18 @@ private: } void HandleSend(const boost::system::error_code& error) { + boost::system::error_code _ignored{}; // Send a request for getting port info for the pad Request::PortInfo port_info{1, {pad_index, 0, 0, 0}}; const auto port_message = Request::Create(port_info, client_id); std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE); - socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint); + socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint, {}, _ignored); // Send a request for getting pad data for the pad Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS}; const auto pad_message = Request::Create(pad_data, client_id); std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE); - socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint); + socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint, {}, _ignored); StartSend(timer.expiry()); } diff --git a/src/input_common/udp/protocol.cpp b/src/input_common/udp/protocol.cpp index a982ac49d..5e50bd612 100644 --- a/src/input_common/udp/protocol.cpp +++ b/src/input_common/udp/protocol.cpp @@ -31,7 +31,6 @@ namespace Response { */ std::optional<Type> Validate(u8* data, std::size_t size) { if (size < sizeof(Header)) { - LOG_DEBUG(Input, "Invalid UDP packet received"); return std::nullopt; } Header header{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e1965fb21..3fcd319fd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,6 +36,7 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -56,8 +57,7 @@ namespace { template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - Tegra::Engines::ShaderType shader_type, - std::size_t index = 0) { + ShaderType shader_type, std::size_t index = 0) { if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); @@ -910,15 +910,10 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetShaderEntries().samplers) { - const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); - if (!entry.IsIndexed()) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); + const auto shader_type = static_cast<ShaderType>(stage_index); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); SetupTexture(binding++, texture, entry); - } else { - for (std::size_t i = 0; i < entry.Size(); ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); - SetupTexture(binding++, texture, entry); - } } } } @@ -928,16 +923,9 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().samplers) { - if (!entry.IsIndexed()) { - const auto texture = - GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); SetupTexture(binding++, texture, entry); - } else { - for (std::size_t i = 0; i < entry.Size(); ++i) { - const auto texture = - GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); - SetupTexture(binding++, texture, entry); - } } } } diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 7ed505628..d3dea3659 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { } case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: + case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return GL_UNSIGNED_SHORT; + default: + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + return {}; + } + case Maxwell::VertexAttribute::Type::SignedScaled: + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: + case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return GL_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: + case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return GL_SHORT; default: LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 447f69d4d..a4340b502 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -353,9 +353,9 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { // Recreate the frame if the size of the window has changed if (layout.width != frame->width || layout.height != frame->height || - is_srgb != frame->is_srgb) { + screen_info.display_srgb != frame->is_srgb) { LOG_DEBUG(Render_OpenGL, "Reloading render frame"); - is_srgb = frame->is_srgb = screen_info.display_srgb; + frame->is_srgb = screen_info.display_srgb; frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); } state.draw.draw_framebuffer = frame->render.handle; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 4107e10a9..d45e69cbc 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -111,9 +111,6 @@ private: Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; Common::Rectangle<int> framebuffer_crop_rect; - /// Represents if the final render frame is sRGB - bool is_srgb{}; - /// Frame presentation mailbox std::unique_ptr<FrameMailbox> frame_mailbox; }; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index ef66dd141..948d67d89 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -120,7 +120,7 @@ struct FormatTuple { {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U - {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage}, // A1B5G5R5U (flipped with swizzle) + {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle) {vk::Format::eR8Unorm, Attachable | Storage}, // R8U {vk::Format::eR8Uint, Attachable | Storage}, // R8UI {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F @@ -371,8 +371,22 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr } case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return vk::Format::eR8Uscaled; case Maxwell::VertexAttribute::Size::Size_8_8: return vk::Format::eR8G8Uscaled; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return vk::Format::eR8G8B8Uscaled; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return vk::Format::eR8G8B8A8Uscaled; + case Maxwell::VertexAttribute::Size::Size_16: + return vk::Format::eR16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16: + return vk::Format::eR16G16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return vk::Format::eR16G16B16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return vk::Format::eR16G16B16A16Uscaled; default: break; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9d5b8de7a..60f57d83e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -73,7 +73,7 @@ UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; u32 binding = 0; u32 offset = 0; - FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); + FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); if (template_entries.empty()) { // If the shader doesn't use descriptor sets, skip template creation. return UniqueDescriptorUpdateTemplate{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b155dfb49..6a02403c1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -97,8 +97,7 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat u32 offset = 0; for (const auto& stage : program) { if (stage) { - FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, - template_entries); + FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); } } if (template_entries.empty()) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7ddf7d3ee..696e4b291 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -36,6 +36,13 @@ using Tegra::Engines::ShaderType; namespace { +// C++20's using enum +constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer; +constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer; +constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer; +constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler; +constexpr auto eStorageImage = vk::DescriptorType::eStorageImage; + constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ VideoCommon::Shader::CompileDepth::FullDecompile}; @@ -119,23 +126,32 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { } } +template <vk::DescriptorType descriptor_type, class Container> +void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding, + vk::ShaderStageFlags stage_flags, const Container& container) { + const u32 num_entries = static_cast<u32>(std::size(container)); + for (std::size_t i = 0; i < num_entries; ++i) { + u32 count = 1; + if constexpr (descriptor_type == eCombinedImageSampler) { + // Combined image samplers can be arrayed. + count = container[i].Size(); + } + bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr); + } +} + u32 FillDescriptorLayout(const ShaderEntries& entries, std::vector<vk::DescriptorSetLayoutBinding>& bindings, Maxwell::ShaderProgram program_type, u32 base_binding) { const ShaderType stage = GetStageFromProgram(program_type); - const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); + const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); u32 binding = base_binding; - const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { - for (std::size_t i = 0; i < num_entries; ++i) { - bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); - } - }; - AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); - AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); - AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); - AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); - AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); + AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers); + AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers); + AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers); + AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers); + AddBindings<eStorageImage>(bindings, binding, flags, entries.images); return binding; } @@ -361,32 +377,45 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { return {std::move(program), std::move(bindings)}; } -void FillDescriptorUpdateTemplateEntries( - const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { - static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); - const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { - const u32 count = static_cast<u32>(count_); - if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && - device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { - // Nvidia has a bug where updating multiple uniform texels at once causes the driver to - // crash. - for (u32 i = 0; i < count; ++i) { - template_entries.emplace_back(binding + i, 0, 1, descriptor_type, - offset + i * entry_size, entry_size); - } - } else if (count != 0) { - template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); +template <vk::DescriptorType descriptor_type, class Container> +void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding, + u32& offset, const Container& container) { + static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); + const u32 count = static_cast<u32>(std::size(container)); + + if constexpr (descriptor_type == eCombinedImageSampler) { + for (u32 i = 0; i < count; ++i) { + const u32 num_samplers = container[i].Size(); + template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset, + entry_size); + ++binding; + offset += num_samplers * entry_size; } - offset += count * entry_size; - binding += count; - }; + return; + } - AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); - AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); - AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); - AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); - AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); + if constexpr (descriptor_type == eUniformTexelBuffer) { + // Nvidia has a bug where updating multiple uniform texels at once causes the driver to + // crash. + for (u32 i = 0; i < count; ++i) { + template_entries.emplace_back(binding + i, 0, 1, descriptor_type, + offset + i * entry_size, entry_size); + } + } else if (count > 0) { + template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); + } + offset += count * entry_size; + binding += count; +} + +void FillDescriptorUpdateTemplateEntries( + const ShaderEntries& entries, u32& binding, u32& offset, + std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { + AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers); + AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers); + AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers); + AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers); + AddEntry<eStorageImage>(template_entries, offset, binding, entries.images); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8678fc9c3..92a670cc7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -194,7 +194,7 @@ private: }; void FillDescriptorUpdateTemplateEntries( - const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, + const ShaderEntries& entries, u32& binding, u32& offset, std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3bf86da87..3fe28c204 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -105,17 +105,20 @@ void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlag template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - std::size_t stage) { + std::size_t stage, std::size_t index = 0) { const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); return engine.GetTextureInfo(tex_handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); + const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); + const u32 offset = entry.GetOffset() + entry_offset; if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { - return engine.GetStageTexture(stage_type, entry.GetOffset()); + return engine.GetStageTexture(stage_type, offset); } else { - return engine.GetTexture(entry.GetOffset()); + return engine.GetTexture(offset); } } @@ -836,8 +839,10 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: MICROPROFILE_SCOPE(Vulkan_Textures); const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.samplers) { - const auto texture = GetTextureInfo(gpu, entry, stage); - SetupTexture(texture, entry); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(gpu, entry, stage, i); + SetupTexture(texture, entry); + } } } @@ -886,8 +891,10 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.samplers) { - const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); - SetupTexture(texture, entry); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i); + SetupTexture(texture, entry); + } } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 2da622d15..cfcca5af0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -69,8 +69,9 @@ struct TexelBuffer { struct SampledImage { Id image_type{}; - Id sampled_image_type{}; - Id sampler{}; + Id sampler_type{}; + Id sampler_pointer_type{}; + Id variable{}; }; struct StorageImage { @@ -833,16 +834,20 @@ private: constexpr int sampled = 1; constexpr auto format = spv::ImageFormat::Unknown; const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id sampled_image_type = TypeSampledImage(image_type); - const Id pointer_type = - TypePointer(spv::StorageClass::UniformConstant, sampled_image_type); + const Id sampler_type = TypeSampledImage(image_type); + const Id sampler_pointer_type = + TypePointer(spv::StorageClass::UniformConstant, sampler_type); + const Id type = sampler.IsIndexed() + ? TypeArray(sampler_type, Constant(t_uint, sampler.Size())) + : sampler_type; + const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - sampled_images.emplace(sampler.GetIndex(), - SampledImage{image_type, sampled_image_type, id}); + sampled_images.emplace(sampler.GetIndex(), SampledImage{image_type, sampler_type, + sampler_pointer_type, id}); } return binding; } @@ -1525,7 +1530,12 @@ private: ASSERT(!meta.sampler.IsBuffer()); const auto& entry = sampled_images.at(meta.sampler.GetIndex()); - return OpLoad(entry.sampled_image_type, entry.sampler); + Id sampler = entry.variable; + if (meta.sampler.IsIndexed()) { + const Id index = AsInt(Visit(meta.index)); + sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); + } + return OpLoad(entry.sampler_type, sampler); } Id GetTextureImage(Operation operation) { @@ -2211,16 +2221,14 @@ private: switch (specialization.attribute_types.at(location)) { case Maxwell::VertexAttribute::Type::SignedNorm: case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: case Maxwell::VertexAttribute::Type::Float: return {Type::Float, t_in_float, t_in_float4}; case Maxwell::VertexAttribute::Type::SignedInt: return {Type::Int, t_in_int, t_in_int4}; case Maxwell::VertexAttribute::Type::UnsignedInt: return {Type::Uint, t_in_uint, t_in_uint4}; - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - UNIMPLEMENTED(); - return {Type::Float, t_in_float, t_in_float4}; default: UNREACHABLE(); return {Type::Float, t_in_float, t_in_float4}; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index f47b691a8..9e73fa9cd 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -141,11 +141,6 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; - extent = ChooseSwapExtent(capabilities, width, height); - - current_width = extent.width; - current_height = extent.height; - current_srgb = srgb; u32 requested_image_count{capabilities.minImageCount + 1}; if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { @@ -153,10 +148,9 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities } vk::SwapchainCreateInfoKHR swapchain_ci( - {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, - extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, - capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, - {}); + {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1, + vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform, + vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {}); const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; @@ -169,9 +163,18 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; } + // Request the size again to reduce the possibility of a TOCTOU race condition. + const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld); + swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); + // Don't add code within this and the swapchain creation. const auto dev{device.GetLogical()}; swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); + extent = swapchain_ci.imageExtent; + current_width = extent.width; + current_height = extent.height; + current_srgb = srgb; + images = dev.getSwapchainImagesKHR(*swapchain, dld); image_count = static_cast<u32>(images.size()); image_format = surface_format.format; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a0a7b9111..a1828546e 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -299,7 +299,7 @@ private: u32 index{}; ///< Emulated index given for the this sampler. u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). - u32 size{}; ///< Size of the sampler if indexed. + u32 size{1}; ///< Size of the sampler. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index face8c943..15e22b9fa 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -157,13 +157,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { return {}; } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; + s64 current_cursor = cursor; + while (current_cursor > 0) { + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1); + current_cursor = new_cursor; + if (!source) { + continue; + } + const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor); + if (base_address != nullptr) { + return {base_address, index, offset}; + } } - return TrackCbuf(source, code, new_cursor); + return {}; } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 3f1a94627..c1ea25fb8 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -116,7 +116,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons constexpr std::size_t BaseRegister = 29; auto& memory = Core::System::GetInstance().Memory(); - u64 base_pointer = thread.GetContext().cpu_registers[BaseRegister]; + u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister]; while (base_pointer != 0) { const u64 lr = memory.Read64(base_pointer + sizeof(u64)); @@ -240,7 +240,7 @@ QString WaitTreeThread::GetText() const { break; } - const auto& context = thread.GetContext(); + const auto& context = thread.GetContext64(); const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") .arg(context.pc, 8, 16, QLatin1Char{'0'}) .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 67c5de1da..d63d7a58e 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -84,7 +84,7 @@ touch_device= # from any cemuhook compatible motion program. # IPv4 address of the udp input server (Default "127.0.0.1") -udp_input_address= +udp_input_address=127.0.0.1 # Port of the udp input server. (Default 26760) udp_input_port= |