diff options
-rw-r--r-- | src/core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/core/core.cpp | 10 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 33 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | 4 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 135 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.h | 20 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/nvdrv.cpp | 17 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/nvdrv.h | 14 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/syncpoint_manager.cpp | 39 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/syncpoint_manager.h | 85 | ||||
-rw-r--r-- | src/core/hle/service/nvflinger/nvflinger.cpp | 4 | ||||
-rw-r--r-- | src/video_core/dma_pusher.cpp | 80 | ||||
-rw-r--r-- | src/video_core/dma_pusher.h | 49 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 48 | ||||
-rw-r--r-- | src/video_core/gpu.h | 25 |
15 files changed, 448 insertions, 117 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e0f207f3e..9a983e81d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -454,6 +454,8 @@ add_library(core STATIC hle/service/nvdrv/nvdrv.h hle/service/nvdrv/nvmemp.cpp hle/service/nvdrv/nvmemp.h + hle/service/nvdrv/syncpoint_manager.cpp + hle/service/nvdrv/syncpoint_manager.h hle/service/nvflinger/buffer_queue.cpp hle/service/nvflinger/buffer_queue.h hle/service/nvflinger/nvflinger.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index fde2ccc09..242796008 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -179,16 +179,18 @@ struct System::Impl { arp_manager.ResetAll(); telemetry_session = std::make_unique<Core::TelemetrySession>(); + + gpu_core = VideoCore::CreateGPU(emu_window, system); + if (!gpu_core) { + return ResultStatus::ErrorVideoCore; + } + service_manager = std::make_shared<Service::SM::ServiceManager>(kernel); Service::Init(service_manager, system); GDBStub::DeferStart(); interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); - gpu_core = VideoCore::CreateGPU(emu_window, system); - if (!gpu_core) { - return ResultStatus::ErrorVideoCore; - } // Initialize time manager, which must happen after kernel is created time_manager.Initialize(); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 75d9191ff..8356a8139 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -15,8 +15,9 @@ namespace Service::Nvidia::Devices { -nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) - : nvdevice(system), events_interface{events_interface} {} +nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface, + SyncpointManager& syncpoint_manager) + : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} nvhost_ctrl::~nvhost_ctrl() = default; u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& return NvResult::BadParameter; } + if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { + params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id); + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::Success; + } + + if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id); + syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { + params.value = new_value; + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::Success; + } + auto event = events_interface.events[event_id]; auto& gpu = system.GPU(); + // This is mostly to take into account unimplemented features. As synced // gpu is always synced. if (!gpu.IsAsync()) { - event.writable->Signal(); + event.event.writable->Signal(); return NvResult::Success; } auto lock = gpu.LockSync(); - const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); + const u32 current_syncpoint_value = event.fence.value; const s32 diff = current_syncpoint_value - params.threshold; if (diff >= 0) { - event.writable->Signal(); + event.event.writable->Signal(); params.value = current_syncpoint_value; std::memcpy(output.data(), ¶ms, sizeof(params)); return NvResult::Success; @@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; } params.value |= event_id; - event.writable->Clear(); + event.event.writable->Clear(); gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); if (!is_async && ctrl.fresh_call) { ctrl.must_delay = true; @@ -157,15 +172,19 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) { IocCtrlEventSignalParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); + u32 event_id = params.event_id & 0x00FF; LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); + if (event_id >= MaxNvEvents) { return NvResult::BadParameter; } if (events_interface.status[event_id] == EventState::Waiting) { events_interface.LiberateEvent(event_id); - events_interface.events[event_id].writable->Signal(); } + + syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id); + return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index f7b04d9f1..24ad96cb9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices { class nvhost_ctrl final : public nvdevice { public: - explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); + explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface, + SyncpointManager& syncpoint_manager); ~nvhost_ctrl() override; u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -145,6 +146,7 @@ private: u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); EventInterface& events_interface; + SyncpointManager& syncpoint_manager; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index f1966ac0e..152019548 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -7,14 +7,20 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" namespace Service::Nvidia::Devices { -nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) - : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} { + channel_fence.id = syncpoint_manager.AllocateSyncpoint(); + channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); +} + nvhost_gpu::~nvhost_gpu() = default; u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, params.unk3); - auto& gpu = system.GPU(); - params.fence_out.id = assigned_syncpoints; - params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); - assigned_syncpoints++; + channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); + + params.fence_out = channel_fence; + std::memcpy(output.data(), ¶ms, output.size()); return 0; } @@ -145,39 +151,100 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< return 0; } -u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { - if (input.size() < sizeof(IoctlSubmitGpfifo)) { - UNIMPLEMENTED(); +static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) { + return { + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, + Tegra::SubmissionMode::Increasing), + {fence.value}, + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, + Tegra::SubmissionMode::Increasing), + Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), + }; +} + +static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) { + std::vector<Tegra::CommandHeader> result{ + Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, + Tegra::SubmissionMode::Increasing), + {}}; + + for (u32 count = 0; count < add_increment; ++count) { + result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, + Tegra::SubmissionMode::Increasing)); + result.emplace_back( + Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id)); } - IoctlSubmitGpfifo params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); + + return result; +} + +static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence, + u32 add_increment) { + std::vector<Tegra::CommandHeader> result{ + Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1, + Tegra::SubmissionMode::Increasing), + {}}; + const std::vector<Tegra::CommandHeader> increment{ + BuildIncrementCommandList(fence, add_increment)}; + + result.insert(result.end(), increment.begin(), increment.end()); + + return result; +} + +u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, + Tegra::CommandList&& entries) { LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, params.num_entries, params.flags.raw); - ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + - params.num_entries * sizeof(Tegra::CommandListHeader), - "Incorrect input size"); + auto& gpu = system.GPU(); - Tegra::CommandList entries(params.num_entries); - std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], - params.num_entries * sizeof(Tegra::CommandListHeader)); + params.fence_out.id = channel_fence.id; - UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); - UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); + if (params.flags.add_wait.Value() && + !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { + gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); + } - auto& gpu = system.GPU(); - u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); - if (params.flags.increment.Value()) { - params.fence_out.value += current_syncpoint_value; + if (params.flags.add_increment.Value() || params.flags.increment.Value()) { + const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0; + params.fence_out.value = syncpoint_manager.IncreaseSyncpoint( + params.fence_out.id, params.AddIncrementValue() + increment_value); } else { - params.fence_out.value = current_syncpoint_value; + params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); } + + entries.RefreshIntegrityChecks(gpu); gpu.PushGPUEntries(std::move(entries)); + if (params.flags.add_increment.Value()) { + if (params.flags.suppress_wfi) { + gpu.PushGPUEntries(Tegra::CommandList{ + BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())}); + } else { + gpu.PushGPUEntries(Tegra::CommandList{ + BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())}); + } + } + std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); return 0; } +u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { + if (input.size() < sizeof(IoctlSubmitGpfifo)) { + UNIMPLEMENTED(); + } + IoctlSubmitGpfifo params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); + + Tegra::CommandList entries(params.num_entries); + std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)], + params.num_entries * sizeof(Tegra::CommandListHeader)); + + return SubmitGPFIFOImpl(params, output, std::move(entries)); +} + u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, const std::vector<u8>& input2, IoctlVersion version) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { @@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, } IoctlSubmitGpfifo params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); - LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, - params.num_entries, params.flags.raw); Tegra::CommandList entries(params.num_entries); if (version == IoctlVersion::Version2) { - std::memcpy(entries.data(), input2.data(), + std::memcpy(entries.command_lists.data(), input2.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); } else { - system.Memory().ReadBlock(params.address, entries.data(), + system.Memory().ReadBlock(params.address, entries.command_lists.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); } - UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); - UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); - - auto& gpu = system.GPU(); - u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); - if (params.flags.increment.Value()) { - params.fence_out.value += current_syncpoint_value; - } else { - params.fence_out.value = current_syncpoint_value; - } - gpu.PushGPUEntries(std::move(entries)); - std::memcpy(output.data(), ¶ms, output.size()); - return 0; + return SubmitGPFIFOImpl(params, output, std::move(entries)); } u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 2ac74743f..a252fc06d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -11,6 +11,11 @@ #include "common/swap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/nvdata.h" +#include "video_core/dma_pusher.h" + +namespace Service::Nvidia { +class SyncpointManager; +} namespace Service::Nvidia::Devices { @@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); class nvhost_gpu final : public nvdevice { public: - explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_gpu() override; u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -162,10 +168,15 @@ private: u32_le raw; BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list BitField<1, 1, u32_le> add_increment; // append an increment to the list - BitField<2, 1, u32_le> new_hw_format; // Mostly ignored + BitField<2, 1, u32_le> new_hw_format; // mostly ignored + BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt BitField<8, 1, u32_le> increment; // increment the returned fence } flags; Fence fence_out; // returned new fence object for others to wait on + + u32 AddIncrementValue() const { + return flags.add_increment.Value() << 1; + } }; static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), "IoctlSubmitGpfifo is incorrect size"); @@ -190,6 +201,8 @@ private: u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); + u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, + Tegra::CommandList&& entries); u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, const std::vector<u8>& input2, IoctlVersion version); @@ -198,7 +211,8 @@ private: u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); std::shared_ptr<nvmap> nvmap_dev; - u32 assigned_syncpoints{}; + SyncpointManager& syncpoint_manager; + Fence channel_fence; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 803c1a984..a46755cdc 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -21,6 +21,7 @@ #include "core/hle/service/nvdrv/interface.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvdrv/nvmemp.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/hle/service/nvflinger/nvflinger.h" namespace Service::Nvidia { @@ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger nvflinger.SetNVDrvInstance(module_); } -Module::Module(Core::System& system) { +Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { auto& kernel = system.Kernel(); for (u32 i = 0; i < MaxNvEvents; i++) { std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); - events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label); + events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)}; events_interface.status[i] = EventState::Free; events_interface.registered[i] = false; } auto nvmap_dev = std::make_shared<Devices::nvmap>(system); devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); - devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); + devices["/dev/nvhost-gpu"] = + std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager); devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); - devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); + devices["/dev/nvhost-ctrl"] = + std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); @@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) { if (events_interface.assigned_syncpt[i] == syncpoint_id && events_interface.assigned_value[i] == value) { events_interface.LiberateEvent(i); - events_interface.events[i].writable->Signal(); + events_interface.events[i].event.writable->Signal(); } } } std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { - return events_interface.events[event_id].readable; + return events_interface.events[event_id].event.readable; } std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { - return events_interface.events[event_id].writable; + return events_interface.events[event_id].event.writable; } } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 7706a5590..f3d863dac 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "core/hle/kernel/writable_event.h" #include "core/hle/service/nvdrv/nvdata.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/hle/service/service.h" namespace Core { @@ -22,15 +23,23 @@ class NVFlinger; namespace Service::Nvidia { +class SyncpointManager; + namespace Devices { class nvdevice; } +/// Represents an Nvidia event +struct NvEvent { + Kernel::EventPair event; + Fence fence{}; +}; + struct EventInterface { // Mask representing currently busy events u64 events_mask{}; // Each kernel event associated to an NV event - std::array<Kernel::EventPair, MaxNvEvents> events; + std::array<NvEvent, MaxNvEvents> events; // The status of the current NVEvent std::array<EventState, MaxNvEvents> status{}; // Tells if an NVEvent is registered or not @@ -119,6 +128,9 @@ public: std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; private: + /// Manages syncpoints on the host + SyncpointManager syncpoint_manager; + /// Id to use for the next open file descriptor. u32 next_fd = 1; diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/syncpoint_manager.cpp new file mode 100644 index 000000000..0151a03b7 --- /dev/null +++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" +#include "video_core/gpu.h" + +namespace Service::Nvidia { + +SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {} + +SyncpointManager::~SyncpointManager() = default; + +u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) { + syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id); + return GetSyncpointMin(syncpoint_id); +} + +u32 SyncpointManager::AllocateSyncpoint() { + for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) { + if (!syncpoints[syncpoint_id].is_allocated) { + syncpoints[syncpoint_id].is_allocated = true; + return syncpoint_id; + } + } + UNREACHABLE_MSG("No more available syncpoints!"); + return {}; +} + +u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) { + for (u32 index = 0; index < value; ++index) { + syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed); + } + + return GetSyncpointMax(syncpoint_id); +} + +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.h b/src/core/hle/service/nvdrv/syncpoint_manager.h new file mode 100644 index 000000000..4168b6c7e --- /dev/null +++ b/src/core/hle/service/nvdrv/syncpoint_manager.h @@ -0,0 +1,85 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <atomic> + +#include "common/common_types.h" +#include "core/hle/service/nvdrv/nvdata.h" + +namespace Tegra { +class GPU; +} + +namespace Service::Nvidia { + +class SyncpointManager final { +public: + explicit SyncpointManager(Tegra::GPU& gpu); + ~SyncpointManager(); + + /** + * Returns true if the specified syncpoint is expired for the given value. + * @param syncpoint_id Syncpoint ID to check. + * @param value Value to check against the specified syncpoint. + * @returns True if the specified syncpoint is expired for the given value, otherwise False. + */ + bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const { + return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value); + } + + /** + * Gets the lower bound for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to get the lower bound for. + * @returns The lower bound for the specified syncpoint. + */ + u32 GetSyncpointMin(u32 syncpoint_id) const { + return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed); + } + + /** + * Gets the uper bound for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to get the upper bound for. + * @returns The upper bound for the specified syncpoint. + */ + u32 GetSyncpointMax(u32 syncpoint_id) const { + return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed); + } + + /** + * Refreshes the minimum value for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to be refreshed. + * @returns The new syncpoint minimum value. + */ + u32 RefreshSyncpoint(u32 syncpoint_id); + + /** + * Allocates a new syncoint. + * @returns The syncpoint ID for the newly allocated syncpoint. + */ + u32 AllocateSyncpoint(); + + /** + * Increases the maximum value for the specified syncpoint. + * @param syncpoint_id Syncpoint ID to be increased. + * @param value Value to increase the specified syncpoint by. + * @returns The new syncpoint maximum value. + */ + u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value); + +private: + struct Syncpoint { + std::atomic<u32> min; + std::atomic<u32> max; + std::atomic<bool> is_allocated; + }; + + std::array<Syncpoint, MaxSyncPoints> syncpoints{}; + + Tegra::GPU& gpu; +}; + +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index c64673dba..44aa2bdae 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -242,6 +242,10 @@ void NVFlinger::Compose() { const auto& igbp_buffer = buffer->get().igbp_buffer; + if (!system.IsPoweredOn()) { + return; // We are likely shutting down + } + auto& gpu = system.GPU(); const auto& multi_fence = buffer->get().multi_fence; guard->unlock(); diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index f2f96ac33..105b85a92 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/cityhash.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -12,6 +13,20 @@ namespace Tegra { +void CommandList::RefreshIntegrityChecks(GPU& gpu) { + command_list_hashes.resize(command_lists.size()); + + for (std::size_t index = 0; index < command_lists.size(); ++index) { + const CommandListHeader command_list_header = command_lists[index]; + std::vector<CommandHeader> command_headers(command_list_header.size); + gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(), + command_list_header.size * sizeof(u32)); + command_list_hashes[index] = + Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), + command_list_header.size * sizeof(u32)); + } +} + DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} DmaPusher::~DmaPusher() = default; @@ -45,32 +60,51 @@ bool DmaPusher::Step() { return false; } - const CommandList& command_list{dma_pushbuffer.front()}; - ASSERT_OR_EXECUTE(!command_list.empty(), { - // Somehow the command_list is empty, in order to avoid a crash - // We ignore it and assume its size is 0. - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - return true; - }); - const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; - const GPUVAddr dma_get = command_list_header.addr; - - if (dma_pushbuffer_subindex >= command_list.size()) { - // We've gone through the current list, remove it from the queue - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - } + CommandList& command_list{dma_pushbuffer.front()}; - if (command_list_header.size == 0) { - return true; - } + ASSERT_OR_EXECUTE( + command_list.command_lists.size() || command_list.prefetch_command_list.size(), { + // Somehow the command_list is empty, in order to avoid a crash + // We ignore it and assume its size is 0. + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + return true; + }); - // Push buffer non-empty, read a word - command_headers.resize(command_list_header.size); - gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + if (command_list.prefetch_command_list.size()) { + // Prefetched command list from nvdrv, used for things like synchronization + command_headers = std::move(command_list.prefetch_command_list); + dma_pushbuffer.pop(); + } else { + const CommandListHeader command_list_header{ + command_list.command_lists[dma_pushbuffer_subindex]}; + const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++]; + const GPUVAddr dma_get = command_list_header.addr; + + if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } + if (command_list_header.size == 0) { + return true; + } + + // Push buffer non-empty, read a word + command_headers.resize(command_list_header.size); + gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); + + // Integrity check + const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), + command_list_header.size * sizeof(u32)); + if (new_hash != next_hash) { + LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get); + dma_pushbuffer.pop(); + return true; + } + } for (std::size_t index = 0; index < command_headers.size();) { const CommandHeader& command_header = command_headers[index]; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index efa90d170..8496ba2da 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -27,6 +27,31 @@ enum class SubmissionMode : u32 { IncreaseOnce = 5 }; +// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence +// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. +// So the values you see in docs might be multiplied by 4. +enum class BufferMethods : u32 { + BindObject = 0x0, + Nop = 0x2, + SemaphoreAddressHigh = 0x4, + SemaphoreAddressLow = 0x5, + SemaphoreSequence = 0x6, + SemaphoreTrigger = 0x7, + NotifyIntr = 0x8, + WrcacheFlush = 0x9, + Unk28 = 0xA, + UnkCacheFlush = 0xB, + RefCnt = 0x14, + SemaphoreAcquire = 0x1A, + SemaphoreRelease = 0x1B, + FenceValue = 0x1C, + FenceAction = 0x1D, + WaitForInterrupt = 0x1E, + Unk7c = 0x1F, + Yield = 0x20, + NonPullerMethods = 0x40, +}; + struct CommandListHeader { union { u64 raw; @@ -49,9 +74,29 @@ union CommandHeader { static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); +static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, + SubmissionMode mode) { + CommandHeader result{}; + result.method.Assign(static_cast<u32>(method)); + result.arg_count.Assign(arg_count); + result.mode.Assign(mode); + return result; +} + class GPU; -using CommandList = std::vector<Tegra::CommandListHeader>; +struct CommandList final { + CommandList() = default; + explicit CommandList(std::size_t size) : command_lists(size) {} + explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list) + : prefetch_command_list{std::move(prefetch_command_list)} {} + + void RefreshIntegrityChecks(GPU& gpu); + + std::vector<Tegra::CommandListHeader> command_lists; + std::vector<u64> command_list_hashes; + std::vector<Tegra::CommandHeader> prefetch_command_list; +}; /** * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the @@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for * details on this implementation. */ -class DmaPusher { +class DmaPusher final { public: explicit DmaPusher(Core::System& system, GPU& gpu); ~DmaPusher(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 171f78183..ebd149c3a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -194,30 +194,6 @@ void GPU::SyncGuestHost() { void GPU::OnCommandListEnd() { renderer->Rasterizer().ReleaseFences(); } -// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence -// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. -// So the values you see in docs might be multiplied by 4. -enum class BufferMethods { - BindObject = 0x0, - Nop = 0x2, - SemaphoreAddressHigh = 0x4, - SemaphoreAddressLow = 0x5, - SemaphoreSequence = 0x6, - SemaphoreTrigger = 0x7, - NotifyIntr = 0x8, - WrcacheFlush = 0x9, - Unk28 = 0xA, - UnkCacheFlush = 0xB, - RefCnt = 0x14, - SemaphoreAcquire = 0x1A, - SemaphoreRelease = 0x1B, - FenceValue = 0x1C, - FenceAction = 0x1D, - Unk78 = 0x1E, - Unk7c = 0x1F, - Yield = 0x20, - NonPullerMethods = 0x40, -}; enum class GpuSemaphoreOperation { AcquireEqual = 0x1, @@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::UnkCacheFlush: case BufferMethods::WrcacheFlush: case BufferMethods::FenceValue: + break; case BufferMethods::FenceAction: + ProcessFenceActionMethod(); + break; + case BufferMethods::WaitForInterrupt: + ProcessWaitForInterruptMethod(); break; case BufferMethods::SemaphoreTrigger: { ProcessSemaphoreTriggerMethod(); @@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { } } +void GPU::ProcessFenceActionMethod() { + switch (regs.fence_action.op) { + case FenceOperation::Acquire: + WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); + break; + case FenceOperation::Increment: + IncrementSyncPoint(regs.fence_action.syncpoint_id); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented operation {}", + static_cast<u32>(regs.fence_action.op.Value())); + } +} + +void GPU::ProcessWaitForInterruptMethod() { + // TODO(bunnei) ImplementMe + LOG_WARNING(HW_GPU, "(STUBBED) called"); +} + void GPU::ProcessSemaphoreTriggerMethod() { const auto semaphoreOperationMask = 0xF; const auto op = diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b8c613b11..5444b49f3 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -263,6 +263,24 @@ public: return use_nvdec; } + enum class FenceOperation : u32 { + Acquire = 0, + Increment = 1, + }; + + union FenceAction { + u32 raw; + BitField<0, 1, FenceOperation> op; + BitField<8, 24, u32> syncpoint_id; + + static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) { + FenceAction result{}; + result.op.Assign(op); + result.syncpoint_id.Assign(syncpoint_id); + return {result.raw}; + } + }; + struct Regs { static constexpr size_t NUM_REGS = 0x40; @@ -291,10 +309,7 @@ public: u32 semaphore_acquire; u32 semaphore_release; u32 fence_value; - union { - BitField<4, 4, u32> operation; - BitField<8, 8, u32> id; - } fence_action; + FenceAction fence_action; INSERT_UNION_PADDING_WORDS(0xE2); // Puller state @@ -342,6 +357,8 @@ protected: private: void ProcessBindMethod(const MethodCall& method_call); + void ProcessFenceActionMethod(); + void ProcessWaitForInterruptMethod(); void ProcessSemaphoreTriggerMethod(); void ProcessSemaphoreRelease(); void ProcessSemaphoreAcquire(); |