summaryrefslogtreecommitdiffstats
path: root/src/core
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-12-29 07:53:52 +0100
committerLiam <byteslice@airmail.cc>2024-01-19 03:12:30 +0100
commit34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718 (patch)
treeafa899bb63e97df9c80e5de49395495143799dbd /src/core
parentSMMU: Initial adaptation to video_core. (diff)
downloadyuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar
yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.gz
yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.bz2
yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.lz
yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.xz
yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.zst
yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.zip
Diffstat (limited to 'src/core')
-rw-r--r--src/core/device_memory_manager.h27
-rw-r--r--src/core/device_memory_manager.inc154
-rw-r--r--src/core/hle/service/nvdrv/core/container.cpp6
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.cpp7
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.h6
-rw-r--r--src/core/memory.cpp53
6 files changed, 221 insertions, 32 deletions
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h
index 1a63cbd09..7c7726348 100644
--- a/src/core/device_memory_manager.h
+++ b/src/core/device_memory_manager.h
@@ -10,8 +10,10 @@
#include <mutex>
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h"
+
namespace Core {
class DeviceMemory;
@@ -49,9 +51,25 @@ public:
template <typename T>
const T* GetPointer(DAddr address) const;
- DAddr GetAddressFromPAddr(PAddr address) const {
+ template <typename Func>
+ void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
DAddr subbits = static_cast<DAddr>(address & page_mask);
- return (static_cast<DAddr>(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits;
+ const u32 base = compressed_device_addr[(address >> page_bits)];
+ if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] {
+ const DAddr d_address = static_cast<DAddr>(base << page_bits) + subbits;
+ operation(d_address);
+ return;
+ }
+ InnerGatherDeviceAddresses(buffer, address);
+ for (u32 value : buffer) {
+ operation(static_cast<DAddr>(value << page_bits) + subbits);
+ }
+ }
+
+ template <typename Func>
+ void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
+ PAddr address = GetRawPhysicalAddr<u8>(p);
+ ApplyOpOnPAddr(address, buffer, operation);
}
PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const {
@@ -98,6 +116,9 @@ private:
static constexpr size_t page_size = 1ULL << page_bits;
static constexpr size_t page_mask = page_size - 1ULL;
static constexpr u32 physical_address_base = 1U << page_bits;
+ static constexpr u32 MULTI_FLAG_BITS = 31;
+ static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS;
+ static constexpr u32 MULTI_MASK = ~MULTI_FLAG;
template <typename T>
T* GetPointerFromRaw(PAddr addr) {
@@ -117,6 +138,8 @@ private:
void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory,
auto increment);
+ void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address);
+
std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl;
const uintptr_t physical_base;
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index 8c5f82d31..4fb3ad3ab 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -18,10 +18,117 @@
namespace Core {
+namespace {
+
+class PhysicalAddressContainer {
+public:
+ PhysicalAddressContainer() = default;
+ ~PhysicalAddressContainer() = default;
+
+ void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
+ buffer.resize(8);
+ buffer.resize(0);
+ size_t index = 0;
+ const auto add_value = [&](u32 value) {
+ buffer[index] = value;
+ index++;
+ buffer.resize(index);
+ };
+
+ u32 iter_entry = start_entry;
+ Entry* current = &storage[iter_entry - 1];
+ add_value(current->value);
+ while (current->next_entry != 0) {
+ iter_entry = current->next_entry;
+ current = &storage[iter_entry - 1];
+ add_value(current->value);
+ }
+ }
+
+ u32 Register(u32 value) {
+ return RegisterImplementation(value);
+ }
+
+ void Register(u32 value, u32 start_entry) {
+ auto entry_id = RegisterImplementation(value);
+ u32 iter_entry = start_entry;
+ Entry* current = &storage[iter_entry - 1];
+ while (current->next_entry != 0) {
+ iter_entry = current->next_entry;
+ current = &storage[iter_entry - 1];
+ }
+ current->next_entry = entry_id;
+ }
+
+ std::pair<bool, u32> Unregister(u32 value, u32 start_entry) {
+ u32 iter_entry = start_entry;
+ Entry* previous{};
+ Entry* current = &storage[iter_entry - 1];
+ Entry* next{};
+ bool more_than_one_remaining = false;
+ u32 result_start{start_entry};
+ size_t count = 0;
+ while (current->value != value) {
+ count++;
+ previous = current;
+ iter_entry = current->next_entry;
+ current = &storage[iter_entry - 1];
+ }
+ // Find next
+ u32 next_entry = current->next_entry;
+ if (next_entry != 0) {
+ next = &storage[next_entry - 1];
+ more_than_one_remaining = next->next_entry != 0;
+ }
+ if (previous) {
+ previous->next_entry = next_entry;
+ } else {
+ result_start = next_entry;
+ }
+ free_entries.emplace_back(iter_entry);
+ return std::make_pair(more_than_one_remaining || count > 1, result_start);
+ }
+
+ u32 ReleaseEntry(u32 start_entry) {
+ Entry* current = &storage[start_entry - 1];
+ free_entries.emplace_back(start_entry);
+ return current->value;
+ }
+
+private:
+ u32 RegisterImplementation(u32 value) {
+ auto entry_id = GetNewEntry();
+ auto& entry = storage[entry_id - 1];
+ entry.next_entry = 0;
+ entry.value = value;
+ return entry_id;
+ }
+ u32 GetNewEntry() {
+ if (!free_entries.empty()) {
+ u32 result = free_entries.front();
+ free_entries.pop_front();
+ return result;
+ }
+ storage.emplace_back();
+ u32 new_entry = static_cast<u32>(storage.size());
+ return new_entry;
+ }
+
+ struct Entry {
+ u32 next_entry{};
+ u32 value{};
+ };
+
+ std::deque<Entry> storage;
+ std::deque<u32> free_entries;
+};
+
struct EmptyAllocator {
EmptyAllocator([[maybe_unused]] DAddr address) {}
};
+} // namespace
+
template <typename DTraits>
struct DeviceMemoryManagerAllocator {
static constexpr bool supports_pinning = DTraits::supports_pinning;
@@ -38,6 +145,7 @@ struct DeviceMemoryManagerAllocator {
std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>
pin_allocator;
Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
+ PhysicalAddressContainer multi_dev_address;
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
template <bool pin_area>
@@ -109,6 +217,9 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
cached_pages = std::make_unique<CachedPages>();
+ for (size_t i = 0; i < 1ULL << (33 - 12); i++) {
+ compressed_device_addr[i] = 0;
+ }
}
template <typename Traits>
@@ -155,8 +266,19 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
}
auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
compressed_physical_ptr[start_page_d + i] = phys_addr;
- compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i);
InsertCPUBacking(start_page_d + i, new_vaddress, process_id);
+ const u32 base_dev = compressed_device_addr[phys_addr - 1U];
+ const u32 new_dev = static_cast<u32>(start_page_d + i);
+ if (base_dev == 0) [[likely]] {
+ compressed_device_addr[phys_addr - 1U] = new_dev;
+ continue;
+ }
+ u32 start_id = base_dev & MULTI_MASK;
+ if ((base_dev >> MULTI_FLAG_BITS) == 0) {
+ start_id = impl->multi_dev_address.Register(base_dev);
+ compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id;
+ }
+ impl->multi_dev_address.Register(new_dev, start_id);
}
}
@@ -170,13 +292,39 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
auto phys_addr = compressed_physical_ptr[start_page_d + i];
compressed_physical_ptr[start_page_d + i] = 0;
cpu_backing_address[start_page_d + i] = 0;
- if (phys_addr != 0) {
- compressed_device_addr[phys_addr - 1] = 0;
+ if (phys_addr != 0) [[likely]] {
+ const u32 base_dev = compressed_device_addr[phys_addr - 1U];
+ if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] {
+ compressed_device_addr[phys_addr - 1] = 0;
+ continue;
+ }
+ const auto [more_entries, new_start] = impl->multi_dev_address.Unregister(
+ static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK);
+ if (!more_entries) {
+ compressed_device_addr[phys_addr - 1] =
+ impl->multi_dev_address.ReleaseEntry(new_start);
+ continue;
+ }
+ compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG;
}
}
}
template <typename Traits>
+void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer,
+ PAddr address) {
+ size_t phys_addr = address >> page_bits;
+ std::scoped_lock lk(mapping_guard);
+ u32 backing = compressed_device_addr[phys_addr];
+ if ((backing >> MULTI_FLAG_BITS) != 0) {
+ impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer);
+ return;
+ }
+ buffer.resize(1);
+ buffer[0] = backing;
+}
+
+template <typename Traits>
template <typename T>
T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) {
const size_t index = address >> Memory::YUZU_PAGEBITS;
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index 7c2231fe6..e12ce05c1 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -16,8 +16,8 @@
namespace Service::Nvidia::NvCore {
struct ContainerImpl {
- explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_)
- : host1x{host1x_}, file{host1x_}, manager{host1x_}, device_file_data{} {}
+ explicit ContainerImpl(Container& core, Tegra::Host1x::Host1x& host1x_)
+ : host1x{host1x_}, file{core, host1x_}, manager{host1x_}, device_file_data{} {}
Tegra::Host1x::Host1x& host1x;
NvMap file;
SyncpointManager manager;
@@ -29,7 +29,7 @@ struct ContainerImpl {
};
Container::Container(Tegra::Host1x::Host1x& host1x_) {
- impl = std::make_unique<ContainerImpl>(host1x_);
+ impl = std::make_unique<ContainerImpl>(*this, host1x_);
}
Container::~Container() = default;
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index 7879c6f04..e4168a37c 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -7,6 +7,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
+#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/memory.h"
#include "video_core/host1x/host1x.h"
@@ -64,7 +65,7 @@ NvResult NvMap::Handle::Duplicate(bool internal_session) {
return NvResult::Success;
}
-NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {}
+NvMap::NvMap(Container& core_, Tegra::Host1x::Host1x& host1x_) : host1x{host1x_}, core{core_} {}
void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
std::scoped_lock lock(handles_lock);
@@ -160,6 +161,8 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
// If not then allocate some space and map it
DAddr address{};
auto& smmu = host1x.MemoryManager();
+ auto* session = core.GetSession(session_id);
+
auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
//: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) {
@@ -179,7 +182,7 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
handle_description->d_address = address;
smmu.Map(address, handle_description->address, handle_description->aligned_size,
- session_id);
+ session->smmu_id);
}
handle_description->pins++;
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index e9e9e8b5b..7dd6d26c3 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -25,6 +25,8 @@ class Host1x;
} // namespace Tegra
namespace Service::Nvidia::NvCore {
+
+class Container;
/**
* @brief The nvmap core class holds the global state for nvmap and provides methods to manage
* handles
@@ -109,7 +111,7 @@ public:
bool can_unlock; //!< If the address region is ready to be unlocked
};
- explicit NvMap(Tegra::Host1x::Host1x& host1x);
+ explicit NvMap(Container& core, Tegra::Host1x::Host1x& host1x);
/**
* @brief Creates an unallocated handle of the given size
@@ -173,5 +175,7 @@ private:
* @return If the handle was removed from the map
*/
bool TryRemoveHandle(const Handle& handle_description);
+
+ Container& core;
};
} // namespace Service::Nvidia::NvCore
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 609e775ae..f126840cb 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -44,7 +44,8 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
// from outside classes. This also allows modification to the internals of the memory
// subsystem without needing to rebuild all files that make use of the memory interface.
struct Memory::Impl {
- explicit Impl(Core::System& system_) : system{system_} {}
+ explicit Impl(Core::System& system_)
+ : system{system_} {}
void SetCurrentPageTable(Kernel::KProcess& process) {
current_page_table = &process.GetPageTable().GetImpl();
@@ -817,26 +818,31 @@ struct Memory::Impl {
void HandleRasterizerDownload(VAddr v_address, size_t size) {
const auto* p = GetPointerImpl(
v_address, []() {}, []() {});
- auto& gpu_device_memory = system.Host1x().MemoryManager();
- DAddr address =
- gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p));
+ if (!gpu_device_memory) [[unlikely]] {
+ gpu_device_memory = &system.Host1x().MemoryManager();
+ }
const size_t core = system.GetCurrentHostThreadID();
auto& current_area = rasterizer_read_areas[core];
- const DAddr end_address = address + size;
- if (current_area.start_address <= address && end_address <= current_area.end_address)
- [[likely]] {
- return;
- }
- current_area = system.GPU().OnCPURead(address, size);
+ gpu_device_memory->ApplyOpOnPointer(
+ p, scratch_buffers[core], [&](DAddr address) {
+ const DAddr end_address = address + size;
+ if (current_area.start_address <= address && end_address <= current_area.end_address)
+ [[likely]] {
+ return;
+ }
+ current_area = system.GPU().OnCPURead(address, size);
+ });
}
void HandleRasterizerWrite(VAddr v_address, size_t size) {
const auto* p = GetPointerImpl(
v_address, []() {}, []() {});
- PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p);
constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
const size_t core = std::min(system.GetCurrentHostThreadID(),
sys_core); // any other calls threads go to syscore.
+ if (!gpu_device_memory) [[unlikely]] {
+ gpu_device_memory = &system.Host1x().MemoryManager();
+ }
// Guard on sys_core;
if (core == sys_core) [[unlikely]] {
sys_core_guard.lock();
@@ -846,17 +852,20 @@ struct Memory::Impl {
sys_core_guard.unlock();
}
});
- auto& current_area = rasterizer_write_areas[core];
- PAddr subaddress = address >> YUZU_PAGEBITS;
- bool do_collection = current_area.last_address == subaddress;
- if (!do_collection) [[unlikely]] {
- do_collection = system.GPU().OnCPUWrite(address, size);
- if (!do_collection) {
- return;
+ gpu_device_memory->ApplyOpOnPointer(
+ p, scratch_buffers[core], [&](DAddr address) {
+ auto& current_area = rasterizer_write_areas[core];
+ PAddr subaddress = address >> YUZU_PAGEBITS;
+ bool do_collection = current_area.last_address == subaddress;
+ if (!do_collection) [[unlikely]] {
+ do_collection = system.GPU().OnCPUWrite(address, size);
+ if (!do_collection) {
+ return;
+ }
+ current_area.last_address = subaddress;
}
- current_area.last_address = subaddress;
- }
- gpu_dirty_managers[core].Collect(address, size);
+ gpu_dirty_managers[core].Collect(address, size);
+ });
}
struct GPUDirtyState {
@@ -872,10 +881,12 @@ struct Memory::Impl {
}
Core::System& system;
+ Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{};
Common::PageTable* current_page_table = nullptr;
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
rasterizer_read_areas{};
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
+ std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{};
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
std::mutex sys_core_guard;