diff options
-rw-r--r-- | src/core/hid/emulated_controller.cpp | 6 | ||||
-rw-r--r-- | src/core/hle/service/nfp/amiibo_crypto.cpp | 4 | ||||
-rw-r--r-- | src/core/hle/service/nfp/nfp_device.cpp | 65 | ||||
-rw-r--r-- | src/core/hle/service/nfp/nfp_result.h | 2 | ||||
-rw-r--r-- | src/core/hle/service/nfp/nfp_types.h | 47 | ||||
-rw-r--r-- | src/core/hle/service/nfp/nfp_user.cpp | 30 | ||||
-rw-r--r-- | src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp | 25 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 62 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | 84 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | 3 | ||||
-rw-r--r-- | src/web_service/web_backend.cpp | 3 |
11 files changed, 209 insertions, 122 deletions
diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index e27d84734..025f1c78e 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -1017,9 +1017,11 @@ bool EmulatedController::SetPollingMode(Common::Input::PollingMode polling_mode) auto& output_device = output_devices[static_cast<std::size_t>(DeviceIndex::Right)]; auto& nfc_output_device = output_devices[3]; - nfc_output_device->SetPollingMode(polling_mode); + const auto virtual_nfc_result = nfc_output_device->SetPollingMode(polling_mode); + const auto mapped_nfc_result = output_device->SetPollingMode(polling_mode); - return output_device->SetPollingMode(polling_mode) == Common::Input::PollingError::None; + return virtual_nfc_result == Common::Input::PollingError::None || + mapped_nfc_result == Common::Input::PollingError::None; } bool EmulatedController::SetCameraFormat( diff --git a/src/core/hle/service/nfp/amiibo_crypto.cpp b/src/core/hle/service/nfp/amiibo_crypto.cpp index ce0bc3f75..c32a6816b 100644 --- a/src/core/hle/service/nfp/amiibo_crypto.cpp +++ b/src/core/hle/service/nfp/amiibo_crypto.cpp @@ -28,7 +28,7 @@ bool IsAmiiboValid(const EncryptedNTAG215File& ntag_file) { LOG_DEBUG(Service_NFP, "model_number=0x{0:x}", static_cast<u16>(amiibo_data.model_info.model_number)); LOG_DEBUG(Service_NFP, "series={}", amiibo_data.model_info.series); - LOG_DEBUG(Service_NFP, "fixed_value=0x{0:x}", amiibo_data.model_info.constant_value); + LOG_DEBUG(Service_NFP, "tag_type=0x{0:x}", amiibo_data.model_info.tag_type); LOG_DEBUG(Service_NFP, "tag_dynamic_lock=0x{0:x}", ntag_file.dynamic_lock); LOG_DEBUG(Service_NFP, "tag_CFG0=0x{0:x}", ntag_file.CFG0); @@ -55,7 +55,7 @@ bool IsAmiiboValid(const EncryptedNTAG215File& ntag_file) { if (amiibo_data.constant_value != 0xA5) { return false; } - if (amiibo_data.model_info.constant_value != 0x02) { + if (amiibo_data.model_info.tag_type != PackedTagType::Type2) { return false; } if ((ntag_file.dynamic_lock & 0xFFFFFF) != 0x0F0001U) { diff --git a/src/core/hle/service/nfp/nfp_device.cpp b/src/core/hle/service/nfp/nfp_device.cpp index 0d4ffd3a5..ec895ac01 100644 --- a/src/core/hle/service/nfp/nfp_device.cpp +++ b/src/core/hle/service/nfp/nfp_device.cpp @@ -98,11 +98,6 @@ bool NfpDevice::LoadAmiibo(std::span<const u8> data) { memcpy(&encrypted_tag_data, data.data(), sizeof(EncryptedNTAG215File)); - if (!AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) { - LOG_INFO(Service_NFP, "Invalid amiibo"); - return false; - } - device_state = DeviceState::TagFound; deactivate_event->GetReadableEvent().Clear(); activate_event->GetWritableEvent().Signal(); @@ -148,20 +143,28 @@ void NfpDevice::Finalize() { } Result NfpDevice::StartDetection(s32 protocol_) { - if (device_state == DeviceState::Initialized || device_state == DeviceState::TagRemoved) { - npad_device->SetPollingMode(Common::Input::PollingMode::NFC); - device_state = DeviceState::SearchingForTag; - protocol = protocol_; - return ResultSuccess; + if (device_state != DeviceState::Initialized && device_state != DeviceState::TagRemoved) { + LOG_ERROR(Service_NFP, "Wrong device state {}", device_state); + return WrongDeviceState; } - LOG_ERROR(Service_NFP, "Wrong device state {}", device_state); - return WrongDeviceState; + if (!npad_device->SetPollingMode(Common::Input::PollingMode::NFC)) { + LOG_ERROR(Service_NFP, "Nfc not supported"); + return NfcDisabled; + } + + device_state = DeviceState::SearchingForTag; + protocol = protocol_; + return ResultSuccess; } Result NfpDevice::StopDetection() { npad_device->SetPollingMode(Common::Input::PollingMode::Active); + if (device_state == DeviceState::Initialized) { + return ResultSuccess; + } + if (device_state == DeviceState::TagFound || device_state == DeviceState::TagMounted) { CloseAmiibo(); return ResultSuccess; @@ -225,6 +228,11 @@ Result NfpDevice::Mount(MountTarget mount_target_) { return WrongDeviceState; } + if (!AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) { + LOG_ERROR(Service_NFP, "Not an amiibo"); + return NotAnAmiibo; + } + if (!AmiiboCrypto::DecodeAmiibo(encrypted_tag_data, tag_data)) { LOG_ERROR(Service_NFP, "Can't decode amiibo {}", device_state); return CorruptedData; @@ -238,6 +246,9 @@ Result NfpDevice::Mount(MountTarget mount_target_) { Result NfpDevice::Unmount() { if (device_state != DeviceState::TagMounted) { LOG_ERROR(Service_NFP, "Wrong device state {}", device_state); + if (device_state == DeviceState::TagRemoved) { + return TagRemoved; + } return WrongDeviceState; } @@ -256,6 +267,9 @@ Result NfpDevice::Unmount() { Result NfpDevice::GetTagInfo(TagInfo& tag_info) const { if (device_state != DeviceState::TagFound && device_state != DeviceState::TagMounted) { LOG_ERROR(Service_NFP, "Wrong device state {}", device_state); + if (device_state == DeviceState::TagRemoved) { + return TagRemoved; + } return WrongDeviceState; } @@ -287,12 +301,7 @@ Result NfpDevice::GetCommonInfo(CommonInfo& common_info) const { // TODO: Validate this data common_info = { - .last_write_date = - { - settings.write_date.GetYear(), - settings.write_date.GetMonth(), - settings.write_date.GetDay(), - }, + .last_write_date = settings.write_date.GetWriteDate(), .write_counter = tag_data.write_counter, .version = 0, .application_area_size = sizeof(ApplicationArea), @@ -303,6 +312,9 @@ Result NfpDevice::GetCommonInfo(CommonInfo& common_info) const { Result NfpDevice::GetModelInfo(ModelInfo& model_info) const { if (device_state != DeviceState::TagMounted) { LOG_ERROR(Service_NFP, "Wrong device state {}", device_state); + if (device_state == DeviceState::TagRemoved) { + return TagRemoved; + } return WrongDeviceState; } @@ -341,12 +353,7 @@ Result NfpDevice::GetRegisterInfo(RegisterInfo& register_info) const { // TODO: Validate this data register_info = { .mii_char_info = manager.ConvertV3ToCharInfo(tag_data.owner_mii), - .creation_date = - { - settings.init_date.GetYear(), - settings.init_date.GetMonth(), - settings.init_date.GetDay(), - }, + .creation_date = settings.init_date.GetWriteDate(), .amiibo_name = GetAmiiboName(settings), .font_region = {}, }; @@ -478,8 +485,7 @@ Result NfpDevice::GetApplicationArea(std::vector<u8>& data) const { } if (data.size() > sizeof(ApplicationArea)) { - LOG_ERROR(Service_NFP, "Wrong data size {}", data.size()); - return ResultUnknown; + data.resize(sizeof(ApplicationArea)); } memcpy(data.data(), tag_data.application_area.data(), data.size()); @@ -518,7 +524,7 @@ Result NfpDevice::SetApplicationArea(std::span<const u8> data) { Common::TinyMT rng{}; std::memcpy(tag_data.application_area.data(), data.data(), data.size()); - // HW seems to fill excess data with garbage + // Fill remaining data with random numbers rng.GenerateRandomBytes(tag_data.application_area.data() + data.size(), sizeof(ApplicationArea) - data.size()); @@ -561,12 +567,12 @@ Result NfpDevice::RecreateApplicationArea(u32 access_id, std::span<const u8> dat if (data.size() > sizeof(ApplicationArea)) { LOG_ERROR(Service_NFP, "Wrong data size {}", data.size()); - return ResultUnknown; + return WrongApplicationAreaSize; } Common::TinyMT rng{}; std::memcpy(tag_data.application_area.data(), data.data(), data.size()); - // HW seems to fill excess data with garbage + // Fill remaining data with random numbers rng.GenerateRandomBytes(tag_data.application_area.data() + data.size(), sizeof(ApplicationArea) - data.size()); @@ -612,7 +618,6 @@ u64 NfpDevice::GetHandle() const { } u32 NfpDevice::GetApplicationAreaSize() const { - // Investigate if this value is really constant return sizeof(ApplicationArea); } diff --git a/src/core/hle/service/nfp/nfp_result.h b/src/core/hle/service/nfp/nfp_result.h index ac259e2ff..d8e4cf094 100644 --- a/src/core/hle/service/nfp/nfp_result.h +++ b/src/core/hle/service/nfp/nfp_result.h @@ -8,6 +8,8 @@ namespace Service::NFP { constexpr Result DeviceNotFound(ErrorModule::NFP, 64); +constexpr Result InvalidArgument(ErrorModule::NFP, 65); +constexpr Result WrongApplicationAreaSize(ErrorModule::NFP, 68); constexpr Result WrongDeviceState(ErrorModule::NFP, 73); constexpr Result NfcDisabled(ErrorModule::NFP, 80); constexpr Result WriteAmiiboFailed(ErrorModule::NFP, 88); diff --git a/src/core/hle/service/nfp/nfp_types.h b/src/core/hle/service/nfp/nfp_types.h index dd4525b61..867ea2f36 100644 --- a/src/core/hle/service/nfp/nfp_types.h +++ b/src/core/hle/service/nfp/nfp_types.h @@ -84,6 +84,15 @@ enum class TagType : u32 { Type5, // ISO15693 RW/RO 540 bytes 106kbit/s }; +enum class PackedTagType : u8 { + None, + Type1, // ISO14443A RW 96-2k bytes 106kbit/s + Type2, // ISO14443A RW/RO 540 bytes 106kbit/s + Type3, // Sony Felica RW/RO 2k bytes 212kbit/s + Type4, // ISO14443A RW/RO 4k-32k bytes 424kbit/s + Type5, // ISO15693 RW/RO 540 bytes 106kbit/s +}; + enum class TagProtocol : u32 { None, TypeA, // ISO14443A @@ -104,6 +113,13 @@ struct TagUuid { }; static_assert(sizeof(TagUuid) == 10, "TagUuid is an invalid size"); +struct WriteDate { + u16 year; + u8 month; + u8 day; +}; +static_assert(sizeof(WriteDate) == 0x4, "WriteDate is an invalid size"); + struct AmiiboDate { u16 raw_date{}; @@ -121,6 +137,21 @@ struct AmiiboDate { return static_cast<u8>(GetValue() & 0x001F); } + WriteDate GetWriteDate() const { + if (!IsValidDate()) { + return { + .year = 2000, + .month = 1, + .day = 1, + }; + } + return { + .year = GetYear(), + .month = GetMonth(), + .day = GetDay(), + }; + } + void SetYear(u16 year) { const u16 year_converted = static_cast<u16>((year - 2000) << 9); raw_date = Common::swap16((GetValue() & ~0xFE00) | year_converted); @@ -133,6 +164,13 @@ struct AmiiboDate { const u16 day_converted = static_cast<u16>(day); raw_date = Common::swap16((GetValue() & ~0x001F) | day_converted); } + + bool IsValidDate() const { + const bool is_day_valid = GetDay() > 0 && GetDay() < 32; + const bool is_month_valid = GetMonth() >= 0 && GetMonth() < 13; + const bool is_year_valid = GetYear() >= 2000; + return is_year_valid && is_month_valid && is_day_valid; + } }; static_assert(sizeof(AmiiboDate) == 2, "AmiiboDate is an invalid size"); @@ -163,7 +201,7 @@ struct AmiiboModelInfo { AmiiboType amiibo_type; u16_be model_number; AmiiboSeries series; - u8 constant_value; // Must be 02 + PackedTagType tag_type; INSERT_PADDING_BYTES(0x4); // Unknown }; static_assert(sizeof(AmiiboModelInfo) == 0xC, "AmiiboModelInfo is an invalid size"); @@ -250,13 +288,6 @@ struct TagInfo { }; static_assert(sizeof(TagInfo) == 0x58, "TagInfo is an invalid size"); -struct WriteDate { - u16 year; - u8 month; - u8 day; -}; -static_assert(sizeof(WriteDate) == 0x4, "WriteDate is an invalid size"); - struct CommonInfo { WriteDate last_write_date; u16 write_counter; diff --git a/src/core/hle/service/nfp/nfp_user.cpp b/src/core/hle/service/nfp/nfp_user.cpp index c61df9401..4ed53b534 100644 --- a/src/core/hle/service/nfp/nfp_user.cpp +++ b/src/core/hle/service/nfp/nfp_user.cpp @@ -93,6 +93,18 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) { return; } + if (!ctx.CanWriteBuffer()) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(InvalidArgument); + return; + } + + if (ctx.GetWriteBufferSize() == 0) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(InvalidArgument); + return; + } + std::vector<u64> nfp_devices; const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64); @@ -255,6 +267,12 @@ void IUser::GetApplicationArea(Kernel::HLERequestContext& ctx) { return; } + if (!ctx.CanWriteBuffer()) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(InvalidArgument); + return; + } + auto device = GetNfpDevice(device_handle); if (!device.has_value()) { @@ -283,6 +301,12 @@ void IUser::SetApplicationArea(Kernel::HLERequestContext& ctx) { return; } + if (!ctx.CanReadBuffer()) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(InvalidArgument); + return; + } + auto device = GetNfpDevice(device_handle); if (!device.has_value()) { @@ -358,6 +382,12 @@ void IUser::CreateApplicationArea(Kernel::HLERequestContext& ctx) { return; } + if (!ctx.CanReadBuffer()) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(InvalidArgument); + return; + } + auto device = GetNfpDevice(device_handle); if (!device.has_value()) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index b5c08d611..7e8f37563 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -13,9 +13,6 @@ namespace Shader::Backend::GLASM { namespace { void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, std::string_view size) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Indirect constant buffer loading"); - } const Register ret{ctx.reg_alloc.Define(inst)}; if (offset.type == Type::U32) { // Avoid reading arrays out of bounds, matching hardware's behavior @@ -24,7 +21,27 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU return; } } - ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); + + if (binding.IsImmediate()) { + ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); + return; + } + + const ScalarU32 idx{ctx.reg_alloc.Consume(binding)}; + for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) { + ctx.Add("SEQ.S.CC RC.x,{},{};" + "IF NE.x;" + "LDC.{} {},c{}[{}];", + idx, i, size, ret, i, offset); + + if (i != Info::MAX_INDIRECT_CBUFS - 1) { + ctx.Add("ELSE;"); + } + } + + for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) { + ctx.Add("ENDIF;"); + } } bool IsInputArray(Stage stage) { diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index aca25d902..a302a9603 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -279,28 +279,13 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { auto dst = Compile_GetRegister(opcode.src_a, RESULT); auto src = Compile_GetRegister(opcode.src_b, eax); - if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) { - shr(src, opcode.bf_src_bit); - } else if (opcode.bf_src_bit == 31) { - xor_(src, src); - } - // Don't bother masking the whole register since we're using a 32 bit register - if (opcode.bf_size != 31 && opcode.bf_size != 0) { - and_(src, opcode.GetBitfieldMask()); - } else if (opcode.bf_size == 0) { - xor_(src, src); - } - if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) { - shl(src, opcode.bf_dst_bit); - } else if (opcode.bf_dst_bit == 31) { - xor_(src, src); - } - const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); - if (mask != 0xffffffff) { - and_(dst, mask); - } + and_(dst, mask); + shr(src, opcode.bf_src_bit); + and_(src, opcode.GetBitfieldMask()); + shl(src, opcode.bf_dst_bit); or_(dst, src); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } @@ -309,17 +294,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { const auto src = Compile_GetRegister(opcode.src_b, RESULT); shr(src, dst.cvt8()); - if (opcode.bf_size != 0 && opcode.bf_size != 31) { - and_(src, opcode.GetBitfieldMask()); - } else if (opcode.bf_size == 0) { - xor_(src, src); - } + and_(src, opcode.GetBitfieldMask()); + shl(src, opcode.bf_dst_bit); - if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) { - shl(src, opcode.bf_dst_bit); - } else if (opcode.bf_dst_bit == 31) { - xor_(src, src); - } Compile_ProcessResult(opcode.result_operation, opcode.dst); } @@ -327,13 +304,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto src = Compile_GetRegister(opcode.src_b, RESULT); - if (opcode.bf_src_bit != 0) { - shr(src, opcode.bf_src_bit); - } - - if (opcode.bf_size != 31) { - and_(src, opcode.GetBitfieldMask()); - } + shr(src, opcode.bf_src_bit); + and_(src, opcode.GetBitfieldMask()); shl(src, dst.cvt8()); Compile_ProcessResult(opcode.result_operation, opcode.dst); @@ -429,17 +401,11 @@ void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { Xbyak::Label handle_post_exit{}; Xbyak::Label skip{}; jmp(skip, T_NEAR); - if (opcode.is_exit) { - L(handle_post_exit); - // Execute 1 instruction - mov(BRANCH_HOLDER, end_of_code); - // Jump to next instruction to skip delay slot check - jmp(labels[jump_address], T_NEAR); - } else { - L(handle_post_exit); - xor_(BRANCH_HOLDER, BRANCH_HOLDER); - jmp(labels[jump_address], T_NEAR); - } + + L(handle_post_exit); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + L(skip); mov(BRANCH_HOLDER, handle_post_exit); jmp(delay_skip[pc], T_NEAR); diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 06f68d09a..7fb256953 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -26,20 +26,39 @@ using namespace Common::Literals; constexpr VkDeviceSize MAX_ALIGNMENT = 256; // Maximum size to put elements in the stream buffer constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; -// Stream buffer size in bytes -constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; -constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; constexpr VkMemoryPropertyFlags HOST_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; -bool IsStreamHeap(VkMemoryHeap heap) noexcept { - return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; +static bool IsStreamHeap(VkMemoryHeap heap, size_t staging_buffer_size) noexcept { + return staging_buffer_size < (heap.size * 2) / 3; +} + +static bool HasLargeDeviceLocalHostVisibleMemory(const VkPhysicalDeviceMemoryProperties& props) { + const auto flags{VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT}; + + for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { + const auto& memory_type{props.memoryTypes[type_index]}; + + if ((memory_type.propertyFlags & flags) != flags) { + // Memory must be device local and host visible + continue; + } + + const auto& heap{props.memoryHeaps[memory_type.heapIndex]}; + if (heap.size >= 7168_MiB) { + // This is the right type of memory + return true; + } + } + + return false; } std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, - VkMemoryPropertyFlags flags) noexcept { + VkMemoryPropertyFlags flags, + size_t staging_buffer_size) noexcept { for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { if (((type_mask >> type_index) & 1) == 0) { // Memory type is incompatible @@ -50,7 +69,7 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p // Memory type doesn't have the flags we want continue; } - if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { + if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex], staging_buffer_size)) { // Memory heap is not suitable for streaming continue; } @@ -61,17 +80,17 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p } u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, - bool try_device_local) { + bool try_device_local, size_t staging_buffer_size) { std::optional<u32> type; if (try_device_local) { // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this - type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); + type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS, staging_buffer_size); if (type) { return *type; } } // Otherwise try without the DEVICE_LOCAL_BIT - type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); + type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS, staging_buffer_size); if (type) { return *type; } @@ -79,20 +98,32 @@ u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_ throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); } -size_t Region(size_t iterator) noexcept { - return iterator / REGION_SIZE; +size_t Region(size_t iterator, size_t region_size) noexcept { + return iterator / region_size; } } // Anonymous namespace StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { + + const auto memory_properties{device.GetPhysical().GetMemoryProperties().memoryProperties}; + if (HasLargeDeviceLocalHostVisibleMemory(memory_properties)) { + // Possible on many integrated and newer discrete cards + staging_buffer_size = 1_GiB; + } else { + // Well-supported default size used by most Vulkan PC games + staging_buffer_size = 256_MiB; + } + + region_size = staging_buffer_size / StagingBufferPool::NUM_SYNCS; + const vk::Device& dev = device.GetLogical(); stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = STREAM_BUFFER_SIZE, + .size = staging_buffer_size, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, @@ -117,19 +148,18 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .image = nullptr, .buffer = *stream_buffer, }; - const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; VkMemoryAllocateInfo stream_memory_info{ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = make_dedicated ? &dedicated_info : nullptr, .allocationSize = requirements.size, - .memoryTypeIndex = - FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true), + .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true, + staging_buffer_size), }; stream_memory = dev.TryAllocateMemory(stream_memory_info); if (!stream_memory) { LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); - stream_memory_info.memoryTypeIndex = - FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false); + stream_memory_info.memoryTypeIndex = FindMemoryTypeIndex( + memory_properties, requirements.memoryTypeBits, false, staging_buffer_size); stream_memory = dev.AllocateMemory(stream_memory_info); } @@ -137,7 +167,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem stream_memory.SetObjectNameEXT("Stream Buffer Memory"); } stream_buffer.BindMemory(*stream_memory, 0); - stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); + stream_pointer = stream_memory.Map(0, staging_buffer_size); } StagingBufferPool::~StagingBufferPool() = default; @@ -158,25 +188,25 @@ void StagingBufferPool::TickFrame() { } StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { - if (AreRegionsActive(Region(free_iterator) + 1, - std::min(Region(iterator + size) + 1, NUM_SYNCS))) { + if (AreRegionsActive(Region(free_iterator, region_size) + 1, + std::min(Region(iterator + size, region_size) + 1, NUM_SYNCS))) { // Avoid waiting for the previous usages to be free return GetStagingBuffer(size, MemoryUsage::Upload); } const u64 current_tick = scheduler.CurrentTick(); - std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), - current_tick); + std::fill(sync_ticks.begin() + Region(used_iterator, region_size), + sync_ticks.begin() + Region(iterator, region_size), current_tick); used_iterator = iterator; free_iterator = std::max(free_iterator, iterator + size); - if (iterator + size >= STREAM_BUFFER_SIZE) { - std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, - current_tick); + if (iterator + size >= staging_buffer_size) { + std::fill(sync_ticks.begin() + Region(used_iterator, region_size), + sync_ticks.begin() + NUM_SYNCS, current_tick); used_iterator = 0; iterator = 0; free_iterator = size; - if (AreRegionsActive(0, Region(size) + 1)) { + if (AreRegionsActive(0, Region(size, region_size) + 1)) { // Avoid waiting for the previous usages to be free return GetStagingBuffer(size, MemoryUsage::Upload); } diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 91dc84da8..90c67177f 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -93,6 +93,9 @@ private: size_t free_iterator = 0; std::array<u64, NUM_SYNCS> sync_ticks{}; + size_t staging_buffer_size = 0; + size_t region_size = 0; + StagingBuffersCache device_local_cache; StagingBuffersCache upload_cache; StagingBuffersCache download_cache; diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index 378804c08..12a7e4922 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -111,7 +111,8 @@ struct Client::Impl { httplib::Error error; if (!cli->send(request, response, error)) { - LOG_ERROR(WebService, "{} to {} returned null", method, host + path); + LOG_ERROR(WebService, "{} to {} returned null (httplib Error: {})", method, host + path, + httplib::to_string(error)); return WebResult{WebResult::Code::LibError, "Null response", ""}; } |