diff options
-rw-r--r-- | CMakeModules/GenerateSCMRev.cmake | 4 | ||||
-rw-r--r-- | src/common/CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/core/hle/kernel/process.cpp | 4 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 11 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/utils.cpp | 17 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/utils.h | 14 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 9 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_sampler_cache.cpp | 6 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 84 | ||||
-rw-r--r-- | src/video_core/shader/decode/texture.cpp | 16 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 2 | ||||
-rw-r--r-- | src/yuzu/configuration/configure_hotkeys.cpp | 1 |
15 files changed, 104 insertions, 77 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 21e03ae98..fa7ae835f 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -5,6 +5,10 @@ function(get_timestamp _var) endfunction() list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules") + +# Find the package here with the known path so that the GetGit commands can find it as well +find_package(Git QUIET PATHS "${GIT_EXECUTABLE}") + # generate git/build information include(GetGitRevisionDescription) get_git_head_revision(GIT_REF_SPEC GIT_REV) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9b0c3db68..9afc6105d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -15,6 +15,10 @@ endif () if (DEFINED ENV{DISPLAYVERSION}) set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) endif () + +# Pass the path to git to the GenerateSCMRev.cmake as well +find_package(Git QUIET) + add_custom_command(OUTPUT scm_rev.cpp COMMAND ${CMAKE_COMMAND} -DSRC_DIR="${CMAKE_SOURCE_DIR}" @@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" -DBUILD_TAG="${BUILD_TAG}" -DBUILD_ID="${DISPLAY_VERSION}" + -DGIT_EXECUTABLE="${GIT_EXECUTABLE}" -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" DEPENDS # WARNING! It was too much work to try and make a common location for this list, diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 12ea4ebe3..b9035a0be 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) { } void Process::LoadModule(CodeSet module_, VAddr base_addr) { + code_memory_size += module_.memory.size(); + const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, @@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code); MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData); MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); - - code_memory_size += module_.memory.size(); } Process::Process(Core::System& system) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a35e7a195..16f95b77d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1271,8 +1271,6 @@ public: } dirty{}; - std::array<u8, Regs::NUM_REGS> dirty_pointers{}; - /// Reads a register value located at the input method address u32 GetRegisterValue(u32 method) const; @@ -1367,6 +1365,8 @@ private: bool execute_on{true}; + std::array<u8, Regs::NUM_REGS> dirty_pointers{}; + /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index de742d11c..a4acb3796 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR; namespace { -// One UBO is always reserved for emulation values on staged shaders -constexpr u32 STAGE_RESERVED_UBOS = 1; - constexpr u32 STAGE_MAIN_OFFSET = 10; constexpr u32 KERNEL_MAIN_OFFSET = 0; @@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp if (!code_b.empty()) { ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); } - const auto entries = GLShader::GetEntries(ir); std::string source = fmt::format(R"(// {} #version 430 core @@ -314,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() { CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) - : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache}, - device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier}, - shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} { + : RasterizerCacheObject{params.host_ptr}, system{params.system}, + disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, + unique_identifier{params.unique_identifier}, shader_type{shader_type}, + entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { if (!params.precompiled_variants) { return; } diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 9770dda1c..ac99e6385 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -6,16 +6,20 @@ #include <vector> #include <fmt/format.h> - #include <glad/glad.h> -#include "common/assert.h" #include "common/common_types.h" -#include "common/scope_exit.h" #include "video_core/renderer_opengl/utils.h" namespace OpenGL { +struct VertexArrayPushBuffer::Entry { + GLuint binding_index{}; + const GLuint* buffer{}; + GLintptr offset{}; + GLsizei stride{}; +}; + VertexArrayPushBuffer::VertexArrayPushBuffer() = default; VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; @@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() { } } +struct BindBuffersRangePushBuffer::Entry { + GLuint binding; + const GLuint* buffer; + GLintptr offset; + GLsizeiptr size; +}; + BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index d56153fe7..3ad7c02d4 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -26,12 +26,7 @@ public: void Bind(); private: - struct Entry { - GLuint binding_index{}; - const GLuint* buffer{}; - GLintptr offset{}; - GLsizei stride{}; - }; + struct Entry; GLuint vao{}; const GLuint* index_buffer{}; @@ -50,12 +45,7 @@ public: void Bind(); private: - struct Entry { - GLuint binding; - const GLuint* buffer; - GLintptr offset; - GLsizeiptr size; - }; + struct Entry; GLenum target; std::vector<Entry> entries; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 000e3616d..331808113 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt return {}; } -vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, +vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, Tegra::Texture::TextureFilter filter) { switch (wrap_mode) { case Tegra::Texture::WrapMode::Wrap: @@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, case Tegra::Texture::WrapMode::Border: return vk::SamplerAddressMode::eClampToBorder; case Tegra::Texture::WrapMode::Clamp: - // TODO(Rodrigo): Emulate GL_CLAMP properly + if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { + // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this + // by sending an invalid enumeration. + return static_cast<vk::SamplerAddressMode>(0xcafe); + } + // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors switch (filter) { case Tegra::Texture::TextureFilter::Nearest: return vk::SamplerAddressMode::eClampToEdge; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 1534b738b..7e9678b7b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter); vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); -vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, +vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, Tegra::Texture::TextureFilter filter); vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 1ce583f75..0a8ec8398 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), MaxwellToVK::Sampler::Filter(tsc.min_filter), MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter), - MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter), - MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), + MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index b427ac873..0229733b6 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -65,7 +65,7 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) - : program_code{program_code}, start{start}, locker{locker} {} + : program_code{program_code}, locker{locker}, start{start} {} const ProgramCode& program_code; ConstBufferLocker& locker; diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index c934d0719..8cc84e935 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -6,6 +6,7 @@ #include <vector> #include <fmt/format.h> +#include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" @@ -22,34 +23,39 @@ using Tegra::Shader::Register; namespace { -u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { +bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { + return uniform_type == Tegra::Shader::UniformType::UnsignedByte || + uniform_type == Tegra::Shader::UniformType::UnsignedShort; +} + +u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { case Tegra::Shader::UniformType::UnsignedByte: - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; + return 0b11; + case Tegra::Shader::UniformType::UnsignedShort: + return 0b10; default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); - return 1; + UNREACHABLE(); + return 0; } } -u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { +u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { + case Tegra::Shader::UniformType::UnsignedByte: + return 8; + case Tegra::Shader::UniformType::UnsignedShort: + return 16; case Tegra::Shader::UniformType::Single: - return 1; + return 32; case Tegra::Shader::UniformType::Double: - return 2; + return 64; case Tegra::Shader::UniformType::Quad: case Tegra::Shader::UniformType::UnsignedQuad: - return 4; + return 128; default: UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); - return 1; + return 32; } } @@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, false); + TrackGlobalMemory(bb, instr, true, false); - const u32 count = GetLdgMemorySize(type); + const u32 size = GetMemorySize(type); + const u32 count = Common::AlignUp(size, 32) / 32; if (!real_address_base || !base_address) { // Tracking failed, load zeroes. for (u32 i = 0; i < count; ++i) { @@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - if (type == Tegra::Shader::UniformType::UnsignedByte) { - // To handle unaligned loads get the byte used to dereferenced global memory - // and extract that byte from the loaded uint32. - Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); - byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); + // To handle unaligned loads get the bytes used to dereference global memory and extract + // those bytes from the loaded u32. + if (IsUnaligned(type)) { + Node mask = Immediate(GetUnalignedMask(type)); + Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), - Immediate(8)); + gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), + std::move(offset), Immediate(size)); } SetTemporary(bb, i, gmem); @@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } }(); + // For unaligned reads we have to read memory too. + const bool is_read = IsUnaligned(type); const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true); + TrackGlobalMemory(bb, instr, is_read, true); if (!real_address_base || !base_address) { // Tracking failed, skip the store. break; } - const u32 count = GetStgMemorySize(type); + const u32 size = GetMemorySize(type); + const u32 count = Common::AlignUp(size, 32) / 32; for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - const Node value = GetRegister(instr.gpr0.Value() + i); + Node value = GetRegister(instr.gpr0.Value() + i); + + if (IsUnaligned(type)) { + Node mask = Immediate(GetUnalignedMask(type)); + Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); + + value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, + Immediate(size)); + } + bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; @@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, Instruction instr, - bool is_write) { + bool is_read, bool is_write) { const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; @@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& const GlobalMemoryBase descriptor{index, offset}; const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); auto& usage = entry->second; - if (is_write) { - usage.is_written = true; - } else { - usage.is_read = true; - } + usage.is_written |= is_write; + usage.is_read |= is_read; const auto real_address = Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4b14cdf58..cd984f763 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -794,14 +794,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4) { - const auto [coord_offsets, size, wrap_value, - diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { - if (is_tld4) { - return {{0, 8, 16}, 6, 32, 64}; - } else { - return {{0, 4, 8}, 4, 8, 16}; - } - }(); + const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; + const u32 size = is_tld4 ? 6 : 4; + const s32 wrap_value = is_tld4 ? 32 : 8; + const s32 diff_value = is_tld4 ? 64 : 16; const u32 mask = (1U << size) - 1; std::vector<Node> aoffi; @@ -814,7 +810,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor LOG_WARNING(HW_GPU, "AOFFI constant folding failed, some hardware might have graphical issues"); for (std::size_t coord = 0; coord < coord_count; ++coord) { - const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); + const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); const Node condition = Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); @@ -824,7 +820,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor } for (std::size_t coord = 0; coord < coord_count; ++coord) { - s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; + s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; if (value >= wrap_value) { value -= diff_value; } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index aacd0a0da..ba1db4c11 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -394,7 +394,7 @@ private: std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, Tegra::Shader::Instruction instr, - bool is_write); + bool is_read, bool is_write); /// Register new amending code and obtain the reference id. std::size_t DeclareAmend(Node new_amend); diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp index 3ea0b8d67..fa9052136 100644 --- a/src/yuzu/configuration/configure_hotkeys.cpp +++ b/src/yuzu/configuration/configure_hotkeys.cpp @@ -48,6 +48,7 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) { } ui->hotkey_list->expandAll(); + ui->hotkey_list->resizeColumnToContents(0); } void ConfigureHotkeys::changeEvent(QEvent* event) { |