diff options
Diffstat (limited to 'src/video_core')
19 files changed, 681 insertions, 386 deletions
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index bc80661d8..72e2a33d5 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -4,6 +4,9 @@ #pragma once +#include <array> +#include <optional> + #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -16,7 +19,7 @@ enum class OutputTopology : u32 { TriangleStrip = 7, }; -enum class AttributeUse : u8 { +enum class PixelImap : u8 { Unused = 0, Constant = 1, Perspective = 2, @@ -24,7 +27,7 @@ enum class AttributeUse : u8 { }; // Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html struct Header { union { BitField<0, 5, u32> sph_type; @@ -59,8 +62,8 @@ struct Header { union { BitField<0, 12, u32> max_output_vertices; BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<24, 4, u32> reserved; - BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + BitField<20, 4, u32> reserved; + BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. } common4{}; union { @@ -93,17 +96,20 @@ struct Header { struct { INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB + union { - BitField<0, 2, AttributeUse> x; - BitField<2, 2, AttributeUse> y; - BitField<4, 2, AttributeUse> w; - BitField<6, 2, AttributeUse> z; + BitField<0, 2, PixelImap> x; + BitField<2, 2, PixelImap> y; + BitField<4, 2, PixelImap> z; + BitField<6, 2, PixelImap> w; u8 raw; } imap_generic_vector[32]; + INSERT_UNION_PADDING_BYTES(2); // ImapColor INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] INSERT_UNION_PADDING_BYTES(2); // ImapReserved + struct { u32 target; union { @@ -112,31 +118,30 @@ struct Header { BitField<2, 30, u32> reserved; }; } omap; + bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { const u32 bit = render_target * 4 + component; return omap.target & (1 << bit); } - AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { - return static_cast<AttributeUse>( - (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); - } - AttributeUse GetAttributeUse(u32 attribute) const { - AttributeUse result = AttributeUse::Unused; - for (u32 i = 0; i < 4; i++) { - const auto index = GetAttributeIndexUse(attribute, i); - if (index == AttributeUse::Unused) { - continue; - } - if (result == AttributeUse::Unused || result == index) { - result = index; + + PixelImap GetPixelImap(u32 attribute) const { + const auto get_index = [this, attribute](u32 index) { + return static_cast<PixelImap>( + (imap_generic_vector[attribute].raw >> (index * 2)) & 3); + }; + + std::optional<PixelImap> result; + for (u32 component = 0; component < 4; ++component) { + const PixelImap index = get_index(component); + if (index == PixelImap::Unused) { continue; } - LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); - if (index == AttributeUse::Perspective) { - result = index; + if (result && result != index) { + LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); } + result = index; } - return result; + return result.value_or(PixelImap::Unused); } } ps; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1a2e2a9f7..c286502ba 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -131,6 +131,31 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin return bindings; } +bool IsASTCSupported() { + static constexpr std::array formats = { + GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, + GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, + GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, + GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, + GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR, + GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR, + GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, + }; + return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) { + GLint supported; + glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1, + &supported); + return supported == GL_TRUE; + }) == formats.end(); +} + } // Anonymous namespace Device::Device() : base_bindings{BuildBaseBindings()} { @@ -152,6 +177,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} { has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); + has_astc = IsASTCSupported(); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index d73b099d0..a55050cb5 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -64,6 +64,10 @@ public: return has_image_load_formatted; } + bool HasASTC() const { + return has_astc; + } + bool HasVariableAoffi() const { return has_variable_aoffi; } @@ -97,6 +101,7 @@ private: bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; + bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 31add708f..346feeb2f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using texture_cache.GuardRenderTargets(true); View color_surface; if (using_color_fb) { - color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); + const std::size_t index = regs.clear_buffers.RT; + color_surface = texture_cache.GetColorBufferSurface(index, true); + texture_cache.MarkColorBufferInUse(index); } View depth_surface; if (using_depth_fb || using_stencil_fb) { - depth_surface = texture_cache.GetDepthBufferSurface(false); + depth_surface = texture_cache.GetDepthBufferSurface(true); + texture_cache.MarkDepthBufferInUse(); } texture_cache.GuardRenderTargets(false); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c7d24cf14..160ae4340 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -31,11 +31,11 @@ namespace { using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; -using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; @@ -702,20 +702,19 @@ private: code.AddNewLine(); } - std::string GetInputFlags(AttributeUse attribute) { + const char* GetInputFlags(PixelImap attribute) { switch (attribute) { - case AttributeUse::Perspective: - // Default, Smooth - return {}; - case AttributeUse::Constant: - return "flat "; - case AttributeUse::ScreenLinear: - return "noperspective "; - default: - case AttributeUse::Unused: - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); - return {}; + case PixelImap::Perspective: + return "smooth"; + case PixelImap::Constant: + return "flat"; + case PixelImap::ScreenLinear: + return "noperspective"; + case PixelImap::Unused: + break; } + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); + return {}; } void DeclareInputAttributes() { @@ -749,8 +748,8 @@ private: std::string suffix; if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetAttributeUse(location)}; - if (skip_unused && input_mode == AttributeUse::Unused) { + const auto input_mode{header.ps.GetPixelImap(location)}; + if (input_mode == PixelImap::Unused) { return; } suffix = GetInputFlags(input_mode); @@ -927,7 +926,7 @@ private: const u32 address{generic_base + index * generic_stride + element * element_stride}; const bool declared = stage != ShaderType::Fragment || - header.ps.GetAttributeUse(index) != AttributeUse::Unused; + header.ps.GetPixelImap(index) != PixelImap::Unused; const std::string value = declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; code.AddLine("case 0x{:X}U: return {};", address, value); @@ -1142,8 +1141,7 @@ private: GetSwizzle(element)), Type::Float}; case ShaderType::Fragment: - return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), - Type::Float}; + return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; default: UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index f424e3000..36590a6d0 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,7 +24,6 @@ using Tegra::Texture::SwizzleSource; using VideoCore::MortonSwizzleMode; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceCompression; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -37,102 +36,100 @@ namespace { struct FormatTuple { GLint internal_format; - GLenum format; - GLenum type; - bool compressed; + GLenum format = GL_NONE; + GLenum type = GL_NONE; }; constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 - {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 - {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16 - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 - {GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F - {GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F - {GL_R32F, GL_RED, GL_FLOAT, false}, // R32F - {GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U - {GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI - {GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI - {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I - {GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S - {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U - {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI - {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45 + {GL_COMPRESSED_RED_RGTC1}, // DXN1 + {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16 + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16 + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F + {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F + {GL_R32F, GL_RED, GL_FLOAT}, // R32F + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S + {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8 + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5 + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4 + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16 // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24 - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24 + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8 }}; const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); - const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; - return format; + return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; } GLenum GetTextureTarget(const SurfaceTarget& target) { @@ -242,13 +239,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte } // Anonymous namespace -CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) - : VideoCommon::SurfaceBase<View>(gpu_addr, params) { - const auto& tuple{GetFormatTuple(params.pixel_format)}; - internal_format = tuple.internal_format; - format = tuple.format; - type = tuple.type; - is_compressed = tuple.compressed; +CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool is_astc_supported) + : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) { + if (is_converted) { + internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; + format = GL_RGBA; + type = GL_UNSIGNED_BYTE; + } else { + const auto& tuple{GetFormatTuple(params.pixel_format)}; + internal_format = tuple.internal_format; + format = tuple.format; + type = tuple.type; + is_compressed = params.IsCompressed(); + } target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format, texture_buffer); DecorateSurfaceName(); @@ -264,7 +268,7 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { if (params.IsBuffer()) { glGetNamedBufferSubData(texture_buffer.handle, 0, - static_cast<GLsizeiptr>(params.GetHostSizeInBytes()), + static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), staging_buffer.data()); return; } @@ -272,9 +276,10 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.emulated_levels; ++level) { - glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); + glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); + u8* const mip_data = staging_buffer.data() + mip_offset; const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); if (is_compressed) { @@ -294,14 +299,10 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { } void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { - glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); + glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); - auto compression_type = params.GetCompressionType(); - - const std::size_t mip_offset = compression_type == SurfaceCompression::Converted - ? params.GetConvertedMipmapOffset(level) - : params.GetHostMipmapLevelOffset(level); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); const u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; @@ -482,7 +483,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, const Device& device, StateTracker& state_tracker) - : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} { + : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} { src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -490,7 +491,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, TextureCacheOpenGL::~TextureCacheOpenGL() = default; Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared<CachedSurface>(gpu_addr, params); + return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); } void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, @@ -596,7 +597,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); - if (source_format.compressed) { + if (src_surface->IsCompressed()) { glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), nullptr); } else { @@ -610,7 +611,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) const GLsizei width = static_cast<GLsizei>(dst_params.width); const GLsizei height = static_cast<GLsizei>(dst_params.height); const GLsizei depth = static_cast<GLsizei>(dst_params.depth); - if (dest_format.compressed) { + if (dst_surface->IsCompressed()) { LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); UNREACHABLE(); } else { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 6658c6ffd..02d9981a1 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -37,7 +37,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> { friend CachedSurfaceView; public: - explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); + explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported); ~CachedSurface(); void UploadTexture(const std::vector<u8>& staging_buffer) override; @@ -51,6 +51,10 @@ public: return texture.handle; } + bool IsCompressed() const { + return is_compressed; + } + protected: void DecorateSurfaceName() override; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 28d2fbc4f..7aafb5e59 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -237,18 +237,21 @@ void VKDevice::ReportLoss() const { bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, const vk::DispatchLoaderDynamic& dldi) const { - // Disable for now to avoid converting ASTC twice. - return false; static constexpr std::array astc_formats = { - vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, + vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock, vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, - vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, - vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock, + vk::Format::eAstc8x5UnormBlock, vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock}; + vk::Format::eAstc8x8UnormBlock, vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc10x5UnormBlock, vk::Format::eAstc10x5SrgbBlock, + vk::Format::eAstc10x6UnormBlock, vk::Format::eAstc10x6SrgbBlock, + vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, + vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, + vk::Format::eAstc12x10UnormBlock, vk::Format::eAstc12x10SrgbBlock, + vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock}; if (!features.textureCompressionASTC_LDR) { return false; } @@ -572,24 +575,34 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eBc2SrgbBlock, vk::Format::eBc3SrgbBlock, vk::Format::eBc7SrgbBlock, + vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc8x5SrgbBlock, + vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc10x8UnormBlock, - vk::Format::eAstc10x8SrgbBlock, + vk::Format::eAstc6x5UnormBlock, + vk::Format::eAstc6x5SrgbBlock, vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, + vk::Format::eAstc8x5UnormBlock, + vk::Format::eAstc8x5SrgbBlock, + vk::Format::eAstc8x6UnormBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc8x8UnormBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc10x5UnormBlock, + vk::Format::eAstc10x5SrgbBlock, + vk::Format::eAstc10x6UnormBlock, + vk::Format::eAstc10x6SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock, vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, + vk::Format::eAstc12x10UnormBlock, + vk::Format::eAstc12x10SrgbBlock, vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock, - vk::Format::eAstc8x6UnormBlock, - vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc6x5UnormBlock, - vk::Format::eAstc6x5SrgbBlock, vk::Format::eE5B9G9R9UfloatPack32}; std::unordered_map<vk::Format, vk::FormatProperties> format_properties; for (const auto format : formats) { diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index d67f08cf9..b9f9e2714 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -35,7 +35,7 @@ namespace { using Sirit::Id; using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; -using Tegra::Shader::AttributeUse; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using namespace VideoCommon::Shader; @@ -752,16 +752,16 @@ private: if (stage != ShaderType::Fragment) { continue; } - switch (header.ps.GetAttributeUse(location)) { - case AttributeUse::Constant: + switch (header.ps.GetPixelImap(location)) { + case PixelImap::Constant: Decorate(id, spv::Decoration::Flat); break; - case AttributeUse::ScreenLinear: - Decorate(id, spv::Decoration::NoPerspective); - break; - case AttributeUse::Perspective: + case PixelImap::Perspective: // Default break; + case PixelImap::ScreenLinear: + Decorate(id, spv::Decoration::NoPerspective); + break; default: UNREACHABLE_MSG("Unused attribute being fetched"); } @@ -1145,9 +1145,6 @@ private: switch (attribute) { case Attribute::Index::Position: { if (stage == ShaderType::Fragment) { - if (element == 3) { - return {Constant(t_float, 1.0f), Type::Float}; - } return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), Type::Float}; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 26175921b..5b9b39670 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -35,7 +35,6 @@ using VideoCore::MortonSwizzleMode; using Tegra::Texture::SwizzleSource; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceCompression; using VideoCore::Surface::SurfaceTarget; namespace { @@ -96,9 +95,10 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) { return {}; } -UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { +UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, + std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(), + const vk::BufferCreateInfo buffer_ci({}, host_memory_size, vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst, @@ -110,12 +110,13 @@ UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, const SurfaceParams& params, - vk::Buffer buffer) { + vk::Buffer buffer, + std::size_t host_memory_size) { ASSERT(params.IsBuffer()); const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes()); + return vk::BufferViewCreateInfo({}, buffer, format, 0, host_memory_size); } vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { @@ -169,14 +170,15 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device}, - resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, - staging_pool{staging_pool} { + : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system}, + device{device}, resource_manager{resource_manager}, + memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { if (params.IsBuffer()) { - buffer = CreateBuffer(device, params); + buffer = CreateBuffer(device, params, host_memory_size); commit = memory_manager.Commit(*buffer, false); - const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer); + const auto buffer_view_ci = + GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); format = buffer_view_ci.format; const auto dev = device.GetLogical(); @@ -255,7 +257,7 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, - size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) { + size = host_memory_size](auto cmdbuf, auto& dld) { const vk::BufferCopy copy(0, 0, size); cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); @@ -299,10 +301,7 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - const auto compression_type = params.GetCompressionType(); - const std::size_t mip_offset = compression_type == SurfaceCompression::Converted - ? params.GetConvertedMipmapOffset(level) - : params.GetHostMipmapLevelOffset(level); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); return vk::BufferImageCopy( mip_offset, 0, 0, @@ -390,8 +389,9 @@ VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterf const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager}, - memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {} + : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device}, + resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, + staging_pool{staging_pool} {} VKTextureCache::~VKTextureCache() = default; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index d2fe4ec5d..0dd7a1196 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -13,13 +13,247 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" +#include "video_core/textures/texture.h" namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; +using Tegra::Shader::StoreType; +using Tegra::Texture::ComponentType; +using Tegra::Texture::TextureFormat; +using Tegra::Texture::TICEntry; namespace { + +ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, + std::size_t component) { + const TextureFormat format{descriptor.format}; + switch (format) { + case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R32_G32_B32: + case TextureFormat::R32_G32: + case TextureFormat::R16_G16: + case TextureFormat::R32: + case TextureFormat::R16: + case TextureFormat::R8: + case TextureFormat::R1: + if (component == 0) { + return descriptor.r_type; + } + if (component == 1) { + return descriptor.g_type; + } + if (component == 2) { + return descriptor.b_type; + } + if (component == 3) { + return descriptor.a_type; + } + break; + case TextureFormat::A8R8G8B8: + if (component == 0) { + return descriptor.a_type; + } + if (component == 1) { + return descriptor.r_type; + } + if (component == 2) { + return descriptor.g_type; + } + if (component == 3) { + return descriptor.b_type; + } + break; + case TextureFormat::A2B10G10R10: + case TextureFormat::A4B4G4R4: + case TextureFormat::A5B5G5R1: + case TextureFormat::A1B5G5R5: + if (component == 0) { + return descriptor.a_type; + } + if (component == 1) { + return descriptor.b_type; + } + if (component == 2) { + return descriptor.g_type; + } + if (component == 3) { + return descriptor.r_type; + } + break; + case TextureFormat::R32_B24G8: + if (component == 0) { + return descriptor.r_type; + } + if (component == 1) { + return descriptor.b_type; + } + if (component == 2) { + return descriptor.g_type; + } + break; + case TextureFormat::B5G6R5: + case TextureFormat::B6G5R5: + if (component == 0) { + return descriptor.b_type; + } + if (component == 1) { + return descriptor.g_type; + } + if (component == 2) { + return descriptor.r_type; + } + break; + case TextureFormat::G8R24: + case TextureFormat::G24R8: + case TextureFormat::G8R8: + case TextureFormat::G4R4: + if (component == 0) { + return descriptor.g_type; + } + if (component == 1) { + return descriptor.r_type; + } + break; + } + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return ComponentType::FLOAT; +} + +bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + constexpr std::array<u8, 16> mask = { + 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), + (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; + return std::bitset<4>{mask.at(component_mask)}.test(component); +} + +u32 GetComponentSize(TextureFormat format, std::size_t component) { + switch (format) { + case TextureFormat::R32_G32_B32_A32: + return 32; + case TextureFormat::R16_G16_B16_A16: + return 16; + case TextureFormat::R32_G32_B32: + return component <= 2 ? 32 : 0; + case TextureFormat::R32_G32: + return component <= 1 ? 32 : 0; + case TextureFormat::R16_G16: + return component <= 1 ? 16 : 0; + case TextureFormat::R32: + return component == 0 ? 32 : 0; + case TextureFormat::R16: + return component == 0 ? 16 : 0; + case TextureFormat::R8: + return component == 0 ? 8 : 0; + case TextureFormat::R1: + return component == 0 ? 1 : 0; + case TextureFormat::A8R8G8B8: + return 8; + case TextureFormat::A2B10G10R10: + return (component == 3 || component == 2 || component == 1) ? 10 : 2; + case TextureFormat::A4B4G4R4: + return 4; + case TextureFormat::A5B5G5R1: + return (component == 0 || component == 1 || component == 2) ? 5 : 1; + case TextureFormat::A1B5G5R5: + return (component == 1 || component == 2 || component == 3) ? 5 : 1; + case TextureFormat::R32_B24G8: + if (component == 0) { + return 32; + } + if (component == 1) { + return 24; + } + if (component == 2) { + return 8; + } + return 0; + case TextureFormat::B5G6R5: + if (component == 0 || component == 2) { + return 5; + } + if (component == 1) { + return 6; + } + return 0; + case TextureFormat::B6G5R5: + if (component == 1 || component == 2) { + return 5; + } + if (component == 0) { + return 6; + } + return 0; + case TextureFormat::G8R24: + if (component == 0) { + return 8; + } + if (component == 1) { + return 24; + } + return 0; + case TextureFormat::G24R8: + if (component == 0) { + return 8; + } + if (component == 1) { + return 24; + } + return 0; + case TextureFormat::G8R8: + return (component == 0 || component == 1) ? 8 : 0; + case TextureFormat::G4R4: + return (component == 0 || component == 1) ? 4 : 0; + default: + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return 0; + } +} + +std::size_t GetImageComponentMask(TextureFormat format) { + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + switch (format) { + case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R16_G16_B16_A16: + case TextureFormat::A8R8G8B8: + case TextureFormat::A2B10G10R10: + case TextureFormat::A4B4G4R4: + case TextureFormat::A5B5G5R1: + case TextureFormat::A1B5G5R5: + return std::size_t{R | G | B | A}; + case TextureFormat::R32_G32_B32: + case TextureFormat::R32_B24G8: + case TextureFormat::B5G6R5: + case TextureFormat::B6G5R5: + return std::size_t{R | G | B}; + case TextureFormat::R32_G32: + case TextureFormat::R16_G16: + case TextureFormat::G8R24: + case TextureFormat::G24R8: + case TextureFormat::G8R8: + case TextureFormat::G4R4: + return std::size_t{R | G}; + case TextureFormat::R32: + case TextureFormat::R16: + case TextureFormat::R8: + case TextureFormat::R1: + return std::size_t{R}; + default: + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return std::size_t{R | G | B | A}; + } +} + std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { switch (image_type) { case Tegra::Shader::ImageType::Texture1D: @@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { } } // Anonymous namespace +std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, + Node original_value) { + switch (component_type) { + case ComponentType::SNORM: { + // range [-1.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); + cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); + return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; + } + case ComponentType::SINT: + case ComponentType::UNORM: { + bool is_signed = component_type == ComponentType::SINT; + // range [0.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate(static_cast<float>(1 << component_size) - 1.f)); + return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), + is_signed}; + } + case ComponentType::UINT: // range [0, (1 << component_size) - 1] + return {std::move(original_value), false}; + case ComponentType::FLOAT: + if (component_size == 16) { + return {Operation(OperationCode::HCastFloat, original_value), true}; + } else { + return {std::move(original_value), true}; + } + default: + UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); + return {std::move(original_value), true}; + } +} + u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); @@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { : GetBindlessImage(instr.gpr39, type)}; image.MarkRead(); - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; + if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { + u32 indexer = 0; + for (u32 element = 0; element < 4; ++element) { + if (!instr.suldst.IsComponentEnabled(element)) { + continue; + } + MetaImage meta{image, {}, element}; + Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); + SetTemporary(bb, indexer++, std::move(value)); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { + UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && + instr.suldst.GetStoreDataLayout() != StoreType::Bits64); + + auto descriptor = [this, instr] { + std::optional<Tegra::Engines::SamplerDescriptor> descriptor; + if (instr.suldst.is_immediate) { + descriptor = + registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); + } else { + const Node image_register = GetRegister(instr.gpr39); + const auto [base_image, buffer, offset] = TrackCbuf( + image_register, global_code, static_cast<s64>(global_code.size())); + descriptor = registry.ObtainBindlessSampler(buffer, offset); + } + if (!descriptor) { + UNREACHABLE_MSG("Failed to obtain image descriptor"); + } + return *descriptor; + }(); + + const auto comp_mask = GetImageComponentMask(descriptor.format); + + switch (instr.suldst.GetStoreDataLayout()) { + case StoreType::Bits32: + case StoreType::Bits64: { + u32 indexer = 0; + u32 shifted_counter = 0; + Node value = Immediate(0); + for (u32 element = 0; element < 4; ++element) { + if (!IsComponentEnabled(comp_mask, element)) { + continue; + } + const auto component_type = GetComponentType(descriptor, element); + const auto component_size = GetComponentSize(descriptor.format, element); + MetaImage meta{image, {}, element}; + + auto [converted_value, is_signed] = GetComponentValue( + component_type, component_size, + Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); + + // shift element to correct position + const auto shifted = shifted_counter; + if (shifted > 0) { + converted_value = + SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, + std::move(converted_value), Immediate(shifted)); + } + shifted_counter += component_size; + + // add value into result + value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); + + // if we shifted enough for 1 byte -> we save it into temp + if (shifted_counter >= 32) { + SetTemporary(bb, indexer++, std::move(value)); + // reset counter and value to prepare pack next byte + value = Immediate(0); + shifted_counter = 0; + } + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } + default: + UNREACHABLE(); + break; } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } break; } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 8262d4d4b..d4f95b18c 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -11,12 +11,17 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::ConditionCode; using Tegra::Shader::Instruction; +using Tegra::Shader::IpaInterpMode; using Tegra::Shader::OpCode; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using Tegra::Shader::SystemVariable; +using Index = Tegra::Shader::Attribute::Index; + u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -219,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { } case OpCode::Id::IPA: { const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), - instr.ipa.sample_mode.Value()}; + const Index index = attribute.index; Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(attribute.index, attribute.element); - const Tegra::Shader::Attribute::Index index = attribute.index.Value(); - const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && - index <= Tegra::Shader::Attribute::Index::Attribute_31; - if (is_generic || is_physical) { - // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. - // In theory by setting them as perspective, OpenGL does the perspective correction. - // A way must figured to reverse the last step of it. - if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); + : GetInputAttribute(index, attribute.element); + + // Code taken from Ryujinx. + if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { + const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); + if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { + Node position_w = GetInputAttribute(Index::Position, 3); + value = Operation(OperationCode::FMul, move(value), move(position_w)); } } - value = GetSaturatedFloat(value, instr.ipa.saturate); - SetRegister(bb, instr.gpr0, value); + if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { + value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); + } + + value = GetSaturatedFloat(move(value), instr.ipa.saturate); + + SetRegister(bb, instr.gpr0, move(value)); break; } case OpCode::Id::OUT_R: { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 80fc9b82c..ca6c976c9 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -312,6 +312,10 @@ private: /// Conditionally saturates a half float pair Node GetSaturatedHalfFloat(Node value, bool saturate = true); + /// Get image component value by type and size + std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, + u32 component_size, Node original_value); + /// Returns a predicate comparing two floats Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); /// Returns a predicate comparing two integers diff --git a/src/video_core/surface.h b/src/video_core/surface.h index ae8817465..e0acd44d3 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -504,103 +504,6 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { return GetFormatBpp(pixel_format) / CHAR_BIT; } -enum class SurfaceCompression { - None, // Not compressed - Compressed, // Texture is compressed - Converted, // Texture is converted before upload or after download - Rearranged, // Texture is swizzled before upload or after download -}; - -constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{ - SurfaceCompression::None, // ABGR8U - SurfaceCompression::None, // ABGR8S - SurfaceCompression::None, // ABGR8UI - SurfaceCompression::None, // B5G6R5U - SurfaceCompression::None, // A2B10G10R10U - SurfaceCompression::None, // A1B5G5R5U - SurfaceCompression::None, // R8U - SurfaceCompression::None, // R8UI - SurfaceCompression::None, // RGBA16F - SurfaceCompression::None, // RGBA16U - SurfaceCompression::None, // RGBA16S - SurfaceCompression::None, // RGBA16UI - SurfaceCompression::None, // R11FG11FB10F - SurfaceCompression::None, // RGBA32UI - SurfaceCompression::Compressed, // DXT1 - SurfaceCompression::Compressed, // DXT23 - SurfaceCompression::Compressed, // DXT45 - SurfaceCompression::Compressed, // DXN1 - SurfaceCompression::Compressed, // DXN2UNORM - SurfaceCompression::Compressed, // DXN2SNORM - SurfaceCompression::Compressed, // BC7U - SurfaceCompression::Compressed, // BC6H_UF16 - SurfaceCompression::Compressed, // BC6H_SF16 - SurfaceCompression::Converted, // ASTC_2D_4X4 - SurfaceCompression::None, // BGRA8 - SurfaceCompression::None, // RGBA32F - SurfaceCompression::None, // RG32F - SurfaceCompression::None, // R32F - SurfaceCompression::None, // R16F - SurfaceCompression::None, // R16U - SurfaceCompression::None, // R16S - SurfaceCompression::None, // R16UI - SurfaceCompression::None, // R16I - SurfaceCompression::None, // RG16 - SurfaceCompression::None, // RG16F - SurfaceCompression::None, // RG16UI - SurfaceCompression::None, // RG16I - SurfaceCompression::None, // RG16S - SurfaceCompression::None, // RGB32F - SurfaceCompression::None, // RGBA8_SRGB - SurfaceCompression::None, // RG8U - SurfaceCompression::None, // RG8S - SurfaceCompression::None, // RG32UI - SurfaceCompression::None, // RGBX16F - SurfaceCompression::None, // R32UI - SurfaceCompression::None, // R32I - SurfaceCompression::Converted, // ASTC_2D_8X8 - SurfaceCompression::Converted, // ASTC_2D_8X5 - SurfaceCompression::Converted, // ASTC_2D_5X4 - SurfaceCompression::None, // BGRA8_SRGB - SurfaceCompression::Compressed, // DXT1_SRGB - SurfaceCompression::Compressed, // DXT23_SRGB - SurfaceCompression::Compressed, // DXT45_SRGB - SurfaceCompression::Compressed, // BC7U_SRGB - SurfaceCompression::None, // R4G4B4A4U - SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB - SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB - SurfaceCompression::Converted, // ASTC_2D_5X5 - SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB - SurfaceCompression::Converted, // ASTC_2D_10X8 - SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB - SurfaceCompression::Converted, // ASTC_2D_6X6 - SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB - SurfaceCompression::Converted, // ASTC_2D_10X10 - SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB - SurfaceCompression::Converted, // ASTC_2D_12X12 - SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X6 - SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB - SurfaceCompression::Converted, // ASTC_2D_6X5 - SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB - SurfaceCompression::None, // E5B9G9R9F - SurfaceCompression::None, // Z32F - SurfaceCompression::None, // Z16 - SurfaceCompression::None, // Z24S8 - SurfaceCompression::Rearranged, // S8Z24 - SurfaceCompression::None, // Z32FS8 -}}; - -constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { - if (format == PixelFormat::Invalid) { - return SurfaceCompression::None; - } - DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size()); - return compression_type_table[static_cast<std::size_t>(format)]; -} - SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); bool SurfaceTargetIsLayered(SurfaceTarget target); diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 002df414f..6fe815135 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -18,15 +18,20 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::SurfaceCompression; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::PixelFormat; StagingCache::StagingCache() = default; StagingCache::~StagingCache() = default; -SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) - : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, - mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { +SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, + bool is_astc_supported) + : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels), + mipmap_offsets(params.num_levels) { + is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported; + host_memory_size = params.GetHostSizeInBytes(is_converted); + std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; @@ -164,7 +169,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf std::size_t guest_offset{mipmap_offsets[level]}; if (params.is_layered) { - std::size_t host_offset{0}; + std::size_t host_offset = 0; const std::size_t guest_stride = layer_size; const std::size_t host_stride = params.GetHostLayerSize(level); for (u32 layer = 0; layer < params.depth; ++layer) { @@ -206,7 +211,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast<u32>(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, staging_buffer.data() + host_offset, level); } @@ -219,7 +224,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, const u32 height{(params.height + block_height - 1) / block_height}; const u32 copy_size{width * bpp}; if (params.pitch == copy_size) { - std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); + std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); } else { const u8* start{host_ptr}; u8* write_to{staging_buffer.data()}; @@ -231,19 +236,15 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - auto compression_type = params.GetCompressionType(); - if (compression_type == SurfaceCompression::None || - compression_type == SurfaceCompression::Compressed) + if (!is_converted && params.pixel_format != PixelFormat::S8Z24) { return; + } - for (u32 level_up = params.num_levels; level_up > 0; --level_up) { - const u32 level = level_up - 1; - const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; - const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged - ? in_host_offset - : params.GetConvertedMipmapOffset(level); - u8* in_buffer = staging_buffer.data() + in_host_offset; - u8* out_buffer = staging_buffer.data() + out_host_offset; + for (u32 level = params.num_levels; level--;) { + const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; + const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; + u8* const in_buffer = staging_buffer.data() + in_host_offset; + u8* const out_buffer = staging_buffer.data() + out_host_offset; ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); @@ -273,7 +274,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, staging_buffer.data() + host_offset, level); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5f79bb0aa..d7882a031 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -131,6 +131,10 @@ public: return !params.is_tiled; } + bool IsConverted() const { + return is_converted; + } + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { return params.pixel_format == pixel_format; } @@ -160,7 +164,8 @@ public: } protected: - explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); + explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, + bool is_astc_supported); ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; @@ -168,12 +173,13 @@ protected: const SurfaceParams params; std::size_t layer_size; std::size_t guest_memory_size; - const std::size_t host_memory_size; + std::size_t host_memory_size; GPUVAddr gpu_addr{}; CacheAddr cache_addr{}; CacheAddr cache_addr_end{}; VAddr cpu_addr{}; bool is_continuous{}; + bool is_converted{}; std::vector<std::size_t> mipmap_sizes; std::vector<std::size_t> mipmap_offsets; @@ -288,8 +294,9 @@ public: } protected: - explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBaseImpl(gpu_addr, params) {} + explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool is_astc_supported) + : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {} ~SurfaceBase() = default; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9931c5ef7..6f3ef45be 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta params.height = tic.Height(); params.depth = tic.Depth(); params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { - params.depth = 1; - } else if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } params.num_levels = tic.max_mip_level + 1; @@ -309,28 +307,26 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { return offset; } -std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { - std::size_t offset = 0; - for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); - } - return offset; -} - -std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { +std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { std::size_t offset = 0; - for (u32 i = 0; i < level; i++) { - offset += GetConvertedMipmapSize(i); + if (is_converted) { + for (u32 i = 0; i < level; ++i) { + offset += GetConvertedMipmapSize(i) * GetNumLayers(); + } + } else { + for (u32 i = 0; i < level; ++i) { + offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); + } } return offset; } std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { constexpr std::size_t rgba8_bpp = 4ULL; - const std::size_t width_t = GetMipWidth(level); - const std::size_t height_t = GetMipHeight(level); - const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); - return width_t * height_t * depth_t * rgba8_bpp; + const std::size_t mip_width = GetMipWidth(level); + const std::size_t mip_height = GetMipHeight(level); + const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); + return mip_width * mip_height * mip_depth * rgba8_bpp; } std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 995cc3818..24957df8d 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -20,8 +20,6 @@ namespace VideoCommon { class FormatLookupTable; -using VideoCore::Surface::SurfaceCompression; - class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. @@ -67,16 +65,14 @@ public: return GetInnerMemorySize(false, false, false); } - std::size_t GetHostSizeInBytes() const { - std::size_t host_size_in_bytes; - if (GetCompressionType() == SurfaceCompression::Converted) { - // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = 0; - for (u32 level = 0; level < num_levels; ++level) { - host_size_in_bytes += GetConvertedMipmapSize(level); - } - } else { - host_size_in_bytes = GetInnerMemorySize(true, false, false); + std::size_t GetHostSizeInBytes(bool is_converted) const { + if (!is_converted) { + return GetInnerMemorySize(true, false, false); + } + // ASTC is uncompressed in software, in emulated as RGBA8 + std::size_t host_size_in_bytes = 0; + for (u32 level = 0; level < num_levels; ++level) { + host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); } return host_size_in_bytes; } @@ -107,9 +103,8 @@ public: u32 GetMipBlockDepth(u32 level) const; /// Returns the best possible row/pitch alignment for the surface. - u32 GetRowAlignment(u32 level) const { - const u32 bpp = - GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); + u32 GetRowAlignment(u32 level, bool is_converted) const { + const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); } @@ -117,11 +112,7 @@ public: std::size_t GetGuestMipmapLevelOffset(u32 level) const; /// Returns the offset in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapLevelOffset(u32 level) const; - - /// Returns the offset in bytes in host memory (linear) of a given mipmap level - /// for a texture that is converted in host gpu. - std::size_t GetConvertedMipmapOffset(u32 level) const; + std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; /// Returns the size in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapSize(u32 level) const { @@ -196,11 +187,6 @@ public: pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } - /// Returns how the compression should be handled for this texture. - SurfaceCompression GetCompressionType() const { - return VideoCore::Surface::GetFormatCompressionType(pixel_format); - } - /// Returns is the surface is a TextureBuffer type of surface. bool IsBuffer() const { return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6cdbe63d0..c8f8d659d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -289,8 +289,9 @@ public: } protected: - TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer} { + explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + bool is_astc_supported) + : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { SetEmptyColorBuffer(i); } @@ -381,6 +382,7 @@ protected: } Core::System& system; + const bool is_astc_supported; private: enum class RecycleStrategy : u32 { |