diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2021-03-23 01:03:20 +0100 |
---|---|---|
committer | ameerj <52414509+ameerj@users.noreply.github.com> | 2021-07-23 03:51:24 +0200 |
commit | c63cf4fa2e22538a01c191e1f97ac0f93b67e804 (patch) | |
tree | 2e9be29df86e6282a431da2f1503f128dd7aea8d | |
parent | shader: Fold interpolation multiplications (diff) | |
download | yuzu-c63cf4fa2e22538a01c191e1f97ac0f93b67e804.tar yuzu-c63cf4fa2e22538a01c191e1f97ac0f93b67e804.tar.gz yuzu-c63cf4fa2e22538a01c191e1f97ac0f93b67e804.tar.bz2 yuzu-c63cf4fa2e22538a01c191e1f97ac0f93b67e804.tar.lz yuzu-c63cf4fa2e22538a01c191e1f97ac0f93b67e804.tar.xz yuzu-c63cf4fa2e22538a01c191e1f97ac0f93b67e804.tar.zst yuzu-c63cf4fa2e22538a01c191e1f97ac0f93b67e804.zip |
-rw-r--r-- | src/shader_recompiler/environment.h | 11 | ||||
-rw-r--r-- | src/shader_recompiler/file_environment.cpp | 4 | ||||
-rw-r--r-- | src/shader_recompiler/file_environment.h | 4 | ||||
-rw-r--r-- | src/shader_recompiler/stage.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 391 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 34 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_render_pass_cache.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_render_pass_cache.h | 4 |
8 files changed, 347 insertions, 106 deletions
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 1fcaa56dd..6dec4b255 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -3,8 +3,8 @@ #include <array> #include "common/common_types.h" -#include "shader_recompiler/stage.h" #include "shader_recompiler/program_header.h" +#include "shader_recompiler/stage.h" namespace Shader { @@ -14,9 +14,9 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; - [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; - [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; + [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; [[nodiscard]] const ProgramHeader& SPH() const noexcept { return sph; @@ -26,9 +26,14 @@ public: return stage; } + [[nodiscard]] u32 StartAddress() const noexcept { + return start_address; + } + protected: ProgramHeader sph{}; Stage stage{}; + u32 start_address{}; }; } // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index 21700c72b..f2104f444 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -39,11 +39,11 @@ u64 FileEnvironment::ReadInstruction(u32 offset) { return data[offset / 8]; } -u32 FileEnvironment::TextureBoundBuffer() { +u32 FileEnvironment::TextureBoundBuffer() const { throw NotImplementedException("Texture bound buffer serialization"); } -std::array<u32, 3> FileEnvironment::WorkgroupSize() { +std::array<u32, 3> FileEnvironment::WorkgroupSize() const { return {1, 1, 1}; } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index 62302bc8e..17640a622 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -14,9 +14,9 @@ public: u64 ReadInstruction(u32 offset) override; - u32 TextureBoundBuffer() override; + u32 TextureBoundBuffer() const override; - std::array<u32, 3> WorkgroupSize() override; + std::array<u32, 3> WorkgroupSize() const override; private: std::vector<u64> data; diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h index fc6ce6043..7d4f2c0bb 100644 --- a/src/shader_recompiler/stage.h +++ b/src/shader_recompiler/stage.h @@ -4,9 +4,11 @@ #pragma once +#include "common/common_types.h" + namespace Shader { -enum class Stage { +enum class Stage : u32 { Compute, VertexA, VertexB, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 75f7c1e61..41fc9588f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -4,12 +4,15 @@ #include <algorithm> #include <cstddef> +#include <fstream> #include <memory> #include <vector> #include "common/bit_cast.h" #include "common/cityhash.h" +#include "common/file_util.h" #include "common/microprofile.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/memory.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -37,18 +40,23 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -namespace { -using Shader::Backend::SPIRV::EmitSPIRV; +template <typename Container> +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; + } ~GenericEnvironment() override = default; - std::optional<u128> Analyze(u32 start_address) { + std::optional<u128> Analyze() { const std::optional<u64> size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -66,11 +74,15 @@ public: return read_highest - read_lowest + INST_SIZE; } + [[nodiscard]] bool CanBeSerialized() const noexcept { + return has_unbound_instructions; + } + [[nodiscard]] u128 CalculateHash() const { const size_t size{ReadSize()}; - auto data = std::make_unique<u64[]>(size); + const auto data{std::make_unique<char[]>(size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size); + return Common::CityHash128(data.get(), size); } u64 ReadInstruction(u32 address) final { @@ -80,9 +92,32 @@ public: if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } + has_unbound_instructions = true; return gpu_memory->Read<u64>(program_base + address); } + void Serialize(std::ofstream& file) const { + const u64 code_size{static_cast<u64>(ReadSize())}; + const auto data{std::make_unique<char[]>(code_size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + + const u32 texture_bound{TextureBoundBuffer()}; + + file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) + .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) + .write(reinterpret_cast<const char*>(&read_lowest), sizeof(read_lowest)) + .write(reinterpret_cast<const char*>(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) + .write(data.get(), code_size); + if (stage == Shader::Stage::Compute) { + const std::array<u32, 3> workgroup_size{WorkgroupSize()}; + file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)); + } else { + file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); + } + } + protected: static constexpr size_t INST_SIZE = sizeof(u64); @@ -122,16 +157,22 @@ protected: u32 cached_lowest = std::numeric_limits<u32>::max(); u32 cached_highest = 0; + + bool has_unbound_instructions = false; }; +namespace { +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_offset) - : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + GPUVAddr program_base_, u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -158,11 +199,11 @@ public: ~GraphicsEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } - std::array<u32, 3> WorkgroupSize() override { + std::array<u32, 3> WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -174,18 +215,20 @@ class ComputeEnvironment final : public GenericEnvironment { public: explicit ComputeEnvironment() = default; explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { stage = Shader::Stage::Compute; } ~ComputeEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } - std::array<u32, 3> WorkgroupSize() override { + std::array<u32, 3> WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } @@ -193,8 +236,174 @@ public: private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; + +void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, + std::ofstream& file) { + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast<u32>(envs.size())}; + file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); +} + +template <typename Key, typename Envs> +void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { + try { + std::ofstream file; + file.exceptions(std::ifstream::failbit); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); + return; + } + if (file.tellp() == 0) { + // Write header... + } + const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key)); + SerializePipeline(key_span, MakeSpan(envs), file); + + } catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::Delete(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); + } + } +} + +class FileEnvironment final : public Shader::Environment { +public: + void Deserialize(std::ifstream& file) { + u64 code_size{}; + file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) + .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) + .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast<char*>(&stage), sizeof(stage)); + code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast<char*>(code.get()), code_size); + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)); + } else { + file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); + } + } + + u64 ReadInstruction(u32 address) override { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; + } + + u32 TextureBoundBuffer() const override { + return texture_bound; + } + + std::array<u32, 3> WorkgroupSize() const override { + return workgroup_size; + } + +private: + std::unique_ptr<u64[]> code; + std::array<u32, 3> workgroup_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; } // Anonymous namespace +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; + std::string base_dir{shader_dir + "/vulkan"}; + std::string transferable_dir{base_dir + "/transferable"}; + std::string precompiled_dir{base_dir + "/precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); + + Common::ThreadWorker worker(11, "PipelineBuilder"); + std::mutex cache_mutex; + struct { + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + std::ifstream file; + Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + // Read header... + + while (file.tellg() != end) { + if (stop_loading) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs)); + auto envs{std::make_shared<std::vector<FileEnvironment>>(num_envs)}; + for (FileEnvironment& env : *envs) { + env.Deserialize(file); + } + if (envs->front().ShaderStage() == Shader::Stage::Compute) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast<char*>(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + + std::lock_guard lock{cache_mutex}; + compute_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } else { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast<char*>(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + boost::container::static_vector<Shader::Environment*, 5> env_ptrs; + for (auto& env : *envs) { + env_ptrs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + + std::lock_guard lock{cache_mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } + ++state.total; + } + { + std::lock_guard lock{cache_mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + } + worker.WaitForRequests(); +} + size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); return static_cast<size_t>(hash); @@ -279,17 +488,22 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { if (!cpu_shader_addr) { return nullptr; } - ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; if (!shader) { - return CreateComputePipelineWithoutShader(*cpu_shader_addr); + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + shader = MakeShaderInfo(env, *cpu_shader_addr); } - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const ComputePipelineCacheKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { return &pipeline; } - pipeline = CreateComputePipeline(shader); + pipeline = CreateComputePipeline(key, shader); return &pipeline; } @@ -310,26 +524,25 @@ bool PipelineCache::RefreshStages() { } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; if (!shader_info) { - const u32 offset{shader_config.offset}; - shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); } graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; } -const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr) { - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; +const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { auto info = std::make_unique<ShaderInfo>(); - if (const std::optional<u128> cached_hash{env.Analyze(start_address)}) { + if (const std::optional<u128> cached_hash{env.Analyze()}) { info->unique_hash = *cached_hash; info->size_bytes = env.CachedSize(); } else { // Slow path, not really hit on commercial games // Build a control flow graph to get the real shader size - flow_block_pool.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + main_pools.flow_block.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; info->unique_hash = env.CalculateHash(); info->size_bytes = env.ReadSize(); } @@ -339,100 +552,100 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); - - std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> envs; +GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, + const GraphicsPipelineCacheKey& key, + std::span<Shader::Environment* const> envs) { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + size_t env_index{0}; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == u128{}) { continue; } - const auto program{static_cast<Maxwell::ShaderProgram>(index)}; - GraphicsEnvironment& env{envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + Shader::Environment& env{*envs[env_index]}; + ++env_index; - const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); - Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); - programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; u32 binding{0}; + env_index = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == u128{}) { continue; } UNIMPLEMENTED_IF(index == 0); - GraphicsEnvironment& env{envs[index]}; Shader::IR::Program& program{programs[index]}; - const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - std::vector<u32> code{EmitSPIRV(profile, env, program, binding)}; - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + Shader::Environment& env{*envs[env_index]}; + ++env_index; + const std::vector<u32> code{EmitSPIRV(profile, env, program, binding)}; modules[stage_index] = BuildShader(device, code); } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, - graphics_key.state, std::move(modules), infos); + descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, + std::move(modules), infos); } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + main_pools.ReleaseContents(); + + std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs; + boost::container::static_vector<GenericEnvironment*, Maxwell::MaxShaderProgram> generic_envs; + boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast<Maxwell::ShaderProgram>(index)}; + GraphicsEnvironment& env{graphics_envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + generic_envs.push_back(&env); + envs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); + } + return pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; - if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { - // TODO: Load from cache + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + main_pools.ReleaseContents(); + ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, + pipeline_cache_filename); } - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); + return pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env) const { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; - Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector<u32> code{EmitSPIRV(profile, env, program, binding)}; - /* - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-dis D:\\shader.spv"); - */ - shader_info->unique_hash = env.CalculateHash(); - shader_info->size_bytes = env.ReadSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } -ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { - ShaderInfo shader; - ComputePipeline pipeline{CreateComputePipeline(&shader)}; - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - const size_t size_bytes{shader.size_bytes}; - Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes); - return &compute_cache.emplace(key, std::move(pipeline)).first->second; -} - -ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { - const auto& qmd{kepler_compute.launch_description}; - return { - .unique_hash = unique_hash, - .shared_memory_size = qmd.shared_alloc, - .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, - }; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 60fb976df..2ecb68bdc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include <array> #include <cstddef> +#include <iosfwd> #include <memory> #include <type_traits> #include <unordered_map> @@ -96,6 +97,7 @@ namespace Vulkan { class ComputePipeline; class Device; +class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKDescriptorPool; @@ -107,6 +109,18 @@ struct ShaderInfo { size_t size_bytes{}; }; +struct ShaderPools { + void ReleaseContents() { + inst.ReleaseContents(); + block.ReleaseContents(); + flow_block.ReleaseContents(); + } + + Shader::ObjectPool<Shader::IR::Inst> inst; + Shader::ObjectPool<Shader::IR::Block> block; + Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; +}; + class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, @@ -123,19 +137,24 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + private: bool RefreshStages(); - const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr); + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); GraphicsPipeline CreateGraphicsPipeline(); - ComputePipeline CreateComputePipeline(ShaderInfo* shader); + GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span<Shader::Environment* const> envs); - ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, + Shader::Environment& env) const; Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -155,11 +174,10 @@ private: std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache; std::unordered_map<GraphicsPipelineCacheKey, GraphicsPipeline> graphics_cache; - Shader::ObjectPool<Shader::IR::Inst> inst_pool; - Shader::ObjectPool<Shader::IR::Block> block_pool; - Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block_pool; + ShaderPools main_pools; Shader::Profile profile; + std::string pipeline_cache_filename; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 7e5ae43ea..1c6ba7289 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -50,6 +50,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + std::lock_guard lock{mutex}; const auto [pair, is_new] = cache.try_emplace(key); if (!is_new) { return *pair->second; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index db8e83f1a..eaa0ed775 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -4,6 +4,7 @@ #pragma once +#include <mutex> #include <unordered_map> #include "video_core/surface.h" @@ -37,7 +38,7 @@ struct hash<Vulkan::RenderPassKey> { namespace Vulkan { - class Device; +class Device; class RenderPassCache { public: @@ -48,6 +49,7 @@ public: private: const Device* device{}; std::unordered_map<RenderPassKey, vk::RenderPass> cache; + std::mutex mutex; }; } // namespace Vulkan |