summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
authorRodrigo Locatti <reinuseslisp@airmail.cc>2020-07-21 09:51:05 +0200
committerGitHub <noreply@github.com>2020-07-21 09:51:05 +0200
commit7278c59d70dc1fdd8755f60a878da6d42825c7a0 (patch)
treeea37c477caeec1747ac8b8b1f775da40dd175fa6 /src/video_core/renderer_opengl
parentMerge pull request #4360 from ReinUsesLisp/glasm-bar (diff)
parentrenderer_{opengl,vulkan}: Clamp shared memory to host's limit (diff)
downloadyuzu-7278c59d70dc1fdd8755f60a878da6d42825c7a0.tar
yuzu-7278c59d70dc1fdd8755f60a878da6d42825c7a0.tar.gz
yuzu-7278c59d70dc1fdd8755f60a878da6d42825c7a0.tar.bz2
yuzu-7278c59d70dc1fdd8755f60a878da6d42825c7a0.tar.lz
yuzu-7278c59d70dc1fdd8755f60a878da6d42825c7a0.tar.xz
yuzu-7278c59d70dc1fdd8755f60a878da6d42825c7a0.tar.zst
yuzu-7278c59d70dc1fdd8755f60a878da6d42825c7a0.zip
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp11
4 files changed, 28 insertions, 6 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index c06e838f7..3b61c9e21 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -913,11 +913,19 @@ void ARBDecompiler::DeclareCompute() {
const ComputeInfo& info = registry.GetComputeInfo();
AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
info.workgroup_size[2]);
- if (info.shared_memory_size_in_words > 0) {
- const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
- AddLine("SHARED_MEMORY {};", size_in_bytes);
- AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
+ if (info.shared_memory_size_in_words == 0) {
+ return;
+ }
+ const u32 limit = device.GetMaxComputeSharedMemorySize();
+ u32 size_in_bytes = info.shared_memory_size_in_words * 4;
+ if (size_in_bytes > limit) {
+ LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
+ size_in_bytes, limit);
+ size_in_bytes = limit;
}
+
+ AddLine("SHARED_MEMORY {};", size_in_bytes);
+ AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
}
void ARBDecompiler::DeclareInputAttributes() {
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 630acb73b..e7d95149f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -212,6 +212,7 @@ Device::Device()
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
+ max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -250,6 +251,7 @@ Device::Device(std::nullptr_t) {
shader_storage_alignment = 4;
max_vertex_attributes = 16;
max_varyings = 15;
+ max_compute_shared_memory_size = 0x10000;
has_warp_intrinsics = true;
has_shader_ballot = true;
has_vertex_viewport_layer = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 94d38d7d1..8a4b6b9fc 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -52,6 +52,10 @@ public:
return max_varyings;
}
+ u32 GetMaxComputeSharedMemorySize() const {
+ return max_compute_shared_memory_size;
+ }
+
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
@@ -118,6 +122,7 @@ private:
std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
+ u32 max_compute_shared_memory_size{};
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2c49aeaac..6a9602ff8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -602,8 +602,15 @@ private:
return;
}
const auto& info = registry.GetComputeInfo();
- if (const u32 size = info.shared_memory_size_in_words; size > 0) {
- code.AddLine("shared uint smem[{}];", size);
+ if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
+ const u32 limit = device.GetMaxComputeSharedMemorySize();
+ if (size > limit) {
+ LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
+ size, limit);
+ size = limit;
+ }
+
+ code.AddLine("shared uint smem[{}];", size / 4);
code.AddNewLine();
}
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",