summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2018-04-24 07:09:02 +0200
committerGitHub <noreply@github.com>2018-04-24 07:09:02 +0200
commit07dc0bbf3e10c030a32f6853de31642162ce988d (patch)
treea6f3ae5a2a5a6567c6c8293a60352c9db83d9da2
parentMerge pull request #370 from Subv/sync_primitives (diff)
parentGPU: Support multiple enabled vertex arrays. (diff)
downloadyuzu-07dc0bbf3e10c030a32f6853de31642162ce988d.tar
yuzu-07dc0bbf3e10c030a32f6853de31642162ce988d.tar.gz
yuzu-07dc0bbf3e10c030a32f6853de31642162ce988d.tar.bz2
yuzu-07dc0bbf3e10c030a32f6853de31642162ce988d.tar.lz
yuzu-07dc0bbf3e10c030a32f6853de31642162ce988d.tar.xz
yuzu-07dc0bbf3e10c030a32f6853de31642162ce988d.tar.zst
yuzu-07dc0bbf3e10c030a32f6853de31642162ce988d.zip
-rw-r--r--src/video_core/engines/maxwell_3d.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp121
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
3 files changed, 89 insertions, 43 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d4fcedace..609504795 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -500,6 +500,11 @@ public:
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) |
start_low);
}
+
+ bool IsEnabled() const {
+ return enable != 0 && StartAddress() != 0;
+ }
+
} vertex_array[NumVertexArrays];
Blend blend;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2d4a0d6db..82001e7b4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -127,7 +127,8 @@ RasterizerOpenGL::~RasterizerOpenGL() {
}
}
-void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
+std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
+ GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VAO);
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
@@ -136,43 +137,59 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
state.draw.vertex_buffer = stream_buffer->GetHandle();
state.Apply();
- // TODO(bunnei): Add support for 1+ vertex arrays
- const auto& vertex_array{regs.vertex_array[0]};
- const auto& vertex_array_limit{regs.vertex_array_limit[0]};
- ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?");
- ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!");
- for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) {
- ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index);
+ // Upload all guest vertex arrays sequentially to our buffer
+ for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const auto& vertex_array = regs.vertex_array[index];
+ if (!vertex_array.IsEnabled())
+ continue;
+
+ const Tegra::GPUVAddr start = vertex_array.StartAddress();
+ const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+
+ ASSERT(end > start);
+ u64 size = end - start + 1;
+
+ // Copy vertex array data
+ const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)};
+ res_cache.FlushRegion(data_addr, size, nullptr);
+ Memory::ReadBlock(data_addr, array_ptr, size);
+
+ // Bind the vertex array to the buffer at the current offset.
+ glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride);
+
+ ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
+
+ array_ptr += size;
+ buffer_offset += size;
}
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
// Enables the first 16 vertex attributes always, as we don't know which ones are actually used
- // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now
+ // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now
// to avoid OpenGL errors.
+ // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
+ // assume every shader uses them all.
for (unsigned index = 0; index < 16; ++index) {
auto& attrib = regs.vertex_attrib_format[index];
NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
attrib.offset.Value(), attrib.IsNormalized());
- glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
- attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
- reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset));
+ auto& buffer = regs.vertex_array[attrib.buffer];
+ ASSERT(buffer.IsEnabled());
+
glEnableVertexAttribArray(index);
+ glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
+ attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
+ glVertexAttribBinding(index, attrib.buffer);
+
hw_vao_enabled_attributes[index] = true;
}
- // Copy vertex array data
- const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1};
- const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())};
- res_cache.FlushRegion(data_addr, data_size, nullptr);
- Memory::ReadBlock(data_addr, array_ptr, data_size);
-
- array_ptr += data_size;
- buffer_offset += data_size;
+ return {array_ptr, buffer_offset};
}
-void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) {
+void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
// Helper function for uploading uniform data
const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
if (has_ARB_direct_state_access) {
@@ -190,8 +207,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size
u32 current_constbuffer_bindpoint = 0;
for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
- ptr_pos += sizeof(GLShader::MaxwellUniformData);
-
auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -205,13 +220,16 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size
}
// Upload uniform data as one UBO per stage
- const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+ const GLintptr ubo_offset = buffer_offset;
copy_buffer(uniform_buffers[stage].handle, ubo_offset,
sizeof(GLShader::MaxwellUniformData));
GLShader::MaxwellUniformData* ub_ptr =
- reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]);
+ reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr);
ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);
+ buffer_ptr += sizeof(GLShader::MaxwellUniformData);
+ buffer_offset += sizeof(GLShader::MaxwellUniformData);
+
// Fetch program code from memory
GLShader::ProgramCode program_code;
const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
@@ -252,6 +270,24 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size
shader_program_manager->UseTrivialGeometryShader();
}
+size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+ size_t size = 0;
+ for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ if (!regs.vertex_array[index].IsEnabled())
+ continue;
+
+ const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
+ const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+
+ ASSERT(end > start);
+ size += end - start + 1;
+ }
+
+ return size;
+}
+
bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
DrawArrays();
@@ -329,44 +365,49 @@ void RasterizerOpenGL::DrawArrays() {
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
- // TODO(bunnei): Add support for 1+ vertex arrays
- vs_input_size = vertex_num * regs.vertex_array[0].stride;
-
state.draw.vertex_buffer = stream_buffer->GetHandle();
state.Apply();
- size_t buffer_size = static_cast<size_t>(vs_input_size);
+ size_t buffer_size = CalculateVertexArraysSize();
+
if (is_indexed) {
- buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size;
+ buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size;
}
// Uniform space for the 5 shader stages
- buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
+ buffer_size = Common::AlignUp<size_t>(buffer_size, 4) +
+ sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
- size_t ptr_pos = 0;
u8* buffer_ptr;
GLintptr buffer_offset;
std::tie(buffer_ptr, buffer_offset) =
stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4);
- SetupVertexArray(buffer_ptr, buffer_offset);
- ptr_pos += vs_input_size;
+ u8* offseted_buffer;
+ std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
+
+ offseted_buffer =
+ reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
+ buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
// If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0;
if (is_indexed) {
- ptr_pos = Common::AlignUp(ptr_pos, 4);
-
const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
const VAddr index_data_addr{
memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())};
- Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size);
+ Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size);
- index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
- ptr_pos += index_buffer_size;
+ index_buffer_offset = buffer_offset;
+ offseted_buffer += index_buffer_size;
+ buffer_offset += index_buffer_size;
}
- SetupShaders(buffer_ptr, buffer_offset, ptr_pos);
+ offseted_buffer =
+ reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
+ buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
+
+ SetupShaders(offseted_buffer, buffer_offset);
stream_buffer->Unmap();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 03e02b52a..544714b95 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -148,13 +148,13 @@ private:
static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024;
std::unique_ptr<OGLStreamBuffer> stream_buffer;
- GLsizeiptr vs_input_size;
+ size_t CalculateVertexArraysSize() const;
- void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);
+ std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
- void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos);
+ void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw;