summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/frontend/maxwell
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell')
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp222
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp12
3 files changed, 332 insertions, 12 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..7a32c5eb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,222 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+ SAFEADD,
+};
+
+enum class AtomSize : u64 {
+ U32,
+ S32,
+ U64,
+ F32,
+ F16x2,
+ S64,
+};
+
+IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
+ AtomOp op, bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.GlobalAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.GlobalAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.GlobalAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.GlobalAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.GlobalAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.GlobalAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.GlobalAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atom Operation {}", op);
+ }
+}
+
+IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
+ AtomSize size) {
+ static constexpr IR::FpControl f16_control{
+ .no_contraction{false},
+ .rounding{IR::FpRounding::RN},
+ .fmz_mode{IR::FmzMode::DontCare},
+ };
+ static constexpr IR::FpControl f32_control{
+ .no_contraction{false},
+ .rounding{IR::FpRounding::RN},
+ .fmz_mode{IR::FmzMode::FTZ},
+ };
+ switch (op) {
+ case AtomOp::ADD:
+ return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
+ : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
+ default:
+ throw NotImplementedException("FP Atom Operation {}", op);
+ }
+}
+
+IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<28, 20, s64> addr_offset;
+ BitField<28, 20, u64> rz_addr_offset;
+ BitField<48, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ return v.L(mem.addr_reg);
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+
+bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
+ // TODO: SAFEADD
+ switch (size) {
+ case AtomSize::S32:
+ case AtomSize::U64:
+ return (op == AtomOp::INC || op == AtomOp::DEC);
+ case AtomSize::S64:
+ return !(op == AtomOp::MIN || op == AtomOp::MAX);
+ case AtomSize::F32:
+ return op != AtomOp::ADD;
+ case AtomSize::F16x2:
+ return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
+ default:
+ return false;
+ }
+}
+
+IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F32:
+ case AtomSize::F16x2:
+ return ir.LoadGlobal32(offset);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ir.PackUint2x32(ir.LoadGlobal64(offset));
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F16x2:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return v.L(dest_reg, IR::U64{result});
+ case AtomSize::F32:
+ return v.F(dest_reg, IR::F32{result});
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<49, 3, AtomSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atom{insn};
+
+ const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64};
+ const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64};
+ const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ IR::Value result;
+
+ if (AtomOpNotApplicable(atom.size, atom.op)) {
+ result = LoadGlobal(ir, offset, atom.size);
+ } else if (!is_integer) {
+ if (atom.size == AtomSize::F32) {
+ result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size);
+ } else {
+ const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))};
+ result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size);
+ }
+ } else if (size_64) {
+ result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed);
+ } else {
+ result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed);
+ }
+ StoreResult(*this, atom.dest_reg, result, atom.size);
+}
+
+void TranslatorVisitor::RED(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_reg_b;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 3, AtomSize> size;
+ BitField<23, 3, AtomOp> op;
+ } const red{insn};
+
+ if (AtomOpNotApplicable(red.size, red.op)) {
+ return;
+ }
+ const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64};
+ const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64};
+ const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ if (!is_integer) {
+ if (red.size == AtomSize::F32) {
+ ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size);
+ } else {
+ const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))};
+ ApplyFpAtomOp(ir, offset, src_b, red.op, red.size);
+ }
+ } else if (size_64) {
+ ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed);
+ } else {
+ ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class AtomsSize : u64 {
+ U32,
+ S32,
+ U64,
+};
+
+IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.SharedAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.SharedAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.SharedAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.SharedAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.SharedAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.SharedAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.SharedAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.SharedAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.SharedAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atoms Operation {}", op);
+ }
+}
+
+IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<30, 22, u64> absolute_offset;
+ BitField<30, 22, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
+ switch (size) {
+ case AtomsSize::U32:
+ case AtomsSize::S32:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomsSize::U64:
+ return v.L(dest_reg, IR::U64{result});
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOMS(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<28, 2, AtomsSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atoms{insn};
+
+ const bool size_64{atoms.size == AtomsSize::U64};
+ if (size_64 && atoms.op != AtomOp::EXCH) {
+ throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
+ }
+ const bool is_signed{atoms.size == AtomsSize::S32};
+ const IR::U32 offset{AtomsOffset(*this, insn)};
+
+ IR::Value result;
+ if (size_64) {
+ result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
+ } else {
+ result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
+ }
+ StoreResult(*this, atoms.dest_reg, result, atoms.size);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 327941223..aebe3072a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) {
ThrowNotImplemented(Opcode::ATOM_cas);
}
-void TranslatorVisitor::ATOM(u64) {
- ThrowNotImplemented(Opcode::ATOM);
-}
-
void TranslatorVisitor::ATOMS_cas(u64) {
ThrowNotImplemented(Opcode::ATOMS_cas);
}
-void TranslatorVisitor::ATOMS(u64) {
- ThrowNotImplemented(Opcode::ATOMS);
-}
-
void TranslatorVisitor::B2R(u64) {
ThrowNotImplemented(Opcode::B2R);
}
@@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) {
ThrowNotImplemented(Opcode::RAM);
}
-void TranslatorVisitor::RED(u64) {
- ThrowNotImplemented(Opcode::RED);
-}
-
void TranslatorVisitor::RET(u64) {
ThrowNotImplemented(Opcode::RET);
}