diff options
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell')
3 files changed, 332 insertions, 12 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..7a32c5eb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -0,0 +1,222 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, + SAFEADD, +}; + +enum class AtomSize : u64 { + U32, + S32, + U64, + F32, + F16x2, + S64, +}; + +IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, + AtomOp op, bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.GlobalAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.GlobalAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.GlobalAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.GlobalAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.GlobalAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.GlobalAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.GlobalAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.GlobalAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.GlobalAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atom Operation {}", op); + } +} + +IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, + AtomSize size) { + static constexpr IR::FpControl f16_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::DontCare}, + }; + static constexpr IR::FpControl f32_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + switch (op) { + case AtomOp::ADD: + return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) + : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); + case AtomOp::MIN: + return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); + case AtomOp::MAX: + return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); + default: + throw NotImplementedException("FP Atom Operation {}", op); + } +} + +IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<28, 20, s64> addr_offset; + BitField<28, 20, u64> rz_addr_offset; + BitField<48, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + return v.L(mem.addr_reg); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast<u64>(mem.rz_addr_offset.Value()); + } else { + return static_cast<u64>(mem.addr_offset.Value()); + } + }()}; + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} + +bool AtomOpNotApplicable(AtomSize size, AtomOp op) { + // TODO: SAFEADD + switch (size) { + case AtomSize::S32: + case AtomSize::U64: + return (op == AtomOp::INC || op == AtomOp::DEC); + case AtomSize::S64: + return !(op == AtomOp::MIN || op == AtomOp::MAX); + case AtomSize::F32: + return op != AtomOp::ADD; + case AtomSize::F16x2: + return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); + default: + return false; + } +} + +IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F32: + case AtomSize::F16x2: + return ir.LoadGlobal32(offset); + case AtomSize::U64: + case AtomSize::S64: + return ir.PackUint2x32(ir.LoadGlobal64(offset)); + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F16x2: + return v.X(dest_reg, IR::U32{result}); + case AtomSize::U64: + case AtomSize::S64: + return v.L(dest_reg, IR::U64{result}); + case AtomSize::F32: + return v.F(dest_reg, IR::F32{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOM(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<49, 3, AtomSize> size; + BitField<52, 4, AtomOp> op; + } const atom{insn}; + + const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; + const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; + const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + IR::Value result; + + if (AtomOpNotApplicable(atom.size, atom.op)) { + result = LoadGlobal(ir, offset, atom.size); + } else if (!is_integer) { + if (atom.size == AtomSize::F32) { + result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; + result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); + } + } else if (size_64) { + result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); + } else { + result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); + } + StoreResult(*this, atom.dest_reg, result, atom.size); +} + +void TranslatorVisitor::RED(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg_b; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 3, AtomSize> size; + BitField<23, 3, AtomOp> op; + } const red{insn}; + + if (AtomOpNotApplicable(red.size, red.op)) { + return; + } + const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; + const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; + const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + if (!is_integer) { + if (red.size == AtomSize::F32) { + ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; + ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); + } + } else if (size_64) { + ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); + } else { + ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class AtomsSize : u64 { + U32, + S32, + U64, +}; + +IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, + bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.SharedAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.SharedAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.SharedAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.SharedAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.SharedAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.SharedAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.SharedAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.SharedAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.SharedAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atoms Operation {}", op); + } +} + +IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<30, 22, u64> absolute_offset; + BitField<30, 22, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); + } else { + const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { + switch (size) { + case AtomsSize::U32: + case AtomsSize::S32: + return v.X(dest_reg, IR::U32{result}); + case AtomsSize::U64: + return v.L(dest_reg, IR::U64{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOMS(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<28, 2, AtomsSize> size; + BitField<52, 4, AtomOp> op; + } const atoms{insn}; + + const bool size_64{atoms.size == AtomsSize::U64}; + if (size_64 && atoms.op != AtomOp::EXCH) { + throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); + } + const bool is_signed{atoms.size == AtomsSize::S32}; + const IR::U32 offset{AtomsOffset(*this, insn)}; + + IR::Value result; + if (size_64) { + result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); + } else { + result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); + } + StoreResult(*this, atoms.dest_reg, result, atoms.size); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 327941223..aebe3072a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } -void TranslatorVisitor::ATOM(u64) { - ThrowNotImplemented(Opcode::ATOM); -} - void TranslatorVisitor::ATOMS_cas(u64) { ThrowNotImplemented(Opcode::ATOMS_cas); } -void TranslatorVisitor::ATOMS(u64) { - ThrowNotImplemented(Opcode::ATOMS); -} - void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } -void TranslatorVisitor::RED(u64) { - ThrowNotImplemented(Opcode::RED); -} - void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } |