diff options
Diffstat (limited to 'src')
15 files changed, 111 insertions, 28 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3d028db0f..7d8b938d1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -169,6 +169,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { AddCapability(spv::Capability::Float64); F64.Define(*this, TypeFloat(64), "f64"); } + array_U32x2 = Name(TypeArray(U32[2], Constant(U32[1], 4U)), "array-u32x2"); } void EmitContext::DefineCommonConstants() { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 5ed815c06..0a1e85408 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -65,6 +65,7 @@ public: VectorTypes U32; VectorTypes F16; VectorTypes F64; + Id array_U32x2; Id true_value{}; Id false_value{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4f945b917..eaf94dad5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -95,7 +95,7 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); @@ -104,7 +104,7 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); @@ -113,7 +113,7 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); @@ -122,6 +122,7 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 616e63676..0da682859 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -3,10 +3,15 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + const auto info{inst->Flags<IR::CompositeDecoration>()}; + if (info.is_constant) { + return ctx.ConstantComposite(ctx.U32[2], e1, e2); + } return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); } @@ -42,7 +47,12 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); } -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + + const auto info{inst->Flags<IR::CompositeDecoration>()}; + if (info.is_constant) { + return ctx.ConstantComposite(ctx.F16[2], e1, e2); + } return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); } @@ -78,7 +88,11 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); } -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + const auto info{inst->Flags<IR::CompositeDecoration>()}; + if (info.is_constant) { + return ctx.ConstantComposite(ctx.F32[2], e1, e2); + } return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); } @@ -150,4 +164,15 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); } +Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + const auto info{inst->Flags<IR::CompositeDecoration>()}; + if (info.is_constant) { + return ctx.ConstantComposite(ctx.array_U32x2, e1, e2, e3, e4); + } + if (ctx.profile.support_variadic_ptp) { + return OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4); + } + return {}; +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 589013773..776afd4ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -30,10 +30,13 @@ public: } } - explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset) { + explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id offset2) { if (Sirit::ValidId(offset)) { Add(spv::ImageOperandsMask::Offset, offset); } + if (Sirit::ValidId(offset2)) { + Add(spv::ImageOperandsMask::ConstOffsets, offset2); + } } void Add(spv::ImageOperandsMask new_mask, Id value) { @@ -177,7 +180,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, [[maybe_unused]] Id offset2) { const auto info{inst->Flags<IR::TextureInstInfo>()}; - const ImageOperands operands(ctx, offset); + const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), @@ -187,7 +190,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, [[maybe_unused]] Id offset2, Id dref) { const auto info{inst->Flags<IR::TextureInstInfo>()}; - const ImageOperands operands(ctx, offset); + const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b365a8a6e..f49c30484 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -398,15 +398,16 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { if (e1.Type() != e2.Type()) { throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } + CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: - return Inst(Opcode::CompositeConstructU32x2, e1, e2); + return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2); case Type::F16: - return Inst(Opcode::CompositeConstructF16x2, e1, e2); + return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2); case Type::F32: - return Inst(Opcode::CompositeConstructF32x2, e1, e2); + return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2); case Type::F64: - return Inst(Opcode::CompositeConstructF64x2, e1, e2); + return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2); default: ThrowInvalidType(e1.Type()); } @@ -436,6 +437,7 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), e3.Type(), e4.Type()); } + CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); @@ -445,6 +447,8 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); case Type::F64: return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); + case Type::U32x2: + return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4); default: ThrowInvalidType(e1.Type()); } @@ -1481,7 +1485,7 @@ Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Val } Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, const F32& dref, TextureInstInfo info) { + const Value& offset2, const F32& dref, TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref : Opcode::BindlessImageGatherDref}; return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 770bbd550..77296cfa4 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -99,6 +99,12 @@ public: return ret; } + template <typename FlagsType> + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) + [[nodiscard]] void SetFlags(FlagsType& new_val) noexcept { + std::memcpy(&flags, &new_val, sizeof(new_val)); + } + /// Intrusively store the host definition of this instruction. template <typename DefinitionType> void SetDefinition(DefinitionType def) { diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 4f09a4b39..20fb14fea 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -32,6 +32,11 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +struct CompositeDecoration { + bool is_constant{false}; +}; +static_assert(sizeof(CompositeDecoration) <= sizeof(u32)); + union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 60a0bc980..0dc0aabdf 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -126,6 +126,7 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index e8e4662e7..7671fc3d8 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -44,6 +44,20 @@ bool Value::IsEmpty() const noexcept { return type == Type::Void; } +bool Value::IsConstantContainer() const { + if (IsImmediate()) { + return true; + } + ValidateAccess(Type::Opaque); + auto num_args = inst->NumArgs(); + for (size_t i = 0; i < num_args; i++) { + if (!inst->Arg(i).IsConstantContainer()) { + return false; + } + } + return true; +} + bool Value::IsImmediate() const noexcept { if (IsIdentity()) { return inst->Arg(0).IsImmediate(); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e70..5d6e74c14 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -38,6 +38,7 @@ public: [[nodiscard]] bool IsImmediate() const noexcept; [[nodiscard]] bool IsLabel() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; + [[nodiscard]] bool IsConstantContainer() const; [[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Block* Label() const; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 1f1689c43..b2da079f9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -101,16 +101,18 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { switch (type) { case TextureType::_1D: case TextureType::ARRAY_1D: - return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); case TextureType::_2D: case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); case TextureType::_3D: case TextureType::ARRAY_3D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); case TextureType::CUBE: case TextureType::ARRAY_CUBE: throw NotImplementedException("Illegal offset on CUBE sample"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index 8c6384040..cdf5cb5c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { throw NotImplementedException("Invalid texture type {}", type); } -std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { +IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { const IR::U32 value1{v.X(reg++)}; const IR::U32 value2{v.X(reg++)}; - const auto getVector = ([&v](const IR::U32& value) { + const IR::U32 bitsize = v.ir.Imm32(6); + const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { return v.ir.CompositeConstruct( - v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); }); - return {getVector(value1), getVector(value2)}; + return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), + getVector(value2, 0), getVector(value2, 16)); } void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, @@ -155,7 +155,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy break; } case OffsetType::PTP: { - std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + offset2 = MakeOffsetPTP(v, meta_reg); break; } default: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 28060dccf..12159e738 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -355,6 +355,17 @@ void FoldBranchConditional(IR::Inst& inst) { } } +void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) { + for (size_t i = 0; i < amount; i++) { + if (!inst.Arg(i).IsConstantContainer()) { + return; + } + } + auto info{inst.Flags<IR::CompositeDecoration>()}; + info.is_constant = true; + inst.SetFlags(info); +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -380,6 +391,13 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); + case IR::Opcode::CompositeConstructU32x2: + case IR::Opcode::CompositeConstructF16x2: + case IR::Opcode::CompositeConstructF32x2: + case IR::Opcode::CompositeConstructF64x2: + return FoldConstantComposite(inst, 2); + case IR::Opcode::CompositeConstructArrayU32x2: + return FoldConstantComposite(inst, 4); case IR::Opcode::FPMul32: return FoldFPMul32(inst); case IR::Opcode::LogicalAnd: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 41550bfc6..64031f49c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -30,6 +30,7 @@ struct Profile { bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; bool support_vote{}; + bool support_variadic_ptp{}; bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead |