From 148f0463f4761b937e5077db2acf2f9f59a1fc88 Mon Sep 17 00:00:00 2001 From: Dong Wang Date: Mon, 27 Nov 2023 14:31:30 +0800 Subject: [PATCH] Support vertex offset mode with new D3D12_VERTEX_BUFFER_VIEW In vertex offset mode. we will use D3D12_VERTEX_BUFFER_VIEW to replace SRD, the address and size fields are already in the right place. We only need to get stride and fill the fourth DWORD. Then we calculate offset with stride and vertex index. For Uber fetch shader, SRD will be updated in driver. --- include/vkgcDefs.h | 6 +- lgc/elfLinker/FetchShader.cpp | 3 +- lgc/include/lgc/patch/VertexFetch.h | 2 +- lgc/interface/lgc/Pipeline.h | 9 +- lgc/patch/VertexFetch.cpp | 206 ++++++++++++++++++--------- llpc/context/llpcGraphicsContext.cpp | 1 + tool/dumper/vkgcPipelineDumper.cpp | 2 + tool/vfx/vfx.h | 11 +- tool/vfx/vfxPipelineDoc.cpp | 1 + tool/vfx/vfxVkSection.h | 1 + 10 files changed, 158 insertions(+), 84 deletions(-) diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h index 06623523d0..945f8255b5 100644 --- a/include/vkgcDefs.h +++ b/include/vkgcDefs.h @@ -49,7 +49,7 @@ #define LLPC_INTERFACE_MAJOR_VERSION 70 /// LLPC minor interface version. -#define LLPC_INTERFACE_MINOR_VERSION 1 +#define LLPC_INTERFACE_MINOR_VERSION 2 #ifndef LLPC_CLIENT_INTERFACE_MAJOR_VERSION #error LLPC client version is not defined @@ -80,6 +80,7 @@ // %Version History // | %Version | Change Description | // | -------- | ----------------------------------------------------------------------------------------------------- | +// | 70.2 | Add useSoftwareVertexBufferDescriptors to GraphicsPipelineBuildInfo | // | 70.1 | Add cpsFlags to RayTracingPipelineBuildInfo | // | 70.0 | Add enablePrimGeneratedQuery to PipelineOptions | // | 69.1 | Add useBarycentric to ShaderModuleUsage | @@ -1367,8 +1368,9 @@ struct GraphicsPipelineBuildInfo { bool dynamicVertexStride; ///< Dynamic Vertex input Stride is enabled. bool enableUberFetchShader; ///< Use uber fetch shader bool enableColorExportShader; ///< Explicitly build color export shader, UnlinkedStageFragment elf will - /// return extra meta data. + /// return extra meta data. bool enableEarlyCompile; ///< Whether enable early compile + bool useSoftwareVertexBufferDescriptors; ///< Use software vertex buffer descriptors to structure SRD. #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 BinaryData shaderLibrary; ///< SPIR-V library binary data #endif diff --git a/lgc/elfLinker/FetchShader.cpp b/lgc/elfLinker/FetchShader.cpp index 0a6c8ec277..9da79b455e 100644 --- a/lgc/elfLinker/FetchShader.cpp +++ b/lgc/elfLinker/FetchShader.cpp @@ -101,7 +101,8 @@ Module *FetchShader::generate() { // // @param [in/out] fetchFunc : The function for the fetch shader. void FetchShader::generateFetchShaderBody(Function *fetchFunc) { // Process each vertex input. - std::unique_ptr vertexFetch(VertexFetch::create(m_lgcContext)); + std::unique_ptr vertexFetch( + VertexFetch::create(m_lgcContext, m_pipelineState->getOptions().useSoftwareVertexBufferDescriptors)); auto ret = cast(fetchFunc->back().getTerminator()); BuilderImpl builder(m_pipelineState); builder.SetInsertPoint(ret); diff --git a/lgc/include/lgc/patch/VertexFetch.h b/lgc/include/lgc/patch/VertexFetch.h index fee7120de5..c6ef2e03d2 100644 --- a/lgc/include/lgc/patch/VertexFetch.h +++ b/lgc/include/lgc/patch/VertexFetch.h @@ -46,7 +46,7 @@ class VertexFetch { virtual ~VertexFetch() {} // Create a VertexFetch - static VertexFetch *create(LgcContext *lgcContext); + static VertexFetch *create(LgcContext *lgcContext, bool useSoftwareVertexBufferDescriptors); // Generate code to fetch a vertex value virtual llvm::Value *fetchVertex(llvm::Type *inputTy, const VertexInputDescription *description, unsigned location, diff --git a/lgc/interface/lgc/Pipeline.h b/lgc/interface/lgc/Pipeline.h index 75f4335340..87b47bc7a8 100644 --- a/lgc/interface/lgc/Pipeline.h +++ b/lgc/interface/lgc/Pipeline.h @@ -178,10 +178,11 @@ union Options { bool fragCoordUsesInterpLoc; // Determining fragCoord use InterpLoc bool disableSampleMask; // Disable export of sample mask from PS unsigned reserved20; - RayTracingIndirectMode rtIndirectMode; // Ray tracing indirect mode - bool enablePrimGeneratedQuery; // Whether to enable primitive generated counter - bool enableFragColor; // If enabled, do frag color broadcast - unsigned cpsFlags; // CPS feature flags + RayTracingIndirectMode rtIndirectMode; // Ray tracing indirect mode + bool enablePrimGeneratedQuery; // Whether to enable primitive generated counter + bool enableFragColor; // If enabled, do frag color broadcast + bool useSoftwareVertexBufferDescriptors; // Use software vertex buffer descriptors to structure SRD. + unsigned cpsFlags; // CPS feature flags }; }; static_assert(sizeof(Options) == sizeof(Options::u32All)); diff --git a/lgc/patch/VertexFetch.cpp b/lgc/patch/VertexFetch.cpp index 92acdf8019..9621437cab 100644 --- a/lgc/patch/VertexFetch.cpp +++ b/lgc/patch/VertexFetch.cpp @@ -88,7 +88,7 @@ struct VertexCompFormatInfo { // Vertex fetch manager class VertexFetchImpl : public VertexFetch { public: - VertexFetchImpl(LgcContext *lgcContext); + VertexFetchImpl(LgcContext *lgcContext, bool vtxBufferOffsetMode); VertexFetchImpl(const VertexFetchImpl &) = delete; VertexFetchImpl &operator=(const VertexFetchImpl &) = delete; @@ -100,8 +100,6 @@ class VertexFetchImpl : public VertexFetch { Value *fetchVertex(InputImportGenericOp *inst, Value *descPtr, Value *locMasks, BuilderBase &builder) override; private: - void initialize(PipelineState *pipelineState); - static VertexFormatInfo getVertexFormatInfo(const VertexInputDescription *description); // Gets variable corresponding to vertex index @@ -116,8 +114,9 @@ class VertexFetchImpl : public VertexFetch { Value *loadVertexBufferDescriptor(unsigned binding, BuilderImpl &builderImpl); - void addVertexFetchInst(Value *vbDesc, unsigned numChannels, bool is16bitFetch, Value *vbIndex, unsigned offset, - unsigned stride, unsigned dfmt, unsigned nfmt, Instruction *insertPos, Value **ppFetch) const; + void addVertexFetchInst(Value *vbDesc, unsigned numChannels, bool is16bitFetch, Value *vbIndex, Value *srdStride, + unsigned offset, unsigned stride, unsigned dfmt, unsigned nfmt, Instruction *insertPos, + Value **ppFetch) const; bool needPostShuffle(const VertexInputDescription *inputDesc, std::vector &shuffleMask) const; @@ -125,6 +124,8 @@ class VertexFetchImpl : public VertexFetch { bool needPatch32(const VertexInputDescription *inputDesc) const; + std::pair convertSrdToOffsetMode(Value *vbDesc, BuilderBase &builder); + LgcContext *m_lgcContext = nullptr; // LGC context LLVMContext *m_context = nullptr; // LLVM context Value *m_vertexBufTablePtr = nullptr; // Vertex buffer table pointer @@ -132,6 +133,8 @@ class VertexFetchImpl : public VertexFetch { Value *m_vertexIndex = nullptr; // Vertex index Value *m_instanceIndex = nullptr; // Instance index + bool m_useSoftwareVertexBufferDescriptors = false; // Use vertex offset mode + static const VertexCompFormatInfo m_vertexCompFormatInfo[]; // Info table of vertex component format static const unsigned char m_vertexFormatMapGfx10[][9]; // Info table of vertex format mapping for GFX10 static const unsigned char m_vertexFormatMapGfx11[][9]; // Info table of vertex format mapping for GFX11 @@ -610,7 +613,9 @@ bool LowerVertexFetch::runImpl(Module &module, PipelineState *pipelineState) { if (vertexFetches.empty()) return false; - std::unique_ptr vertexFetch(VertexFetch::create(pipelineState->getLgcContext())); + std::unique_ptr vertexFetch( + VertexFetch::create(pipelineState->getLgcContext(), + pipelineState->getOptions().useSoftwareVertexBufferDescriptors)); BuilderImpl builder(pipelineState); if (pipelineState->getOptions().enableUberFetchShader) { @@ -843,8 +848,6 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, loadInst->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(loadInst->getContext(), {})); loadInst->setAlignment(Align(16)); Value *vbDesc = loadInst; - // Replace buffer format - vbDesc = builder.CreateInsertElement(vbDesc, bufferFormat, 3); auto isPerInstance = builder.CreateICmpNE(perInstance, zero); @@ -868,6 +871,24 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, const unsigned bitWidth = basicTy->getScalarSizeInBits(); assert(bitWidth == 8 || bitWidth == 16 || bitWidth == 32 || bitWidth == 64); + Intrinsic::ID instId = Intrinsic::amdgcn_struct_buffer_load_format; + if (m_useSoftwareVertexBufferDescriptors) { + instId = Intrinsic::amdgcn_raw_buffer_load_format; + auto srdStride = builder.CreateExtractElement(vbDesc, 3); + byteOffset = builder.CreateAdd(builder.CreateMul(vbIndex, srdStride), byteOffset); + } + // Replace buffer format + vbDesc = builder.CreateInsertElement(vbDesc, bufferFormat, 3); + + SmallVector args; + args.push_back(vbDesc); + if (!m_useSoftwareVertexBufferDescriptors) + args.push_back(vbIndex); + unsigned offsetIdx = args.size(); + args.push_back(byteOffset); + args.push_back(builder.getInt32(0)); + args.push_back(builder.getInt32(0)); + // If ispacked is false, we require per-component fetch builder.CreateCondBr(isPacked, wholeVertexBlock, comp0Block); @@ -879,22 +900,14 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, auto fetch16Type = FixedVectorType::get(Type::getInt16Ty(*m_context), 4); auto fetchType = is16bitFetch ? fetch16Type : fetch32Type; - Value *args[] = { - vbDesc, // rsrc - vbIndex, // vindex - byteOffset, // offset - builder.getInt32(0), // soffset - builder.getInt32(0) // glc, slc - }; - Value *wholeVertex = nullptr; { builder.SetInsertPoint(wholeVertexBlock); - wholeVertex = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, fetchType, args, {}); + wholeVertex = builder.CreateIntrinsic(instId, fetchType, args, {}); if (is64bitFetch) { // If it is 64-bit, we need the second fetch - args[2] = builder.CreateAdd(args[2], builder.getInt32(SizeOfVec4)); - auto secondFetch = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, fetchType, args, {}); + args[offsetIdx] = builder.CreateAdd(args[2], builder.getInt32(SizeOfVec4)); + auto secondFetch = builder.CreateIntrinsic(instId, fetchType, args, {}); wholeVertex = builder.CreateShuffleVector(wholeVertex, secondFetch, ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); } builder.CreateBr(fetchUberEndBlock); @@ -939,16 +952,16 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, { builder.SetInsertPoint(comp0Block); - args[2] = byteOffset; + args[offsetIdx] = byteOffset; if (is64bitFetch) { - Value *comp = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, fetch64Type, args, {}); + Value *comp = builder.CreateIntrinsic(instId, fetch64Type, args, {}); Value *elem = builder.CreateExtractElement(comp, uint64_t(0)); lastVert = builder.CreateInsertElement(lastVert, elem, uint64_t(0)); elem = builder.CreateExtractElement(comp, 1); lastVert = builder.CreateInsertElement(lastVert, elem, 1); comp0 = lastVert; } else { - comp0 = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, compType, args, {}); + comp0 = builder.CreateIntrinsic(instId, compType, args, {}); lastVert = builder.CreateInsertElement(lastVert, comp0, uint64_t(0)); comp0 = lastVert; } @@ -961,16 +974,16 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, { builder.SetInsertPoint(comp1Block); // Add offset. offset = offset + componentSize - args[2] = builder.CreateAdd(args[2], componentSize); + args[offsetIdx] = builder.CreateAdd(args[offsetIdx], componentSize); if (is64bitFetch) { - Value *comp = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, fetch64Type, args, {}); + Value *comp = builder.CreateIntrinsic(instId, fetch64Type, args, {}); Value *elem = builder.CreateExtractElement(comp, uint64_t(0)); lastVert = builder.CreateInsertElement(lastVert, elem, 2); elem = builder.CreateExtractElement(comp, 1); lastVert = builder.CreateInsertElement(lastVert, elem, 3); comp1 = lastVert; } else { - comp1 = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, compType, args, {}); + comp1 = builder.CreateIntrinsic(instId, compType, args, {}); lastVert = builder.CreateInsertElement(lastVert, comp1, 1); comp1 = lastVert; } @@ -981,16 +994,16 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, // .comp2Block { builder.SetInsertPoint(comp2Block); - args[2] = builder.CreateAdd(args[2], componentSize); + args[offsetIdx] = builder.CreateAdd(args[offsetIdx], componentSize); if (is64bitFetch) { - Value *comp = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, fetch64Type, args, {}); + Value *comp = builder.CreateIntrinsic(instId, fetch64Type, args, {}); Value *elem = builder.CreateExtractElement(comp, uint64_t(0)); lastVert = builder.CreateInsertElement(lastVert, elem, 4); elem = builder.CreateExtractElement(comp, 1); lastVert = builder.CreateInsertElement(lastVert, elem, 5); comp2 = lastVert; } else { - comp2 = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, compType, args, {}); + comp2 = builder.CreateIntrinsic(instId, compType, args, {}); lastVert = builder.CreateInsertElement(lastVert, comp2, 2); comp2 = lastVert; } @@ -1001,16 +1014,16 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, // .comp3Block { builder.SetInsertPoint(comp3Block); - args[2] = builder.CreateAdd(args[2], componentSize); + args[offsetIdx] = builder.CreateAdd(args[offsetIdx], componentSize); if (is64bitFetch) { - Value *comp = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, fetch64Type, args, {}); + Value *comp = builder.CreateIntrinsic(instId, fetch64Type, args, {}); Value *elem = builder.CreateExtractElement(comp, uint64_t(0)); lastVert = builder.CreateInsertElement(lastVert, elem, 6); elem = builder.CreateExtractElement(comp, 1); lastVert = builder.CreateInsertElement(lastVert, elem, 7); comp3 = lastVert; } else { - comp3 = builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load_format, compType, args, {}); + comp3 = builder.CreateIntrinsic(instId, compType, args, {}); lastVert = builder.CreateInsertElement(lastVert, comp3, 3); comp3 = lastVert; } @@ -1114,16 +1127,17 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, // ===================================================================================================================== // Create a VertexFetch -VertexFetch *VertexFetch::create(LgcContext *lgcContext) { - return new VertexFetchImpl(lgcContext); +VertexFetch *VertexFetch::create(LgcContext *lgcContext, bool useSoftwareVertexBufferDescriptors) { + return new VertexFetchImpl(lgcContext, useSoftwareVertexBufferDescriptors); } // ===================================================================================================================== // Constructor // // @param context : LLVM context -VertexFetchImpl::VertexFetchImpl(LgcContext *lgcContext) - : m_lgcContext(lgcContext), m_context(&lgcContext->getContext()) { +VertexFetchImpl::VertexFetchImpl(LgcContext *lgcContext, bool useSoftwareVertexBufferDescriptors) + : m_lgcContext(lgcContext), m_context(&lgcContext->getContext()), + m_useSoftwareVertexBufferDescriptors(useSoftwareVertexBufferDescriptors) { // Initialize default fetch values auto zero = ConstantInt::get(Type::getInt32Ty(*m_context), 0); @@ -1177,6 +1191,9 @@ Value *VertexFetchImpl::fetchVertex(Type *inputTy, const VertexInputDescription BuilderBase &builder = BuilderBase::get(builderImpl); Instruction *insertPos = &*builder.GetInsertPoint(); auto vbDesc = loadVertexBufferDescriptor(description->binding, builderImpl); + Value *srdStride = nullptr; + if (m_useSoftwareVertexBufferDescriptors) + std::tie(vbDesc, srdStride) = convertSrdToOffsetMode(vbDesc, builder); Value *vbIndex = nullptr; if (description->inputRate == VertexInputRateVertex) { @@ -1214,8 +1231,8 @@ Value *VertexFetchImpl::fetchVertex(Type *inputTy, const VertexInputDescription const bool is16bitFetch = (inputTy->getScalarSizeInBits() <= 16); // Do the first vertex fetch operation - addVertexFetchInst(vbDesc, formatInfo.numChannels, is16bitFetch, vbIndex, description->offset, description->stride, - formatInfo.dfmt, formatInfo.nfmt, insertPos, &vertexFetches[0]); + addVertexFetchInst(vbDesc, formatInfo.numChannels, is16bitFetch, vbIndex, srdStride, description->offset, + description->stride, formatInfo.dfmt, formatInfo.nfmt, insertPos, &vertexFetches[0]); // Do post-processing in certain cases std::vector shuffleMask; @@ -1293,7 +1310,7 @@ Value *VertexFetchImpl::fetchVertex(Type *inputTy, const VertexInputDescription dfmt = BUF_DATA_FORMAT_32_32; } - addVertexFetchInst(vbDesc, numChannels, is16bitFetch, vbIndex, description->offset + SizeOfVec4, + addVertexFetchInst(vbDesc, numChannels, is16bitFetch, vbIndex, srdStride, description->offset + SizeOfVec4, description->stride, dfmt, formatInfo.nfmt, insertPos, &vertexFetches[1]); } @@ -1602,16 +1619,27 @@ Value *VertexFetchImpl::loadVertexBufferDescriptor(unsigned binding, BuilderImpl // @param is16bitFetch : Whether it is 16-bit vertex fetch // @param vbIndex : Index of vertex fetch in buffer // @param offset : Vertex attribute offset (in bytes) +// @param srdStride: Stride from SRD. Only for offset mode. // @param stride : Vertex attribute stride (in bytes) // @param dfmt : Date format of vertex buffer // @param nfmt : Numeric format of vertex buffer // @param insertPos : Where to insert instructions // @param [out] ppFetch : Destination of vertex fetch void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, unsigned numChannels, bool is16bitFetch, Value *vbIndex, - unsigned offset, unsigned stride, unsigned dfmt, unsigned nfmt, - Instruction *insertPos, Value **ppFetch) const { + Value *srdStride, unsigned offset, unsigned stride, unsigned dfmt, + unsigned nfmt, Instruction *insertPos, Value **ppFetch) const { + const VertexCompFormatInfo *formatInfo = getVertexComponentFormatInfo(dfmt); + Intrinsic::ID instId = Intrinsic::amdgcn_struct_tbuffer_load; + BuilderBase builder(insertPos); + Value *instOffset = builder.getInt32(offset); + if (m_useSoftwareVertexBufferDescriptors) { + instId = Intrinsic::amdgcn_raw_tbuffer_load; + auto index2Offset = builder.CreateMul(vbIndex, srdStride); + instOffset = builder.CreateAdd(index2Offset, instOffset); + } + // NOTE: If the vertex attribute offset and stride are aligned on data format boundaries, we can do a vertex fetch // operation to read the whole vertex. Otherwise, we have to do vertex per-component fetch operations. if (((offset % formatInfo->vertexByteSize) == 0 && (stride % formatInfo->vertexByteSize) == 0 && @@ -1620,32 +1648,29 @@ void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, unsigned numChannels, bo dfmt != BufDataFormat8_8 && dfmt != BufDataFormat8_8_8_8 && dfmt != BufDataFormat16_16 && dfmt != BufDataFormat16_16_16_16 && dfmt != BufDataFormat8_8_8 && dfmt != BufDataFormat16_16_16) || formatInfo->compDfmt == dfmt) { + + SmallVector args; + args.push_back(vbDesc); + if (!m_useSoftwareVertexBufferDescriptors) + args.push_back(vbIndex); + args.push_back(instOffset); + args.push_back(builder.getInt32(0)); + args.push_back(builder.getInt32(mapVertexFormat(dfmt, nfmt))); + args.push_back(builder.getInt32(0)); + // Do vertex fetch - Value *args[] = { - vbDesc, // rsrc - vbIndex, // vindex - ConstantInt::get(Type::getInt32Ty(*m_context), offset), // offset - ConstantInt::get(Type::getInt32Ty(*m_context), 0), // soffset - ConstantInt::get(Type::getInt32Ty(*m_context), mapVertexFormat(dfmt, nfmt)), // dfmt, nfmt - ConstantInt::get(Type::getInt32Ty(*m_context), 0) // glc, slc - }; - - StringRef suffix = ""; Type *fetchTy = nullptr; if (is16bitFetch) { switch (numChannels) { case 1: - suffix = ".f16"; fetchTy = Type::getHalfTy(*m_context); break; case 2: - suffix = ".v2f16"; fetchTy = FixedVectorType::get(Type::getHalfTy(*m_context), 2); break; case 3: case 4: - suffix = ".v4f16"; fetchTy = FixedVectorType::get(Type::getHalfTy(*m_context), 4); break; default: @@ -1655,16 +1680,13 @@ void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, unsigned numChannels, bo } else { switch (numChannels) { case 1: - suffix = ".i32"; fetchTy = Type::getInt32Ty(*m_context); break; case 2: - suffix = ".v2i32"; fetchTy = FixedVectorType::get(Type::getInt32Ty(*m_context), 2); break; case 3: case 4: - suffix = ".v4i32"; fetchTy = FixedVectorType::get(Type::getInt32Ty(*m_context), 4); break; default: @@ -1673,7 +1695,7 @@ void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, unsigned numChannels, bo } } - Value *fetch = emitCall((Twine("llvm.amdgcn.struct.tbuffer.load") + suffix).str(), fetchTy, args, {}, insertPos); + Value *fetch = builder.CreateIntrinsic(instId, fetchTy, args, {}); if (is16bitFetch) { // NOTE: The fetch values are represented by , so we will bitcast the float16 values to @@ -1707,28 +1729,33 @@ void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, unsigned numChannels, bo : FixedVectorType::get(Type::getInt32Ty(*m_context), numChannels); Value *fetch = PoisonValue::get(fetchTy); + SmallVector args; + args.push_back(vbDesc); + if (!m_useSoftwareVertexBufferDescriptors) + args.push_back(vbIndex); + unsigned offsetIdx = args.size(); + args.push_back(instOffset); + args.push_back(builder.getInt32(0)); + args.push_back(builder.getInt32(mapVertexFormat(formatInfo->compDfmt, nfmt))); + args.push_back(builder.getInt32(0)); + // Do vertex per-component fetches for (unsigned i = 0; i < formatInfo->compCount; ++i) { - Value *args[] = { - vbDesc, // rsrc - compVbIndices[i], // vindex - ConstantInt::get(Type::getInt32Ty(*m_context), compOffsets[i]), // offset - ConstantInt::get(Type::getInt32Ty(*m_context), 0), // soffset - ConstantInt::get(Type::getInt32Ty(*m_context), mapVertexFormat(formatInfo->compDfmt, nfmt)), // dfmt, nfmt - ConstantInt::get(Type::getInt32Ty(*m_context), 0) // glc, slc - }; + Value *compOffset = builder.getInt32(compOffsets[i]); + if (m_useSoftwareVertexBufferDescriptors) + args[offsetIdx] = builder.CreateAdd(instOffset, compOffset); + else + args[offsetIdx] = compOffset; Value *compFetch = nullptr; if (is16bitFetch) { - compFetch = emitCall("llvm.amdgcn.struct.tbuffer.load.f16", Type::getHalfTy(*m_context), args, {}, insertPos); - - compFetch = new BitCastInst(compFetch, Type::getInt16Ty(*m_context), "", insertPos); + compFetch = builder.CreateIntrinsic(instId, builder.getHalfTy(), args, {}); + compFetch = builder.CreateBitCast(compFetch, builder.getInt16Ty()); } else { - compFetch = emitCall("llvm.amdgcn.struct.tbuffer.load.i32", Type::getInt32Ty(*m_context), args, {}, insertPos); + compFetch = builder.CreateIntrinsic(instId, builder.getInt32Ty(), args, {}); } - fetch = - InsertElementInst::Create(fetch, compFetch, ConstantInt::get(Type::getInt32Ty(*m_context), i), "", insertPos); + fetch = builder.CreateInsertElement(fetch, compFetch, i); } *ppFetch = fetch; @@ -1791,3 +1818,40 @@ bool VertexFetchImpl::needPatch32(const VertexInputDescription *inputDesc) const bool VertexFetchImpl::needSecondVertexFetch(const VertexInputDescription *inputDesc) const { return inputDesc->dfmt == BufDataFormat64_64_64 || inputDesc->dfmt == BufDataFormat64_64_64_64; } + +// ===================================================================================================================== +// Convert D3D12_VERTEX_BUFFER_VIEW SRD to offset mode. Stride will be used to calculate offset. +// +// @param vbDesc : Original SRD +// @param builder : Builder to use to insert vertex fetch instructions +// @returns : {New SRD,stride} +std::pair VertexFetchImpl::convertSrdToOffsetMode(Value *vbDesc, BuilderBase &builder) { + assert(m_useSoftwareVertexBufferDescriptors); + // NOTE: Vertex buffer SRD is D3D12_VERTEX_BUFFER_VIEW + // struct VertexBufferView + // { + // gpusize gpuva; + // uint32 sizeInBytes; + // uint32 strideInBytes; + // }; + + // Stride is from the third DWORD. + auto srdStride = builder.CreateExtractElement(vbDesc, 3); + + SqBufRsrcWord3 sqBufRsrcWord3; + sqBufRsrcWord3.bits.dstSelX = BUF_DST_SEL_X; + sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y; + sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z; + sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W; + GfxIpVersion gfxIp = m_lgcContext->getTargetInfo().getGfxIpVersion(); + if (gfxIp.major == 10) { + sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT; + sqBufRsrcWord3.gfx10.resourceLevel = 1; + sqBufRsrcWord3.gfx10.oobSelect = 3; + } else if (gfxIp.major >= 11) { + sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT; + sqBufRsrcWord3.gfx11.oobSelect = 3; + } + auto newDesc = builder.CreateInsertElement(vbDesc, builder.getInt32(sqBufRsrcWord3.u32All), 3); + return {newDesc, srdStride}; +} diff --git a/llpc/context/llpcGraphicsContext.cpp b/llpc/context/llpcGraphicsContext.cpp index 866d2b0e82..47c88ae1b3 100644 --- a/llpc/context/llpcGraphicsContext.cpp +++ b/llpc/context/llpcGraphicsContext.cpp @@ -249,6 +249,7 @@ Options GraphicsContext::computePipelineOptions() const { auto pipelineInfo = static_cast(getPipelineBuildInfo()); options.enableUberFetchShader = pipelineInfo->enableUberFetchShader; options.enableColorExportShader = pipelineInfo->enableColorExportShader; + options.useSoftwareVertexBufferDescriptors = pipelineInfo->useSoftwareVertexBufferDescriptors; if (getGfxIpVersion().major >= 10) { // Only set NGG options for a GFX10+ graphics pipeline. const auto &nggState = pipelineInfo->nggState; diff --git a/tool/dumper/vkgcPipelineDumper.cpp b/tool/dumper/vkgcPipelineDumper.cpp index c2ee3a7074..1ac6729c08 100644 --- a/tool/dumper/vkgcPipelineDumper.cpp +++ b/tool/dumper/vkgcPipelineDumper.cpp @@ -953,6 +953,7 @@ void PipelineDumper::dumpGraphicsStateInfo(const GraphicsPipelineBuildInfo *pipe dumpFile << "enableUberFetchShader = " << pipelineInfo->enableUberFetchShader << "\n"; dumpFile << "enableEarlyCompile = " << pipelineInfo->enableEarlyCompile << "\n"; dumpFile << "enableColorExportShader = " << pipelineInfo->enableColorExportShader << "\n"; + dumpFile << "useSoftwareVertexBufferDescriptors = " << pipelineInfo->useSoftwareVertexBufferDescriptors << "\n"; dumpPipelineOptions(&pipelineInfo->options, dumpFile); #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 @@ -1573,6 +1574,7 @@ void PipelineDumper::updateHashForNonFragmentState(const GraphicsPipelineBuildIn #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 70 hasher->Update(pipeline->apiXfbOutData.forceEnablePrimStats); #endif + hasher->Update(pipeline->useSoftwareVertexBufferDescriptors); } // ===================================================================================================================== diff --git a/tool/vfx/vfx.h b/tool/vfx/vfx.h index a3cc57469b..bcdfbe3030 100644 --- a/tool/vfx/vfx.h +++ b/tool/vfx/vfx.h @@ -530,11 +530,12 @@ struct GraphicsPipelineState { #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 Vkgc::BinaryData shaderLibrary; // Shader library SPIR-V binary #endif - Vkgc::RtState rtState; // Ray tracing state - bool dynamicVertexStride; // Dynamic Vertex input Stride is enabled. - bool enableUberFetchShader; // Use uber fetch shader - bool enableEarlyCompile; // Enable early compile - bool enableColorExportShader; // Enable color export shader + Vkgc::RtState rtState; // Ray tracing state + bool dynamicVertexStride; // Dynamic Vertex input Stride is enabled. + bool enableUberFetchShader; // Use uber fetch shader + bool enableEarlyCompile; // Enable early compile + bool enableColorExportShader; // Enable color export shader + bool useSoftwareVertexBufferDescriptors; // Use software vertex buffer descriptors float tessLevelInner[2]; float tessLevelOuter[4]; diff --git a/tool/vfx/vfxPipelineDoc.cpp b/tool/vfx/vfxPipelineDoc.cpp index b7b8f0ef5f..b5cabed674 100644 --- a/tool/vfx/vfxPipelineDoc.cpp +++ b/tool/vfx/vfxPipelineDoc.cpp @@ -155,6 +155,7 @@ VfxPipelineStatePtr PipelineDocument::getDocument() { gfxPipelineInfo->enableUberFetchShader = graphicState.enableUberFetchShader; gfxPipelineInfo->enableEarlyCompile = graphicState.enableEarlyCompile; gfxPipelineInfo->enableColorExportShader = graphicState.enableColorExportShader; + gfxPipelineInfo->useSoftwareVertexBufferDescriptors = graphicState.useSoftwareVertexBufferDescriptors; #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 gfxPipelineInfo->shaderLibrary = graphicState.shaderLibrary; #endif diff --git a/tool/vfx/vfxVkSection.h b/tool/vfx/vfxVkSection.h index d8ba2e5163..92fd19ae64 100644 --- a/tool/vfx/vfxVkSection.h +++ b/tool/vfx/vfxVkSection.h @@ -811,6 +811,7 @@ class SectionGraphicsState : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, enableUberFetchShader, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, enableEarlyCompile, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, enableColorExportShader, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, useSoftwareVertexBufferDescriptors, MemberTypeBool, false); INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_shaderLibrary, MemberTypeString, false); INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_rtState, MemberTypeRtState, true); INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, tessLevelInner, MemberTypeFloat, 2, false);