Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid calling useResource on resources in argument buffers #2402

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions MoltenVK/MoltenVK/API/mvk_datatypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,14 @@ typedef enum {
kMVKShaderStageMax = kMVKShaderStageCount // Public API legacy value
} MVKShaderStage;

typedef enum {
kMVKBarrierStageVertex = 0,
kMVKBarrierStageFragment,
kMVKBarrierStageCompute,
kMVKBarrierStageCopy,
kMVKBarrierStageCount
} MVKBarrierStage;

/** Returns the Metal MTLColorWriteMask corresponding to the specified Vulkan VkColorComponentFlags. */
MTLColorWriteMask mvkMTLColorWriteMaskFromVkChannelFlags(VkColorComponentFlags vkWriteFlags);

Expand Down
1 change: 1 addition & 0 deletions MoltenVK/MoltenVK/API/mvk_private_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ typedef struct {
VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */
VkBool32 nativeTextureAtomics; /**< If true, atomic operations on textures are supported natively. */
VkBool32 needsArgumentBufferEncoders; /**< If true, Metal argument buffer encoders are needed to populate argument buffer content. */
VkBool32 residencySets; /**< If true, the device supports creating residency sets. */
} MVKPhysicalDeviceMetalFeatures;


Expand Down
38 changes: 38 additions & 0 deletions MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,35 @@
return VK_SUCCESS;
}

static uint64_t mvkPipelineStageFlagsToBarrierStages(VkPipelineStageFlags2 flags) {
uint64_t result = 0;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT | VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageVertex;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | VK_PIPELINE_STAGE_2_FRAGMENT_DENSITY_PROCESS_BIT_EXT |
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageFragment;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageCompute;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT |
VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageCopy;

return result;
}

template <size_t N>
VkResult MVKCmdPipelineBarrier<N>::setContent(MVKCommandBuffer* cmdBuff,
VkPipelineStageFlags srcStageMask,
Expand Down Expand Up @@ -157,6 +186,15 @@
}
#endif

if (!cmdEncoder->_mtlRenderEncoder && cmdEncoder->isUsingMetalArgumentBuffers() && cmdEncoder->getDevice()->hasResidencySet()) {
cmdEncoder->endCurrentMetalEncoding();

for (auto& b : _barriers) {
uint64_t sourceStageMask = mvkPipelineStageFlagsToBarrierStages(b.srcStageMask), destStageMask = mvkPipelineStageFlagsToBarrierStages(b.dstStageMask);
cmdEncoder->setBarrier(sourceStageMask, destStageMask);
}
}

// Apple GPUs do not support renderpass barriers, and do not support rendering/writing
// to an attachment and then reading from that attachment within a single renderpass.
// So, in the case where we are inside a Metal renderpass, we need to split those activities
Expand Down
21 changes: 19 additions & 2 deletions MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,8 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma
id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(commandUse));

cmdEncoder->barrierWait(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

float zIncr;
if (blitKey.srcMTLTextureType == MTLTextureType3D) {
// In this case, I need to interpolate along the third dimension manually.
Expand Down Expand Up @@ -678,7 +680,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

NSUInteger instanceCount = isLayeredBlit ? mtlRPD.renderTargetArrayLengthMVK : 1;
[mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart: 0 vertexCount: kMVKBlitVertexCount instanceCount: instanceCount];
[mtlRendEnc popDebugGroup];

cmdEncoder->barrierUpdate(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc popDebugGroup];
[mtlRendEnc endEncoding];
}
}
Expand Down Expand Up @@ -897,6 +902,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

[mtlRendEnc pushDebugGroup: @"vkCmdResolveImage"];
[mtlRendEnc popDebugGroup];

cmdEncoder->barrierWait(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->barrierUpdate(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
}
}
Expand Down Expand Up @@ -1700,6 +1709,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mtlRendEncName);

cmdEncoder->barrierWait(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->barrierUpdate(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
} else {
for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
Expand All @@ -1715,7 +1728,11 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mtlRendEncName);
[mtlRendEnc endEncoding];

cmdEncoder->barrierWait(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->barrierUpdate(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
}
}
}
Expand Down
25 changes: 25 additions & 0 deletions MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,27 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
/** Returns the command encoding pool. */
MVKCommandEncodingPool* getCommandEncodingPool();

#pragma mark Barriers

/** Encode waits in the current command encoder for the stage that corresponds to given use. */
void encodeBarrierWaits(MVKCommandUse use);

/** Update fences for the currently executing pipeline stage. */
void encodeBarrierUpdates();

/** Insert a new execution barrier */
void setBarrier(uint64_t sourceStageMask, uint64_t destStageMask);

/** Encode waits for a specific stage in given encoder. */
void barrierWait(MVKBarrierStage stage, id<MTLRenderCommandEncoder> mtlEncoder, MTLRenderStages beforeStages);
void barrierWait(MVKBarrierStage stage, id<MTLBlitCommandEncoder> mtlEncoder);
void barrierWait(MVKBarrierStage stage, id<MTLComputeCommandEncoder> mtlEncoder);

/** Encode update for a specific stage in given encoder. */
void barrierUpdate(MVKBarrierStage stage, id<MTLRenderCommandEncoder> mtlEncoder, MTLRenderStages afterStages);
void barrierUpdate(MVKBarrierStage stage, id<MTLBlitCommandEncoder> mtlEncoder);
void barrierUpdate(MVKBarrierStage stage, id<MTLComputeCommandEncoder> mtlEncoder);

#pragma mark Queries

/** Begins an occlusion query. */
Expand Down Expand Up @@ -484,6 +505,7 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse);
template<typename T> void retainIfImmediatelyEncoding(T& mtlEnc);
template<typename T> void endMetalEncoding(T& mtlEnc);
id<MTLFence> getBarrierStageFence(MVKBarrierStage stage);

typedef struct GPUCounterQuery {
MVKGPUCounterQueryPool* queryPool = nullptr;
Expand Down Expand Up @@ -511,6 +533,9 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
uint32_t _flushCount;
MVKCommandUse _mtlComputeEncoderUse;
MVKCommandUse _mtlBlitEncoderUse;
uint32_t _updateFenceSlotDirtyBits = ~0;
int _updateFenceSlots[kMVKBarrierStageCount] = {};
int _waitFenceSlots[kMVKBarrierStageCount][kMVKBarrierStageCount] = {};
bool _isRenderingEntireAttachment;
};

Expand Down
Loading