Skip to content

Commit

Permalink
Implement barriers using Metal fences
Browse files Browse the repository at this point in the history
  • Loading branch information
js6i committed Dec 3, 2024
1 parent 1d2ec63 commit 6200edf
Show file tree
Hide file tree
Showing 8 changed files with 329 additions and 4 deletions.
9 changes: 9 additions & 0 deletions MoltenVK/MoltenVK/API/mvk_datatypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,15 @@ typedef enum {
kMVKShaderStageMax = kMVKShaderStageCount // Public API legacy value
} MVKShaderStage;

typedef enum {
kMVKBarrierStageVertex = 0,
kMVKBarrierStageFragment,
kMVKBarrierStageCompute,
kMVKBarrierStageCopy,
kMVKBarrierStageHost,
kMVKBarrierStageCount
} MVKBarrierStage;

/** Returns the Metal MTLColorWriteMask corresponding to the specified Vulkan VkColorComponentFlags. */
MTLColorWriteMask mvkMTLColorWriteMaskFromVkChannelFlags(VkColorComponentFlags vkWriteFlags);

Expand Down
41 changes: 41 additions & 0 deletions MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,38 @@
return VK_SUCCESS;
}

static uint64_t mvkPipelineStageFlagsToBarrierStages(VkPipelineStageFlags2 flags) {
uint64_t result = 0;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT | VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageVertex;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | VK_PIPELINE_STAGE_2_FRAGMENT_DENSITY_PROCESS_BIT_EXT |
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageFragment;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageCompute;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT |
VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageCopy;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_HOST_BIT))
result |= 1 << kMVKBarrierStageHost;

return result;
}

template <size_t N>
VkResult MVKCmdPipelineBarrier<N>::setContent(MVKCommandBuffer* cmdBuff,
VkPipelineStageFlags srcStageMask,
Expand Down Expand Up @@ -157,6 +189,15 @@
}
#endif

if (!cmdEncoder->_mtlRenderEncoder) {
cmdEncoder->endCurrentMetalEncoding();

for (auto& b : _barriers) {
uint64_t sourceStageMask = mvkPipelineStageFlagsToBarrierStages(b.srcStageMask), destStageMask = mvkPipelineStageFlagsToBarrierStages(b.dstStageMask);
cmdEncoder->getDevice()->setBarrier(cmdEncoder->_mtlCmdBuffer, sourceStageMask, destStageMask);
}
}

// Apple GPUs do not support renderpass barriers, and do not support rendering/writing
// to an attachment and then reading from that attachment within a single renderpass.
// So, in the case where we are inside a Metal renderpass, we need to split those activities
Expand Down
21 changes: 19 additions & 2 deletions MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,8 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma
id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(commandUse));

cmdEncoder->getDevice()->barrierWait(kMVKBarrierStageCopy, cmdEncoder->_mtlCmdBuffer, mtlRendEnc, MTLRenderStageFragment);

float zIncr;
if (blitKey.srcMTLTextureType == MTLTextureType3D) {
// In this case, I need to interpolate along the third dimension manually.
Expand Down Expand Up @@ -678,7 +680,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

NSUInteger instanceCount = isLayeredBlit ? mtlRPD.renderTargetArrayLengthMVK : 1;
[mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart: 0 vertexCount: kMVKBlitVertexCount instanceCount: instanceCount];
[mtlRendEnc popDebugGroup];

cmdEncoder->getDevice()->barrierUpdate(kMVKBarrierStageCopy, cmdEncoder->_mtlCmdBuffer, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc popDebugGroup];
[mtlRendEnc endEncoding];
}
}
Expand Down Expand Up @@ -897,6 +902,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

[mtlRendEnc pushDebugGroup: @"vkCmdResolveImage"];
[mtlRendEnc popDebugGroup];

cmdEncoder->getDevice()->barrierWait(kMVKBarrierStageCopy, cmdEncoder->_mtlCmdBuffer, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->getDevice()->barrierUpdate(kMVKBarrierStageCopy, cmdEncoder->_mtlCmdBuffer, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
}
}
Expand Down Expand Up @@ -1700,6 +1709,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mtlRendEncName);

cmdEncoder->getDevice()->barrierWait(kMVKBarrierStageCopy, cmdEncoder->_mtlCmdBuffer, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->getDevice()->barrierUpdate(kMVKBarrierStageCopy, cmdEncoder->_mtlCmdBuffer, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
} else {
for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
Expand All @@ -1715,7 +1728,11 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mtlRendEncName);
[mtlRendEnc endEncoding];

cmdEncoder->getDevice()->barrierWait(kMVKBarrierStageCopy, cmdEncoder->_mtlCmdBuffer, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->getDevice()->barrierUpdate(kMVKBarrierStageCopy, cmdEncoder->_mtlCmdBuffer, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,10 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
/** Returns the command encoding pool. */
MVKCommandEncodingPool* getCommandEncodingPool();

void encodeBarrierWaits(MVKCommandUse use);

void encodeBarrierUpdates();

#pragma mark Queries

/** Begins an occlusion query. */
Expand Down
98 changes: 98 additions & 0 deletions MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@
uint32_t subpassIndex,
MVKCommandUse cmdUse) {
encodeStoreActions();
encodeBarrierUpdates();
endMetalRenderEncoding();

_lastMultiviewPassCmd = subpassCmd;
Expand Down Expand Up @@ -521,6 +522,87 @@
mtlEnc = nil;
}

static MVKBarrierStage commandUseToBarrierStage(MVKCommandUse use) {
switch (use) {
case kMVKCommandUseNone: return kMVKBarrierStageCount; /**< No use defined. */
case kMVKCommandUseBeginCommandBuffer: return kMVKBarrierStageCount; /**< vkBeginCommandBuffer (prefilled VkCommandBuffer). */
case kMVKCommandUseQueueSubmit: return kMVKBarrierStageCount; /**< vkQueueSubmit. */
case kMVKCommandUseAcquireNextImage: return kMVKBarrierStageCount; /**< vkAcquireNextImageKHR. */
case kMVKCommandUseQueuePresent: return kMVKBarrierStageCount; /**< vkQueuePresentKHR. */
case kMVKCommandUseQueueWaitIdle: return kMVKBarrierStageCount; /**< vkQueueWaitIdle. */
case kMVKCommandUseDeviceWaitIdle: return kMVKBarrierStageCount; /**< vkDeviceWaitIdle. */
case kMVKCommandUseInvalidateMappedMemoryRanges: return kMVKBarrierStageCount; /**< vkInvalidateMappedMemoryRanges. */
case kMVKCommandUseBeginRendering: return kMVKBarrierStageCount; /**< vkCmdBeginRendering. */
case kMVKCommandUseBeginRenderPass: return kMVKBarrierStageCount; /**< vkCmdBeginRenderPass. */
case kMVKCommandUseNextSubpass: return kMVKBarrierStageCount; /**< vkCmdNextSubpass. */
case kMVKCommandUseRestartSubpass: return kMVKBarrierStageCount; /**< Create a new Metal renderpass due to Metal requirements. */
case kMVKCommandUsePipelineBarrier: return kMVKBarrierStageCount; /**< vkCmdPipelineBarrier. */
case kMVKCommandUseBlitImage: return kMVKBarrierStageCopy; /**< vkCmdBlitImage. */
case kMVKCommandUseCopyImage: return kMVKBarrierStageCopy; /**< vkCmdCopyImage. */
case kMVKCommandUseResolveImage: return kMVKBarrierStageCopy; /**< vkCmdResolveImage - resolve stage. */
case kMVKCommandUseResolveExpandImage: return kMVKBarrierStageCopy; /**< vkCmdResolveImage - expand stage. */
case kMVKCommandUseResolveCopyImage: return kMVKBarrierStageCopy; /**< vkCmdResolveImage - copy stage. */
case kMVKCommandUseCopyImageToMemory: return kMVKBarrierStageCopy; /**< vkCopyImageToMemoryEXT host sync. */
case kMVKCommandUseCopyBuffer: return kMVKBarrierStageCopy; /**< vkCmdCopyBuffer. */
case kMVKCommandUseCopyBufferToImage: return kMVKBarrierStageCopy; /**< vkCmdCopyBufferToImage. */
case kMVKCommandUseCopyImageToBuffer: return kMVKBarrierStageCopy; /**< vkCmdCopyImageToBuffer. */
case kMVKCommandUseFillBuffer: return kMVKBarrierStageCopy; /**< vkCmdFillBuffer. */
case kMVKCommandUseUpdateBuffer: return kMVKBarrierStageCopy; /**< vkCmdUpdateBuffer. */
case kMVKCommandUseClearAttachments: return kMVKBarrierStageCount; /**< vkCmdClearAttachments. */
case kMVKCommandUseClearColorImage: return kMVKBarrierStageCopy; /**< vkCmdClearColorImage. */
case kMVKCommandUseClearDepthStencilImage: return kMVKBarrierStageCopy; /**< vkCmdClearDepthStencilImage. */
case kMVKCommandUseResetQueryPool: return kMVKBarrierStageCopy; /**< vkCmdResetQueryPool. */
case kMVKCommandUseDispatch: return kMVKBarrierStageCompute; /**< vkCmdDispatch. */
case kMVKCommandUseTessellationVertexTessCtl: return kMVKBarrierStageVertex; /**< vkCmdDraw* - vertex and tessellation control stages. */
case kMVKCommandUseDrawIndirectConvertBuffers: return kMVKBarrierStageVertex; /**< vkCmdDrawIndirect* convert indirect buffers. */
case kMVKCommandUseCopyQueryPoolResults: return kMVKBarrierStageCopy; /**< vkCmdCopyQueryPoolResults. */
case kMVKCommandUseAccumOcclusionQuery: return kMVKBarrierStageCount; /**< Any command terminating a Metal render pass with active visibility buffer. */
case kMVKCommandUseRecordGPUCounterSample: return kMVKBarrierStageCount; /**< Any command triggering the recording of a GPU counter sample. */
}
}

void MVKCommandEncoder::encodeBarrierWaits(MVKCommandUse use) {
if (_mtlRenderEncoder) {
[_mtlRenderEncoder insertDebugSignpost:@"Encoding waits"];
getDevice()->barrierWait(kMVKBarrierStageVertex, _mtlCmdBuffer, _mtlRenderEncoder, MTLRenderStageVertex);
getDevice()->barrierWait(kMVKBarrierStageFragment, _mtlCmdBuffer, _mtlRenderEncoder, MTLRenderStageFragment);
}
if (_mtlComputeEncoder) {
auto stage = commandUseToBarrierStage(use);
if (stage != kMVKBarrierStageCount) {
getDevice()->barrierWait(stage, _mtlCmdBuffer, _mtlComputeEncoder);
}
}
if (_mtlBlitEncoder) {
auto stage = commandUseToBarrierStage(use);
if (stage != kMVKBarrierStageCount) {
getDevice()->barrierWait(stage, _mtlCmdBuffer, _mtlBlitEncoder);
}
}
}

void MVKCommandEncoder::encodeBarrierUpdates() {
if (_mtlRenderEncoder) {
getDevice()->barrierUpdate(kMVKBarrierStageVertex, _mtlCmdBuffer, _mtlRenderEncoder, MTLRenderStageVertex);
getDevice()->barrierUpdate(kMVKBarrierStageFragment, _mtlCmdBuffer, _mtlRenderEncoder, MTLRenderStageFragment);
}

if (_mtlComputeEncoder) {
MVKBarrierStage stage = commandUseToBarrierStage(_mtlComputeEncoderUse);
if (stage != kMVKBarrierStageCount) {
getDevice()->barrierUpdate(stage, _mtlCmdBuffer, _mtlComputeEncoder);
}
}

if (_mtlBlitEncoder) {
MVKBarrierStage stage = commandUseToBarrierStage(_mtlBlitEncoderUse);
if (stage != kMVKBarrierStageCount) {
getDevice()->barrierUpdate(stage, _mtlCmdBuffer, _mtlBlitEncoder);
}
}
}


// Creates _mtlRenderEncoder and marks cached render state as dirty so it will be set into the _mtlRenderEncoder.
void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) {

Expand Down Expand Up @@ -584,6 +666,8 @@
retainIfImmediatelyEncoding(_mtlRenderEncoder);
_cmdBuffer->setMetalObjectLabel(_mtlRenderEncoder, getMTLRenderCommandEncoderName(cmdUse));

encodeBarrierWaits(cmdUse);

// We shouldn't clear the render area if we are restarting the Metal renderpass
// separately from a Vulkan subpass, and we otherwise only need to clear render
// area if we're not rendering to the entire attachment.
Expand Down Expand Up @@ -787,6 +871,7 @@
}

encodeStoreActions();
encodeBarrierUpdates();
endMetalRenderEncoding();
if ( !mvkIsAnyFlagEnabled(_pEncodingContext->getRenderingFlags(), VK_RENDERING_SUSPENDING_BIT) ) {
_pEncodingContext->setRenderingContext(nullptr, nullptr);
Expand Down Expand Up @@ -815,6 +900,7 @@
}

void MVKCommandEncoder::endCurrentMetalEncoding() {
encodeBarrierUpdates();
endMetalRenderEncoding();

_computePipelineState.markDirty();
Expand All @@ -833,7 +919,9 @@
}

id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse, bool markCurrentComputeStateDirty) {
bool needWaits = false;
if ( !_mtlComputeEncoder ) {
needWaits = true;
endCurrentMetalEncoding();
_mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder];
retainIfImmediatelyEncoding(_mtlComputeEncoder);
Expand All @@ -846,22 +934,32 @@
_computeResourcesState.markDirty();
}
if (_mtlComputeEncoderUse != cmdUse) {
needWaits = true;
_mtlComputeEncoderUse = cmdUse;
_cmdBuffer->setMetalObjectLabel(_mtlComputeEncoder, mvkMTLComputeCommandEncoderLabel(cmdUse));
}
if (needWaits) {
encodeBarrierWaits(cmdUse);
}
return _mtlComputeEncoder;
}

id<MTLBlitCommandEncoder> MVKCommandEncoder::getMTLBlitEncoder(MVKCommandUse cmdUse) {
bool needWaits = false;
if ( !_mtlBlitEncoder ) {
needWaits = true;
endCurrentMetalEncoding();
_mtlBlitEncoder = [_mtlCmdBuffer blitCommandEncoder];
retainIfImmediatelyEncoding(_mtlBlitEncoder);
}
if (_mtlBlitEncoderUse != cmdUse) {
needWaits = true;
_mtlBlitEncoderUse = cmdUse;
_cmdBuffer->setMetalObjectLabel(_mtlBlitEncoder, mvkMTLBlitCommandEncoderLabel(cmdUse));
}
if (needWaits) {
encodeBarrierWaits(cmdUse);
}
return _mtlBlitEncoder;
}

Expand Down
19 changes: 19 additions & 0 deletions MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,25 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject {
return (MVKDevice*)getDispatchableObject(vkDevice);
}

#pragma mark Barriers

/** Returns a Metal fence to update for the given barrier stage. */
id<MTLFence> getBarrierStageFence(id<MTLCommandBuffer> mtlCommandBuffer, MVKBarrierStage stage);

void setBarrier(id<MTLCommandBuffer> commandBuffer, uint64_t sourceStageMask, uint64_t destStageMask);

void barrierWait(MVKBarrierStage stage, id<MTLCommandBuffer> mtlCommandBuffer, id<MTLRenderCommandEncoder> mtlEncoder, MTLRenderStages beforeStages);
void barrierWait(MVKBarrierStage stage, id<MTLCommandBuffer> mtlCommandBuffer, id<MTLBlitCommandEncoder> mtlEncoder);
void barrierWait(MVKBarrierStage stage, id<MTLCommandBuffer> mtlCommandBuffer, id<MTLComputeCommandEncoder> mtlEncoder);

void barrierUpdate(MVKBarrierStage stage, id<MTLCommandBuffer> mtlCommandBuffer, id<MTLRenderCommandEncoder> mtlEncoder, MTLRenderStages afterStages);
void barrierUpdate(MVKBarrierStage stage, id<MTLCommandBuffer> mtlCommandBuffer, id<MTLBlitCommandEncoder> mtlEncoder);
void barrierUpdate(MVKBarrierStage stage, id<MTLCommandBuffer> mtlCommandBuffer, id<MTLComputeCommandEncoder> mtlEncoder);

id<MTLFence> _stageBarriers[kMVKBarrierStageCount] = {};
id<MTLFence> _activeBarriers[kMVKBarrierStageCount][kMVKBarrierStageCount] = {};
bool _stageBarriersDirty[kMVKBarrierStageCount] = {};

protected:
friend class MVKDeviceTrackingMixin;

Expand Down
Loading

0 comments on commit 6200edf

Please sign in to comment.