Skip to content

Commit

Permalink
Adding Metal compute based skinning
Browse files Browse the repository at this point in the history
This is very hacked together - definetly not ready yet
  • Loading branch information
colincornaby committed Dec 31, 2024
1 parent 76e31fe commit 092cc5a
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 4 deletions.
1 change: 1 addition & 0 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ set(pfMetalPipeline_SHADERS
ShaderSrc/Clear.metal
ShaderSrc/GammaCorrection.metal
ShaderSrc/TextFontShader.metal
ShaderSrc/Skinning.metal
)

plasma_library(pfMetalPipeline
Expand Down
76 changes: 76 additions & 0 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Skinning.metal
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
//
// Skinning.metal
// plClient
//
// Created by Colin Cornaby on 8/18/22.
//

#include <metal_stdlib>
using namespace metal;

struct SkinningUniforms {
uint32_t destinationVerticesStride;
};

constant const int32_t numWeights [[ function_constant(0) ]];
constant const bool hasSkinIndices [[ function_constant(1) ]];

constant const bool hasWeight1 = numWeights > 0;
constant const bool hasWeight2 = numWeights > 1;
constant const bool hasWeight3 = numWeights > 2;

struct hsMatrix {
float4x4 matrix;
uint8_t alignment[16];
};

struct VertexIn {
float3 position [[ attribute(0) ]];
float3 normal [[ attribute(1)] ];
float weight1 [[ attribute(2), function_constant(hasWeight1)] ];
float weight2 [[ attribute(3), function_constant(hasWeight2)] ];
float weight3 [[ attribute(4), function_constant(hasWeight3)] ];
uint32_t skinIndices [[ attribute(5), function_constant(hasSkinIndices) ]];
};

kernel void SkinningFunction(
VertexIn in [[stage_in]],
constant hsMatrix* matrixPalette [[buffer(1)]],
device char* destinationVertices [[buffer(2)]],
constant SkinningUniforms & uniforms [[buffer(3)]],
uint2 gid [[thread_position_in_grid]]
) {
float4 weights = {0};
float weightSum = 0.f;
for (uint8_t j = 0; j < numWeights; ++j) {
weights[j] = (&in.weight1)[j];
weightSum += weights[j];
}
weights[numWeights] = 1.f - weightSum;

uint32_t indices;

if (hasSkinIndices) {
indices = in.skinIndices;
} else
indices = 1 << 8;

float4 destNorm = { 0.f, 0.f, 0.f, 0.f };
float4 destPoints = { 0.f, 0.f, 0.f, 1.f };

for (int j = 0; j < numWeights + 1; ++j) {
int index = indices & 0xFF;
if (weights[j]) {
const float4x4 matrix = matrixPalette[index].matrix;
destPoints.xyz = in.position.xyz;
destNorm.xyz = in.normal.xyz;
//destPoints.xyz += weights[j] * (float4(in.position, 1.0f) * matrix).xyz;
//destNorm.xyz += weights[j] * (float4(in.normal, 0.0f) * matrix).xyz;
}
indices >>= 8;
}

device packed_float3* dest = (device packed_float3*)(destinationVertices + (gid[0] * uniforms.destinationVerticesStride));
dest[0] = destPoints.xyz;
dest[1] = destNorm.xyz;
}
5 changes: 4 additions & 1 deletion Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,12 @@ class plMetalVertexBufferRef : public plMetalBufferPoolRef
fOwner(),
fData(),
fFormat(),
fRefTime()
fRefTime(),
fBackingBuffer()
{
}

MTL::Buffer* fBackingBuffer;

virtual ~plMetalVertexBufferRef();

Expand Down
147 changes: 144 additions & 3 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1968,7 +1968,7 @@ bool plMetalPipeline::ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup
bool plMetalPipeline::IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBufferRef* vRef)
{
ptrdiff_t size = (group->GetVertBufferEnd(vRef->fIndex) - group->GetVertBufferStart(vRef->fIndex)) * vRef->fVertexSize;
if (!size)
if (!size || !vRef->IsDirty())
return false; // No error, just nothing to do.

hsAssert(size > 0, "Bad start and end counts in a group");
Expand Down Expand Up @@ -4099,6 +4099,15 @@ bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std:
drawable->SetBlendingSpanVectorBit(visList[i], false);
}
}

#define PF_USE_GPU_VERTEX_BLENDING 1

#if PF_USE_GPU_VERTEX_BLENDING
MTL::CommandBuffer* commandBuffer = fDevice.fCommandQueue->commandBuffer();
commandBuffer->setLabel(MTLSTR("Skinning compute pass"));
commandBuffer->enqueue();
MTL::ComputeCommandEncoder* encoder = commandBuffer->computeCommandEncoder();
#endif

// Now go through each of the group/buffer (= a real vertex buffer) pairs we found,
// and blend into it. We'll lock the buffer once, and then for each span that
Expand All @@ -4114,17 +4123,50 @@ bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std:
hsAssert(vRef->fData, "Going into skinning with no place to put results!");

uint8_t* destPtr = vRef->fData;

#if PF_USE_GPU_VERTEX_BLENDING
uint8_t* ptr = vRef->fOwner->GetVertBufferData(vRef->fIndex);

MTL::Buffer* destBuffer = vRef->GetBuffer();
if(!destBuffer) {
destBuffer = fDevice.fMetalDevice->newBuffer(vRef->fData, vRef->fCount * vRef->fVertexSize, MTL::ResourceStorageModeManaged);
vRef->SetBuffer(destBuffer);
}
encoder->setBuffer(destBuffer, 0, 2);

MTL::Buffer* srcBuffer = vRef->fBackingBuffer;
if(!srcBuffer) {
srcBuffer = fDevice.fMetalDevice->newBuffer(ptr, vRef->fOwner->GetVertBufferSize(vRef->fIndex), MTL::ResourceStorageModeManaged);
vRef->fBackingBuffer = srcBuffer;
} else if (vRef->IsDirty()) {
memcpy(srcBuffer->contents(), ptr, vRef->fOwner->GetVertBufferSize(vRef->fIndex));
srcBuffer->didModifyRange(NS::Range(0, vRef->fOwner->GetVertBufferSize(vRef->fIndex)));
}
encoder->setBuffer(srcBuffer, 0, 0);
#endif

int k;
for (k = 0; k < visList.size(); k++) {
const plIcicle& span = *(plIcicle*)spans[visList[k]];
if (span.fGroupIdx == i && span.fVBufferIdx == j) {
plProfile_Inc(NumSkin);

hsMatrix44* matrixPalette = drawable->GetMatrixPalette(span.fBaseMatrix);
matrixPalette[0] = span.fLocalToWorld;


#if PF_USE_GPU_VERTEX_BLENDING
MTL::Buffer* matrixPaletteBuffer = fDevice.fMetalDevice->newBuffer((void *)matrixPalette, sizeof(hsMatrix44) * (span.fNumMatrices + 1), MTL::ResourceStorageModeManaged);
encoder->setBuffer(matrixPaletteBuffer, 0, 1);

IBlendVertBufferMetal( vRef->fOwner->GetVertexFormat(), vRef->fOwner->GetVertexSize(), vRef->fVertexSize, span.fVLength, span.fVStartIdx * vRef->fOwner->GetVertexSize(), span.fVStartIdx * vRef->fVertexSize, encoder);
vRef->SetDirty(false);

matrixPaletteBuffer->release();

#else

uint8_t* ptr = vRef->fOwner->GetVertBufferData(vRef->fIndex);

ptr += span.fVStartIdx * vRef->fOwner->GetVertexSize();
IBlendVertBuffer((plSpan*)&span,
matrixPalette, span.fNumMatrices,
Expand All @@ -4136,12 +4178,18 @@ bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std:
span.fVLength,
span.fLocalUVWChans);
vRef->SetDirty(true);
#endif
}
}
// Unlock and move on.
}
}
}

#if PF_USE_GPU_VERTEX_BLENDING
encoder->endEncoding();
commandBuffer->commit();
#endif

plProfile_EndTiming(Skin);

Expand All @@ -4154,6 +4202,99 @@ bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std:
return true;
}


void plMetalPipeline::IBlendVertBufferMetal(uint8_t format, uint32_t srcStride,
uint32_t destStride, uint32_t count,
size_t srcOffset, size_t destOffset, MTL::ComputeCommandEncoder* encoder)
{
struct SkinningUniforms {
uint32_t destinationVerticesStride;
};
int32_t numWeights = (format & plGBufferGroup::kSkinWeightMask) >> 4;

struct SkinningUniforms uniforms;
uniforms.destinationVerticesStride = destStride;
bool hasSkinIndices = format & plGBufferGroup::kSkinIndices;

static MTL::ComputePipelineState* pipelineState[4][2] = { 0 };
if(!pipelineState[numWeights][hasSkinIndices]) {
MTL::Library* library = fDevice.GetShaderLibrary();
MTL::FunctionConstantValues* constantValues = MTL::FunctionConstantValues::alloc()->init();
constantValues->setConstantValue((void *)&numWeights, MTL::DataTypeInt, (int)0);
constantValues->setConstantValue((void *)&hasSkinIndices, MTL::DataTypeBool, (int)1);
MTL::Function* skinningFunction = library->newFunction(NS::MakeConstantString("SkinningFunction"), constantValues, (NS::Error**)nullptr);
MTL::ComputePipelineDescriptor* pipelineDescriptor = MTL::ComputePipelineDescriptor::alloc()->init();
pipelineDescriptor->setComputeFunction(skinningFunction);
pipelineDescriptor->buffers()->object(0)->setMutability(MTL::MutabilityImmutable);
pipelineDescriptor->buffers()->object(1)->setMutability(MTL::MutabilityImmutable);
pipelineDescriptor->buffers()->object(2)->setMutability(MTL::MutabilityImmutable);
pipelineDescriptor->buffers()->object(3)->setMutability(MTL::MutabilityImmutable);

MTL::BufferLayoutDescriptor* layout = MTL::BufferLayoutDescriptor::alloc()->init();
layout->setStride(srcStride);
layout->setStepRate(1);
layout->setStepFunction(MTL::StepFunctionThreadPositionInGridX);

MTL::StageInputOutputDescriptor* inOutDescriptor = MTL::StageInputOutputDescriptor::alloc()->init();
uint offset = 0;
inOutDescriptor->layouts()->setObject(layout, 0);

MTL::AttributeDescriptor* positionAttribute = MTL::AttributeDescriptor::alloc()->init();
positionAttribute->setFormat(MTL::AttributeFormatFloat3);
positionAttribute->setOffset(offset);
positionAttribute->setBufferIndex(0);
inOutDescriptor->attributes()->setObject(positionAttribute, 0);
offset += sizeof(float) * 3;

for(int i=0; i<numWeights; i++) {
MTL::AttributeDescriptor* weightAttribute = MTL::AttributeDescriptor::alloc()->init();
weightAttribute->setFormat(MTL::AttributeFormatFloat);
weightAttribute->setOffset(offset);
weightAttribute->setBufferIndex(0);
inOutDescriptor->attributes()->setObject(weightAttribute, 2+i);
offset += sizeof(float);
}

if(hasSkinIndices) {
MTL::AttributeDescriptor* skinIndicesAttribute = MTL::AttributeDescriptor::alloc()->init();
skinIndicesAttribute->setFormat(MTL::AttributeFormatUInt);
skinIndicesAttribute->setOffset(offset);
skinIndicesAttribute->setBufferIndex(0);
inOutDescriptor->attributes()->setObject(skinIndicesAttribute, 5);
offset += sizeof(uint32_t);
}

MTL::AttributeDescriptor* normalAttribute = MTL::AttributeDescriptor::alloc()->init();
normalAttribute->setFormat(MTL::AttributeFormatFloat3);
normalAttribute->setOffset(offset);
normalAttribute->setBufferIndex(0);
inOutDescriptor->attributes()->setObject(normalAttribute, 1);
offset += sizeof(float) * 3;

pipelineDescriptor->setStageInputDescriptor(inOutDescriptor);

std::string label = "Weights: ";
label.append(std::to_string(numWeights));
label.append(", hasSkinIndices: ");
label.append(std::to_string(hasSkinIndices));
pipelineDescriptor->setLabel(NS::String::string(label.c_str(), NS::UTF8StringEncoding));
NS::Error* error;
pipelineState[numWeights][hasSkinIndices] = fDevice.fMetalDevice->newComputePipelineState(pipelineDescriptor, MTL::PipelineOptionNone, nullptr, &error);
hsAssert(error == nullptr, "Error was not null");
skinningFunction->release();
library->release();
}

encoder->setComputePipelineState(pipelineState[numWeights][hasSkinIndices]);
encoder->setBufferOffset(srcOffset, 0);
encoder->setBufferOffset(destOffset, 2);
encoder->setBytes(&uniforms, sizeof(SkinningUniforms), 3);

MTL::Size threadsPerThreadgroup = MTL::Size(pipelineState[numWeights][hasSkinIndices]->maxTotalThreadsPerThreadgroup(), 1, 1);

encoder->dispatchThreads(MTL::Size(count, 1, 1), threadsPerThreadgroup);
}

//// IBlendVertsIntoBuffer ////////////////////////////////////////////////////
// Given a pointer into a buffer of verts that have blending data in the D3D
// format, blends them into the destination buffer given without the blending
Expand Down
4 changes: 4 additions & 0 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ class plMetalPipeline : public pl3DPipeline<plMetalDevice>
uint8_t* dest, uint32_t destStride, uint32_t count,
uint16_t localUVWChans);

void IBlendVertBufferMetal(uint8_t format, uint32_t srcStride,
uint32_t destStride, uint32_t count,
size_t srcOffset, size_t destOffset, MTL::ComputeCommandEncoder* encoder);

plMetalVertexShader* fVShaderRefList;
plMetalFragmentShader* fPShaderRefList;
bool IPrepShadowCaster(const plShadowCaster* caster);
Expand Down

0 comments on commit 092cc5a

Please sign in to comment.