diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index b117fcd5c5..65e3d6fc4c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -124,18 +124,6 @@ static inline uint8_t* inlStuff(uint8_t* dst, const T* val) return reinterpret_cast(ptr); } -matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst) -{ - constexpr auto matrixSize = sizeof(matrix_float4x4); - if (src.fFlags & hsMatrix44::kIsIdent) { - memcpy(dst, &matrix_identity_float4x4, matrixSize); - } else { - memcpy(dst, &src.fMap, matrixSize); - } - - return dst; -} - bool plMetalDevice::InitDevice() { // FIXME: Should Metal adopt InitDevice like OGL? @@ -972,7 +960,7 @@ void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef* tRef, plCubic void plMetalDevice::SetProjectionMatrix(const hsMatrix44& src) { - hsMatrix2SIMD(src, &fMatrixProj); + fMatrixProj = *hsMatrix2SIMD(src); } void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src) @@ -980,8 +968,8 @@ void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src) hsMatrix44 inv; src.GetInverse(&inv); - hsMatrix2SIMD(src, &fMatrixW2C); - hsMatrix2SIMD(inv, &fMatrixC2W); + fMatrixW2C = *hsMatrix2SIMD(src); + fMatrixC2W = *hsMatrix2SIMD(inv); } void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src) @@ -989,8 +977,8 @@ void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src) hsMatrix44 inv; src.GetInverse(&inv); - hsMatrix2SIMD(src, &fMatrixL2W); - hsMatrix2SIMD(inv, &fMatrixW2L); + fMatrixL2W = *hsMatrix2SIMD(src); + fMatrixW2L = *hsMatrix2SIMD(inv); } void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 4aec911ed0..cd568b39dd 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -65,8 +65,11 @@ class plCubicEnvironmap; class plLayerInterface; class plMetalPipelineState; -// NOTE: Results of this will be row major -matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst); +constexpr const matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src) +{ + //reinterperate_cast not allowed in constexpr + return (simd_float4x4*)(src.fMap); +} class plMetalDevice { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 725eb9eb74..d6d6163c34 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -204,9 +204,7 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encode void plMetalMaterialShaderRef::EncodeTransform(const plLayerInterface* layer, UVOutDescriptor* transform) { - matrix_float4x4 tXfm; - hsMatrix2SIMD(layer->GetTransform(), &tXfm); - transform->transform = tXfm; + transform->transform = *hsMatrix2SIMD(layer->GetTransform()); transform->UVWSrc = layer->GetUVWSrc(); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 2ffc8ba3f7..d590e4f437 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1147,9 +1147,8 @@ void plMetalPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& } if (span.fNumMatrices == 2) { - matrix_float4x4 mat; - hsMatrix2SIMD(drawable->GetPaletteMatrix(span.fBaseMatrix + 1), &mat); - fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), VertexShaderArgumentBlendMatrix1); + const matrix_float4x4 *mat = hsMatrix2SIMD(drawable->GetPaletteMatrix(span.fBaseMatrix + 1)); + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(mat, sizeof(matrix_float4x4), VertexShaderArgumentBlendMatrix1); } fCurrentRenderPassUniforms->projectionMatrix = fDevice.fMatrixProj; @@ -1315,9 +1314,8 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI fCurrentRenderPassUniforms->fogColor = {0.f, 0.f, 0.f}; fCurrentRenderPassUniforms->diffuseCol = {1.f, 1.f, 1.f, 1.f}; - matrix_float4x4 tXfm; - hsMatrix2SIMD(proj->GetTransform(), &tXfm); - fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + const matrix_float4x4 *tXfm = hsMatrix2SIMD(proj->GetTransform()); + fCurrentRenderPassUniforms->uvTransforms[0].transform = *tXfm; fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = proj->GetUVWSrc(); fCurrNumLayers = 1; @@ -3235,10 +3233,7 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) castLUT = castLUT * c2w; } - simd_float4x4 tXfm; - hsMatrix2SIMD(castLUT, &tXfm); - - fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + fCurrentRenderPassUniforms->uvTransforms[0].transform = *hsMatrix2SIMD(castLUT); fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; /*DWORD clearColor = 0xff000000L; @@ -3877,9 +3872,7 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) // Normal UVW source. fCurrentRenderPassUniforms->uvTransforms[2].UVWSrc = uvwSrc; // MiscFlags to layer's misc flags - matrix_float4x4 tXfm; - hsMatrix2SIMD(layer->GetTransform(), &tXfm); - fCurrentRenderPassUniforms->uvTransforms[2].transform = tXfm; + fCurrentRenderPassUniforms->uvTransforms[2].transform = *hsMatrix2SIMD(layer->GetTransform()); } fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&layerIndex, sizeof(int), FragmentShaderArgumentShadowCastAlphaSrc); @@ -3947,19 +3940,16 @@ void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&uniforms, sizeof(plMetalShadowCastFragmentShaderArgumentBuffer), FragmentShaderArgumentShadowCastUniforms); hsMatrix44 cameraToTexture = slave->fWorldToTexture * c2w; - simd_float4x4 tXfm; - hsMatrix2SIMD(cameraToTexture, &tXfm); fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; - fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + fCurrentRenderPassUniforms->uvTransforms[0].transform = *hsMatrix2SIMD(cameraToTexture); // Stage 1: the lut // Set the texture transform to slave's fRcvLUT hsMatrix44 cameraToLut = slave->fRcvLUT * c2w; - hsMatrix2SIMD(cameraToLut, &tXfm); fCurrentRenderPassUniforms->uvTransforms[1].UVWSrc = plLayerInterface::kUVWPosition; - fCurrentRenderPassUniforms->uvTransforms[1].transform = tXfm; + fCurrentRenderPassUniforms->uvTransforms[1].transform = *hsMatrix2SIMD(cameraToLut); } /////////////////////////////////////////////////////////////////////////////// @@ -4198,15 +4188,13 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, simd_float4 destNorm_buf = (simd_float4){0.f, 0.f, 0.f, 0.f}; simd_float4 destPt_buf = (simd_float4){0.f, 0.f, 0.f, 1.f}; - simd_float4x4 simdMatrix; - // Blend for (uint32_t j = 0; j < numWeights + 1; ++j) { - hsMatrix2SIMD(matrixPalette[indices & 0xFF], &simdMatrix); + const simd_float4x4 *simdMatrix = hsMatrix2SIMD(matrixPalette[indices & 0xFF]); if (weights[j]) { // Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine. - destPt_buf += simd_mul(*(simd_float4*)pt_buf, simdMatrix) * weights[j]; - destNorm_buf += simd_mul(*(simd_float4*)vec_buf, simdMatrix) * weights[j]; + destPt_buf += simd_mul(*(simd_float4*)pt_buf, *simdMatrix) * weights[j]; + destNorm_buf += simd_mul(*(simd_float4*)vec_buf, *simdMatrix) * weights[j]; } // ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf); indices >>= 8;