diff --git a/impl/gltf/AssetGpuBuffers.cpp b/impl/gltf/AssetGpuBuffers.cpp index 7fb8a55..6aa5d45 100644 --- a/impl/gltf/AssetGpuBuffers.cpp +++ b/impl/gltf/AssetGpuBuffers.cpp @@ -11,6 +11,65 @@ import :helpers.fastgltf; import :helpers.functional; import :helpers.ranges; +#define INDEX_SEQ(Is, N, ...) [&](std::index_sequence) __VA_ARGS__ (std::make_index_sequence{}) + +template + requires (std::ranges::sized_range> && ...) + && (std::is_trivially_copyable_v>> && ...) +[[nodiscard]] std::pair>> createSoaCombinedStagingBuffer( + vma::Allocator allocator, + vk::BufferUsageFlags usage, + const Rs &...segments +) { + // Get size of the latest segments + // TODO.CXX26: use pack indexing. +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-value" + const std::size_t segmentSize = (segments, ...).size(); +#pragma clang diagnostic pop + if (((segments.size() != segmentSize) || ...)) { + throw std::invalid_argument { "Segments must have the same size" }; + } + + if (segmentSize == 0) { + throw std::invalid_argument { "Empty segments not allowed (Vulkan requires non-zero buffer size)" }; + } + + // Calculate the total required size. + std::vector copyOffsets; // concatenated copy offsets for every segment, will be interleaved later. + copyOffsets.reserve(sizeof...(Rs) * segmentSize); + ([&]() { + for (const auto &segment : segments) { + using value_type = std::ranges::range_value_t; + copyOffsets.emplace_back(sizeof(value_type) * segment.size()); + } + }(), ...); + assert(copyOffsets.size() == sizeof...(Rs) * segmentSize && "reserve() size estimation failed"); + vk::DeviceSize sizeTotal = copyOffsets.back(); + std::exclusive_scan(copyOffsets.begin(), copyOffsets.end(), copyOffsets.begin(), vk::DeviceSize { 0 }); + sizeTotal += copyOffsets.back(); + + vku::MappedBuffer buffer { allocator, vk::BufferCreateInfo { {}, sizeTotal, usage} }; + void *mapped = buffer.data; + ([&]() { + for (const auto &segment : segments) { + using value_type = std::ranges::range_value_t; + mapped = std::ranges::copy(segment, static_cast(mapped)).out; + } + }(), ...); + + std::vector> interleavedCopyOffsets(segmentSize); + INDEX_SEQ(Is, sizeof...(Rs), { + ([&]() { + for (std::size_t i = 0; i < segmentSize; ++i) { + interleavedCopyOffsets[i][Is] = copyOffsets[Is * segmentSize + i]; + } + }(), ...); + }); + + return std::pair { std::move(buffer).unmap(), std::move(interleavedCopyOffsets) }; +} + [[nodiscard]] std::pair getTextureTransformMatrixPair(const fastgltf::TextureTransform &transform) noexcept { const float c = std::cos(transform.rotation), s = std::sin(transform.rotation); return { @@ -154,7 +213,7 @@ vku::AllocatedBuffer vk_gltf_viewer::gltf::AssetGpuBuffers::createMaterialBuffer } std::variant vk_gltf_viewer::gltf::AssetGpuBuffers::createPrimitiveBuffer() { - vku::MappedBuffer buffer { + vku::AllocatedBuffer buffer = vku::MappedBuffer { gpu.allocator, std::from_range, orderedPrimitives | std::views::transform([this](const fastgltf::Primitive *pPrimitive) { const AssetPrimitiveInfo &primitiveInfo = primitiveInfos[pPrimitive]; @@ -171,7 +230,8 @@ std::variant vk_gltf_viewer::gltf::Asse .pPositionBuffer = primitiveInfo.positionInfo.address, .pNormalBuffer = normalInfo.address, .pTangentBuffer = tangentInfo.address, - .pTexcoordAttributeMappingInfoBuffer = primitiveInfo.texcoordsInfo.pMappingBuffer, + .pTexcoordAttributeStartAddressMappingBuffer = primitiveInfo.texcoordsInfo.pStartAddressMappingBuffer, + .pTexcoordAttributeByteStrideMappingBuffer = primitiveInfo.texcoordsInfo.pByteStrideMappingBuffer, .pColorBuffer = colorInfo.address, .positionByteStride = primitiveInfo.positionInfo.byteStride, .normalByteStride = normalInfo.byteStride, @@ -181,35 +241,42 @@ std::variant vk_gltf_viewer::gltf::Asse }; }), gpu.isUmaDevice ? vk::BufferUsageFlagBits::eStorageBuffer : vk::BufferUsageFlagBits::eTransferSrc, - }; + }.unmap(); stageIfNeeded(buffer, vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eTransferDst); return buffer; } void vk_gltf_viewer::gltf::AssetGpuBuffers::createPrimitiveIndexedAttributeMappingBuffers() { // Collect primitives that have any TEXCOORD attributes. - const std::vector primitiveWithTexcoordAttributeInfos + const std::vector primitiveWithTexcoord = primitiveInfos | std::views::values | std::views::filter([](const AssetPrimitiveInfo &primitiveInfo) { return !primitiveInfo.texcoordsInfo.attributeInfos.empty(); }) - | std::views::transform([](AssetPrimitiveInfo &primitiveInfo) { return std::tie(primitiveInfo, primitiveInfo.texcoordsInfo.attributeInfos); }) + | ranges::views::addressof | std::ranges::to(); - - if (primitiveWithTexcoordAttributeInfos.empty()) { + if (primitiveWithTexcoord.empty()) { return; } - auto [buffer, copyOffsets] = createCombinedStagingBuffer( + auto [buffer, interleavedCopyOffsets] = createSoaCombinedStagingBuffer( gpu.allocator, - primitiveWithTexcoordAttributeInfos | std::views::values, gpu.isUmaDevice ? vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eShaderDeviceAddress - : vk::BufferUsageFlagBits::eTransferSrc); + : vk::BufferUsageFlagBits::eTransferSrc, + primitiveWithTexcoord | std::views::transform([](const AssetPrimitiveInfo *pPrimitiveInfo) { + // Wrap it with span to avoid copying vector. + return pPrimitiveInfo->texcoordsInfo.attributeInfos | std::views::transform([](const auto &x) { return x.address; }); + }), + primitiveWithTexcoord | std::views::transform([](const AssetPrimitiveInfo *pPrimitiveInfo) { + // Wrap it with span to avoid copying vector. + return pPrimitiveInfo->texcoordsInfo.attributeInfos | std::views::transform([](const auto &x) { return x.byteStride; }); + })); stageIfNeeded(buffer, vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eShaderDeviceAddress); const vk::DeviceAddress pIndexAttributeMappingBuffer = gpu.device.getBufferAddress({ buffer }); - for (auto &&[primitiveInfo, copyOffset] : std::views::zip(primitiveWithTexcoordAttributeInfos | std::views::keys, copyOffsets)) { - primitiveInfo.texcoordsInfo.pMappingBuffer = pIndexAttributeMappingBuffer + copyOffset; + for (const auto &[pPrimitiveInfo, interleavedCopyOffset] : std::views::zip(primitiveWithTexcoord, interleavedCopyOffsets)) { + pPrimitiveInfo->texcoordsInfo.pStartAddressMappingBuffer = pIndexAttributeMappingBuffer + interleavedCopyOffset[0]; + pPrimitiveInfo->texcoordsInfo.pByteStrideMappingBuffer = pIndexAttributeMappingBuffer + interleavedCopyOffset[1]; } internalBuffers.emplace_back(std::move(buffer)); diff --git a/interface/gltf/AssetGpuBuffers.cppm b/interface/gltf/AssetGpuBuffers.cppm index fcb31e6..0ee4da9 100644 --- a/interface/gltf/AssetGpuBuffers.cppm +++ b/interface/gltf/AssetGpuBuffers.cppm @@ -152,13 +152,15 @@ namespace vk_gltf_viewer::gltf { vk::DeviceAddress pPositionBuffer; vk::DeviceAddress pNormalBuffer; vk::DeviceAddress pTangentBuffer; - vk::DeviceAddress pTexcoordAttributeMappingInfoBuffer; + vk::DeviceAddress pTexcoordAttributeStartAddressMappingBuffer; + vk::DeviceAddress pTexcoordAttributeByteStrideMappingBuffer; vk::DeviceAddress pColorBuffer; std::uint8_t positionByteStride; std::uint8_t normalByteStride; std::uint8_t tangentByteStride; std::uint8_t colorByteStride; std::uint32_t materialIndex; + char _padding_[8]; }; std::unordered_map primitiveInfos = createPrimitiveInfos(); diff --git a/interface/gltf/AssetPrimitiveInfo.cppm b/interface/gltf/AssetPrimitiveInfo.cppm index b7c3500..8711c96 100644 --- a/interface/gltf/AssetPrimitiveInfo.cppm +++ b/interface/gltf/AssetPrimitiveInfo.cppm @@ -10,7 +10,12 @@ namespace vk_gltf_viewer::gltf { struct IndexBufferInfo { vk::DeviceSize offset; vk::IndexType type; }; struct AttributeBufferInfo { vk::DeviceAddress address; std::uint8_t byteStride; fastgltf::ComponentType componentType; }; struct ColorAttributeBufferInfo final : AttributeBufferInfo { std::uint8_t numComponent; }; - struct IndexedAttributeBufferInfos { vk::DeviceAddress pMappingBuffer; std::vector attributeInfos; }; + + struct IndexedAttributeBufferInfos { + std::vector attributeInfos; + vk::DeviceAddress pStartAddressMappingBuffer; + vk::DeviceAddress pByteStrideMappingBuffer; + }; std::uint16_t index; std::optional materialIndex; diff --git a/interface/helpers/ranges/mod.cppm b/interface/helpers/ranges/mod.cppm index 207a46c..a643d08 100644 --- a/interface/helpers/ranges/mod.cppm +++ b/interface/helpers/ranges/mod.cppm @@ -161,7 +161,7 @@ namespace views { return *FWD(x); }); - export CLANG_INLINE constexpr auto addressof = std::views::transform([](const auto &x) { + export CLANG_INLINE constexpr auto addressof = std::views::transform([](auto &x) { return &x; }); diff --git a/shaders/mask_depth.vert b/shaders/mask_depth.vert index ff3e604..7e5feb1 100644 --- a/shaders/mask_depth.vert +++ b/shaders/mask_depth.vert @@ -64,8 +64,9 @@ vec3 getPosition() { #if HAS_BASE_COLOR_TEXTURE vec2 getTexcoord(uint texcoordIndex){ - IndexedAttributeMappingInfo mappingInfo = PRIMITIVE.texcoordAttributeMappingInfos.data[texcoordIndex]; - uint64_t fetchAddress = mappingInfo.bytesPtr + int(mappingInfo.stride) * gl_VertexIndex; + uint64_t texcoordStartAddress = PRIMITIVE.texcoordStartAddresses.data[texcoordIndex]; + int texcoordByteStride = PRIMITIVE.texcoordByteStrides.data[texcoordIndex]; + uint64_t fetchAddress = texcoordStartAddress + texcoordByteStride * gl_VertexIndex; switch (TEXCOORD_COMPONENT_TYPE) { case 5121U: // UNSIGNED BYTE diff --git a/shaders/mask_jump_flood_seed.vert b/shaders/mask_jump_flood_seed.vert index 2331eda..8c63ae2 100644 --- a/shaders/mask_jump_flood_seed.vert +++ b/shaders/mask_jump_flood_seed.vert @@ -63,8 +63,9 @@ vec3 getPosition() { #if HAS_BASE_COLOR_TEXTURE vec2 getTexcoord(uint texcoordIndex){ - IndexedAttributeMappingInfo mappingInfo = PRIMITIVE.texcoordAttributeMappingInfos.data[texcoordIndex]; - uint64_t fetchAddress = mappingInfo.bytesPtr + int(mappingInfo.stride) * gl_VertexIndex; + uint64_t texcoordStartAddress = PRIMITIVE.texcoordStartAddresses.data[texcoordIndex]; + int texcoordByteStride = PRIMITIVE.texcoordByteStrides.data[texcoordIndex]; + uint64_t fetchAddress = texcoordStartAddress + texcoordByteStride * gl_VertexIndex; switch (TEXCOORD_COMPONENT_TYPE) { case 5121U: // UNSIGNED BYTE diff --git a/shaders/primitive.vert b/shaders/primitive.vert index a85099c..18f8aa7 100644 --- a/shaders/primitive.vert +++ b/shaders/primitive.vert @@ -91,8 +91,9 @@ vec4 getTangent() { #if TEXCOORD_COUNT >= 1 vec2 getTexcoord(uint texcoordIndex){ - IndexedAttributeMappingInfo mappingInfo = PRIMITIVE.texcoordAttributeMappingInfos.data[texcoordIndex]; - uint64_t fetchAddress = mappingInfo.bytesPtr + int(mappingInfo.stride) * gl_VertexIndex; + uint64_t texcoordStartAddress = PRIMITIVE.texcoordStartAddresses.data[texcoordIndex]; + int texcoordByteStride = PRIMITIVE.texcoordByteStrides.data[texcoordIndex]; + uint64_t fetchAddress = texcoordStartAddress + texcoordByteStride * gl_VertexIndex; switch ((PACKED_TEXCOORD_COMPONENT_TYPES >> (8U * texcoordIndex)) & 0xFFU) { case 1U: // UNSIGNED BYTE diff --git a/shaders/types.glsl b/shaders/types.glsl index 1fa8844..c98261c 100644 --- a/shaders/types.glsl +++ b/shaders/types.glsl @@ -41,19 +41,22 @@ struct IndexedAttributeMappingInfo { uint8_t stride; }; -layout (std430, buffer_reference, buffer_reference_align = 16) readonly buffer IndexedAttributeMappingInfos { IndexedAttributeMappingInfo data[]; }; +layout (std430, buffer_reference, buffer_reference_align = 1) readonly buffer U8ArrayRef { uint8_t data[]; }; +layout (std430, buffer_reference, buffer_reference_align = 16) readonly buffer U64ArrayRef { uint64_t data[]; }; struct Primitive { uint64_t pPositionBuffer; uint64_t pNormalBuffer; uint64_t pTangentBuffer; - IndexedAttributeMappingInfos texcoordAttributeMappingInfos; + U64ArrayRef texcoordStartAddresses; + U8ArrayRef texcoordByteStrides; uint64_t pColorBuffer; uint8_t positionByteStride; uint8_t normalByteStride; uint8_t tangentByteStride; uint8_t colorByteStride; uint materialIndex; + vec2 _padding_; }; #endif \ No newline at end of file diff --git a/shaders/unlit_primitive.vert b/shaders/unlit_primitive.vert index ddeac8f..821cc8c 100644 --- a/shaders/unlit_primitive.vert +++ b/shaders/unlit_primitive.vert @@ -68,8 +68,9 @@ vec3 getPosition() { #if HAS_BASE_COLOR_TEXTURE vec2 getTexcoord(uint texcoordIndex){ - IndexedAttributeMappingInfo mappingInfo = PRIMITIVE.texcoordAttributeMappingInfos.data[texcoordIndex]; - uint64_t fetchAddress = mappingInfo.bytesPtr + int(mappingInfo.stride) * gl_VertexIndex; + uint64_t texcoordStartAddress = PRIMITIVE.texcoordStartAddresses.data[texcoordIndex]; + int texcoordByteStride = PRIMITIVE.texcoordByteStrides.data[texcoordIndex]; + uint64_t fetchAddress = texcoordStartAddress + texcoordByteStride * gl_VertexIndex; switch (TEXCOORD_COMPONENT_TYPE) { case 5121U: // UNSIGNED BYTE