From e4636f06383f3ccb8aefc49aa9b83a0f63676036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 17 Jan 2025 11:58:02 +0100 Subject: [PATCH] [SPIR-V] Avoid emitting Int64 when loading Float64 (#7073) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When loading a Float64 from a raw buffer, we used an Int64, which required an additional capability, even if the code wasn't using any Int64. In practice, it seems most devices supporting Float64 do also support Int64, but this it doesn't have to. By changing the codegen a bit, we can avoid the Int64 value. Tested the word-order using a vulkan compute shader, and checking the returned value on the API side. ```hlsl double tmp = buffer.Load(0); if (tmp == 12.0) buffer.Store(0, 13.0); ``` Fixes #7038 --------- Signed-off-by: Nathan Gauër --- tools/clang/lib/SPIRV/RawBufferMethods.cpp | 82 ++++------- ...address-buffer.load.double.capability.hlsl | 39 +++++ ...-address-buffer.templated-load.matrix.hlsl | 79 +++++------ ...-address-buffer.templated-load.scalar.hlsl | 133 ++++++++---------- ...-address-buffer.templated-load.vector.hlsl | 43 +++--- ...address-buffer.templated-store.struct.hlsl | 88 ++++++------ 6 files changed, 229 insertions(+), 235 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.load.double.capability.hlsl diff --git a/tools/clang/lib/SPIRV/RawBufferMethods.cpp b/tools/clang/lib/SPIRV/RawBufferMethods.cpp index 537c012ace..87409e7ccc 100644 --- a/tools/clang/lib/SPIRV/RawBufferMethods.cpp +++ b/tools/clang/lib/SPIRV/RawBufferMethods.cpp @@ -117,48 +117,32 @@ SpirvInstruction *RawBufferHandler::load64Bits(SpirvInstruction *buffer, SpirvInstruction *ptr = nullptr; auto *constUint0 = spvBuilder.getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, 0)); - auto *constUint32 = - spvBuilder.getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, 32)); + // Load the first word and increment index. auto *index = address.getWordIndex(loc, range); - - // Need to perform two 32-bit uint loads and construct a 64-bit value. - - // Load the first 32-bit uint (word0). ptr = spvBuilder.createAccessChain(astContext.UnsignedIntTy, buffer, {constUint0, index}, loc, range); SpirvInstruction *word0 = spvBuilder.createLoad(astContext.UnsignedIntTy, ptr, loc, range); - // Increment the base index address.incrementWordIndex(loc, range); + + // Load the second word and increment index. index = address.getWordIndex(loc, range); - // Load the second 32-bit uint (word1). ptr = spvBuilder.createAccessChain(astContext.UnsignedIntTy, buffer, {constUint0, index}, loc, range); SpirvInstruction *word1 = spvBuilder.createLoad(astContext.UnsignedIntTy, ptr, loc, range); - - // Convert both word0 and word1 to 64-bit uints. - word0 = spvBuilder.createUnaryOp( - spv::Op::OpUConvert, astContext.UnsignedLongLongTy, word0, loc, range); - word1 = spvBuilder.createUnaryOp( - spv::Op::OpUConvert, astContext.UnsignedLongLongTy, word1, loc, range); - - // Shift word1 to the left by 32 bits. - word1 = spvBuilder.createBinaryOp(spv::Op::OpShiftLeftLogical, - astContext.UnsignedLongLongTy, word1, - constUint32, loc, range); - - // BitwiseOr word0 and word1. - result = spvBuilder.createBinaryOp(spv::Op::OpBitwiseOr, - astContext.UnsignedLongLongTy, word0, - word1, loc, range); - result = bitCastToNumericalOrBool(result, astContext.UnsignedLongLongTy, - target64BitType, loc, range); - result->setRValue(); - address.incrementWordIndex(loc, range); + // Combine the 2 words into a composite, and bitcast into the destination + // type. + const auto uintVec2Type = + astContext.getExtVectorType(astContext.UnsignedIntTy, 2); + auto *operand = spvBuilder.createCompositeConstruct( + uintVec2Type, {word0, word1}, loc, range); + result = spvBuilder.createUnaryOp(spv::Op::OpBitcast, target64BitType, + operand, loc, range); + result->setRValue(); return result; } @@ -441,39 +425,31 @@ void RawBufferHandler::store64Bits(SpirvInstruction *value, const auto loc = buffer->getSourceLocation(); auto *constUint0 = spvBuilder.getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, 0)); - auto *constUint32 = - spvBuilder.getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, 32)); - auto *index = address.getWordIndex(loc, range); + // Bitcast the source into a 32-bit words composite. + const auto uintVec2Type = + astContext.getExtVectorType(astContext.UnsignedIntTy, 2); + auto *tmp = spvBuilder.createUnaryOp(spv::Op::OpBitcast, uintVec2Type, value, + loc, range); - // The underlying element type of the ByteAddressBuffer is uint. So we - // need to store two 32-bit values. + // Extract the low and high word (careful! word order). + auto *A = spvBuilder.createCompositeExtract(astContext.UnsignedIntTy, tmp, + {0}, loc, range); + auto *B = spvBuilder.createCompositeExtract(astContext.UnsignedIntTy, tmp, + {1}, loc, range); + + // Store the first word, and increment counter. + auto *index = address.getWordIndex(loc, range); auto *ptr = spvBuilder.createAccessChain(astContext.UnsignedIntTy, buffer, {constUint0, index}, loc, range); - // First convert the 64-bit value to uint64_t. Then extract two 32-bit words - // from it. - value = bitCastToNumericalOrBool(value, valueType, - astContext.UnsignedLongLongTy, loc, range); - - // Use OpUConvert to perform truncation (produces the least significant bits). - SpirvInstruction *lsb = spvBuilder.createUnaryOp( - spv::Op::OpUConvert, astContext.UnsignedIntTy, value, loc, range); - - // Shift uint64_t to the right by 32 bits and truncate to get the most - // significant bits. - SpirvInstruction *msb = spvBuilder.createUnaryOp( - spv::Op::OpUConvert, astContext.UnsignedIntTy, - spvBuilder.createBinaryOp(spv::Op::OpShiftRightLogical, - astContext.UnsignedLongLongTy, value, - constUint32, loc, range), - loc, range); - - spvBuilder.createStore(ptr, lsb, loc, range); + spvBuilder.createStore(ptr, A, loc, range); address.incrementWordIndex(loc, range); + + // Store the second word, and increment counter. index = address.getWordIndex(loc, range); ptr = spvBuilder.createAccessChain(astContext.UnsignedIntTy, buffer, {constUint0, index}, loc, range); - spvBuilder.createStore(ptr, msb, loc, range); + spvBuilder.createStore(ptr, B, loc, range); address.incrementWordIndex(loc, range); } diff --git a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.load.double.capability.hlsl b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.load.double.capability.hlsl new file mode 100644 index 0000000000..535bbecfe6 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.load.double.capability.hlsl @@ -0,0 +1,39 @@ +// RUN: %dxc -T cs_6_0 -E main -O0 %s -spirv | FileCheck %s + +// CHECK-NOT: OpCapability Int64 +// CHECK-DAG: OpCapability Float64 +// CHECK-NOT: OpCapability Int64 + +RWByteAddressBuffer buffer; + +[numthreads(1, 1, 1)] +void main() { + double tmp; + +// CHECK: [[addr1:%[0-9]+]] = OpShiftRightLogical %uint %uint_0 %uint_2 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buffer %uint_0 [[addr1]] +// CHECK: [[word0:%[0-9]+]] = OpLoad %uint [[ptr]] +// CHECK: [[addr2:%[0-9]+]] = OpIAdd %uint [[addr1]] %uint_1 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buffer %uint_0 [[addr2]] +// CHECK: [[word1:%[0-9]+]] = OpLoad %uint [[ptr]] +// CHECK: [[addr3:%[0-9]+]] = OpIAdd %uint [[addr2]] %uint_1 +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word0]] [[word1]] +// CHECK: [[value:%[0-9]+]] = OpBitcast %double [[merge]] +// CHECK: OpStore %tmp [[value]] + tmp = buffer.Load(0); + +// CHECK: [[value:%[0-9]+]] = OpLoad %double %tmp +// CHECK: [[merge:%[0-9]+]] = OpBitcast %v2uint [[value]] +// CHECK: [[word0:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 0 +// CHECK: [[word1:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 1 + +// CHECK: [[addr1:%[0-9]+]] = OpShiftRightLogical %uint %uint_0 %uint_2 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buffer %uint_0 [[addr1]] +// CHECK: OpStore [[ptr]] [[word0]] +// CHECK: [[addr2:%[0-9]+]] = OpIAdd %uint [[addr1]] %uint_1 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buffer %uint_0 [[addr2]] +// CHECK: OpStore [[ptr]] [[word1]] +// CHECK: [[addr3:%[0-9]+]] = OpIAdd %uint [[addr2]] %uint_1 + buffer.Store(0, tmp); +} + diff --git a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.matrix.hlsl b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.matrix.hlsl index c4ac7bca5a..7a4c968f42 100644 --- a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.matrix.hlsl +++ b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.matrix.hlsl @@ -98,53 +98,46 @@ void main(uint3 tid : SV_DispatchThreadId) // ********* 64-bit matrix ******************** // CHECK: [[index_1:%[0-9]+]] = OpShiftRightLogical %uint [[addr0_1:%[0-9]+]] %uint_2 -// CHECK: [[ptr_11:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1]] -// CHECK: [[word0_2:%[0-9]+]] = OpLoad %uint [[ptr_11]] -// CHECK: [[index_1_2:%[0-9]+]] = OpIAdd %uint [[index_1]] %uint_1 -// CHECK: [[ptr_12:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1_2]] -// CHECK: [[word1_3:%[0-9]+]] = OpLoad %uint [[ptr_12]] -// CHECK: [[word0_ulong:%[0-9]+]] = OpUConvert %ulong [[word0_2]] -// CHECK: [[word1_ulong:%[0-9]+]] = OpUConvert %ulong [[word1_3]] -// CHECK: [[word1_ulong_shifted:%[0-9]+]] = OpShiftLeftLogical %ulong [[word1_ulong]] %uint_32 -// CHECK: [[val0_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[word0_ulong]] [[word1_ulong_shifted]] -// CHECK: [[val0_1:%[0-9]+]] = OpBitcast %double [[val0_ulong]] -// CHECK: [[index_2_2:%[0-9]+]] = OpIAdd %uint [[index_1_2]] %uint_1 -// CHECK: [[ptr_13:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_2_2]] -// CHECK: [[word2_2:%[0-9]+]] = OpLoad %uint [[ptr_13]] -// CHECK: [[index_3_0:%[0-9]+]] = OpIAdd %uint [[index_2_2]] %uint_1 -// CHECK: [[ptr_14:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_3_0]] -// CHECK: [[word3_0:%[0-9]+]] = OpLoad %uint [[ptr_14]] -// CHECK: [[word2_ulong:%[0-9]+]] = OpUConvert %ulong [[word2_2]] -// CHECK: [[word3_ulong:%[0-9]+]] = OpUConvert %ulong [[word3_0]] -// CHECK: [[word3_ulong_shifted:%[0-9]+]] = OpShiftLeftLogical %ulong [[word3_ulong]] %uint_32 -// CHECK: [[val1_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[word2_ulong]] [[word3_ulong_shifted]] -// CHECK: [[val1_1:%[0-9]+]] = OpBitcast %double [[val1_ulong]] -// CHECK: [[index_4_0:%[0-9]+]] = OpIAdd %uint [[index_3_0]] %uint_1 -// CHECK: [[ptr_15:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_4_0]] -// CHECK: [[word4_0:%[0-9]+]] = OpLoad %uint [[ptr_15]] -// CHECK: [[index_5_0:%[0-9]+]] = OpIAdd %uint [[index_4_0]] %uint_1 -// CHECK: [[ptr_16:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_5_0]] -// CHECK: [[word5_0:%[0-9]+]] = OpLoad %uint [[ptr_16]] -// CHECK: [[word4_ulong:%[0-9]+]] = OpUConvert %ulong [[word4_0]] -// CHECK: [[word5_ulong:%[0-9]+]] = OpUConvert %ulong [[word5_0]] -// CHECK: [[word5_ulong_shifted:%[0-9]+]] = OpShiftLeftLogical %ulong [[word5_ulong]] %uint_32 -// CHECK: [[val2_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[word4_ulong]] [[word5_ulong_shifted]] -// CHECK: [[val2_1:%[0-9]+]] = OpBitcast %double [[val2_ulong]] +// CHECK: [[ptr_11:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1]] +// CHECK: [[word0_2:%[0-9]+]] = OpLoad %uint [[ptr_11]] +// CHECK: [[index_1_2:%[0-9]+]] = OpIAdd %uint [[index_1]] %uint_1 +// CHECK: [[ptr_12:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1_2]] +// CHECK: [[word1_3:%[0-9]+]] = OpLoad %uint [[ptr_12]] +// CHECK: [[index_2_2:%[0-9]+]] = OpIAdd %uint [[index_1_2]] %uint_1 +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word0_2]] [[word1_3]] +// CHECK: [[val0_1:%[0-9]+]] = OpBitcast %double [[merge]] + +// CHECK: [[ptr_13:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_2_2]] +// CHECK: [[word2_2:%[0-9]+]] = OpLoad %uint [[ptr_13]] +// CHECK: [[index_3_0:%[0-9]+]] = OpIAdd %uint [[index_2_2]] %uint_1 +// CHECK: [[ptr_14:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_3_0]] +// CHECK: [[word3_0:%[0-9]+]] = OpLoad %uint [[ptr_14]] +// CHECK: [[index_4_0:%[0-9]+]] = OpIAdd %uint [[index_3_0]] %uint_1 +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word2_2]] [[word3_0]] +// CHECK: [[val1_1:%[0-9]+]] = OpBitcast %double [[merge]] + +// CHECK: [[ptr_15:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_4_0]] +// CHECK: [[word4_0:%[0-9]+]] = OpLoad %uint [[ptr_15]] +// CHECK: [[index_5_0:%[0-9]+]] = OpIAdd %uint [[index_4_0]] %uint_1 +// CHECK: [[ptr_16:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_5_0]] +// CHECK: [[word5_0:%[0-9]+]] = OpLoad %uint [[ptr_16]] // CHECK: [[index_6:%[0-9]+]] = OpIAdd %uint [[index_5_0]] %uint_1 -// CHECK: [[ptr_17:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_6]] +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word4_0]] [[word5_0]] +// CHECK: [[val2_1:%[0-9]+]] = OpBitcast %double [[merge]] + +// CHECK: [[ptr_17:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_6]] // CHECK: [[word6:%[0-9]+]] = OpLoad %uint [[ptr_17]] // CHECK: [[index_7:%[0-9]+]] = OpIAdd %uint [[index_6]] %uint_1 -// CHECK: [[ptr_18:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_7]] +// CHECK: [[ptr_18:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_7]] // CHECK: [[word7:%[0-9]+]] = OpLoad %uint [[ptr_18]] -// CHECK: [[word6_ulong:%[0-9]+]] = OpUConvert %ulong [[word6]] -// CHECK: [[word7_ulong:%[0-9]+]] = OpUConvert %ulong [[word7]] -// CHECK: [[word7_ulong_shifted:%[0-9]+]] = OpShiftLeftLogical %ulong [[word7_ulong]] %uint_32 -// CHECK: [[val3_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[word6_ulong]] [[word7_ulong_shifted]] -// CHECK: [[val3_1:%[0-9]+]] = OpBitcast %double [[val3_ulong]] -// CHECK: [[row0_1:%[0-9]+]] = OpCompositeConstruct %v2double [[val0_1]] [[val2_1]] -// CHECK: [[row1_1:%[0-9]+]] = OpCompositeConstruct %v2double [[val1_1]] [[val3_1]] -// CHECK: [[matrix_1:%[0-9]+]] = OpCompositeConstruct %mat2v2double [[row0_1]] [[row1_1]] -// CHECK: OpStore %f64 [[matrix_1]] +// CHECK: [[index_8:%[0-9]+]] = OpIAdd %uint [[index_7]] %uint_1 +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word6]] [[word7]] +// CHECK: [[val3_1:%[0-9]+]] = OpBitcast %double [[merge]] + +// CHECK: [[row0_1:%[0-9]+]] = OpCompositeConstruct %v2double [[val0_1]] [[val2_1]] +// CHECK: [[row1_1:%[0-9]+]] = OpCompositeConstruct %v2double [[val1_1]] [[val3_1]] +// CHECK: [[matrix_1:%[0-9]+]] = OpCompositeConstruct %mat2v2double [[row0_1]] [[row1_1]] +// CHECK: OpStore %f64 [[matrix_1]] float64_t2x2 f64 = buf.Load(tid.x); // ********* array of matrices ******************** diff --git a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.scalar.hlsl b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.scalar.hlsl index a1a3e7694a..96b20034b3 100644 --- a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.scalar.hlsl +++ b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.scalar.hlsl @@ -80,42 +80,34 @@ ByteAddressBuffer buf; // ********* 64-bit scalar ******************** -// CHECK: [[ptr_9:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[addr:%[0-9]+]] -// CHECK: [[word0:%[0-9]+]] = OpLoad %uint [[ptr_9]] -// CHECK: [[newAddr:%[0-9]+]] = OpIAdd %uint [[addr]] %uint_1 -// CHECK: [[ptr_10:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[newAddr]] -// CHECK: [[word1:%[0-9]+]] = OpLoad %uint [[ptr_10]] -// CHECK: [[word0ULong:%[0-9]+]] = OpUConvert %ulong [[word0]] -// CHECK: [[word1ULong:%[0-9]+]] = OpUConvert %ulong [[word1]] -// CHECK:[[shiftedWord1ULong:%[0-9]+]] = OpShiftLeftLogical %ulong [[word1ULong]] %uint_32 -// CHECK: [[val:%[0-9]+]] = OpBitwiseOr %ulong [[word0ULong]] [[shiftedWord1ULong]] -// CHECK: OpStore %u64 [[val]] +// CHECK: [[ptr_9:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[addr:%[0-9]+]] +// CHECK: [[word0:%[0-9]+]] = OpLoad %uint [[ptr_9]] +// CHECK: [[newAddr:%[0-9]+]] = OpIAdd %uint [[addr]] %uint_1 +// CHECK: [[ptr_10:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[newAddr]] +// CHECK: [[word1:%[0-9]+]] = OpLoad %uint [[ptr_10]] +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word0]] [[word1]] +// CHECK: [[val:%[0-9]+]] = OpBitcast %ulong [[merge]] +// CHECK: OpStore %u64 [[val]] uint64_t u64 = buf.Load(tid.x); -// CHECK: [[ptr_11:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[addr_0:%[0-9]+]] -// CHECK: [[word0_0:%[0-9]+]] = OpLoad %uint [[ptr_11]] -// CHECK: [[newAddr_0:%[0-9]+]] = OpIAdd %uint [[addr_0]] %uint_1 -// CHECK: [[ptr_12:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[newAddr_0]] -// CHECK: [[word1_0:%[0-9]+]] = OpLoad %uint [[ptr_12]] -// CHECK: [[word0Long:%[0-9]+]] = OpUConvert %ulong [[word0_0]] -// CHECK: [[word1Long:%[0-9]+]] = OpUConvert %ulong [[word1_0]] -// CHECK: [[shiftedWord1Long:%[0-9]+]] = OpShiftLeftLogical %ulong [[word1Long]] %uint_32 -// CHECK: [[val_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[word0Long]] [[shiftedWord1Long]] -// CHECK: [[val_long:%[0-9]+]] = OpBitcast %long [[val_ulong]] -// CHECK: OpStore %i64 [[val_long]] +// CHECK: [[ptr_11:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[addr_0:%[0-9]+]] +// CHECK: [[word0_0:%[0-9]+]] = OpLoad %uint [[ptr_11]] +// CHECK: [[newAddr_0:%[0-9]+]] = OpIAdd %uint [[addr_0]] %uint_1 +// CHECK: [[ptr_12:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[newAddr_0]] +// CHECK: [[word1_0:%[0-9]+]] = OpLoad %uint [[ptr_12]] +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word0_0]] [[word1_0]] +// CHECK: [[val_long:%[0-9]+]] = OpBitcast %long [[merge]] +// CHECK: OpStore %i64 [[val_long]] int64_t i64 = buf.Load(tid.x); -// CHECK: [[ptr_13:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[addr_1:%[0-9]+]] -// CHECK: [[word0_1:%[0-9]+]] = OpLoad %uint [[ptr_13]] -// CHECK: [[newAddr_1:%[0-9]+]] = OpIAdd %uint [[addr_1]] %uint_1 -// CHECK: [[ptr_14:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[newAddr_1]] -// CHECK: [[word1_1:%[0-9]+]] = OpLoad %uint [[ptr_14]] -// CHECK: [[word0Long_0:%[0-9]+]] = OpUConvert %ulong [[word0_1]] -// CHECK: [[word1Long_0:%[0-9]+]] = OpUConvert %ulong [[word1_1]] -// CHECK: [[shiftedWord1Long_0:%[0-9]+]] = OpShiftLeftLogical %ulong [[word1Long_0]] %uint_32 -// CHECK: [[val_ulong_0:%[0-9]+]] = OpBitwiseOr %ulong [[word0Long_0]] [[shiftedWord1Long_0]] -// CHECK: [[val_double:%[0-9]+]] = OpBitcast %double [[val_ulong_0]] -// CHECK: OpStore %f64 [[val_double]] +// CHECK: [[ptr_13:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[addr_1:%[0-9]+]] +// CHECK: [[word0_1:%[0-9]+]] = OpLoad %uint [[ptr_13]] +// CHECK: [[newAddr_1:%[0-9]+]] = OpIAdd %uint [[addr_1]] %uint_1 +// CHECK: [[ptr_14:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[newAddr_1]] +// CHECK: [[word1_1:%[0-9]+]] = OpLoad %uint [[ptr_14]] +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word0_1]] [[word1_1]] +// CHECK: [[val_double:%[0-9]+]] = OpBitcast %double [[merge]] +// CHECK: OpStore %f64 [[val_double]] double f64 = buf.Load(tid.x); // ********* array of scalars ***************** @@ -124,68 +116,63 @@ ByteAddressBuffer buf; // CHECK: [[index0:%[0-9]+]] = OpShiftRightLogical %uint [[addr0:%[0-9]+]] %uint_2 // CHECK: [[byteOff0:%[0-9]+]] = OpUMod %uint [[addr0]] %uint_4 // CHECK: [[bitOff0:%[0-9]+]] = OpShiftLeftLogical %uint [[byteOff0]] %uint_3 -// CHECK: [[ptr_15:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index0]] -// CHECK: [[word0_2:%[0-9]+]] = OpLoad %uint [[ptr_15]] -// CHECK: [[shift_4:%[0-9]+]] = OpShiftRightLogical %uint [[word0_2]] [[bitOff0]] +// CHECK: [[ptr_15:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index0]] +// CHECK: [[word0_2:%[0-9]+]] = OpLoad %uint [[ptr_15]] +// CHECK: [[shift_4:%[0-9]+]] = OpShiftRightLogical %uint [[word0_2]] [[bitOff0]] // CHECK: [[val0:%[0-9]+]] = OpUConvert %ushort [[shift_4]] // CHECK: [[addr1:%[0-9]+]] = OpIAdd %uint [[addr0]] %uint_2 // CHECK: [[index1:%[0-9]+]] = OpShiftRightLogical %uint [[addr1]] %uint_2 // CHECK: [[byteOff1:%[0-9]+]] = OpUMod %uint [[addr1]] %uint_4 // CHECK: [[bitOff1:%[0-9]+]] = OpShiftLeftLogical %uint [[byteOff1]] %uint_3 -// CHECK: [[ptr_16:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index1]] -// CHECK: [[word0_3:%[0-9]+]] = OpLoad %uint [[ptr_16]] +// CHECK: [[ptr_16:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index1]] +// CHECK: [[word0_3:%[0-9]+]] = OpLoad %uint [[ptr_16]] // CHECK: [[val1uint:%[0-9]+]] = OpShiftRightLogical %uint [[word0_3]] [[bitOff1]] // CHECK: [[val1:%[0-9]+]] = OpUConvert %ushort [[val1uint]] // CHECK: [[addr2:%[0-9]+]] = OpIAdd %uint [[addr1]] %uint_2 // CHECK: [[index2:%[0-9]+]] = OpShiftRightLogical %uint [[addr2]] %uint_2 // CHECK: [[byteOff2:%[0-9]+]] = OpUMod %uint [[addr2]] %uint_4 // CHECK: [[bitOff2:%[0-9]+]] = OpShiftLeftLogical %uint [[byteOff2]] %uint_3 -// CHECK: [[ptr_17:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index2]] -// CHECK: [[word1_2:%[0-9]+]] = OpLoad %uint [[ptr_17]] -// CHECK: [[shift_5:%[0-9]+]] = OpShiftRightLogical %uint [[word1_2]] [[bitOff2]] +// CHECK: [[ptr_17:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index2]] +// CHECK: [[word1_2:%[0-9]+]] = OpLoad %uint [[ptr_17]] +// CHECK: [[shift_5:%[0-9]+]] = OpShiftRightLogical %uint [[word1_2]] [[bitOff2]] // CHECK: [[val2:%[0-9]+]] = OpUConvert %ushort [[shift_5]] // CHECK: [[uArr:%[0-9]+]] = OpCompositeConstruct %_arr_ushort_uint_3 [[val0]] [[val1]] [[val2]] -// CHECK: OpStore %uArr [[uArr]] +// CHECK: OpStore %uArr [[uArr]] uint16_t uArr[3] = buf.Load(tid.x); // CHECK: [[index_1:%[0-9]+]] = OpShiftRightLogical %uint [[addr_2:%[0-9]+]] %uint_2 -// CHECK: [[ptr_18:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1]] -// CHECK: [[val0_uint:%[0-9]+]] = OpLoad %uint [[ptr_18]] +// CHECK: [[ptr_18:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1]] +// CHECK: [[val0_uint:%[0-9]+]] = OpLoad %uint [[ptr_18]] // CHECK: [[val0_0:%[0-9]+]] = OpBitcast %int [[val0_uint]] -// CHECK: [[newIndex:%[0-9]+]] = OpIAdd %uint [[index_1]] %uint_1 -// CHECK: [[ptr_19:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[newIndex]] -// CHECK: [[val1_uint:%[0-9]+]] = OpLoad %uint [[ptr_19]] +// CHECK: [[newIndex:%[0-9]+]] = OpIAdd %uint [[index_1]] %uint_1 +// CHECK: [[ptr_19:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[newIndex]] +// CHECK: [[val1_uint:%[0-9]+]] = OpLoad %uint [[ptr_19]] // CHECK: [[val1_0:%[0-9]+]] = OpBitcast %int [[val1_uint]] -// CHECK: [[iArr:%[0-9]+]] = OpCompositeConstruct %_arr_int_uint_2 [[val0_0]] [[val1_0]] -// CHECK: OpStore %iArr [[iArr]] +// CHECK: [[iArr:%[0-9]+]] = OpCompositeConstruct %_arr_int_uint_2 [[val0_0]] [[val1_0]] +// CHECK: OpStore %iArr [[iArr]] int iArr[2] = buf.Load(tid.x); -// CHECK: [[index_0:%[0-9]+]] = OpShiftRightLogical %uint [[addr_0:%[0-9]+]] %uint_2 -// CHECK: [[ptr_20:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_0]] -// CHECK: [[val0_word0_uint:%[0-9]+]] = OpLoad %uint [[ptr_20]] -// CHECK: [[index_1:%[0-9]+]] = OpIAdd %uint [[index_0]] %uint_1 -// CHECK: [[ptr_21:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1]] -// CHECK: [[val0_word1_uint:%[0-9]+]] = OpLoad %uint [[ptr_21]] -// CHECK: [[val0_word0_ulong:%[0-9]+]] = OpUConvert %ulong [[val0_word0_uint]] -// CHECK: [[val0_word1_ulong:%[0-9]+]] = OpUConvert %ulong [[val0_word1_uint]] -// CHECK: [[shifted_val0_word1_ulong:%[0-9]+]] = OpShiftLeftLogical %ulong [[val0_word1_ulong]] %uint_32 -// CHECK: [[val0_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[val0_word0_ulong]] [[shifted_val0_word1_ulong]] -// CHECK: [[val0_double:%[0-9]+]] = OpBitcast %double [[val0_ulong]] +// CHECK: [[index_0:%[0-9]+]] = OpShiftRightLogical %uint [[addr_0:%[0-9]+]] %uint_2 +// CHECK: [[ptr_20:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_0]] +// CHECK: [[val0_word0_uint:%[0-9]+]] = OpLoad %uint [[ptr_20]] +// CHECK: [[index_1:%[0-9]+]] = OpIAdd %uint [[index_0]] %uint_1 +// CHECK: [[ptr_21:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1]] +// CHECK: [[val0_word1_uint:%[0-9]+]] = OpLoad %uint [[ptr_21]] +// CHECK: [[index_2:%[0-9]+]] = OpIAdd %uint [[index_1]] %uint_1 +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[val0_word0_uint]] [[val0_word1_uint]] +// CHECK: [[val0_double:%[0-9]+]] = OpBitcast %double [[merge]] + +// CHECK: [[ptr_22:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_2]] +// CHECK: [[val1_word0_uint:%[0-9]+]] = OpLoad %uint [[ptr_22]] +// CHECK: [[index_3:%[0-9]+]] = OpIAdd %uint [[index_2]] %uint_1 +// CHECK: [[ptr_23:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_3]] +// CHECK: [[val1_word1_uint:%[0-9]+]] = OpLoad %uint [[ptr_23]] +// CHECK: [[index_4:%[0-9]+]] = OpIAdd %uint [[index_3]] %uint_1 +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[val1_word0_uint]] [[val1_word1_uint]] +// CHECK: [[val1_double:%[0-9]+]] = OpBitcast %double [[merge]] // -// CHECK: [[index_2:%[0-9]+]] = OpIAdd %uint [[index_1]] %uint_1 -// CHECK: [[ptr_22:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_2]] -// CHECK: [[val1_word0_uint:%[0-9]+]] = OpLoad %uint [[ptr_22]] -// CHECK: [[index_3:%[0-9]+]] = OpIAdd %uint [[index_2]] %uint_1 -// CHECK: [[ptr_23:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_3]] -// CHECK: [[val1_word1_uint:%[0-9]+]] = OpLoad %uint [[ptr_23]] -// CHECK: [[val1_word0_ulong:%[0-9]+]] = OpUConvert %ulong [[val1_word0_uint]] -// CHECK: [[val1_word1_ulong:%[0-9]+]] = OpUConvert %ulong [[val1_word1_uint]] -// CHECK: [[shifted_val1_word1_ulong:%[0-9]+]] = OpShiftLeftLogical %ulong [[val1_word1_ulong]] %uint_32 -// CHECK: [[val1_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[val1_word0_ulong]] [[shifted_val1_word1_ulong]] -// CHECK: [[val1_double:%[0-9]+]] = OpBitcast %double [[val1_ulong]] -// -// CHECK: [[fArr:%[0-9]+]] = OpCompositeConstruct %_arr_double_uint_2 [[val0_double]] [[val1_double]] -// CHECK: OpStore %fArr [[fArr]] +// CHECK: [[fArr:%[0-9]+]] = OpCompositeConstruct %_arr_double_uint_2 [[val0_double]] [[val1_double]] +// CHECK: OpStore %fArr [[fArr]] double fArr[2] = buf.Load(tid.x); } diff --git a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.vector.hlsl b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.vector.hlsl index 07f76aad6a..16702c0e37 100644 --- a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.vector.hlsl +++ b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-load.vector.hlsl @@ -65,30 +65,25 @@ void main(uint3 tid : SV_DispatchThreadId) // ********* 64-bit vector ******************** -// CHECK: [[index_3:%[0-9]+]] = OpShiftRightLogical %uint [[addr0_2:%[0-9]+]] %uint_2 -// CHECK: [[ptr_6:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_3]] -// CHECK: [[word0_3:%[0-9]+]] = OpLoad %uint [[ptr_6]] -// CHECK: [[index_1_2:%[0-9]+]] = OpIAdd %uint [[index_3]] %uint_1 -// CHECK: [[ptr_7:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1_2]] -// CHECK: [[word1_2:%[0-9]+]] = OpLoad %uint [[ptr_7]] -// CHECK: [[word0_ulong:%[0-9]+]] = OpUConvert %ulong [[word0_3]] -// CHECK: [[word1_ulong:%[0-9]+]] = OpUConvert %ulong [[word1_2]] -// CHECK: [[shifted_word1_ulong:%[0-9]+]] = OpShiftLeftLogical %ulong [[word1_ulong]] %uint_32 -// CHECK: [[val0_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[word0_ulong]] [[shifted_word1_ulong]] -// CHECK: [[val0_2:%[0-9]+]] = OpBitcast %double [[val0_ulong]] -// CHECK: [[index_2_0:%[0-9]+]] = OpIAdd %uint [[index_1_2]] %uint_1 -// CHECK: [[ptr_8:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_2_0]] -// CHECK: [[word0_4:%[0-9]+]] = OpLoad %uint [[ptr_8]] -// CHECK: [[index_3:%[0-9]+]] = OpIAdd %uint [[index_2_0]] %uint_1 -// CHECK: [[ptr_9:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_3]] -// CHECK: [[word1_3:%[0-9]+]] = OpLoad %uint [[ptr_9]] -// CHECK: [[word0_ulong_0:%[0-9]+]] = OpUConvert %ulong [[word0_4]] -// CHECK: [[word1_ulong_0:%[0-9]+]] = OpUConvert %ulong [[word1_3]] -// CHECK: [[shifted_word1_ulong_0:%[0-9]+]] = OpShiftLeftLogical %ulong [[word1_ulong_0]] %uint_32 -// CHECK: [[val1_ulong:%[0-9]+]] = OpBitwiseOr %ulong [[word0_ulong_0]] [[shifted_word1_ulong_0]] -// CHECK: [[val1_2:%[0-9]+]] = OpBitcast %double [[val1_ulong]] -// CHECK: [[fVec:%[0-9]+]] = OpCompositeConstruct %v2double [[val0_2]] [[val1_2]] -// CHECK: OpStore %f64 [[fVec]] +// CHECK: [[index_3:%[0-9]+]] = OpShiftRightLogical %uint [[addr0_2:%[0-9]+]] %uint_2 +// CHECK: [[ptr_6:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_3]] +// CHECK: [[word0_3:%[0-9]+]] = OpLoad %uint [[ptr_6]] +// CHECK: [[index_1_2:%[0-9]+]] = OpIAdd %uint [[index_3]] %uint_1 +// CHECK: [[ptr_7:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_1_2]] +// CHECK: [[word1_2:%[0-9]+]] = OpLoad %uint [[ptr_7]] +// CHECK: [[index_2_0:%[0-9]+]] = OpIAdd %uint [[index_1_2]] %uint_1 +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word0_3]] [[word1_2]] +// CHECK: [[val0_2:%[0-9]+]] = OpBitcast %double [[merge]] +// CHECK: [[ptr_8:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_2_0]] +// CHECK: [[word0_4:%[0-9]+]] = OpLoad %uint [[ptr_8]] +// CHECK: [[index_3:%[0-9]+]] = OpIAdd %uint [[index_2_0]] %uint_1 +// CHECK: [[ptr_9:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf %uint_0 [[index_3]] +// CHECK: [[word1_3:%[0-9]+]] = OpLoad %uint [[ptr_9]] +// CHECK: [[index_4:%[0-9]+]] = OpIAdd %uint [[index_3]] %uint_1 +// CHECK: [[merge:%[0-9]+]] = OpCompositeConstruct %v2uint [[word0_4]] [[word1_3]] +// CHECK: [[val1_2:%[0-9]+]] = OpBitcast %double [[merge]] +// CHECK: [[fVec:%[0-9]+]] = OpCompositeConstruct %v2double [[val0_2]] [[val1_2]] +// CHECK: OpStore %f64 [[fVec]] float64_t2 f64 = buf.Load(tid.x); // ********* array of vectors ******************** diff --git a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-store.struct.hlsl b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-store.struct.hlsl index 3bf947afe2..10c978e44d 100644 --- a/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-store.struct.hlsl +++ b/tools/clang/test/CodeGenSPIRV/method.byte-address-buffer.templated-store.struct.hlsl @@ -226,18 +226,20 @@ void main(uint3 tid : SV_DispatchThreadId) { // The second member of S starts at byte offset 24 (6 words) // // CHECK: [[c_addr:%[0-9]+]] = OpIAdd %uint [[base_addr]] %uint_24 -// + // CHECK: [[c:%[0-9]+]] = OpCompositeExtract %double [[s0]] 1 +// CHECK: [[merge:%[0-9]+]] = OpBitcast %v2uint [[c]] +// CHECK: [[c_word0:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 0 +// CHECK: [[c_word1:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 1 + // CHECK: [[c_index:%[0-9]+]] = OpShiftRightLogical %uint [[c_addr]] %uint_2 -// CHECK: [[ptr_4:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[c_index]] -// CHECK: [[c_u64:%[0-9]+]] = OpBitcast %ulong [[c]] -// CHECK: [[c_word0:%[0-9]+]] = OpUConvert %uint [[c_u64]] -// CHECK: [[c_u64_shifted:%[0-9]+]] = OpShiftRightLogical %ulong [[c_u64]] %uint_32 -// CHECK: [[c_word1:%[0-9]+]] = OpUConvert %uint [[c_u64_shifted]] -// CHECK: OpStore [[ptr_4]] [[c_word0]] +// CHECK: [[ptr_4:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[c_index]] +// CHECK: OpStore [[ptr_4]] [[c_word0]] // CHECK: [[c_msb_index:%[0-9]+]] = OpIAdd %uint [[c_index]] %uint_1 -// CHECK: [[ptr_5:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[c_msb_index]] -// CHECK: OpStore [[ptr_5]] [[c_word1]] + +// CHECK: [[ptr_5:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[c_msb_index]] +// CHECK: OpStore [[ptr_5]] [[c_word1]] +// CHECK: [[next_index:%[0-9]+]] = OpIAdd %uint [[c_msb_index]] %uint_1 // // The third member of S starts at byte offset 32 (8 words) @@ -305,16 +307,17 @@ void main(uint3 tid : SV_DispatchThreadId) { // CHECK: [[b_addr:%[0-9]+]] = OpIAdd %uint [[base_addr]] %uint_48 // // CHECK: [[b:%[0-9]+]] = OpCompositeExtract %double [[s0]] 3 +// CHECK: [[merge:%[0-9]+]] = OpBitcast %v2uint [[b]] +// CHECK: [[b_word0:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 0 +// CHECK: [[b_word1:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 1 + // CHECK: [[b_index:%[0-9]+]] = OpShiftRightLogical %uint [[b_addr]] %uint_2 -// CHECK: [[ptr_9:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[b_index]] -// CHECK: [[b_u64:%[0-9]+]] = OpBitcast %ulong [[b]] -// CHECK: [[b_word0:%[0-9]+]] = OpUConvert %uint [[b_u64]] -// CHECK: [[b_u64_shifted:%[0-9]+]] = OpShiftRightLogical %ulong [[b_u64]] %uint_32 -// CHECK: [[b_word1:%[0-9]+]] = OpUConvert %uint [[b_u64_shifted]] -// CHECK: OpStore [[ptr_9]] [[b_word0]] +// CHECK: [[ptr_9:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[b_index]] +// CHECK: OpStore [[ptr_9]] [[b_word0]] // CHECK: [[b_msb_index:%[0-9]+]] = OpIAdd %uint [[b_index]] %uint_1 -// CHECK: [[ptr_10:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[b_msb_index]] -// CHECK: OpStore [[ptr_10]] [[b_word1]] +// CHECK: [[ptr_10:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[b_msb_index]] +// CHECK: OpStore [[ptr_10]] [[b_word1]] +// CHECK: [[next_index:%[0-9]+]] = OpIAdd %uint [[b_msb_index]] %uint_1 // // The fifth member of S starts at byte offset 56 (14 words) @@ -651,19 +654,20 @@ void main(uint3 tid : SV_DispatchThreadId) { // // The second member of S starts at byte offset 24 (6 words) // -// CHECK: [[c_addr_0:%[0-9]+]] = OpIAdd %uint [[s1_addr]] %uint_24 +// CHECK: [[c_addr_0:%[0-9]+]] = OpIAdd %uint [[s1_addr]] %uint_24 // -// CHECK: [[c_0:%[0-9]+]] = OpCompositeExtract %double [[s1]] 1 -// CHECK: [[c_index_0:%[0-9]+]] = OpShiftRightLogical %uint [[c_addr_0]] %uint_2 -// CHECK: [[ptr_28:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[c_index_0]] -// CHECK: [[c_u64_0:%[0-9]+]] = OpBitcast %ulong [[c_0]] -// CHECK: [[c_word0_0:%[0-9]+]] = OpUConvert %uint [[c_u64_0]] -// CHECK: [[c_u64_shifted_0:%[0-9]+]] = OpShiftRightLogical %ulong [[c_u64_0]] %uint_32 -// CHECK: [[c_word1_0:%[0-9]+]] = OpUConvert %uint [[c_u64_shifted_0]] -// CHECK: OpStore [[ptr_28]] [[c_word0_0]] -// CHECK: [[c_msb_index_0:%[0-9]+]] = OpIAdd %uint [[c_index_0]] %uint_1 -// CHECK: [[ptr_29:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[c_msb_index_0]] -// CHECK: OpStore [[ptr_29]] [[c_word1_0]] +// CHECK: [[c_0:%[0-9]+]] = OpCompositeExtract %double [[s1]] 1 +// CHECK: [[merge:%[0-9]+]] = OpBitcast %v2uint [[c_0]] +// CHECK: [[c_word0_0:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 0 +// CHECK: [[c_word1_0:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 1 + +// CHECK: [[c_index_0:%[0-9]+]] = OpShiftRightLogical %uint [[c_addr_0]] %uint_2 +// CHECK: [[ptr_28:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[c_index_0]] +// CHECK: OpStore [[ptr_28]] [[c_word0_0]] +// CHECK: [[c_msb_index_0:%[0-9]+]] = OpIAdd %uint [[c_index_0]] %uint_1 +// CHECK: [[ptr_29:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[c_msb_index_0]] +// CHECK: OpStore [[ptr_29]] [[c_word1_0]] +// CHECK: [[index_next:%[0-9]+]] = OpIAdd %uint [[c_msb_index_0]] %uint_1 // // The third member of S starts at byte offset 32 (8 words) @@ -728,19 +732,19 @@ void main(uint3 tid : SV_DispatchThreadId) { // // The fourth member of S starts at byte offset 48 (12 words) // -// CHECK: [[b_addr_0:%[0-9]+]] = OpIAdd %uint [[s1_addr]] %uint_48 -// -// CHECK: [[b_0:%[0-9]+]] = OpCompositeExtract %double [[s1]] 3 -// CHECK: [[b_index_0:%[0-9]+]] = OpShiftRightLogical %uint [[b_addr_0]] %uint_2 -// CHECK: [[ptr_33:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[b_index_0]] -// CHECK: [[b_u64_0:%[0-9]+]] = OpBitcast %ulong [[b_0]] -// CHECK: [[b_word0_0:%[0-9]+]] = OpUConvert %uint [[b_u64_0]] -// CHECK: [[b_u64_shifted_0:%[0-9]+]] = OpShiftRightLogical %ulong [[b_u64_0]] %uint_32 -// CHECK: [[b_word1_0:%[0-9]+]] = OpUConvert %uint [[b_u64_shifted_0]] -// CHECK: OpStore [[ptr_33]] [[b_word0_0]] -// CHECK: [[b_msb_index_0:%[0-9]+]] = OpIAdd %uint [[b_index_0]] %uint_1 -// CHECK: [[ptr_34:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[b_msb_index_0]] -// CHECK: OpStore [[ptr_34]] [[b_word1_0]] +// CHECK: [[b_addr_0:%[0-9]+]] = OpIAdd %uint [[s1_addr]] %uint_48 +// +// CHECK: [[b_0:%[0-9]+]] = OpCompositeExtract %double [[s1]] 3 +// CHECK: [[merge:%[0-9]+]] = OpBitcast %v2uint [[b_0]] +// CHECK: [[b_word0_0:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 0 +// CHECK: [[b_word1_0:%[0-9]+]] = OpCompositeExtract %uint [[merge]] 1 +// CHECK: [[b_index_0:%[0-9]+]] = OpShiftRightLogical %uint [[b_addr_0]] %uint_2 +// CHECK: [[ptr_33:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[b_index_0]] +// CHECK: OpStore [[ptr_33]] [[b_word0_0]] +// CHECK: [[b_msb_index_0:%[0-9]+]] = OpIAdd %uint [[b_index_0]] %uint_1 +// CHECK: [[ptr_34:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buf2 %uint_0 [[b_msb_index_0]] +// CHECK: OpStore [[ptr_34]] [[b_word1_0]] +// CHECK: [[next_index:%[0-9]+]] = OpIAdd %uint [[b_msb_index_0]] %uint_1 // // The fifth member of S starts at byte offset 56 (14 words)