Skip to content

Commit

Permalink
Fix ExecutionTest::DerivativesTest issues (#6311)
Browse files Browse the repository at this point in the history
This PR fixes several issues in the `ExecutionTest::DerivativesTest`:
- Use 1D quad order only when writing 1D derivative results. 2D results
are expected in standard 2D form.
- Use separate derivatives verification function for SM 6.6 compute,
mesh and amplification shaders. In these cases the quad layout is well
defined, and so are the expected results. There is only one possible
result for `ddx_fine`/`ddy_fine` and two for `ddx_coarse`/`ddy_coarse`.
This is different from pixel shaders where the quad layout can vary
quite a bit, and so do the expected results.
- Change the expected values to match results for texture pixel `(2,2)`
- Adjust mesh shader dispatch dimensions to make sure `X * Y * Z <= 128`
- Use same logic (shared function) to calculate center pixel for
compute, mesh and amplification shaders
- To enable easier debugging in the future, I have added a function that
writes out the derivatives results (under `DERIVATIVES_TEST_DEBUG`
define)

Verified on:
- Latest Microsoft Basic Render Driver that supports shader model 6.8
(CS, MS, AS). Tested on x64 and arm64.
- NVIDIA GeForce RTX 2080 Ti (CS only)
- AMD Radeon RX 6900 XT (CS only)

Fixes #4787
  • Loading branch information
hekota authored Feb 22, 2024
1 parent d9266c7 commit fdbecd3
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 39 deletions.
131 changes: 105 additions & 26 deletions tools/clang/unittests/HLSLExec/ExecutionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3791,7 +3791,7 @@ TEST_F(ExecutionTest, BasicTriangleOpTestHalf) {
D3D_SHADER_MODEL_6_2);
}

void VerifyDerivResults(const float *pPixels, UINT offsetCenter) {
void VerifyDerivResults_PS_60(const float *pPixels, UINT offsetCenter) {

// pixel at the center
float CenterDDXFine = pPixels[offsetCenter];
Expand All @@ -3810,6 +3810,7 @@ void VerifyDerivResults(const float *pPixels, UINT offsetCenter) {
// 1 .125 .25

// In D3D12 there is no guarantee of how the adapter is grouping 2x2 pixels
// for pixel shaders and shader model 6.0.
// So for fine derivatives there can be up to two possible results for the
// center pixel, while for coarse derivatives there can be up to six possible
// results.
Expand Down Expand Up @@ -3844,6 +3845,45 @@ void VerifyDerivResults(const float *pPixels, UINT offsetCenter) {
}
}

void VerifyDerivResults_CS_AS_MS_66(const float *pPixels, UINT offsetCenter) {

// pixel at the center
float CenterDDXFine = pPixels[offsetCenter];
float CenterDDYFine = pPixels[offsetCenter + 1];
float CenterDDXCoarse = pPixels[offsetCenter + 2];
float CenterDDYCoarse = pPixels[offsetCenter + 3];

LogCommentFmt(
L"center ddx_fine: %8f, ddy_fine: %8f, ddx_coarse: %8f, ddy_coarse: %8f",
CenterDDXFine, CenterDDYFine, CenterDDXCoarse, CenterDDYCoarse);

// The 4x4 texture used to calculate the derivatives looks like this:
// .125 .25 .5 1
// 2 4 16 32
// 32 64 *128* 256
// 256 512 1024 2048
//
// We are checking the derivate values calculated at the texture
// center pixel (2,2).

// In D3D12 for shader model 6.6 compute, mesh and amplification shaders
// the quad grouping is well defined. There is one possible result for
// fine derivatives and 2 possible results for coarse derivatives.
int ulpTolerance = 1;

// 256 - 128
VERIFY_IS_TRUE(CompareFloatULP(CenterDDXFine, 128.0f, ulpTolerance));
// 1024 - 128
VERIFY_IS_TRUE(CompareFloatULP(CenterDDYFine, 896.0f, ulpTolerance));

// 256 - 128 or 2048 - 1024
VERIFY_IS_TRUE(CompareFloatULP(CenterDDXCoarse, 128.0f, ulpTolerance) ||
CompareFloatULP(CenterDDXCoarse, 1024.0f, ulpTolerance));
// 1024 - 128 or 2048 - 256
VERIFY_IS_TRUE(CompareFloatULP(CenterDDYCoarse, 896.0f, ulpTolerance) ||
CompareFloatULP(CenterDDYCoarse, 1792.0f, ulpTolerance));
}

// Rendering two right triangles forming a square and assigning a texture value
// for each pixel to calculate derivates.
TEST_F(ExecutionTest, PartialDerivTest) {
Expand All @@ -3870,7 +3910,7 @@ TEST_F(ExecutionTest, PartialDerivTest) {
UINT centerIndex = (UINT64)width * height / 2 - width / 2;
UINT offsetCenter = centerIndex * pixelSize;

VerifyDerivResults(pPixels, offsetCenter);
VerifyDerivResults_PS_60(pPixels, offsetCenter);
}

struct Dispatch {
Expand Down Expand Up @@ -3905,6 +3945,53 @@ std::shared_ptr<st::ShaderOpTest> RunDispatch(ID3D12Device *pDevice,
return test;
}

UINT DerivativesTest_GetCenterIndex(Dispatch &D) {
if (D.height == 1) {
// 1D Quads - Find center, truncate to the previous multiple of 16 to get
// to the start of the repeating pattern, and then add 12 to get to the
// middle (2,2) pixel of the pattern. The values are stored in Z-order.
return (((UINT64)D.width / 2) & ~0xF) + 12;
} else {
// To find roughly the center, divide the height and width in
// half, truncate to the previous multiple of 4 to get to the start of the
// repeating pattern and then add 2 rows to get to the second row of quads
// and 2 to get to the first texel of the second row of that quad row
UINT centerRow = ((D.height / 2UL) & ~0x3) + 2;
UINT centerCol = ((D.width / 2UL) & ~0x3) + 2;
return centerRow * D.width + centerCol;
}
}

void DerivativesTest_DebugOutput(Dispatch &D,
std::shared_ptr<st::ShaderOpTest> &Test,
const float *pPixels, UINT centerIndex) {
#ifdef DERIVATIVES_TEST_DEBUG
LogCommentFmt(L"------------------------------------");
MappedData dataDbg;
Test->GetReadBackData("U3", &dataDbg);
UINT *pCoords = (UINT *)dataDbg.data();

LogCommentFmt(L"DISPATCH %d x %d x %d", D.width, D.height, D.depth);
for (int j = 0; j < D.height; j++) {
for (int i = 0; i < D.width; i++) {
UINT index = (j * 4) * D.width + i * 4;
LogCommentFmt(L"%3d (%2d, %2d, %2d)\t ddx_fine: %8f, ddy_fine: %8f, "
L"ddx_coarse: %8f, ddy_coarse: %8f",
pCoords[index], pCoords[index + 1], pCoords[index + 2],
pCoords[index + 3], pPixels[index], pPixels[index + 1],
pPixels[index + 2], pPixels[index + 3]);
}
}
LogCommentFmt(L"CENTER %d", centerIndex);
LogCommentFmt(L"------------------------------------");
#else
UNREFERENCED_PARAMETER(D);
UNREFERENCED_PARAMETER(Test);
UNREFERENCED_PARAMETER(pPixels);
UNREFERENCED_PARAMETER(centerIndex);
#endif
}

TEST_F(ExecutionTest, DerivativesTest) {
const UINT pixelSize = 4; // always float4

Expand All @@ -3925,12 +4012,12 @@ TEST_F(ExecutionTest, DerivativesTest) {

std::vector<Dispatch> dispatches = {{40, 1, 1}, {1000, 1, 1}, {32, 32, 1},
{16, 64, 1}, {4, 12, 4}, {4, 64, 1},
{16, 16, 3}, {32, 8, 2}};
{16, 16, 3}, {32, 8, 2}, {8, 8, 1}};

std::vector<Dispatch> meshDispatches = {
{60, 1, 1}, {128, 1, 1}, {8, 8, 1}, {32, 8, 1},
{8, 16, 4}, {8, 64, 1}, {8, 8, 3},
};
std::vector<Dispatch> meshDispatches = {// (X * Y * Z) must be <= 128
{60, 1, 1}, {128, 1, 1}, {8, 8, 1},
{16, 8, 1}, {8, 4, 2}, {10, 10, 1},
{4, 16, 2}, {4, 16, 2}};

std::vector<Dispatch> badDispatches = {{16, 3, 1}, {2, 16, 1}, {33, 1, 1}};

Expand All @@ -3945,25 +4032,15 @@ TEST_F(ExecutionTest, DerivativesTest) {
RunDispatch(pDevice, m_support, pShaderOp, D);

test->GetReadBackData("U0", &data);

float *pPixels = (float *)data.data();
;

UINT centerIndex = 0;
if (D.height == 1) {
centerIndex = (((UINT64)(D.width * D.height * D.depth) / 2) & ~0xF) + 10;
} else {
// To find roughly the center for compute, divide the height and width in
// half, truncate to the previous multiple of 4 to get to the start of the
// repeating pattern and then add 2 rows to get to the second row of quads
// and 2 to get to the first texel of the second row of that quad row
UINT centerRow = ((D.height / 2UL) & ~0x3) + 2;
UINT centerCol = ((D.width / 2UL) & ~0x3) + 2;
centerIndex = centerRow * D.width + centerCol;
}
UINT centerIndex = DerivativesTest_GetCenterIndex(D);

DerivativesTest_DebugOutput(D, test, pPixels, centerIndex);

UINT offsetCenter = centerIndex * pixelSize;
LogCommentFmt(L"Verifying derivatives in compute shader results");
VerifyDerivResults(pPixels, offsetCenter);
VerifyDerivResults_CS_AS_MS_66(pPixels, offsetCenter);
}

if (DoesDeviceSupportMeshAmpDerivatives(pDevice)) {
Expand All @@ -3976,16 +4053,18 @@ TEST_F(ExecutionTest, DerivativesTest) {

test->GetReadBackData("U1", &data);
const float *pPixels = (float *)data.data();
UINT centerIndex =
(((UINT64)(D.width * D.height * D.depth) / 2) & ~0xF) + 10;
UINT centerIndex = DerivativesTest_GetCenterIndex(D);

DerivativesTest_DebugOutput(D, test, pPixels, centerIndex);

UINT offsetCenter = centerIndex * pixelSize;
LogCommentFmt(L"Verifying derivatives in mesh shader results");
VerifyDerivResults(pPixels, offsetCenter);
VerifyDerivResults_CS_AS_MS_66(pPixels, offsetCenter);

test->GetReadBackData("U2", &data);
pPixels = (float *)data.data();
LogCommentFmt(L"Verifying derivatives in amplification shader results");
VerifyDerivResults(pPixels, offsetCenter);
VerifyDerivResults_CS_AS_MS_66(pPixels, offsetCenter);
}
}

Expand Down
33 changes: 20 additions & 13 deletions tools/clang/unittests/HLSLExec/ShaderOpArith.xml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
<ShaderOp Name="Derivatives" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
<RootSignature>
RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)),
DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2), UAV(u3)),
StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
</RootSignature>
<Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
Expand Down Expand Up @@ -119,6 +119,9 @@
<Resource Name="U2" Dimension="BUFFER" Width="16384"
Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
Init="Zero" ReadBack="true" TransitionTo="UNORDERED_ACCESS" />
<Resource Name="U3" Dimension="BUFFER" Width="16384"
Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
Init="Zero" ReadBack="true" TransitionTo="UNORDERED_ACCESS" />

<RootValues>
<RootValue HeapName="ResHeap" />
Expand All @@ -131,6 +134,8 @@
NumElements="1024" StructureByteStride="16" />
<Descriptor Name='U2' Kind='UAV' ResName='U2'
NumElements="1024" StructureByteStride="16" />
<Descriptor Name='U3' Kind='UAV' ResName='U3'
NumElements="1024" StructureByteStride="16" />
</DescriptorHeap>
<DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
<Descriptor Name="RTarget" Kind="RTV"/>
Expand All @@ -157,6 +162,7 @@
RWStructuredBuffer<float4> g_bufMain : register(u0);
RWStructuredBuffer<float4> g_bufMesh : register(u1);
RWStructuredBuffer<float4> g_bufAmp : register(u2);
RWStructuredBuffer<uint4> g_bufDbg : register(u3);
float4 DerivTest(int2 uv) {
int3 offset = int3(uv%4, 0);
Expand Down Expand Up @@ -204,14 +210,7 @@
{ 1.0f, 1.0f }};
uint convert2Dto1D(uint x, uint y, uint width) {
// Convert 2D coords to 1D for testing
// All completed rows of quads
uint prevRows = (y/2)*2*width;
// All previous full quads on this quad row
uint prevQuads = (x/2)*4;
// index into current quad
uint quadIx = (y&1)*2 + (x&1);
return prevRows + prevQuads + quadIx;
return (y * width) + x;
}
float4 PSMain(PSInput input) : SV_TARGET {
Expand All @@ -232,10 +231,14 @@
[NumThreads(DISPATCHX, DISPATCHY, DISPATCHZ)]
void CSMain(uint3 id : SV_GroupThreadID, uint ix : SV_GroupIndex) {
if (DISPATCHY == 1 && DISPATCHZ == 1)
if (DISPATCHY == 1 && DISPATCHZ == 1) {
g_bufMain[ix] = DerivTest(ix);
else
g_bufDbg[ix] = uint4(ix, ConvertGroupIdx(id), 0);
}
else {
g_bufMain[convert2Dto1D(id.x, id.y, DISPATCHX)] = DerivTest(id.xy);
g_bufDbg[convert2Dto1D(id.x, id.y, DISPATCHX)] = uint4(ix, id);
}
}
#if DISPATCHX * DISPATCHY * DISPATCHZ > 128
Expand Down Expand Up @@ -273,10 +276,14 @@
verts[ix%6].uv = g_UV[ix%6];
tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
g_bufMesh[ix] = DerivTest(ix);
if (DISPATCHY == 1 && DISPATCHZ == 1)
if (DISPATCHY == 1 && DISPATCHZ == 1) {
g_bufMesh[ix] = DerivTest(ix);
else
g_bufDbg[ix] = uint4(ix, id);
}
else {
g_bufMesh[convert2Dto1D(id.x, id.y, DISPATCHX)] = DerivTest(id.xy);
g_bufDbg[convert2Dto1D(id.x, id.y, DISPATCHX)] = uint4(ix, id);
}
}
]]>
Expand Down

0 comments on commit fdbecd3

Please sign in to comment.