From 51f036fe7ec0774fd2af008d234a9d1598a96a07 Mon Sep 17 00:00:00 2001 From: Tanner Van De Walle Date: Thu, 2 Feb 2023 16:08:02 -0800 Subject: [PATCH] Syncing spec updates --- d3d/ConservativeRasterization.md | 3 +- d3d/D3D12EnhancedBarriers.md | 530 ++++++++++++++++-------- d3d/D3D12VideoEncoding.md | 2 +- d3d/HLSL_SM_6_7_Advanced_Texture_Ops.md | 15 +- d3d/MeshShader.md | 86 ++++ d3d/Raytracing.md | 178 ++++---- d3d/ResourceBinding.md | 156 ++++++- 7 files changed, 710 insertions(+), 260 deletions(-) diff --git a/d3d/ConservativeRasterization.md b/d3d/ConservativeRasterization.md index a7565ac..55dff6e 100644 --- a/d3d/ConservativeRasterization.md +++ b/d3d/ConservativeRasterization.md @@ -259,7 +259,8 @@ RenderTarget with sample count \> 1 (see section 3.5.5), although in the case of Conservative Rasterization, it is the depth value going into the fixed function depth test that can be extrapolated. -Early Depth culling behavior with Depth Extrapolation is undefined. This +Use of early depth culling behavior in combination with conservative rasterization is undefined. +This includes early depth culling where depth extrapolation occurs. This is because some Early Depth culling hardware cannot properly support extrapolated depth values. However, Early Depth culling behavior in the presence of Depth Extrapolation is problematic even with hardware that diff --git a/d3d/D3D12EnhancedBarriers.md b/d3d/D3D12EnhancedBarriers.md index ed1222c..410d399 100644 --- a/d3d/D3D12EnhancedBarriers.md +++ b/d3d/D3D12EnhancedBarriers.md @@ -10,7 +10,7 @@ This document proposes an enhanced D3D12 Barrier API/DDI design that is capable - [Asymmetric Aliasing is Even More Expensive](#asymmetric-aliasing-is-even-more-expensive) - [Resource State Promotion and Decay](#resource-state-promotion-and-decay) - [Compute Queues and `D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE`](#compute-queues-and-d3d12_resource_state_pixel_shader_resource) - - [Full-Resource Clear, Copy or Discard](#full-resource-clear-copy-or-discard) + - [Full-Subresource Clear, Copy or Discard](#full-subresource-clear-copy-or-discard) - [Simultaneous Access - But Only Across Queues](#simultaneous-access---but-only-across-queues) - [Inefficient Batching of Subresource Range Transitions](#inefficient-batching-of-subresource-range-transitions) - [Synchronous Copy, Discard and Resolve](#synchronous-copy-discard-and-resolve) @@ -61,17 +61,17 @@ This document proposes an enhanced D3D12 Barrier API/DDI design that is capable - [`D3D12_TEXTURE_BARRIER`](#d3d12_texture_barrier) - [`D3D12_BUFFER_BARRIER`](#d3d12_buffer_barrier) - [`D3D12_BARRIER_GROUP`](#d3d12_barrier_group) - - [ID3D12GraphicsCommandList7::Barrier](#id3d12graphicscommandlist7barrier) - - [ID3D12VideoDecodeCommandList3::Barrier](#id3d12videodecodecommandlist3barrier) - - [ID3D12VideoProcessCommandList3::Barrier](#id3d12videoprocesscommandlist3barrier) - - [ID3D12VideoEncodeCommandList3::Barrier](#id3d12videoencodecommandlist3barrier) - - [ID3D12Device10::CreateCommittedResource3](#id3d12device10createcommittedresource3) - - [ID3D12Device10::CreatePlacedResource2](#id3d12device10createplacedresource2) - - [ID3D12Device10::CreateReservedResource2](#id3d12device10createreservedresource2) - - [ID3D12DebugCommandQueue1::AssertResourceAccess](#id3d12debugcommandqueue1assertresourceaccess) - - [ID3D12DebugCommandQueue1::AssertTextureLayout](#id3d12debugcommandqueue1asserttexturelayout) - - [ID3D12DebugCommandList3::AssertResourceAccess](#id3d12debugcommandlist3assertresourceaccess) - - [ID3D12DebugCommandList3::AssertTextureLayout](#id3d12debugcommandlist3asserttexturelayout) + - [ID3D12GraphicsCommandList7 Barrier](#id3d12graphicscommandlist7-barrier) + - [ID3D12VideoDecodeCommandList3 Barrier](#id3d12videodecodecommandlist3-barrier) + - [ID3D12VideoProcessCommandList3 Barrier](#id3d12videoprocesscommandlist3-barrier) + - [ID3D12VideoEncodeCommandList3 Barrier](#id3d12videoencodecommandlist3-barrier) + - [ID3D12Device10 CreateCommittedResource3](#id3d12device10-createcommittedresource3) + - [ID3D12Device10 CreatePlacedResource2](#id3d12device10-createplacedresource2) + - [ID3D12Device10 CreateReservedResource2](#id3d12device10-createreservedresource2) + - [ID3D12DebugCommandQueue1 AssertResourceAccess](#id3d12debugcommandqueue1-assertresourceaccess) + - [ID3D12DebugCommandQueue1 AssertTextureLayout](#id3d12debugcommandqueue1-asserttexturelayout) + - [ID3D12DebugCommandList3 AssertResourceAccess](#id3d12debugcommandlist3-assertresourceaccess) + - [ID3D12DebugCommandList3 AssertTextureLayout](#id3d12debugcommandlist3-asserttexturelayout) - [Barrier Examples](#barrier-examples) - [DDI](#ddi) - [`D3D12DDI_BARRIER_LAYOUT`](#d3d12ddi_barrier_layout) @@ -87,7 +87,7 @@ This document proposes an enhanced D3D12 Barrier API/DDI design that is capable - [`D3D12DDI_RANGE_BARRIER_FLAGS_0094`](#d3d12ddi_range_barrier_flags_0094) - [`D3D12DDI_RANGED_BARRIER_0094`](#d3d12ddi_ranged_barrier_0094) - [`D3D12DDI_BARRIER_TYPE`](#d3d12ddi_barrier_type) - - [D3D12DDIARG_BARRIER_0094](#d3d12ddiarg_barrier_0094) + - [`D3D12DDIARG_BARRIER_0094`](#d3d12ddiarg_barrier_0094) - [`PFND3D12DDI_BARRIER`](#pfnd3d12ddi_barrier) - [`D3D12DDIARG_CREATERESOURCE_0088`](#d3d12ddiarg_createresource_0088) - [`PFND3D12DDI_CREATEHEAPANDRESOURCE_0088`](#pfnd3d12ddi_createheapandresource_0088) @@ -188,11 +188,11 @@ Promotion and decay reflect the natural consequences of `ExecuteCommandLists` bo The `D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE` state is only usable in Direct command lists. Therefore, a Compute queue cannot use or transition a resource in state `D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE`|`D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE`. However, Compute queues DO support `D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE`, which has an identical layout in both Direct and Compute queues. This design oversight is a common source of d3d12 app developer frustration. The primary reason for the separate states is to provide precise execution sync and memory flush for a Direct queue. However, when passing resources between Direct and Compute queues, sync and flush are handled using fences. -### Full-Resource Clear, Copy or Discard +### Full-Subresource Clear, Copy or Discard -According to earlier D3D12 specifications, Clear, Copy or Discard commands require destination resources be in a specific resource state. Typically, this involves a state transition from a prior state. This makes sense when only a portion of the resource is being written to or discarded. However, when performing a full-resource Clear, Copy or Discard, the old resource data is being completely replaced. Therefore, a layout transition seems unnecessary. +According to earlier D3D12 specifications, Clear, Copy or Discard commands require destination resources be in a specific resource state. Typically, this involves a state transition from a prior state. This makes sense when only a portion of the resource is being written to or discarded. However, when performing a full-subresource Clear, Copy or Discard, the old resource data is being completely replaced. Therefore, a layout transition seems unnecessary. -This is particularly interesting when it comes to resource aliasing or updated tile mapping, as these operations require full-resource Clear, Copy or Discard when the target is a DSV or RSV. In such cases, there may not even be a previous "state" to transition from. In fact, it is conceivable that a memory-decompressing state transition could trigger device removal. +This is particularly interesting when it comes to resource aliasing or updated tile mapping, as these operations require full-subresource Clear, Copy or Discard when the target is a DSV or RSV. In such cases, there may not even be a previous "state" to transition from. In fact, it is conceivable that a memory-decompressing state transition could trigger device removal. ### Simultaneous Access - But Only Across Queues @@ -269,7 +269,7 @@ Graphics processors are designed to execute as much work in parallel as possible With legacy Resource Barriers, drivers must infer which work to synchronize. Often this is a best-guess since the driver may not be able to determine when a subresource was last accessed. Typically, the driver must assume the worst-case: any previous work that *could* have accessed a resource in StateBefore must be synchronized with any work that *could* access the resource in StateAfter. -The enhanced Barrier API's use explicit `SyncBefore` and `SyncAfter` values as logical bitfield masks. A Barrier must wait for all preceding command `SyncBefore` scopes to complete before executing the barrier. Similarly, a Barrier must block all subsequent `SyncAfter` scopes until the barrier completes. +The enhanced Barrier API's use explicit `SyncBefore` and `SyncAfter` values as bitfield masks that can describe one or more combined synchronization scopes. A Barrier must wait for all preceding command `SyncBefore` scopes to complete before executing the barrier. Similarly, a Barrier must block all subsequent `SyncAfter` scopes until the barrier completes. D3D12_BARRIER_SYNC_NONE indicates synchronization is not needed either before or after barrier. A `D3D12_BARRIER_SYNC_NONE` `SyncBefore` value implies that the corresponding subresources are not accessed before the barrier in the same `ExecuteCommandLists` scope. Likewise, a SYNC_NONE `SyncAfter` value implies that the corresponding subresources are not accessed after the barrier in the same `ExecuteCommandLists` scope. Therefore, `Sync[Before|After]=D3D12_BARRIER_SYNC_NONE` must be paired with `Access[Before|After]=D3D12_BARRIER_ACCESS_NO_ACCESS`. @@ -285,7 +285,7 @@ DATA_STATIC_WHILE_SET_AT_EXECUTE descriptors require resource data is finalized #### Umbrella Synchronization Scopes -Umbrella synchronization scopes supersede one or more other synchronization scopes, and can effectively be treated as though all of the superseded scope bits are set. For example, the `D3D12_BARRIER_SYNC_DRAW` scope supersedes `D3D12_BARRIER_SYNC_INPUT_ASSEMBLER`, `D3D12_BARRIER_SYNC_VERTEX_SHADING`, `D3D12_BARRIER_SYNC_PIXEL_SHADING`, `D3D12_BARRIER_SYNC_DEPTH_STENCIL`, and `D3D12_BARRIER_SYNC_RENDER_TARGET` (see Figure 2). +Umbrella synchronization scopes supersede one or more other synchronization scopes, and can effectively be treated as though all of the superseded scope bits are set. For example, the `D3D12_BARRIER_SYNC_DRAW` scope supersedes `D3D12_BARRIER_SYNC_INDEX_INPUT`, `D3D12_BARRIER_SYNC_VERTEX_SHADING`, `D3D12_BARRIER_SYNC_PIXEL_SHADING`, `D3D12_BARRIER_SYNC_DEPTH_STENCIL`, and `D3D12_BARRIER_SYNC_RENDER_TARGET` (see Figure 2). ![Figure 2](images/D3D12PipelineBarriers/OverlappingScopes.png) *Figure 2* @@ -293,9 +293,9 @@ Umbrella synchronization scopes supersede one or more other synchronization scop The following tables list superseded synchronization scope bits for each umbrella synchronization scope bit. | `D3D12_BARRIER_SYNC_ALL` | -|------------------------------------------------| +|--------------------------------------------------| | `D3D12_BARRIER_SYNC_DRAW` | -| `D3D12_BARRIER_SYNC_INPUT_ASSEMBLER` | +| `D3D12_BARRIER_SYNC_INDEX_INPUT` | | `D3D12_BARRIER_SYNC_VERTEX_SHADING` | | `D3D12_BARRIER_SYNC_PIXEL_SHADING` | | `D3D12_BARRIER_SYNC_DEPTH_STENCIL` | @@ -311,13 +311,13 @@ The following tables list superseded synchronization scope bits for each umbrell | `D3D12_BARRIER_SYNC_VIDEO_ENCODE` | | `D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW` | -| `D3D12_BARRIER_SYNC_DRAW` | -|--------------------------------------| -| `D3D12_BARRIER_SYNC_INPUT_ASSEMBLER` | -| `D3D12_BARRIER_SYNC_VERTEX_SHADING` | -| `D3D12_BARRIER_SYNC_PIXEL_SHADING` | -| `D3D12_BARRIER_SYNC_DEPTH_STENCIL` | -| `D3D12_BARRIER_SYNC_RENDER_TARGET` | +| `D3D12_BARRIER_SYNC_DRAW` | +|-------------------------------------| +| `D3D12_BARRIER_SYNC_INDEX_INPUT` | +| `D3D12_BARRIER_SYNC_VERTEX_SHADING` | +| `D3D12_BARRIER_SYNC_PIXEL_SHADING` | +| `D3D12_BARRIER_SYNC_DEPTH_STENCIL` | +| `D3D12_BARRIER_SYNC_RENDER_TARGET` | | `D3D12_BARRIER_SYNC_ALL_SHADING` | |--------------------------------------| @@ -332,49 +332,49 @@ The following tables list superseded synchronization scope bits for each umbrell #### Sequential Barriers -Any barrier subsequent to another barrier on the same subresource in the same `ExecuteCommandLists` scope must use a `SyncBefore` value that fully-contains the preceding barrier `SyncAfter` scopes. +Any barrier subsequent to another barrier on the same subresource in the same `ExecuteCommandLists` scope must use a `SyncBefore` value that fully-contains the preceding barrier `SyncAfter` scope bits. -To provide well-defined barrier ordering, sequential, adjacent barriers on the same subresource with no intervening commands behave as though all `SyncBefore` and `SyncAfter` bits are logically combined. +To provide well-defined barrier ordering, sequential, adjacent barriers on the same subresource with no intervening commands behave as though all `SyncBefore` and `SyncAfter` bits are bitwise combined. #### Barrier Sync Examples -| `SyncBefore` | `SyncAfter` | +| `SyncBefore` | `SyncAfter` | |--------------------------|--------------------------| | `D3D12_BARRIER_SYNC_ALL` | `D3D12_BARRIER_SYNC_ALL` | Execute barrier **after** all preceding GPU work has completed and block **all subsequent work** until barrier has completed. -| `SyncBefore` | `SyncAfter` | +| `SyncBefore` | `SyncAfter` | |--------------------------|----------------------| | `D3D12_BARRIER_SYNC_ALL` | *specific sync bits* | Execute barrier **after** all preceding GPU work has completed and block *specific sync bits* GPU work until barrier has completed. -| `SyncBefore` | `SyncAfter` | +| `SyncBefore` | `SyncAfter` | |----------------------|--------------------------| | *specific sync bits* | `D3D12_BARRIER_SYNC_ALL` | Execute barrier **after** *specific sync bits* GPU work has completed and block **all subsequent work** until barrier has completed. -| `SyncBefore` | `SyncAfter` | +| `SyncBefore` | `SyncAfter` | |---------------------------|----------------------| | `D3D12_BARRIER_SYNC_NONE` | *specific sync bits* | Execute barrier **before** *specific sync bits* GPU work, but do not wait for any preceding work. -| `SyncBefore` | `SyncAfter` | +| `SyncBefore` | `SyncAfter` | |----------------------|---------------------------| | *specific sync bits* | `D3D12_BARRIER_SYNC_NONE` | Execute barrier **after** *specific sync bits* GPU work but do not block any subsequent work. -| `SyncBefore` | `SyncAfter` | +| `SyncBefore` | `SyncAfter` | |-------------------------------------|--------------------------------------| | `D3D12_BARRIER_SYNC_VERTEX_SHADING` | `D3D12_BARRIER_SYNC_COMPUTE_SHADING` | Execute barrier **after** all vertex stages have completed and block subsequent compute shading work until barrier has completed. -| `SyncBefore` | `SyncAfter` | +| `SyncBefore` | `SyncAfter` | |---------------------------|---------------------------| | `D3D12_BARRIER_SYNC_NONE` | `D3D12_BARRIER_SYNC_NONE` | @@ -394,6 +394,8 @@ To provide well-defined barrier ordering, the layout of a subresource after comp Since many GPU-write operations are cached, any Barrier from a write access to another write access or a read-only access may require a cache flush. The enhanced Barrier API's use access transitions to indicate that a subresource's memory needs to be made visible for a specific new access type. Like the layout transitions, some access transitions may not be needed if it is known that the memory of the associated subresource is already accessible for the desired use. +Barrier `AccessBefore` and/or `AccessAfter` values may be a bitwise-or combination of `D3D12_BARRIER_ACCESS` bits when more than one access type applies before or after the barrier. + Access transitions for textures are expressed as part of the `D3D12_TEXTURE_BARRIER` structure data. Access transitions for buffers are expressed using the `D3D12_BUFFER_BARRIER` structure. Access transitions do not perform synchronization. It is expected that synchronization between dependent accesses is handled using appropriate `SyncBefore` and `SyncAfter` values in the barrier. @@ -401,11 +403,11 @@ Access transitions do not perform synchronization. It is expected that synchron An `AccessBefore` made visible to a specified `AccessAfter` DOES NOT guarantee that the resource memory is also visible for a *different* access type. For example: ```C++ -MyTexBarrier.AccessBefore=UNORDERED_ACCESS; -MyTexBarrier.AccessAfter=SHADER_RESOURCE; +MyTexBarrier.AccessBefore=D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; +MyTexBarrier.AccessAfter=D3D12_BARRIER_ACCESS_SHADER_RESOURCE|D3D12_BARRIER_ACCESS_COPY_SOURCE; ``` -This access transition indicates that a subsequent shader-read access depends on a preceding unordered-access-write. However, this may not actually flush the UAV cache if the hardware is capable of reading shader resources directly from the UAV cache. +This access transition indicates that subsequent shader-resource and copy-source accesses depends on a preceding unordered-access-write. However, this may not actually flush the UAV cache if the hardware is capable of reading shader-resource and copy-source data directly from the UAV cache. `D3D12_BARRIER_ACCESS_COMMON` is a special access type that indicates any layout-compatible access. Transitioning to `D3D12_BARRIER_ACCESS_COMMON` means that subresource data must be available for any layout-compatible access after a barrier. Since buffers have no layout, `D3D12_BARRIER_ACCESS_COMMON` simply means any buffer-compatible access. @@ -610,7 +612,7 @@ Non-simultaneous-access textures using a common layout can be accessed as `D3D12 |----------------------------------------------------------|----------------------------------------| | `D3D12_RESOURCE_STATE_COMMON` | `D3D12_BARRIER_SYNC_ALL` | | `D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER` | `D3D12_BARRIER_SYNC_ALL_SHADING` | -| `D3D12_RESOURCE_STATE_INDEX_BUFFER` | `D3D12_BARRIER_SYNC_INPUT_ASSEMBLER` | +| `D3D12_RESOURCE_STATE_INDEX_BUFFER` | `D3D12_BARRIER_SYNC_INDEX_INPUT` | | `D3D12_RESOURCE_STATE_RENDER_TARGET` | `D3D12_BARRIER_SYNC_RENDER_TARGET` | | `D3D12_RESOURCE_STATE_UNORDERED_ACCESS` | `D3D12_BARRIER_SYNC_ALL_SHADING` | | `D3D12_RESOURCE_STATE_DEPTH_WRITE` | `D3D12_BARRIER_SYNC_DEPTH_STENCIL` | @@ -674,7 +676,7 @@ For each "after resource": - If needed: Wait for all "before resource" accesses to complete. - If needed: Specify desired layout. - Use `LayoutBefore` of `D3D12_BARRIER_LAYOUT_UNDEFINED` to avoid modifying memory as part of the barrier. -- If needed: Perform a full-resource discard using `D3D2_TEXTURE_BARRIER_FLAG_DISCARD`. +- If needed: Perform a full-subresource discard using `D3D2_TEXTURE_BARRIER_FLAG_DISCARD`. - Must not use this flag if any of the "before resource" barriers transition layout or flush memory writes. - If needed: Set `AccessBefore` to `D3D12_BARRIER_ACCESS_NO_ACCESS` 'activate' a subresource previously 'deactivated' in the same `ExecuteCommandLists` scope. @@ -894,7 +896,7 @@ As with `D3D12_RESOURCE_STATES`, Resource Accesses MUST be compatible with the t - `D3D12_BARRIER_SYNC_ALL` - `D3D12_BARRIER_SYNC_DRAW` -- `D3D12_BARRIER_SYNC_INPUT_ASSEMBLER` +- `D3D12_BARRIER_SYNC_INDEX_INPUT` - `D3D12_BARRIER_SYNC_VERTEX_SHADING` - `D3D12_BARRIER_SYNC_PIXEL_SHADING` - `D3D12_BARRIER_SYNC_DEPTH_STENCIL` @@ -962,9 +964,10 @@ Copy queues do not support layout transition Barriers, thus any subresources acc The following tables describe the Access types compatible with a given layout: -| `D3D12_BARRIER_LAYOUT_UNDEFINED` | -|----------------------------------| -| None | +| `D3D12_BARRIER_LAYOUT_UNDEFINED` | +|-----------------------------------------------------------------------------------| +| Any access bits (only when BOTH `LayoutBefore` AND `LayoutAfter` are `UNDEFINED`) | +| `D3D12_BARRIER_ACCESS_NO_ACCESS` | | `D3D12_BARRIER_LAYOUT_COMMON` | |----------------------------------------| @@ -1112,27 +1115,7 @@ Some Access types require matching Sync. For the following access bits, at leas | `D3D12_BARRIER_ACCESS_COMMON` | |--------------------------------------------------| -| `D3D12_BARRIER_SYNC_NONE` | -| `D3D12_BARRIER_SYNC_ALL` | -| `D3D12_BARRIER_SYNC_DRAW` | -| `D3D12_BARRIER_SYNC_INPUT_ASSEMBLER` | -| `D3D12_BARRIER_SYNC_VERTEX_SHADING` | -| `D3D12_BARRIER_SYNC_PIXEL_SHADING` | -| `D3D12_BARRIER_SYNC_DEPTH_STENCIL` | -| `D3D12_BARRIER_SYNC_RENDER_TARGET` | -| `D3D12_BARRIER_SYNC_COMPUTE_SHADING` | -| `D3D12_BARRIER_SYNC_RAYTRACING` | -| `D3D12_BARRIER_SYNC_COPY` | -| `D3D12_BARRIER_SYNC_RESOLVE` | -| `D3D12_BARRIER_SYNC_EXECUTE_INDIRECT` | -| `D3D12_BARRIER_SYNC_PREDICATION` | -| `D3D12_BARRIER_SYNC_ALL_SHADING` | -| `D3D12_BARRIER_SYNC_NON_PIXEL_SHADING` | -| `D3D12_BARRIER_SYNC_VIDEO_DECODE` | -| `D3D12_BARRIER_SYNC_VIDEO_PROCESS` | -| `D3D12_BARRIER_SYNC_VIDEO_ENCODE` | -| `D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW` | -| `D3D12_BARRIER_SYNC_SPLIT` | +| Any valid sync bits | | `D3D12_BARRIER_ACCESS_VERTEX_BUFFER` | |--------------------------------------| @@ -1150,11 +1133,11 @@ Some Access types require matching Sync. For the following access bits, at leas | `D3D12_BARRIER_SYNC_DRAW` | | `D3D12_BARRIER_SYNC_ALL_SHADING` | -| `D3D12_BARRIER_ACCESS_INDEX_BUFFER` | -|--------------------------------------| -| `D3D12_BARRIER_SYNC_ALL` | -| `D3D12_BARRIER_SYNC_INPUT_ASSEMBLER` | -| `D3D12_BARRIER_SYNC_DRAW` | +| `D3D12_BARRIER_ACCESS_INDEX_BUFFER` | +|-------------------------------------| +| `D3D12_BARRIER_SYNC_ALL` | +| `D3D12_BARRIER_SYNC_INDEX_INPUT` | +| `D3D12_BARRIER_SYNC_DRAW` | | `D3D12_BARRIER_ACCESS_RENDER_TARGET` | |--------------------------------------| @@ -1288,8 +1271,7 @@ Some Access types require matching Sync. For the following access bits, at leas | `D3D12_BARRIER_ACCESS_NO_ACCESS` | |----------------------------------| -| `D3D12_BARRIER_SYNC_NONE` | -| `D3D12_BARRIER_SYNC_SPLIT` | +| Any valid sync bits | ------------------------------------------------ @@ -1340,9 +1322,11 @@ typedef enum D3D12_BARRIER_LAYOUT #### `D3D12_BARRIER_LAYOUT_UNDEFINED` -Provides support for subresource layout changes where the previous layout is irrelevant or undefined. Typically, this is used for full-subresource or full-resource Clear, Discard, and Copy commands. +Provides support for subresource layout changes where the previous layout is irrelevant or undefined. Typically, this is used for full-subresource or full-subresource Clear, Discard, and Copy commands. -A layout transition with BOTH `LayoutBefore` and `LayoutAfter` set to `D3D12_BARRIER_LAYOUT_UNDEFINED` indicates a memory-access-only barrier. Many read/write operations support `D3D12_BARRIER_LAYOUT_COMMON`. In particular, Copy commands may write to textures using either the `D3D12_BARRIER_LAYOUT_COMMON` or `D3D12_BARRIER_LAYOUT_COPY`. A memory-access-only barrier can be used to flush copy writes to a texture without changing the texture layout. +A layout transition with BOTH `LayoutBefore` AND `LayoutAfter` set to `D3D12_BARRIER_LAYOUT_UNDEFINED` indicates a memory-access-only barrier. Many write operations support more than one layout (e.g. Copy operations support `D3D12_BARRIER_LAYOUT_COMMON` or `D3D12_BARRIER_LAYOUT_COPY_DEST`). A memory-access-only barrier can be used to flush writes to a texture without inadvertently changing the texture layout. + +A barrier with only `LayoutBefore` OR `LayoutAfter` set to `D3D12_BARRIER_LAYOUT_UNDEFINED` must set the corresponding `AccessBefore` or `AccessAfter` value to `D3D12_BARRIER_ACCESS_NO_ACCESS`. A texture with an undefined layout clearly does not have meaningful data and thus should not require preservation of data or cache flushes. Barriers used for aliasing can take advantage of this to let the GPU discard outstanding cache writes. #### `D3D12_BARRIER_LAYOUT_COMMON` @@ -1468,13 +1452,15 @@ Supports common (barrier free) usage on video queues only. May be more optimal t ### `D3D12_BARRIER_SYNC` +Bit values representing synchronization scopes. Can be combined using bitwise-or in barrier `SyncBefore` and `SyncAfter` values. + ```c++ enum D3D12_BARRIER_SYNC { D3D12_BARRIER_SYNC_NONE = 0x0, D3D12_BARRIER_SYNC_ALL = 0x1, D3D12_BARRIER_SYNC_DRAW = 0x2, - D3D12_BARRIER_SYNC_INPUT_ASSEMBLER = 0x4, + D3D12_BARRIER_SYNC_INDEX_INPUT = 0x4, D3D12_BARRIER_SYNC_VERTEX_SHADING = 0x8, D3D12_BARRIER_SYNC_PIXEL_SHADING = 0x10, D3D12_BARRIER_SYNC_DEPTH_STENCIL = 0x20, @@ -1523,29 +1509,64 @@ The `SetGraphicsRoot*` synchronization is required to support `D3D12_DESCRIPTOR_ This is an umbrella scope for all Draw pipeline stages. A `SyncBefore` value of `D3D12_BARRIER_SYNC_DRAW` indicates ALL PRECEDING Draw work must complete before executing the barrier. A `SyncAfter` value of `D3D12_BARRIER_SYNC_DRAW` indicates ALL SUBSEQUENT Draw work must wait for the barrier to complete. -#### `D3D12_BARRIER_SYNC_INPUT_ASSEMBLER` +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_VERTEX_BUFFER` +- `D3D12_BARRIER_ACCESS_CONSTANT_BUFFER` +- `D3D12_BARRIER_ACCESS_INDEX_BUFFER` +- `D3D12_BARRIER_ACCESS_RENDER_TARGET` +- `D3D12_BARRIER_ACCESS_UNORDERED_ACCESS` +- `D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE` +- `D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ` +- `D3D12_BARRIER_ACCESS_SHADER_RESOURCE` +- `D3D12_BARRIER_ACCESS_STREAM_OUTPUT` + +#### `D3D12_BARRIER_SYNC_INDEX_INPUT` + +Synchronize scope for processing index buffer input. -Synchronize against Input Assembler stage execution. +Access types in this scope are limited to: + +- `D3D12_BARRIER_SYNC_INDEX_INPUT` #### `D3D12_BARRIER_SYNC_VERTEX_SHADING` -Synchronize against all vertex shading stages, including vertex, domain, hull, tessellation, geometry, amplification and mesh shading. +Synchronize scope for all vertex shading stages, including vertex, domain, hull, tessellation, geometry, amplification and mesh shading. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_VERTEX_BUFFER` +- `D3D12_BARRIER_ACCESS_CONSTANT_BUFFER` +- `D3D12_BARRIER_ACCESS_UNORDERED_ACCESS` +- `D3D12_BARRIER_ACCESS_SHADER_RESOURCE` +- `D3D12_BARRIER_ACCESS_STREAM_OUTPUT` #### `D3D12_BARRIER_SYNC_PIXEL_SHADING` -Synchronize against pixel shader execution. +Synchronize scope for pixel shader execution. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_CONSTANT_BUFFER` +- `D3D12_BARRIER_ACCESS_UNORDERED_ACCESS` +- `D3D12_BARRIER_ACCESS_SHADER_RESOURCE` #### `D3D12_BARRIER_SYNC_DEPTH_STENCIL` -Synchronize against depth/stencil read/write operations. This includes DSV accesses during `Draw*` and `ClearRenderTargetView`. +Synchronize scope for depth/stencil read/write operations. This includes DSV accesses during `Draw*` and `ClearRenderTargetView`. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE` +- `D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ` #### `D3D12_BARRIER_SYNC_RENDER_TARGET` -Synchronize against render target read/write operations. This include RTV writes during `Draw*` and `ClearRenderTargetView`. +Synchronize scope for render target read/write operations. This include RTV writes during `Draw*` and `ClearRenderTargetView`. #### `D3D12_BARRIER_SYNC_COMPUTE_SHADING` -Synchronize against the following GPU workloads: +Synchronize scope for the following GPU workloads: - `Dispatch` - `SetComputeRootDescriptorTable` @@ -1555,57 +1576,125 @@ Synchronize against the following GPU workloads: The `SetComputeRoot*` synchronization is required to support `D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTION` descriptors. +Access types in this scope are limited to: + +- `D3D12_BARRIER_SYNC_RENDER_TARGET` + #### `D3D12_BARRIER_SYNC_RAYTRACING` -Synchronize against raytracing execution. +Synchronize scope for raytracing execution. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ` #### `D3D12_BARRIER_SYNC_COPY` -Synchronize against Copy commands. +Synchronize scope for Copy commands. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_COPY_SOURCE` +- `D3D12_BARRIER_ACCESS_COPY_DEST` #### `D3D12_BARRIER_SYNC_RESOLVE` -Synchronize against Resolve commands. +Synchronize scope for Resolve commands. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_RESOLVE_SOURCE` +- `D3D12_BARRIER_ACCESS_RESOLVE_DEST` #### `D3D12_BARRIER_SYNC_EXECUTE_INDIRECT` -Synchronize against ExecuteIndirect execution. +Synchronize scope for ExecuteIndirect execution. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT` #### `D3D12_BARRIER_SYNC_ALL_SHADING` -Synchronize against ALL shader execution. +Synchronize scope for ALL shader execution. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_VERTEX_BUFFER` +- `D3D12_BARRIER_ACCESS_CONSTANT_BUFFER` +- `D3D12_BARRIER_ACCESS_UNORDERED_ACCESS` +- `D3D12_BARRIER_ACCESS_SHADER_RESOURCE` +- `D3D12_BARRIER_ACCESS_STREAM_OUTPUT` +- `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ` +- `D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE` #### `D3D12_BARRIER_SYNC_NON_PIXEL_SHADING` -Synchronize against shader execution EXCEPT pixel shading. Exists for compatibility with legacy `ResourceBarrier` API. +Synchronize scope for shader execution EXCEPT pixel shading. Exists for compatibility with legacy `ResourceBarrier` API. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_VERTEX_BUFFER` +- `D3D12_BARRIER_ACCESS_CONSTANT_BUFFER` +- `D3D12_BARRIER_ACCESS_UNORDERED_ACCESS` +- `D3D12_BARRIER_ACCESS_SHADER_RESOURCE` +- `D3D12_BARRIER_ACCESS_STREAM_OUTPUT` +- `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ` #### `D3D12_BARRIER_SYNC_VIDEO_DECODE` -Synchronize against Video Decode execution. +Synchronize scope for Video Decode execution. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_VIDEO_DECODE_READ` +- `D3D12_BARRIER_ACCESS_VIDEO_DECODE_WRITE` #### `D3D12_BARRIER_SYNC_VIDEO_PROCESS` -Synchronize against Video Process execution. +Synchronize scope for Video Process execution. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_VIDEO_PROCESS_READ` +- `D3D12_BARRIER_ACCESS_VIDEO_PROCESS_WRITE` #### `D3D12_BARRIER_SYNC_VIDEO_ENCODE` -Synchronize against Video Encode execution. +Synchronize scope for Video Encode execution. + +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_VIDEO_ENCODE_READ` +- `D3D12_BARRIER_ACCESS_VIDEO_ENCODE_WRITE` #### `D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE` -Synchronize against `ID3D12GraphicsCommandList4::BuildAccelerationStructure` work. +Synchronize scope for `ID3D12GraphicsCommandList4::BuildAccelerationStructure` work. Corresponding barrier `Access[Before|After]` must have the `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE` bit set. +Access types in this scope are limited to: + +- `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ` + #### `D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE` -Synchronize against `ID3D12GraphicsCommandList4::CopyRaytracingAccelerationStructure` work. +Synchronize scope for `ID3D12GraphicsCommandList4::CopyRaytracingAccelerationStructure` work. Corresponding barrier `Access[Before|After]` must have the `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE` bit set. +Access types in this scope are limited to: + +`D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ` + #### `D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW` -Synchronize against `ID3D12GraphicsCommandList::ClearUnorderedAccessViewUint` and `ID3D12GraphicsCommandList::ClearUnorderedAccessViewFloat`. +Synchronize scope for `ID3D12GraphicsCommandList::ClearUnorderedAccessViewUint` and `ID3D12GraphicsCommandList::ClearUnorderedAccessViewFloat`. + +Access types in this scope are limited to: + +`D3D12_BARRIER_ACCESS_COMMON` #### `D3D12_BARRIER_SYNC_SPLIT` @@ -1613,6 +1702,8 @@ Special sync bit indicating a [split barrier](#split-barriers). Used as a `Sync ### `D3D12_BARRIER_ACCESS` +Bit values representing access types. Can be combined using bitwise-or in barrier `AccessBefore` and `AccessAfter` values. + ```c++ enum D3D12_BARRIER_ACCESS { @@ -1655,112 +1746,217 @@ App developers should avoid using `D3D12_BARRIER_ACCESS_COMMON` as a barrier `Ac #### `D3D12_BARRIER_ACCESS_VERTEX_BUFFER` -Indicates a buffer resource is accessible as a vertex buffer in the current execution queue. +Indicates a buffer resource is accessible as a vertex buffer in the current execution queue. Vertex buffer accesses occur only in `D3D12_BARRIER_SYNC_VERTEX_SHADING` scope. Runtime barrier validation ensure that `D3D12_BARRIER_ACCESS_VERTEX_BUFFER` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VERTEX_SHADING` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_ALL_SHADING` #### `D3D12_BARRIER_ACCESS_CONSTANT_BUFFER` -Indicates a buffer resource is accessible as a constant buffer in the current execution queue. +Indicates a buffer resource is accessible as a constant buffer in the current execution queue. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_CONSTANT_BUFFER` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VERTEX_SHADING` +- `D3D12_BARRIER_SYNC_PIXEL_SHADING` +- `D3D12_BARRIER_SYNC_COMPUTE_SHADING` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_ALL_SHADING` + #### `D3D12_BARRIER_ACCESS_INDEX_BUFFER` -Indicates a buffer resource is accessible as an index buffer in the current execution queue. +Indicates a buffer resource is accessible as an index buffer in the current execution queue. Index buffer accesses occur only in `D3D12_BARRIER_SYNC_INDEX_INPUT` scope. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_INDEX_BUFFER` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_INDEX_INPUT` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_ALL` #### `D3D12_BARRIER_ACCESS_RENDER_TARGET` -Indicates a resource is accessible as a render target. +Indicates a resource is accessible as a render target. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_RENDER_TARGET` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_RENDER_TARGET` #### `D3D12_BARRIER_ACCESS_UNORDERED_ACCESS` -Indicates a resource is accessible as an unordered access resource. +Indicates a resource is accessible as an unordered access resource. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_UNORDERED_ACCESS` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VERTEX_SHADING` +- `D3D12_BARRIER_SYNC_PIXEL_SHADING` +- `D3D12_BARRIER_SYNC_COMPUTE_SHADING` +- `D3D12_BARRIER_SYNC_VERTEX_SHADING` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_ALL_SHADING` +- `D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW` #### `D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE` -Indicates a resource is accessible as a writable depth/stencil resource. +Indicates a resource is accessible as a writable depth/stencil resource. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_DEPTH_STENCIL` + #### `D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ` -Indicates a resource is accessible as a read-only depth/stencil resource. +Indicates a resource is accessible as a read-only depth/stencil resource. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_DEPTH_STENCIL` #### `D3D12_BARRIER_ACCESS_SHADER_RESOURCE` -Indicates a resource is accessible as a shader resource. +Indicates a resource is accessible as a shader resource. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_SHADER_RESOURCE` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VERTEX_SHADING` +- `D3D12_BARRIER_SYNC_PIXEL_SHADING` +- `D3D12_BARRIER_SYNC_COMPUTE_SHADING` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_ALL_SHADING` + #### `D3D12_BARRIER_ACCESS_STREAM_OUTPUT` -Indicates a buffer is accessible as a stream output target. +Indicates a buffer is accessible as a stream output target. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_STREAM_OUTPUT` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VERTEX_SHADING` +- `D3D12_BARRIER_SYNC_DRAW` +- `D3D12_BARRIER_SYNC_ALL_SHADING` #### `D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT` -Indicates a buffer is accessible as an indirect argument buffer. +Indicates a buffer is accessible as an indirect argument buffer. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_EXECUTE_INDIRECT` #### `D3D12_BARRIER_ACCESS_PREDICATION` -Indicates a buffer is accessible as a predication buffer. +Indicates a buffer is accessible as a predication buffer. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_PREDICATION` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_PREDICATION` #### `D3D12_BARRIER_ACCESS_COPY_DEST` -Indicates a resource is accessible as a copy destination. +Indicates a resource is accessible as a copy destination. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_COPY_DEST` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_COPY` #### `D3D12_BARRIER_ACCESS_COPY_SOURCE` -Indicates a resource is accessible as a copy source. +Indicates a resource is accessible as a copy source. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_COPY_SOURCE` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_COPY` #### `D3D12_BARRIER_ACCESS_RESOLVE_DEST` -Indicates a resource is accessible as a resolve destination. +Indicates a resource is accessible as a resolve destination. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_RESOLVE_DEST` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_RESOLVE` #### `D3D12_BARRIER_ACCESS_RESOLVE_SOURCE` -Indicates a resource is accessible as a resolve source. +Indicates a resource is accessible as a resolve source. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_RESOLVE_SOURCE` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_RESOLVE` #### `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ` -Indicates a resource is accessible for read as a raytracing acceleration structure. The resource MUST have been created using an initial state of `D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE`. +Indicates a resource is accessible for read as a raytracing acceleration structure. The resource MUST have been created using an initial state of `D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE`. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_COMPUTE_SHADING` +- `D3D12_BARRIER_SYNC_RAYTRACING` +- `D3D12_BARRIER_SYNC_ALL_SHADING` +- `D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE` +- `D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE` +- `D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO` #### `D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE` -Indicates a resource is accessible for write as a raytracing acceleration structure. The resource MUST have been created using an initial state of `D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE`. +Indicates a resource is accessible for write as a raytracing acceleration structure. The resource MUST have been created using an initial state of `D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE`. Runtime barrier validation ensures that `` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_COMPUTE_SHADING` +- `D3D12_BARRIER_SYNC_RAYTRACING` +- `D3D12_BARRIER_SYNC_ALL_SHADING` +- `D3D12_BARRIER_SYNC_RAYTRACING_ACCELERATION_STRUCTURE_BUILD` +- `D3D12_BARRIER_SYNC_RAYTRACING_ACCELERATION_STRUCTURE_COPY` #### `D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE` -Indicates a resource is accessible as a shading rate source. +Indicates a resource is accessible as a shading rate source. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_PIXEL_SHADING` +- `D3D12_BARRIER_SYNC_ALL_SHADING` #### `D3D12_BARRIER_ACCESS_VIDEO_DECODE_READ` -Indicates a resource is accessible for read-only access in a video decode queue. +Indicates a resource is accessible for read-only access in a video decode queue. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_VIDEO_DECODE_READ` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VIDEO_DECODE` #### `D3D12_BARRIER_ACCESS_VIDEO_DECODE_WRITE` -Indicates a resource is accessible for write access in a video decode queue. +Indicates a resource is accessible for write access in a video decode queue. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_VIDEO_DECODE_WRITE` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VIDEO_DECODE` #### `D3D12_BARRIER_ACCESS_VIDEO_PROCESS_READ` -Indicates a resource is accessible for read-only access in a video process queue. +Indicates a resource is accessible for read-only access in a video process queue. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_VIDEO_PROCESS_READ` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VIDEO_PROCESS` #### `D3D12_BARRIER_ACCESS_VIDEO_PROCESS_WRITE` -Indicates a resource is accessible for read-only access in a video process queue. +Indicates a resource is accessible for read-only access in a video process queue. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_VIDEO_PROCESS_WRITE` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VIDEO_PROCESS` #### `D3D12_BARRIER_ACCESS_VIDEO_ENCODE_READ` -Indicates a resource is accessible for read-only access in a video encode queue. +Indicates a resource is accessible for read-only access in a video encode queue. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_VIDEO_ENCODE_READ` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VIDEO_ENCODE` #### `D3D12_BARRIER_ACCESS_VIDEO_ENCODE_WRITE` -Indicates a resource is accessible for read-only access in a video encode queue. +Indicates a resource is accessible for read-only access in a video encode queue. Runtime barrier validation ensures that `D3D12_BARRIER_ACCESS_VIDEO_ENCODE_WRITE` is used with one or more of the following sync bits: + +- `D3D12_BARRIER_SYNC_ALL` +- `D3D12_BARRIER_SYNC_VIDEO_ENCODE` #### `D3D12_BARRIER_ACCESS_NO_ACCESS` -Resource is inaccessible for read or write. Once a subresource access has been transitioned to `D3D12_BARRIER_ACCESS_NO_ACCESS`, it must be be reactivated by a barrier with `AccessBefore` set to `D3D12_BARRIER_ACCESS_NO_ACCESS` before using in the same `ExecuteCommandLists` scope. +Resource is either not accessed for before/after the barrier in the same ECL context, or the data is no longer needed. `D3D12_BARRIER_ACCESS_NO_ACCESS` may not be combined with other access bits. -`D3D12_BARRIER_ACCESS_NO_ACCESS` may only be used in conjunction with `D3D12_BARRIER_SYNC_NONE` or `D3D12_BARRIER_SYNC_SPLIT`. +Using `AccessBefore=D3D12_BARRIER_ACCESS_NO_ACCESS` with `SyncBefore=D3D12_BARRIER_SYNC_NONE` implies that a subresource was not accessed before the barrier in the current `ExecuteCommandLists` scope. Likewise, using `AccessAfter=D3D12_BARRIER_ACCESS_NO_ACCESS` with `SyncAfter=D3D12_BARRIER_SYNC_NONE` implies that a subresource is not accessed after the barrier in the same `ExecuteCommandLists` scope. This is useful for initiating a layout transition as the final act on a resource before the end of an `ExecuteCommandLists` scope. -`D3D12_BARRIER_ACCESS_NO_ACCESS` may not be set with other access bits. `D3D12_BARRIER_ACCESS_NO_ACCESS` indicates a resource is not expected to be accessed until some subsequent barrier or the next ECL scope. +Barriers used for aliased resource transitions can set `AccessBefore` or `AccessAfter` to `D3D12_BARRIER_ACCESS_NO_ACCESS` to indicate that aliased subresources do not share data across synchronization boundaries. This can help avoid unnecessary cache flushes and layout transitions. Useful in aliasing barriers when subresource is not needed for a sufficiently long time that it makes sense to purge the subresource from any read cache. -Also useful for initiating a layout transition as the final act on a resource before the end of an `ExecuteCommandLists` scope. If `SyncAfter` is `D3D12_BARRIER_SYNC_NONE`, then `AccessAfter` MUST be `D3D12_BARRIER_ACCESS_NO_ACCESS`. - ### `D3D12_BARRIER_SUBRESOURCE_RANGE` ```C++ @@ -1817,12 +2013,12 @@ struct D3D12_GLOBAL_BARRIER } ``` -| Member | | -|----------------|-------------------------------------------------------------------------------------------------------------------------------------| -| `SyncBefore` | Synchronization scope of all preceding GPU work that must be completed before executing the barrier. | -| `SyncAfter` | Synchronization scope of all subsequent GPU work that must wait until the barrier execution is finished. | -| `AccessBefore` | Access bits corresponding with any relevant resource usage since the preceding barrier or the start of `ExecuteCommandLists` scope. | -| `AccessAfter` | Access bits corresponding with any relevant resource usage after the barrier completes. | +| Member | | +|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `SyncBefore` | Synchronization scope bits of all preceding GPU work that must be completed before executing the barrier. Can be a bitwise-or combination of multiple D3D12_BARRIER_SYNC bits. | +| `SyncAfter` | Synchronization scope bits of all subsequent GPU work that must wait until the barrier execution is finished. Can be a bitwise-or combination of multiple D3D12_BARRIER_SYNC bits. | +| `AccessBefore` | Access bits corresponding with any relevant resource usage since the preceding barrier or the start of `ExecuteCommandLists` scope. Can be a bitwise-or combination of multiple D3D12_BARRIER_ACCESS bits. | +| `AccessAfter` | Access bits corresponding with any relevant resource usage after the barrier completes. Can be a bitwise-or combination of multiple D3D12_BARRIER_ACCESS bits. | ### `D3D12_TEXTURE_BARRIER_FLAGS` @@ -1836,7 +2032,7 @@ enum D3D12_TEXTURE_BARRIER_FLAGS #### `D3D12_TEXTURE_BARRIER_FLAG_DISCARD` -Can only be used when `LayoutBefore` is `D3D12_BARRIER_LAYOUT_UNDEFINED`. Typically, this is used to initialize compression metadata as part of a barrier that activates an aliased resource. The Subresource member must indicate all subresources. Without this flag, a full resource Clear, Copy or Discard is required before use. +Can only be used when `LayoutBefore` is `D3D12_BARRIER_LAYOUT_UNDEFINED`. Typically, this is used to initialize compression metadata as part of a barrier that activates an aliased resource. ### `D3D12_TEXTURE_BARRIER` @@ -1855,17 +2051,17 @@ struct D3D12_TEXTURE_BARRIER }; ``` -| Member | | -|----------------|------------------------------------------------------------------------------------------------------------------------| -| `SyncBefore` | Synchronization scope of all preceding GPU work that must be completed before executing the barrier. | -| `SyncAfter` | Synchronization scope of all subsequent GPU work that must wait until the barrier execution is finished. | -| `AccessBefore` | Access bits corresponding with resource usage since the preceding barrier or the start of `ExecuteCommandLists` scope. | -| `AccessAfter` | Access bits corresponding with resource usage after the barrier completes. | -| `LayoutBefore` | Layout of texture preceding the barrier execution. | -| `LayoutAfter` | Layout of texture upon completion of barrier execution. | -| `pResource` | Pointer to the buffer resource being using the barrier. | -| `Subresources` | Range of texture subresources being barriered. | -| `Flags` | Optional flags values. | +| Member | | +|----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `SyncBefore` | Synchronization scope bits of all preceding GPU work that must be completed before executing the barrier. Can be a bitwise-or combination of multiple D3D12_BARRIER_SYNC bits. | +| `SyncAfter` | Synchronization scope bits of all subsequent GPU work that must wait until the barrier execution is finished. Can be a bitwise-or combination of multiple D3D12_BARRIER_SYNC bits. | +| `AccessBefore` | Access bits corresponding with resource usage since the preceding barrier or the start of `ExecuteCommandLists` scope. Can be a bitwise-or combination of multiple D3D12_BARRIER_ACCESS bits. | +| `AccessAfter` | Access bits corresponding with resource usage after the barrier completes. Can be a bitwise-or combination of multiple D3D12_BARRIER_ACCESS bits. | +| `LayoutBefore` | Layout of texture preceding the barrier execution. | +| `LayoutAfter` | Layout of texture upon completion of barrier execution. | +| `pResource` | Pointer to the buffer resource being using the barrier. | +| `Subresources` | Range of texture subresources being barriered. | +| `Flags` | Optional flags values. | ### `D3D12_BUFFER_BARRIER` @@ -1882,15 +2078,15 @@ struct D3D12_BUFFER_BARRIER }; ``` -| Member | | -|----------------|------------------------------------------------------------------------------------------------------------------------| -| `SyncBefore` | Synchronization scope of all preceding GPU work that must be completed before executing the barrier. | -| `SyncAfter` | Synchronization scope of all subsequent GPU work that must wait until the barrier execution is finished. | -| `AccessBefore` | Access bits corresponding with resource usage since the preceding barrier or the start of `ExecuteCommandLists` scope. | -| `AccessAfter` | Access bits corresponding with resource usage after the barrier completes. | -| `pResource` | Pointer to the buffer resource being using the barrier. | -| `Offset` | Offset value must be 0. | -| `Size` | Size must either be UINT64_MAX or the size of the buffer in bytes. | +| Member | | +|----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `SyncBefore` | Synchronization scope bits of all preceding GPU work that must be completed before executing the barrier. Can be a bitwise-or combination of multiple D3D12_BARRIER_SYNC bits. | +| `SyncAfter` | Synchronization scope bits of all subsequent GPU work that must wait until the barrier execution is finished. Can be a bitwise-or combination of multiple D3D12_BARRIER_SYNC bits. | +| `AccessBefore` | Access bits corresponding with resource usage since the preceding barrier or the start of `ExecuteCommandLists` scope. Can be a bitwise-or combination of multiple D3D12_BARRIER_ACCESS bits. | +| `AccessAfter` | Access bits corresponding with resource usage after the barrier completes. Can be a bitwise-or combination of multiple D3D12_BARRIER_ACCESS bits. | +| `pResource` | Pointer to the buffer resource being using the barrier. | +| `Offset` | Offset value must be 0. | +| `Size` | Size must either be UINT64_MAX or the size of the buffer in bytes. | ### `D3D12_BARRIER_GROUP` @@ -1918,7 +2114,7 @@ struct D3D12_BARRIER_GROUP | `pTextureBarriers` | Pointer to an array of `D3D12_TEXTURE_BARRIERS` if Type is `D3D12_BARRIER_TYPE_TEXTURE` | | `pBufferBarriers` | Pointer to an array of `D3D12_BUFFER_BARRIERS` if Type is `D3D12_BARRIER_TYPE_BUFFER` | -### ID3D12GraphicsCommandList7::Barrier +### ID3D12GraphicsCommandList7 Barrier Adds a collection of barriers into a graphics command list recording. @@ -1934,7 +2130,7 @@ void ID3D12GraphicsCommandList7::Barrier( | `NumBarrierGroups` | Number of barrier groups pointed to by pBarrierGroups | | `pBarrierGroups` | Pointer to an array of `D3D12_BARRIER_GROUP` objects | -### ID3D12VideoDecodeCommandList3::Barrier +### ID3D12VideoDecodeCommandList3 Barrier Adds a collection of barriers into a video decode command list recording. @@ -1950,7 +2146,7 @@ void ID3D12VideoDecodeCommandList3::Barrier( | `NumBarrierGroups` | Number of barrier groups pointed to by pBarrierGroups | | `pBarrierGroups` | Pointer to an array of `D3D12_BARRIER_GROUP` objects | -### ID3D12VideoProcessCommandList3::Barrier +### ID3D12VideoProcessCommandList3 Barrier Adds a collection of barriers into a video process command list recording. @@ -1966,7 +2162,7 @@ void ID3D12VideoProcessCommandList3::Barrier( | `NumBarrierGroup`s | Number of barrier groups pointed to by pBarrierGroups | | `pBarrierGroups` | Pointer to an array of `D3D12_BARRIER_GROUP` objects | -### ID3D12VideoEncodeCommandList3::Barrier +### ID3D12VideoEncodeCommandList3 Barrier Adds a collection of barriers into a video encode command list recording. @@ -1982,7 +2178,7 @@ void ID3D12VideoEncodeCommandList3::Barrier( | `NumBarrierGroups` | Number of barrier groups pointed to by pBarrierGroups | | `pBarrierGroups` | Pointer to an array of `D3D12_BARRIER_GROUP` objects | -### ID3D12Device10::CreateCommittedResource3 +### ID3D12Device10 CreateCommittedResource3 Creates a committed resource with an initial layout rather than an initial state. @@ -2008,7 +2204,7 @@ HRESULT ID3D12Device10::CreateCommittedResource3( See the [Format List Casting](VulkanOn12.md#format-list-casting) spec for details on format casting using `NumCastableFormats` and `pCastableFormats`. -### ID3D12Device10::CreatePlacedResource2 +### ID3D12Device10 CreatePlacedResource2 ```c++ HRESULT ID3D12Device10::CreatePlacedResource2( @@ -2031,7 +2227,7 @@ HRESULT ID3D12Device10::CreatePlacedResource2( See the [Format List Casting](VulkanOn12.md#format-list-casting) spec for details on format casting using `NumCastableFormats` and `pCastableFormats`. -### ID3D12Device10::CreateReservedResource2 +### ID3D12Device10 CreateReservedResource2 ```c++ HRESULT ID3D12Device10::CreateReservedResource2( @@ -2054,7 +2250,7 @@ HRESULT ID3D12Device10::CreateReservedResource2( See the [Format List Casting](VulkanOn12.md#format-list-casting) spec for details on format casting using `NumCastableFormats` and `pCastableFormats`. -### ID3D12DebugCommandQueue1::AssertResourceAccess +### ID3D12DebugCommandQueue1 AssertResourceAccess ```c++ void ID3D12DebugCommandQueue1::AssertResourceAccess( @@ -2069,7 +2265,7 @@ Also tracks the resource as in-use by the command queue until the `AssertResourc Access validation currently considers only texture barrier layout (if `pResource` is a texture) and resource creation flags. Validation related to missing synchronization and/or cache flush is not implemented. -### ID3D12DebugCommandQueue1::AssertTextureLayout +### ID3D12DebugCommandQueue1 AssertTextureLayout ```c++ void ID3D12DebugCommandQueue1::AssertTextureLayout( @@ -2084,7 +2280,7 @@ Also tracks the resource as in-use by the command queue until the `AssertTexture Buffers have no layout, therefore `AssertTextureLayout` does nothing when `pResource` is a buffer resource. -### ID3D12DebugCommandList3::AssertResourceAccess +### ID3D12DebugCommandList3 AssertResourceAccess ```c++ void ID3D12DebugCommandList3::AssertResourceAccess( @@ -2099,7 +2295,7 @@ Also tracks the resource as in-use by the command queue until the command list r Access validation currently considers only texture barrier layout (if `pResource` is a texture) and resource creation flags. Validation related to missing synchronization and/or cache flush is not implemented. -### ID3D12DebugCommandList3::AssertTextureLayout +### ID3D12DebugCommandList3 AssertTextureLayout ```c++ void ID3D12DebugCommandList3::AssertTextureLayout( @@ -2219,10 +2415,10 @@ void BarrierSamples( D3D12_BUFFER_BARRIER BufBarrierAlias[] = { CD3DX12_BUFFER_BARRIER( - D3D12_BARRIER_SYNC_INPUT_ASSEMBLER, // SyncBefore + D3D12_BARRIER_SYNC_INDEX_INPUT, // SyncBefore D3D12_BARRIER_SYNC_PIXEL_SHADING, // SyncAfter D3D12_BARRIER_ACCESS_INDEX_BUFFER, // AccessBefore - D3D12_BARRIER_ACCESS_COMMON, // AccessAfter + D3D12_BARRIER_ACCESS_NO_ACCESS, // AccessAfter: Data get discarded/overwritten pBuffer, } }; @@ -2230,9 +2426,9 @@ void BarrierSamples( D3D12_TEXTURE_BARRIER TexBarrierAlias[] = { CD3DX12_TEXTURE_BARRIER( - D3D12_BARRIER_SYNC_INPUT_ASSEMBLER, // SyncBefore + D3D12_BARRIER_SYNC_INDEX_INPUT, // SyncBefore D3D12_BARRIER_SYNC_PIXEL_SHADING, // SyncAfter - D3D12_BARRIER_ACCESS_INDEX_BUFFER, // AccessBefore + D3D12_BARRIER_ACCESS_NO_ACCESS, // AccessBefore: Old data is not needed D3D12_BARRIER_ACCESS_SHADER_RESOURCE, // AccessAfter D3D12_BARRIER_LAYOUT_UNDEFINED, // LayoutBefore is UNDEFINED D3D12_BARRIER_LAYOUT_SHADER_RESOURCE, // LayoutAfter @@ -2485,7 +2681,7 @@ typedef enum D3D12DDI_BARRIER_TYPE ``` -### D3D12DDIARG_BARRIER_0094 +### `D3D12DDIARG_BARRIER_0094` ```C++ typedef struct D3D12DDIARG_BARRIER_0094 diff --git a/d3d/D3D12VideoEncoding.md b/d3d/D3D12VideoEncoding.md index cac117c..81010e8 100644 --- a/d3d/D3D12VideoEncoding.md +++ b/d3d/D3D12VideoEncoding.md @@ -4176,4 +4176,4 @@ A host might follow the steps outlined below to use the low level D3D12 API in a 6. Finish up the final compressed bitstream output with the remaining codec headers (eg. PPS). -7. Flush and release all the D3D12 and system resources used for the encoding session. +7. Flush and release all the D3D12 and system resources used for the encoding session. \ No newline at end of file diff --git a/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.md b/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.md index 6819855..82b4fee 100644 --- a/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.md +++ b/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.md @@ -87,6 +87,7 @@ Where `Flags` is the enum: enum D3D12_SAMPLER_FLAGS { D3D12_SAMPLER_FLAG_NONE = 0 D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR = 0x1 +} ``` Setting the `D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR` bit @@ -115,6 +116,13 @@ Static samplers used with unsigned integer formats must use either `D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK_UINT` for black borders or `D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE_UINT` for white borders. +Support for non-normalized coordinate samplers added +`D3D12_STATIC_SAMPLER_DESC1` with a `D3D12_SAMPLER_FLAGS` member. In +the context of static samplers, `D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR` +is shadowed by the static border color, and not applicable. However, root +signature creation will fail if this flag is used with a floating-point +border color; with a uint border color it is redundant, but not an error. + ## Raw Gather To enable access to four appropriately-sized elements @@ -140,9 +148,9 @@ Additionally, same-size and same-channel aliasing can be performed In order to be able to create an single-channel integer resource view, a resource must be created using the -[CreateCommittedResource3](D3D12EnhancedBarriers.md#id3d12device10createcommittedresource3), -[CreatePlacedResource2](D3D12EnhancedBarriers.md#id3d12device10createplacedresource2), -or [CreateReservedResource2](D3D12EnhancedBarriers.md#id3d12device10createreservedresource2) +[CreateCommittedResource3](D3D12EnhancedBarriers.md#id3d12device10-createcommittedresource3), +[CreatePlacedResource2](D3D12EnhancedBarriers.md#id3d12device10-createplacedresource2), +or [CreateReservedResource2](D3D12EnhancedBarriers.md#id3d12device10-createreservedresource2) using the new API fields, `NumCastableFormats` and `pCastableFormats` to specify the list of acceptable casts. @@ -339,6 +347,7 @@ but it is being considered a prerequisite for enabling `AdvancedTextureOpsSuppor ## Change Log Version|Date|Description -|-|- +1.01|30 Sep 2022|Add note about D3D12_STATIC_SAMPLER_DESC1 having sampler flags and interaction with border colors 1.00|01 Aug 2022|Minor edits for publication 0.10|08 Mar 2022|Rename integer sampler identifiers 0.9|07 Mar 2022|Clarify pre-requisite for advanced texture ops, Update integer aliasing in keeping with other specs. Correct type, function, and struct details. diff --git a/d3d/MeshShader.md b/d3d/MeshShader.md index 99e2714..09872f8 100644 --- a/d3d/MeshShader.md +++ b/d3d/MeshShader.md @@ -430,6 +430,92 @@ The following enums are also revised to support the new pipeline statistics stru `D3D12DDI_QUERY_TYPE_PIPELINE_STATISTICS1` is added to the enum `D3D12DDI_QUERY_TYPE`. +## MSPrimitives and view instancing + +The interaction between the MSPrimitives pipeline statistic and [view instancing](ViewInstancing.md#pipeline-statistics) depends on the view instancing tier (D3D12_VIEW_INSTANCING_TIER) of the device. + +On D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED devices, view instancing doesn't affect MSPrimitives. + +On D3D12_VIEW_INSTANCING_TIER_1 devices, view instances increment MSPrimitives. + +On D3D12_VIEW_INSTANCING_TIER_2 devices, + * If SV_ViewID is referenced from MS, view instances increment MSPrimitives. + * If SV_ViewID is referenced from a later stage like PS, view instances might or might not increment MSPrimitives. + +On D3D12_VIEW_INSTANCING_TIER_3 devices, + * If SV_ViewID is referenced from MS, view instances increment MSPrimitives. + * If SV_ViewID is referenced from a later stage like PS, then view instances don't increment MSPrimitives. + +## Culled primitives' effect on MSPrimitives + +Culled primitives might or might not be included in MSPrimitives, depending on the GPU implementation. + +Applications can find out whether the GPU implementation includes culled primitives in the MSPrimitives count, using a queriable capability called MSPrimitivesPipelineStatisticIncludesCulledPrimitives. + +The capability is queried using + +``` + D3D12_FEATURE_DATA_D3D12_OPTIONS12 featureData12 = {}; + VERIFY_SUCCEEDED(m_pDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &featureData12, sizeof(featureData12))); + + bool includeCulledPrimitives = featureData12.MSPrimitivesPipelineStatisticIncludesCulledPrimitives == D3D12_TRI_STATE_TRUE; +``` + +In the API, the capability appears as +``` +typedef +enum D3D12_FEATURE +{ + ... + D3D12_FEATURE_D3D12_OPTIONS12 = 41 +} D3D12_FEATURE; + + +typedef +enum D3D12_TRI_STATE +{ + D3D12_TRI_STATE_UNKNOWN = -1, + D3D12_TRI_STATE_FALSE = 0, + D3D12_TRI_STATE_TRUE = 1 +} D3D12_TRI_STATE; + +typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS12 +{ + _Out_ D3D12_TRI_STATE MSPrimitivesPipelineStatisticIncludesCulledPrimitives; +} D3D12_FEATURE_DATA_D3D12_OPTIONS12; +``` + +If the capability value is FALSE, then culled primitives are not included in the MSPrimitives count. + +If the capability value is TRUE, then called primitives are included in the MSPrimitives count. + +If the capability value is UNKNOWN, then the culled primitives might or might not be included in the count. + +> ### Remark +> +> At the DDI level, drivers do not return UNKNOWN. At the application level, a capability value of UNKNOWN +> may be returned if the capability can not be queried from the driver. This could be relevant when, for example, +> using a driver which supports the mesh shader feature but has not been updated to support the 0086 DDI +> version. + +At the DDI level, the capability is communicated through UMD DDI revision 0086: +``` +#define D3D12DDI_INTERFACE_VERSION_R8 ((D3D12DDI_MAJOR_VERSION << 16) | D3D12DDI_MINOR_VERSION_R8) + +#define D3D12DDI_BUILD_VERSION_0086 6 +#define D3D12DDI_SUPPORTED_0086 ((((UINT64)D3D12DDI_INTERFACE_VERSION_R8) << 32) | (((UINT64)D3D12DDI_BUILD_VERSION_0086) << 16)) +``` + +With revision 0086, there exists a revised OPTIONS structure: +``` +// D3D12DDICAPS_TYPE_D3D12_OPTIONS +typedef struct D3D12DDI_D3D12_OPTIONS_DATA_0086 +{ + ... + BOOL MSPrimitivesPipelineStatisticIncludesCulledPrimitives; +} +``` + SV_RenderTargetArrayIndex limitations based on queryable capability =================================================================== diff --git a/d3d/Raytracing.md b/d3d/Raytracing.md index d742875..babf68a 100644 --- a/d3d/Raytracing.md +++ b/d3d/Raytracing.md @@ -1,6 +1,6 @@ # DirectX Raytracing (DXR) Functional Spec -v1.19 8/10/2022 +v1.20 1/11/2023 --- @@ -114,81 +114,81 @@ v1.19 8/10/2022 - [Device methods](#device-methods) - [CheckFeatureSupport](#checkfeaturesupport) - [CheckFeatureSupport Structures](#checkfeaturesupport-structures) - - [D3D12_FEATURE_D3D12_OPTIONS5](#d3d12_feature_d3d12_options5) - - [D3D12_RAYTRACING_TIER](#d3d12_raytracing_tier) + - [D3D12\_FEATURE\_D3D12\_OPTIONS5](#d3d12_feature_d3d12_options5) + - [D3D12\_RAYTRACING\_TIER](#d3d12_raytracing_tier) - [CreateStateObject](#createstateobject) - [CreateStateObject Structures](#createstateobject-structures) - - [D3D12_STATE_OBJECT_DESC](#d3d12_state_object_desc) - - [D3D12_STATE_OBJECT_TYPE](#d3d12_state_object_type) - - [D3D12_STATE_SUBOBJECT](#d3d12_state_subobject) - - [D3D12_STATE_SUBOBJECT_TYPE](#d3d12_state_subobject_type) - - [D3D12_STATE_OBJECT_CONFIG](#d3d12_state_object_config) - - [D3D12_STATE_OBJECT_FLAGS](#d3d12_state_object_flags) - - [D3D12_GLOBAL_ROOT_SIGNATURE](#d3d12_global_root_signature) - - [D3D12_LOCAL_ROOT_SIGNATURE](#d3d12_local_root_signature) - - [D3D12_DXIL_LIBRARY_DESC](#d3d12_dxil_library_desc) - - [D3D12_EXPORT_DESC](#d3d12_export_desc) - - [D3D12_EXPORT_FLAGS](#d3d12_export_flags) - - [D3D12_EXISTING_COLLECTION_DESC](#d3d12_existing_collection_desc) - - [D3D12_HIT_GROUP_DESC](#d3d12_hit_group_desc) - - [D3D12_HIT_GROUP_TYPE](#d3d12_hit_group_type) - - [D3D12_RAYTRACING_SHADER_CONFIG](#d3d12_raytracing_shader_config) - - [D3D12_RAYTRACING_PIPELINE_CONFIG](#d3d12_raytracing_pipeline_config) - - [D3D12_RAYTRACING_PIPELINE_CONFIG1](#d3d12_raytracing_pipeline_config1) - - [D3D12_RAYTRACING_PIPELINE_FLAGS](#d3d12_raytracing_pipeline_flags) - - [D3D12_NODE_MASK](#d3d12_node_mask) - - [D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION](#d3d12_subobject_to_exports_association) - - [D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION](#d3d12_dxil_subobject_to_exports_association) + - [D3D12\_STATE\_OBJECT\_DESC](#d3d12_state_object_desc) + - [D3D12\_STATE\_OBJECT\_TYPE](#d3d12_state_object_type) + - [D3D12\_STATE\_SUBOBJECT](#d3d12_state_subobject) + - [D3D12\_STATE\_SUBOBJECT\_TYPE](#d3d12_state_subobject_type) + - [D3D12\_STATE\_OBJECT\_CONFIG](#d3d12_state_object_config) + - [D3D12\_STATE\_OBJECT\_FLAGS](#d3d12_state_object_flags) + - [D3D12\_GLOBAL\_ROOT\_SIGNATURE](#d3d12_global_root_signature) + - [D3D12\_LOCAL\_ROOT\_SIGNATURE](#d3d12_local_root_signature) + - [D3D12\_DXIL\_LIBRARY\_DESC](#d3d12_dxil_library_desc) + - [D3D12\_EXPORT\_DESC](#d3d12_export_desc) + - [D3D12\_EXPORT\_FLAGS](#d3d12_export_flags) + - [D3D12\_EXISTING\_COLLECTION\_DESC](#d3d12_existing_collection_desc) + - [D3D12\_HIT\_GROUP\_DESC](#d3d12_hit_group_desc) + - [D3D12\_HIT\_GROUP\_TYPE](#d3d12_hit_group_type) + - [D3D12\_RAYTRACING\_SHADER\_CONFIG](#d3d12_raytracing_shader_config) + - [D3D12\_RAYTRACING\_PIPELINE\_CONFIG](#d3d12_raytracing_pipeline_config) + - [D3D12\_RAYTRACING\_PIPELINE\_CONFIG1](#d3d12_raytracing_pipeline_config1) + - [D3D12\_RAYTRACING\_PIPELINE\_FLAGS](#d3d12_raytracing_pipeline_flags) + - [D3D12\_NODE\_MASK](#d3d12_node_mask) + - [D3D12\_SUBOBJECT\_TO\_EXPORTS\_ASSOCIATION](#d3d12_subobject_to_exports_association) + - [D3D12\_DXIL\_SUBOBJECT\_TO\_EXPORTS\_ASSOCIATION](#d3d12_dxil_subobject_to_exports_association) - [AddToStateObject](#addtostateobject) - [GetRaytracingAccelerationStructurePrebuildInfo](#getraytracingaccelerationstructureprebuildinfo) - [GetRaytracingAccelerationStructurePrebuildInfo Structures](#getraytracingaccelerationstructureprebuildinfo-structures) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO](#d3d12_raytracing_acceleration_structure_prebuild_info) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_PREBUILD\_INFO](#d3d12_raytracing_acceleration_structure_prebuild_info) - [CheckDriverMatchingIdentifier](#checkdrivermatchingidentifier) - [CheckDriverMatchingIdentifier Structures](#checkdrivermatchingidentifier-structures) - - [D3D12_SERIALIZED_DATA_TYPE](#d3d12_serialized_data_type) - - [D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER](#d3d12_serialized_data_driver_matching_identifier) - - [D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS](#d3d12_driver_matching_identifier_status) + - [D3D12\_SERIALIZED\_DATA\_TYPE](#d3d12_serialized_data_type) + - [D3D12\_SERIALIZED\_DATA\_DRIVER\_MATCHING\_IDENTIFIER](#d3d12_serialized_data_driver_matching_identifier) + - [D3D12\_DRIVER\_MATCHING\_IDENTIFIER\_STATUS](#d3d12_driver_matching_identifier_status) - [CreateCommandSignature](#createcommandsignature) - [CreateCommandSignature Structures](#createcommandsignature-structures) - - [D3D12_COMMAND_SIGNATURE_DESC](#d3d12_command_signature_desc) - - [D3D12_INDIRECT_ARGUMENT_DESC](#d3d12_indirect_argument_desc) - - [D3D12_INDIRECT_ARGUMENT_TYPE](#d3d12_indirect_argument_type) + - [D3D12\_COMMAND\_SIGNATURE\_DESC](#d3d12_command_signature_desc) + - [D3D12\_INDIRECT\_ARGUMENT\_DESC](#d3d12_indirect_argument_desc) + - [D3D12\_INDIRECT\_ARGUMENT\_TYPE](#d3d12_indirect_argument_type) - [Command list methods](#command-list-methods) - [BuildRaytracingAccelerationStructure](#buildraytracingaccelerationstructure) - [BuildRaytracingAccelerationStructure Structures](#buildraytracingaccelerationstructure-structures) - - [D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC](#d3d12_build_raytracing_acceleration_structure_desc) - - [D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS](#d3d12_build_raytracing_acceleration_structure_inputs) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE](#d3d12_raytracing_acceleration_structure_type) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS](#d3d12_raytracing_acceleration_structure_build_flags) - - [D3D12_ELEMENTS_LAYOUT](#d3d12_elements_layout) - - [D3D12_RAYTRACING_GEOMETRY_DESC](#d3d12_raytracing_geometry_desc) - - [D3D12_RAYTRACING_GEOMETRY_TYPE](#d3d12_raytracing_geometry_type) - - [D3D12_RAYTRACING_GEOMETRY_FLAGS](#d3d12_raytracing_geometry_flags) - - [D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC](#d3d12_raytracing_geometry_triangles_desc) - - [D3D12_RAYTRACING_GEOMETRY_AABBS_DESC](#d3d12_raytracing_geometry_aabbs_desc) - - [D3D12_RAYTRACING_AABB](#d3d12_raytracing_aabb) - - [D3D12_RAYTRACING_INSTANCE_DESC](#d3d12_raytracing_instance_desc) - - [D3D12_RAYTRACING_INSTANCE_FLAGS](#d3d12_raytracing_instance_flags) - - [D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE](#d3d12_gpu_virtual_address_and_stride) + - [D3D12\_BUILD\_RAYTRACING\_ACCELERATION\_STRUCTURE\_DESC](#d3d12_build_raytracing_acceleration_structure_desc) + - [D3D12\_BUILD\_RAYTRACING\_ACCELERATION\_STRUCTURE\_INPUTS](#d3d12_build_raytracing_acceleration_structure_inputs) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_TYPE](#d3d12_raytracing_acceleration_structure_type) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_BUILD\_FLAGS](#d3d12_raytracing_acceleration_structure_build_flags) + - [D3D12\_ELEMENTS\_LAYOUT](#d3d12_elements_layout) + - [D3D12\_RAYTRACING\_GEOMETRY\_DESC](#d3d12_raytracing_geometry_desc) + - [D3D12\_RAYTRACING\_GEOMETRY\_TYPE](#d3d12_raytracing_geometry_type) + - [D3D12\_RAYTRACING\_GEOMETRY\_FLAGS](#d3d12_raytracing_geometry_flags) + - [D3D12\_RAYTRACING\_GEOMETRY\_TRIANGLES\_DESC](#d3d12_raytracing_geometry_triangles_desc) + - [D3D12\_RAYTRACING\_GEOMETRY\_AABBS\_DESC](#d3d12_raytracing_geometry_aabbs_desc) + - [D3D12\_RAYTRACING\_AABB](#d3d12_raytracing_aabb) + - [D3D12\_RAYTRACING\_INSTANCE\_DESC](#d3d12_raytracing_instance_desc) + - [D3D12\_RAYTRACING\_INSTANCE\_FLAGS](#d3d12_raytracing_instance_flags) + - [D3D12\_GPU\_VIRTUAL\_ADDRESS\_AND\_STRIDE](#d3d12_gpu_virtual_address_and_stride) - [EmitRaytracingAccelerationStructurePostbuildInfo](#emitraytracingaccelerationstructurepostbuildinfo) - [EmitRaytracingAccelerationStructurePostbuildInfo Structures](#emitraytracingaccelerationstructurepostbuildinfo-structures) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_desc) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_TYPE](#d3d12_raytracing_acceleration_structure_postbuild_info_type) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_compacted_size_desc) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_TOOLS_VISUALIZATION_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_tools_visualization_desc) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_serialization_desc) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_current_size_desc) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_POSTBUILD\_INFO\_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_desc) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_POSTBUILD\_INFO\_TYPE](#d3d12_raytracing_acceleration_structure_postbuild_info_type) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_POSTBUILD\_INFO\_COMPACTED\_SIZE\_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_compacted_size_desc) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_POSTBUILD\_INFO\_TOOLS\_VISUALIZATION\_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_tools_visualization_desc) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_POSTBUILD\_INFO\_SERIALIZATION\_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_serialization_desc) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_POSTBUILD\_INFO\_CURRENT\_SIZE\_DESC](#d3d12_raytracing_acceleration_structure_postbuild_info_current_size_desc) - [CopyRaytracingAccelerationStructure](#copyraytracingaccelerationstructure) - [CopyRaytracingAccelerationStructure Structures](#copyraytracingaccelerationstructure-structures) - - [D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE](#d3d12_raytracing_acceleration_structure_copy_mode) - - [D3D12_SERIALIZED_RAYTRACING_ACCELERATION_STRUCTURE_HEADER](#d3d12_serialized_raytracing_acceleration_structure_header) - - [D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_TOOLS_VISUALIZATION_HEADER](#d3d12_build_raytracing_acceleration_structure_tools_visualization_header) + - [D3D12\_RAYTRACING\_ACCELERATION\_STRUCTURE\_COPY\_MODE](#d3d12_raytracing_acceleration_structure_copy_mode) + - [D3D12\_SERIALIZED\_RAYTRACING\_ACCELERATION\_STRUCTURE\_HEADER](#d3d12_serialized_raytracing_acceleration_structure_header) + - [D3D12\_BUILD\_RAYTRACING\_ACCELERATION\_STRUCTURE\_TOOLS\_VISUALIZATION\_HEADER](#d3d12_build_raytracing_acceleration_structure_tools_visualization_header) - [SetPipelineState1](#setpipelinestate1) - [DispatchRays](#dispatchrays) - [DispatchRays Structures](#dispatchrays-structures) - - [D3D12_DISPATCH_RAYS_DESC](#d3d12_dispatch_rays_desc) - - [D3D12_GPU_VIRTUAL_ADDRESS_RANGE](#d3d12_gpu_virtual_address_range) - - [D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE](#d3d12_gpu_virtual_address_range_and_stride) + - [D3D12\_DISPATCH\_RAYS\_DESC](#d3d12_dispatch_rays_desc) + - [D3D12\_GPU\_VIRTUAL\_ADDRESS\_RANGE](#d3d12_gpu_virtual_address_range) + - [D3D12\_GPU\_VIRTUAL\_ADDRESS\_RANGE\_AND\_STRIDE](#d3d12_gpu_virtual_address_range_and_stride) - [ExecuteIndirect](#executeindirect) - [ID3D12StateObjectProperties methods](#id3d12stateobjectproperties-methods) - [GetShaderIdentifier](#getshaderidentifier) @@ -197,7 +197,7 @@ v1.19 8/10/2022 - [SetPipelineStackSize](#setpipelinestacksize) - [Additional resource states](#additional-resource-states) - [Additional root signature flags](#additional-root-signature-flags) - - [D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE](#d3d12_root_signature_flag_local_root_signature) + - [D3D12\_ROOT\_SIGNATURE\_FLAG\_LOCAL\_ROOT\_SIGNATURE](#d3d12_root_signature_flag_local_root_signature) - [Note on shader visibility](#note-on-shader-visibility) - [Additional SRV type](#additional-srv-type) - [Constants](#constants) @@ -257,8 +257,8 @@ v1.19 8/10/2022 - [RayQuery](#rayquery) - [RayQuery intrinsics](#rayquery-intrinsics) - [RayQuery enums](#rayquery-enums) - - [COMMITTED_STATUS](#committed_status) - - [CANDIDATE_TYPE](#candidate_type) + - [COMMITTED\_STATUS](#committed_status) + - [CANDIDATE\_TYPE](#candidate_type) - [RayQuery TraceRayInline](#rayquery-tracerayinline) - [TraceRayInline examples](#tracerayinline-examples) - [TraceRayInline example 1](#tracerayinline-example-1) @@ -326,17 +326,17 @@ v1.19 8/10/2022 - [Descriptor handle encodings](#descriptor-handle-encodings) - [State object DDIs](#state-object-ddis) - [State subobjects](#state-subobjects) - - [D3D12DDI_STATE_SUBOBJECT_TYPE](#d3d12ddi_state_subobject_type) - - [D3D12DDI_STATE_SUBOBJECT_0054](#d3d12ddi_state_subobject_0054) - - [D3D12DDI_STATE_SUBOBJECT_TYPE_SHADER_EXPORT_SUMMARY](#d3d12ddi_state_subobject_type_shader_export_summary) - - [D3D12DDI_FUNCTION_SUMMARY_0054](#d3d12ddi_function_summary_0054) - - [D3D12DDI_FUNCTION_SUMMARY_NODE_0054](#d3d12ddi_function_summary_node_0054) - - [D3D12_EXPORT_SUMMARY_FLAGS](#d3d12_export_summary_flags) + - [D3D12DDI\_STATE\_SUBOBJECT\_TYPE](#d3d12ddi_state_subobject_type) + - [D3D12DDI\_STATE\_SUBOBJECT\_0054](#d3d12ddi_state_subobject_0054) + - [D3D12DDI\_STATE\_SUBOBJECT\_TYPE\_SHADER\_EXPORT\_SUMMARY](#d3d12ddi_state_subobject_type_shader_export_summary) + - [D3D12DDI\_FUNCTION\_SUMMARY\_0054](#d3d12ddi_function_summary_0054) + - [D3D12DDI\_FUNCTION\_SUMMARY\_NODE\_0054](#d3d12ddi_function_summary_node_0054) + - [D3D12\_EXPORT\_SUMMARY\_FLAGS](#d3d12_export_summary_flags) - [State object lifetimes as seen by driver](#state-object-lifetimes-as-seen-by-driver) - [Collection lifetimes](#collection-lifetimes) - [AddToStateObject parent lifetimes](#addtostateobject-parent-lifetimes) - [Reporting raytracing support from the driver](#reporting-raytracing-support-from-the-driver) - - [D3D12DDI_RAYTRACING_TIER](#d3d12ddi_raytracing_tier) + - [D3D12DDI\_RAYTRACING\_TIER](#d3d12ddi_raytracing_tier) - [Potential future features](#potential-future-features) - [Traversal shaders](#traversal-shaders) - [More efficient acceleration structure builds](#more-efficient-acceleration-structure-builds) @@ -4830,8 +4830,12 @@ shader entry function. Use an empty string to omit a shader type. Example: ```C++ -HitGroup my_group_name("intersection_main", "anyhit_main", -"closesthit_main"); +HitGroup my_group_name = +{ + "intersection_main", + "anyhit_main", + "closesthit_main" +}; ``` --- @@ -4843,7 +4847,10 @@ or associated with shaders by name. The root signature is global for all shaders in a [DispatchRays](#dispatchrays) call. ```C++ -RootSignature my_rs_name("root signature definition"); +GlobalRootSignature my_rs_name = +{ + "root signature definition" +}; ``` --- @@ -4855,7 +4862,10 @@ local root signature defines the structure of additional root arguments read from the shader record in the shader table. ```C++ -LocalRootSignature my_local_rs_name("local root signature definition"); +LocalRootSignature my_local_rs_name = +{ + "local root signature definition" +}; ``` --- @@ -4868,8 +4878,11 @@ string, and the list of entry points is supplied as a semicolon-separated list of function names in a string. ```C++ -SubobjectToEntrypointAssociation -my_association_name("subobject_name","function1;function2;function3"); +SubobjectToEntrypointAssociation my_association_name = +{ + "subobject_name", + "function1;function2;function3" +}; ``` --- @@ -4880,7 +4893,11 @@ Defines the maximum sizes in bytes for the [ray payload](#ray-payload-structure) See the API equivalent: [D3D12_RAYTRACING_SHADER_CONFIG](#d3d12_raytracing_shader_config). ```C++ -RaytracingShaderConfig shader_config_name(maxPayloadSizeInBytes,maxAttributeSizeInBytes); +RaytracingShaderConfig shader_config_name = +{ + maxPayloadSizeInBytes, + maxAttributeSizeInBytes +}; ``` --- @@ -4891,7 +4908,10 @@ Defines the maximum [TraceRay()](#traceray) recursion depth. See the API equivalent: [D3D12_RAYTRACING_PIPELINE_CONFIG](#d3d12_raytracing_pipeline_config). ```C++ -RaytracingPipelineConfig config_name(maxTraceRecursionDepth); +RaytracingPipelineConfig config_name = +{ + maxTraceRecursionDepth +}; ``` --- @@ -4902,8 +4922,11 @@ Defines the maximum [TraceRay()](#traceray) recursion depth as well as raytracin equivalent: [D3D12_RAYTRACING_PIPELINE_CONFIG1](#d3d12_raytracing_pipeline_config1). ```C++ -RaytracingPipelineConfig1 config_name(maxTraceRecursionDepth, - RAYTRACING_PIPELINE_CONFIG_FLAG_*); +RaytracingPipelineConfig1 config_name = +{ + maxTraceRecursionDepth, + RAYTRACING_PIPELINE_CONFIG_FLAG_* +}; ``` The available flags (`RAYTRACING_PIPELINE_CONFIG_FLAG_*`) are defined at: [Raytracing pipeline flags](#raytracing-pipeline-flags). @@ -7652,3 +7675,4 @@ v1.16|7/29/2021|
  • For [any hit shaders](#any-hit-shaders), clarified that for v1.17|10/25/2021|
  • In [Degenerate primitives and instances](#degenerate-primitives-and-instances), added the clarification: An exception to the rule that degenerates cannot be discarded with `ALLOW_UPDATE` specified is primitives that have repeated index value can always be discarded (even with `ALLOW_UPDATE` specified). There is no value in keeping them since index values cannot be changed.
  • v1.18|3/31/2022|
  • In [Inactive primitives and instances](#inactive-primitives-and-instances), changed handling of NaN in triangles to: Triangles are considered "inactive" (but legal input to acceleration structure build) if the x component of any vertex is NaN. The "any" used to be "each". This reduces the amount of undefined behavior apps are exposed to.
  • v1.19|8/10/2022|
  • For [GetShaderIdentifier](#getshaderidentifier) clarified that the returned pointer is valid as long as the state object is valid (used to only say the data pointed to is valid).
  • Removed inaccurate statements that various acceleration structure copy modes require the OS to be in developer mode. Such enforcement was initially considered but not implemented.
  • +v1.20|1/11/2023|
  • In HLSL [Subobject definitions](#subobject-definitions) sections, corrected all the examples to show the syntax actually supported by the compiler. Previously showed an alternative syntax that didn't end up being used.
  • \ No newline at end of file diff --git a/d3d/ResourceBinding.md b/d3d/ResourceBinding.md index 0adf9ef..d229469 100644 --- a/d3d/ResourceBinding.md +++ b/d3d/ResourceBinding.md @@ -1,6 +1,6 @@

    D3D12 Resource Binding Functional Spec

    -v1.19 4/10/2019 +v1.21 3/11/2022 --- @@ -1065,15 +1065,14 @@ minor restriction is imposed on what types of samplers can be created. This limits the amount of data required to fully represent a sampler. BorderColor must be one of: {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 1.0}, -{1.0, 1.0, 1.0, 1.0}. +{1.0, 1.0, 1.0, 1.0}; {0u, 0u, 0u, 1u}, {1u, 1u, 1u, 1u}. +Where the unsigned integer colors are available with SM6.7 or higher, +and require adding `D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR` +to the Flags field in the sampler description. -The small set of border colors at least allows for basic cases like -common shadow map out of bounds values to be represented in only 2 -bits.  The reason there are 3 and not 4 values is in case an -implementation needs to use the 4^th^ encoding to represent that a full -FLOAT4 border color is needed (which full samplers support).  In the -static sampler definition, BorderColor is chosen via an enumeration -listing just the 3 possibilities rather than allowing arbitrary floats. +In the static sampler definition, BorderColor is chosen via an +enumeration listing just the 5 possibilities rather than allowing +arbitrary floats. In the highly unlikely case this restriction doesn't work for an application it can always use the full samplers in a sampler descriptor @@ -1784,14 +1783,40 @@ typedef struct D3D12_SAMPLER_DESC FLOAT MaxLOD; } D3D12_SAMPLER_DESC; -interface ID3D12Device +typedef struct D3D12_SAMPLER_DESC2 +{ + D3D12_FILTER Filter; + D3D12_TEXTURE_ADDRESS_MODE AddressU; + D3D12_TEXTURE_ADDRESS_MODE AddressV; + D3D12_TEXTURE_ADDRESS_MODE AddressW; + FLOAT MipLODBias; + UINT MaxAnisotropy; + D3D12_COMPARISON_FUNC ComparisonFunc; + union + { + FLOAT FloatBorderColor[4]; // RGBA + UINT UintBorderColor[4]; + }; + FLOAT MinLOD; + FLOAT MaxLOD; + D3D12_SAMPLER_FLAGS Flags; +} D3D12_SAMPLER_DESC; +interface ID3D12Device { ... HRESULT CreateSampler( _In_ const D3D12_SAMPLER_DESC* pDesc, _In_ D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptor); }; + +interface ID3D12Device11 +{ + ... + HRESULT CreateSampler2( + _In_ const D3D12_SAMPLER_DESC2* pDesc, + _In_ D3D12_CPU_DESCRIPTOR_HANDLE DestDescriptor); +}; ``` Note that static samplers in the root signature use a different @@ -1848,6 +1873,18 @@ typedef struct D3D12_TEX2D_ARRAY_UAV UINT PlaneSlice; } D3D12_TEX2D_ARRAY_UAV; +typedef struct D3D12_TEX2DMS_UAV +{ + // don't need to define anything specific for this view dimension + UINT UnusedField_NothingToDefine; +} D3D12_TEX2DMS_UAV; + +typedef struct D3D12_TEX2DMS_ARRAY_UAV +{ + UINT FirstArraySlice; + UINT ArraySize; +} D3D12_TEX2DMS_ARRAY_UAV; + typedef struct D3D12_TEX3D_UAV { UINT MipSlice; @@ -1855,6 +1892,22 @@ typedef struct D3D12_TEX3D_UAV UINT WSize; } D3D12_TEX3D_UAV; +typedef enum D3D12_UAV_DIMENSION +{ + D3D12_UAV_DIMENSION_UNKNOWN = 0, + D3D12_UAV_DIMENSION_BUFFER = 1, + D3D12_UAV_DIMENSION_TEXTURE1D = 2, + D3D12_UAV_DIMENSION_TEXTURE1DARRAY = 3, + D3D12_UAV_DIMENSION_TEXTURE2D = 4, + D3D12_UAV_DIMENSION_TEXTURE2DARRAY = 5, + D3D12_UAV_DIMENSION_TEXTURE2DMS = 6, + D3D12_UAV_DIMENSION_TEXTURE2DMSARRAY = 7, + D3D12_UAV_DIMENSION_TEXTURE3D = 8, +} D3D12_UAV_DIMENSION; +// The MS options above are only available if the +// WriteableMSAATexturesSupported cap is TRUE, +// and using shader model 6.7+ + typedef struct D3D12_UNORDERED_ACCESS_VIEW_DESC { DXGI_FORMAT Format; @@ -1866,6 +1919,8 @@ typedef struct D3D12_UNORDERED_ACCESS_VIEW_DESC D3D12_TEX1D_ARRAY_UAV Texture1DArray; D3D12_TEX2D_UAV Texture2D; D3D12_TEX2D_ARRAY_UAV Texture2DArray; + D3D12_TEX2DMS_UAV Texture2DMS; + D3D12_TEX2DMS_ARRAY_UAV Texture2DMSArray; D3D12_TEX3D_UAV Texture3D; }; } D3D12_UNORDERED_ACCESS_VIEW_DESC; @@ -2403,6 +2458,8 @@ typedef enum D3D12_STATIC_BORDER_COLOR     D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK, // 0.0f,0.0f,0.0f,0.0f     D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK, // 0.0f,0.0f,0.0f,1.0f     D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, // 1.0f,1.0f,1.0f,1.0f + D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK_UINT, // 0u,0u,0u,1u + D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE_UINT // 1u,1u,1u,1u }; typedef struct D3D12_STATIC_SAMPLER @@ -4032,6 +4089,8 @@ C++ API equivalent is the 'borderColor' field. - `STATIC_BORDER_COLOR_TRANSPARENT_BLACK` - `STATIC_BORDER_COLOR_OPAQUE_BLACK` - `STATIC_BORDER_COLOR_OPAQUE_WHITE` +- `STATIC_BORDER_COLOR_OPAQUE_BLACK_UINT` +- `STATIC_BORDER_COLOR_OPAQUE_WHITE_UINT` # API Example @@ -4514,7 +4573,21 @@ typedef VOID ( APIENTRY* PFND3D12DDI_CREATE_CONSTANT_BUFFER_VIEW )( ### DDI Sampler ```C++ -// D3D12_DDI_SAMPLER_DESC rename ported from D3D11 (not shown here for simplicity) + +typedef struct D3D12DDI_SAMPLER_DESC +{ + D3D12DDI_FILTER Filter; + D3D12DDI_TEXTURE_ADDRESS_MODE AddressU; + D3D12DDI_TEXTURE_ADDRESS_MODE AddressV; + D3D12DDI_TEXTURE_ADDRESS_MODE AddressW; + FLOAT MipLODBias; + UINT MaxAnisotropy; + D3D12DDI_COMPARISON_FUNC ComparisonFunc; + FLOAT BorderColor[4]; // RGBA + FLOAT MinLOD; + FLOAT MaxLOD; +} D3D12DDI_SAMPLER_DESC; + typedef struct D3D12DDIARG_CREATE_SAMPLER { CONST D3D12_DDI_SAMPLER_DESC* pSamplerDesc; @@ -4524,6 +4597,47 @@ typedef VOID ( APIENTRY* PFND3D12DDI_CREATE_SAMPLER )( D3D12DDI_HDEVICE, _In_ CONST D3D12DDIARG_CREATE_SAMPLER*, _In_ D3D12DDI_CPU_DESCRIPTOR_HANDLE DestDescriptor); + + + + +typedef enum D3D12DDI_SAMPLER_FLAGS_0096 +{ + D3D12DDI_SAMPLER_FLAG_NONE = 0x0, + D3D12DDI_SAMPLER_FLAG_UINT_BORDER_COLOR = 0x01 +} D3D12DDI_SAMPLER_FLAGS_0096; +DEFINE_ENUM_FLAG_OPERATORS(D3D12DDI_SAMPLER_FLAGS_0096); + +typedef struct D3D12DDI_SAMPLER_DESC_0096 +{ + D3D12DDI_FILTER Filter; + D3D12DDI_TEXTURE_ADDRESS_MODE AddressU; + D3D12DDI_TEXTURE_ADDRESS_MODE AddressV; + D3D12DDI_TEXTURE_ADDRESS_MODE AddressW; + FLOAT MipLODBias; + UINT MaxAnisotropy; + D3D12DDI_COMPARISON_FUNC ComparisonFunc; + union + { + FLOAT FloatBorderColor[4]; // RGBA + UINT UintBorderColor[4]; + }; + FLOAT MinLOD; + FLOAT MaxLOD; + D3D12DDI_SAMPLER_FLAGS_0096 Flags; +} D3D12DDI_SAMPLER_DESC_0096; + + +typedef struct D3D12DDIARG_CREATE_SAMPLER_0096 +{ + CONST D3D12DDI_SAMPLER_DESC_0096* pSamplerDesc; +} D3D12DDIARG_CREATE_SAMPLER_0096; + +typedef VOID(APIENTRY* PFND3D12DDI_CREATE_SAMPLER_0096)( + D3D12DDI_HDEVICE, + _In_ CONST D3D12DDIARG_CREATE_SAMPLER_0096*, + _In_ D3D12DDI_CPU_DESCRIPTOR_HANDLE DestDescriptor); + ``` ### DDI Unordered Access View @@ -4903,6 +5017,8 @@ typedef enum D3D12DDI_STATIC_BORDER_COLOR D3D12DDI_STATIC_BORDER_COLOR_TRANSPARENT_BLACK, //0.0f,0.0f,0.0f,0.0f D3D12DDI_STATIC_BORDER_COLOR_OPAQUE_BLACK, // 0.0f,0.0f,0.0f,1.0f D3D12DDI_STATIC_BORDER_COLOR_OPAQUE_WHITE, // 1.0f,1.0f,1.0f,1.0f + D3D12DDI_STATIC_BORDER_COLOR_OPAQUE_BLACK_UINT, // 0u,0u,0u,1u + D3D12DDI_STATIC_BORDER_COLOR_OPAQUE_WHITE_UINT // 1u,1u,1u,1u } D3D12_DDI_STATIC_BORDER_COLOR; typedef struct D3D12DDI_STATIC_SAMPLER @@ -5173,6 +5289,24 @@ signature. # Change History +V1.21 Mar 11, 2022 +- Updated [Limitations on Static Samplers](#limitations-on-static-samplers); removing the mention +of border color being restricted to 2 bits, and adding new integer border colors to the list. +- Added declarations and DDIs for + - D3D12_SAMPLER_DESC2 + - D3D12_SAMPLER_FLAGS + - CreateSampler2 + - D3D12DDI_SAMPLER_DESC_0096 + - D3D12DDI_SAMPLER_FLAGS_0096 + - D3D12DDI_SAMPLER_FLAGS_0096 + - D3D12DDIARG_CREATE_SAMPLER_0096 + - PFND3D12DDI_CREATE_SAMPLER_0096 + +V1.20 Feb 3, 2022 + +- Added MSAA UAVs, supported if WriteableMSAATexturesSupported cap is TRUE. + e.g. D3D12_UAV_DIMENSION_TEXTURE2DMS and D3D12_UAV_DIMENSION_TEXTURE2DMSARRAY + V1.19 April 10, 2019 - Markdown bugfix