Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add debug ring support to normal ExecuteIndirect #1544

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion include/vkd3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ extern "C" {
#define VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE (1ull << 9)
#define VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV (1ull << 10)
#define VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET (1ull << 11)
/* Bit 12 is vacant */
#define VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT (1ull << 12)
#define VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED (1ull << 13)
/* Bit 14 is vacant */
#define VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION (1ull << 15)
Expand Down
115 changes: 108 additions & 7 deletions libs/vkd3d/command.c
Original file line number Diff line number Diff line change
Expand Up @@ -6766,6 +6766,68 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count(struct d3d12_c
return true;
}

static void d3d12_command_list_emit_execute_indirect_debug_ring(struct d3d12_command_list *list,
struct d3d12_command_signature *signature,
VkDeviceAddress indirect_args, VkDeviceAddress count_arg, uint32_t max_commands)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct vkd3d_execute_indirect_debug_ring_args args;
VkCommandBuffer vk_patch_cmd_buffer;
VkMemoryBarrier2 vk_barrier;
VkDependencyInfo dep_info;

memset(&args, 0, sizeof(args));
args.api_buffer_va = indirect_args;
args.indirect_count_va = count_arg;
args.api_buffer_word_stride = signature->desc.ByteStride / sizeof(uint32_t);

if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT)
{
args.debug_tag = signature->desc.pArgumentDescs[signature->desc.NumArgumentDescs - 1].Type;
args.implicit_instance = vkd3d_atomic_uint32_increment(
&list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1;
}

/* Allow correlation against breadcrumb log. */
VKD3D_BREADCRUMB_TAG("Implicit instance (plain)");
VKD3D_BREADCRUMB_AUX32(args.implicit_instance);

d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list);
vk_patch_cmd_buffer = list->cmd.vk_init_commands_post_indirect_barrier;

if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer)
d3d12_command_list_end_current_render_pass(list, true);

VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
signature->debug_ring_pipeline.vk_pipeline));
VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer,
signature->debug_ring_pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(args), &args));

VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, max_commands, 1, 1));

if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer)
{
memset(&dep_info, 0, sizeof(dep_info));
dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
dep_info.memoryBarrierCount = 1;
dep_info.pMemoryBarriers = &vk_barrier;

memset(&vk_barrier, 0, sizeof(vk_barrier));
vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
vk_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
vk_barrier.srcAccessMask = 0;
vk_barrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
vk_barrier.dstAccessMask = 0;
VK_CALL(vkCmdPipelineBarrier2(vk_patch_cmd_buffer, &dep_info));

d3d12_command_list_invalidate_current_pipeline(list, true);
d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings);
}
else
list->cmd.indirect_meta->need_compute_to_indirect_barrier = true;
}

static bool d3d12_command_list_emit_multi_dispatch_indirect_count_state(struct d3d12_command_list *list,
struct d3d12_command_signature *signature,
VkDeviceAddress indirect_args,
Expand Down Expand Up @@ -6817,6 +6879,17 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count_state(struct d
args.stride_words = stride / sizeof(uint32_t);
args.dispatch_offset_words = signature->state_template.compute.dispatch_offset_words;

if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT)
{
args.debug_tag = UINT32_MAX;
args.implicit_instance = vkd3d_atomic_uint32_increment(
&list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1;
}

/* Allow correlation against breadcrumb log. */
VKD3D_BREADCRUMB_TAG("Implicit instance (compute template)");
VKD3D_BREADCRUMB_AUX32(args.implicit_instance);

d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list);
vk_patch_cmd_buffer = list->cmd.vk_init_commands_post_indirect_barrier;

Expand Down Expand Up @@ -13065,7 +13138,7 @@ static void d3d12_command_list_execute_indirect_state_template_dgc(
current_pipeline = list->current_pipeline;

memset(&patch_args, 0, sizeof(patch_args));
patch_args.debug_tag = 0; /* Modify to non-zero value as desired when debugging. */
patch_args.debug_tag = (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT) ? UINT32_MAX : 0;

/* If everything regarding alignment works out, we can just reuse the app indirect buffer instead. */
require_ibo_update = false;
Expand Down Expand Up @@ -13133,9 +13206,12 @@ static void d3d12_command_list_execute_indirect_state_template_dgc(
if (patch_args.debug_tag != 0)
{
/* Makes log easier to understand since a sorted log will appear in-order. */
static uint32_t vkd3d_implicit_instance_count;
patch_args.implicit_instance = vkd3d_atomic_uint32_increment(
&vkd3d_implicit_instance_count, vkd3d_memory_order_relaxed) - 1;
&list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1;

/* Allow correlation against breadcrumb log. */
VKD3D_BREADCRUMB_TAG("Implicit instance (template)");
VKD3D_BREADCRUMB_AUX32(patch_args.implicit_instance);
}

d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list);
Expand Down Expand Up @@ -13441,6 +13517,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_l
return;
}

d3d12_command_list_end_transfer_batch(list);

if (sig_impl->debug_ring_pipeline.vk_pipeline)
{
d3d12_command_list_emit_execute_indirect_debug_ring(list, sig_impl,
arg_impl->res.va + arg_buffer_offset,
count_impl ? count_impl->res.va + count_buffer_offset : 0,
max_command_count);
}

/* Temporary workaround, since we cannot parse non-draw arguments yet. Point directly
* to the first argument. Should avoid hard crashes for now. */
arg_buffer_offset += sig_impl->argument_buffer_offset_for_command;
Expand Down Expand Up @@ -13514,7 +13600,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_l
scratch.va = arg_impl->res.va + arg_buffer_offset;
}

d3d12_command_list_end_transfer_batch(list);
switch (arg_desc->Type)
{
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
Expand Down Expand Up @@ -18909,6 +18994,13 @@ static HRESULT d3d12_command_signature_init_state_template_dgc(struct d3d12_comm
VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI,
};

static const enum vkd3d_patch_command_token dispatch_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X,
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y,
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z,
};

static const VkIndexType vk_index_types[] = { VK_INDEX_TYPE_UINT32, VK_INDEX_TYPE_UINT16 };
static const uint32_t d3d_index_types[] = { DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R16_UINT };

Expand Down Expand Up @@ -19082,9 +19174,8 @@ static HRESULT d3d12_command_signature_init_state_template_dgc(struct d3d12_comm
token.offset = stream_stride;
stream_stride += sizeof(VkDispatchIndirectCommand);
dst_word_offset = token.offset / sizeof(uint32_t);
/* TODO: Rebase on top of debug-ring-indirect. */
generic_u32_copy_count = 0;
generic_u32_copy_types = NULL;
generic_u32_copy_count = ARRAY_SIZE(dispatch_types);
generic_u32_copy_types = dispatch_types;
break;

default:
Expand Down Expand Up @@ -19331,8 +19422,18 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, struct d3d12
* for optimal reordering. */
vkd3d_atomic_uint32_store_explicit(&device->device_has_dgc_templates, 1, vkd3d_memory_order_relaxed);
}
else
{
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT)
{
vkd3d_meta_get_execute_indirect_debug_ring_pipeline(&device->meta_ops,
signature_size / sizeof(uint32_t),
&object->debug_ring_pipeline);
}
}

object->argument_buffer_offset_for_command = argument_buffer_offset;

d3d12_device_add_ref(object->device = device);

TRACE("Created command signature %p.\n", object);
Expand Down
100 changes: 98 additions & 2 deletions libs/vkd3d/debug_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token
case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_X: return "Mesh Tasks (X)";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Y: return "Mesh Tasks (Y)";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Z: return "Mesh Tasks (Z)";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X: return "X";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y: return "Y";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z: return "Z";
default: return "???";
}
}
Expand All @@ -107,6 +110,29 @@ static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token toke
}
}

static const char *vkd3d_debug_tag_to_str(uint32_t value)
{
switch (value)
{
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
return "Draw";
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
return "DrawIndexed";
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
return "Dispatch";
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH:
return "Mesh";
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS:
return "RayGen";
case UINT32_MAX:
return "Template";
default:
break;
}

return "???";
}

static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring,
uint32_t word_offset, uint32_t message_word_count)
{
Expand Down Expand Up @@ -136,8 +162,8 @@ static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring
* Make sure the log is sortable for easier debug.
* TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages,
* but that is overengineering at this time ... */
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ",
debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, %s, DrawID %04u (ThreadID %04u): ",
debug_instance, vkd3d_debug_tag_to_str(debug_thread_id[0]), debug_thread_id[1], debug_thread_id[2]);

if (message_word_count == 2)
{
Expand All @@ -147,6 +173,76 @@ static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring
READ_RING_WORD(word_offset + 0),
READ_RING_WORD(word_offset + 1));
}
else if (message_word_count == 3)
{
static const enum vkd3d_patch_command_token draw_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT,
VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE,
};

static const enum vkd3d_patch_command_token draw_indexed_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT,
VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX,
VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE,
};

static const enum vkd3d_patch_command_token dispatch_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X,
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y,
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z,
};

static const enum vkd3d_patch_command_token mesh_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_X,
VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Y,
VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Z,
};

const char *tag_str = "?";
uint32_t value, index;

len = strlen(message_buffer);
avail = sizeof(message_buffer) - len;
/* word 0 is a dummy value. */
index = READ_RING_WORD(word_offset + 1);
value = READ_RING_WORD(word_offset + 2);

switch (debug_thread_id[0])
{
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
if (index < ARRAY_SIZE(draw_types))
tag_str = vkd3d_patch_command_token_str(draw_types[index]);
break;

case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
if (index < ARRAY_SIZE(draw_indexed_types))
tag_str = vkd3d_patch_command_token_str(draw_indexed_types[index]);
break;

case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
if (index < ARRAY_SIZE(dispatch_types))
tag_str = vkd3d_patch_command_token_str(dispatch_types[index]);
break;

case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH:
if (index < ARRAY_SIZE(mesh_types))
tag_str = vkd3d_patch_command_token_str(mesh_types[index]);
break;

default:
break;
}

snprintf(message_buffer + len, avail, "%s <- %u", tag_str, value);
}
else if (message_word_count == 4)
{
union { uint32_t u32; float f32; int32_t s32; } value;
Expand Down
15 changes: 8 additions & 7 deletions libs/vkd3d/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,7 @@ static const struct vkd3d_debug_option vkd3d_config_options[] =
{"preallocate_srv_mip_clamps", VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS},
{"force_initial_transition", VKD3D_CONFIG_FLAG_FORCE_INITIAL_TRANSITION},
{"breadcrumbs_trace", VKD3D_CONFIG_FLAG_BREADCRUMBS | VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE},
{"breadcrumbs_trace_indirect", VKD3D_CONFIG_FLAG_BREADCRUMBS | VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT},
{"requires_compute_indirect_templates", VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES},
{"skip_driver_workarounds", VKD3D_CONFIG_FLAG_SKIP_DRIVER_WORKAROUNDS},
{"curb_memory_pso_cache", VKD3D_CONFIG_FLAG_CURB_MEMORY_PSO_CACHE},
Expand Down Expand Up @@ -8416,19 +8417,19 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
if (FAILED(hr = vkd3d_sampler_state_init(&device->sampler_state, device)))
goto out_cleanup_view_map;

if (FAILED(hr = vkd3d_meta_ops_init(&device->meta_ops, device)))
if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device)))
goto out_cleanup_sampler_state;

if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device)))
goto out_cleanup_meta_ops;
if (FAILED(hr = vkd3d_meta_ops_init(&device->meta_ops, device)))
goto out_cleanup_debug_ring;

vkd3d_scratch_pool_init(device);

#ifdef VKD3D_ENABLE_BREADCRUMBS
vkd3d_breadcrumb_tracer_init_barrier_hashes(&device->breadcrumb_tracer);
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
if (FAILED(hr = vkd3d_breadcrumb_tracer_init(&device->breadcrumb_tracer, device)))
goto out_cleanup_debug_ring;
goto out_cleanup_meta_ops;
#endif

if (vkd3d_descriptor_debug_active_qa_checks())
Expand Down Expand Up @@ -8475,12 +8476,12 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
#ifdef VKD3D_ENABLE_BREADCRUMBS
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
vkd3d_breadcrumb_tracer_cleanup(&device->breadcrumb_tracer, device);
out_cleanup_debug_ring:
out_cleanup_meta_ops:
vkd3d_breadcrumb_tracer_cleanup_barrier_hashes(&device->breadcrumb_tracer);
#endif
vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device);
out_cleanup_meta_ops:
vkd3d_meta_ops_cleanup(&device->meta_ops, device);
out_cleanup_debug_ring:
vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device);
out_cleanup_sampler_state:
vkd3d_sampler_state_cleanup(&device->sampler_state, device);
out_cleanup_view_map:
Expand Down
5 changes: 4 additions & 1 deletion libs/vkd3d/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ vkd3d_shaders =[
'shaders/vs_swapchain_fullscreen.vert',
'shaders/fs_swapchain_fullscreen.frag',
'shaders/cs_execute_indirect_patch.comp',
'shaders/cs_execute_indirect_patch_debug_ring.comp',
'shaders/cs_execute_indirect_multi_dispatch.comp',
'shaders/cs_execute_indirect_multi_dispatch_state.comp',

Expand All @@ -53,6 +52,10 @@ vkd3d_shaders =[
'shaders/cs_resolve_color_float.comp',
'shaders/cs_resolve_color_uint.comp',
'shaders/cs_resolve_color_sint.comp',

'shaders/cs_execute_indirect_debug_ring.comp',
'shaders/cs_execute_indirect_patch_debug_ring.comp',
'shaders/cs_execute_indirect_multi_dispatch_state_debug_ring.comp',
]

vkd3d_src = [
Expand Down
Loading
Loading