Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move the device timer conversion in cvk_device instead of commands #630

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1249,11 +1249,17 @@ cl_int cvk_device::get_device_host_timer(cl_ulong* device_timestamp,
return CL_SUCCESS;
}

cl_ulong cvk_device::device_timer_to_host(cl_ulong dev, cl_ulong sync_dev,
cl_ulong sync_host) const {
if (sync_host > sync_dev) {
return (sync_host - sync_dev) + dev;
cl_int cvk_device::device_timer_to_host(cl_ulong dev, cl_ulong& host) {
if (dev > m_sync_dev) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (dev > m_sync_dev) {
if (dev <= m_sync_dev) {

Isn't your condition backwards? You want a new pair of aligned timestamps only when you've wrapped around and the timestamp to convert to the host's time base is smaller than the device timestamp used as a reference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it is backwards.
The idea is to get new sync points when the time we want is bigger than what we have converted so far. Because they might have been a slight divergence since we have the last sync point.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So you'll end up requesting a new pair of timestamps for pretty much each command since the device timestamp will almost always be greater than the last one obtained when getting a host/device pair. The only time where you're not requesting a host/device pair is when the device timestamp wraps around. At which point I don't see much motivation for the change.

cl_int err = get_device_host_timer(&m_sync_dev, &m_sync_host);
if (err != CL_SUCCESS) {
return err;
}
}
if (m_sync_host > m_sync_dev) {
host = (m_sync_host - m_sync_dev) + dev;
} else {
return dev - (sync_dev - sync_host);
host = dev - (m_sync_dev - m_sync_host);
}
return CL_SUCCESS;
}
6 changes: 4 additions & 2 deletions src/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,7 @@ struct cvk_device : public _cl_device_id,

CHECK_RETURN cl_int get_device_host_timer(cl_ulong* dev_ts,
cl_ulong* host_ts) const;
cl_ulong device_timer_to_host(cl_ulong dev, cl_ulong sync_dev,
cl_ulong sync_host) const;
cl_int device_timer_to_host(cl_ulong dev, cl_ulong& host);

uint64_t timestamp_to_ns(uint64_t ts) const {
double ns_per_tick = vulkan_limits().timestampPeriod;
Expand Down Expand Up @@ -738,6 +737,9 @@ struct cvk_device : public _cl_device_id,

cl_uint m_preferred_subgroup_size{};

cl_ulong m_sync_host{};
cl_ulong m_sync_dev{};

spv_target_env m_vulkan_spirv_env;

std::unique_ptr<cvk_device_properties> m_clvk_properties;
Expand Down
42 changes: 22 additions & 20 deletions src/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,18 +691,27 @@ struct cvk_command_batchable : public cvk_command {
CHECK_RETURN cl_int do_action() override;
CHECK_RETURN virtual cl_int do_post_action() { return CL_SUCCESS; }

CHECK_RETURN cl_int set_profiling_info_end(cl_ulong sync_dev,
cl_ulong sync_host) {
cl_ulong start, end;
auto perr = get_timestamp_query_results(&start, &end);
CHECK_RETURN cl_int set_profiling_info_end() {
// If it has already been set, don't override it
if (m_event->get_profiling_info(CL_PROFILING_COMMAND_END) != 0) {
return CL_SUCCESS;
}
cl_ulong start_dev, end_dev;
cl_int perr = get_timestamp_query_results(&start_dev, &end_dev);
if (perr != CL_COMPLETE) {
return perr;
}
start =
m_queue->device()->device_timer_to_host(start, sync_dev, sync_host);
end = m_queue->device()->device_timer_to_host(end, sync_dev, sync_host);
m_event->set_profiling_info(CL_PROFILING_COMMAND_START, start);
m_event->set_profiling_info(CL_PROFILING_COMMAND_END, end);
cl_ulong start_host, end_host;
perr = m_queue->device()->device_timer_to_host(start_dev, start_host);
if (perr != CL_SUCCESS) {
return perr;
}
perr = m_queue->device()->device_timer_to_host(end_dev, end_host);
if (perr != CL_SUCCESS) {
return perr;
}
m_event->set_profiling_info(CL_PROFILING_COMMAND_START, start_host);
m_event->set_profiling_info(CL_PROFILING_COMMAND_END, end_host);
return CL_SUCCESS;
}

Expand All @@ -716,12 +725,10 @@ struct cvk_command_batchable : public cvk_command {
pinfo == CL_PROFILING_COMMAND_SUBMIT) {
return cvk_command::set_profiling_info(pinfo);
} else if (pinfo == CL_PROFILING_COMMAND_START) {
return m_queue->device()->get_device_host_timer(&m_sync_dev,
&m_sync_host);
return CL_SUCCESS;
} else {
CVK_ASSERT(pinfo == CL_PROFILING_COMMAND_END);
CVK_ASSERT(m_sync_dev != 0 && m_sync_host != 0);
return set_profiling_info_end(m_sync_dev, m_sync_host);
return set_profiling_info_end();
}
}

Expand All @@ -732,8 +739,6 @@ struct cvk_command_batchable : public cvk_command {
static const int NUM_POOL_QUERIES_PER_COMMAND = 2;
static const int POOL_QUERY_CMD_START = 0;
static const int POOL_QUERY_CMD_END = 1;

cl_ulong m_sync_dev{}, m_sync_host{};
};

struct cvk_ndrange {
Expand Down Expand Up @@ -861,14 +866,12 @@ struct cvk_command_batch : public cvk_command {
cl_int status = cvk_command::set_profiling_info(pinfo);
if (m_queue->profiling_on_device()) {
if (pinfo == CL_PROFILING_COMMAND_START) {
return m_queue->device()->get_device_host_timer(&m_sync_dev,
&m_sync_host);
return status;
} else {
for (auto& cmd : m_commands) {
cl_int err;
if (pinfo == CL_PROFILING_COMMAND_END) {
err = cmd->set_profiling_info_end(m_sync_dev,
m_sync_host);
err = cmd->set_profiling_info_end();
} else {
err = cmd->set_profiling_info(pinfo);
}
Expand Down Expand Up @@ -896,7 +899,6 @@ struct cvk_command_batch : public cvk_command {
private:
std::vector<std::unique_ptr<cvk_command_batchable>> m_commands;
std::unique_ptr<cvk_command_buffer> m_command_buffer;
cl_ulong m_sync_dev, m_sync_host;
};

struct cvk_command_map_buffer final : public cvk_command_buffer_base_region {
Expand Down
Loading