Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#5605: Only force-stall ethernet programs on earlier ethernet programs #16202

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions tt_metal/impl/dispatch/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1568,18 +1568,15 @@ void EnqueueProgramCommand::process() {
uint32_t sync_count = 0;
bool stall_first = reservation.first.need_sync;
bool stall_before_program = false;
if (!program.kernel_binary_always_stored_in_ringbuffer()) {
// Wait for all existing commands to run before writing out the kernel binary.
sync_count = this->expected_num_workers_completed;
stall_before_program = !stall_first;
} else if (reservation.first.need_sync) {
if (reservation.first.need_sync) {
// TODO: attempt to send RTA only without stalling.
sync_count = reservation.first.sync_count;
// Check if the launch message is the only thing preventing us from
// sending the program. If so, we can at least send the RTAs. Ideally we
// would also send the kernel binaries in this case, but the rest of the
// code isn't set up for that.
auto config_sizes = program.get_program_config_sizes();
config_sizes[config_sizes.size() - 2] = 0;
config_sizes[config_sizes.size() - 1] = 0;
const std::pair<ConfigBufferSync, std::vector<ConfigBufferEntry>&> memory_reservation =
this->config_buffer_mgr.reserve(config_sizes);
Expand Down Expand Up @@ -1622,9 +1619,9 @@ void EnqueueProgramCommand::process() {
this->config_buffer_mgr.alloc(this->expected_num_workers_completed + num_workers);
std::vector<ConfigBufferEntry>& kernel_config_addrs_raw = reservation.second;

// Remove launch buffer from config addrs, since it's not a real core.
// Remove launch buffers from config addrs, since they're not real cores.
const tt::stl::Span<ConfigBufferEntry> kernel_config_addrs{
kernel_config_addrs_raw.data(), kernel_config_addrs_raw.size() - 1};
kernel_config_addrs_raw.data(), kernel_config_addrs_raw.size() - 2};

RecordProgramRun(program);

Expand Down Expand Up @@ -3077,6 +3074,9 @@ void HWCommandQueue::reset_config_buffer_mgr(const uint32_t num_entries) {
// Subtract 1 from the number of entries, so the watcher can read information (e.g. fired asserts) from the
// previous launch message.
this->config_buffer_mgr[i].init_add_buffer(0, launch_msg_buffer_num_entries - 1);

// There's no ring buffer for active ethernet binaries, so keep track of them separately.
this->config_buffer_mgr[i].init_add_buffer(0, 1);
}
}

Expand Down
7 changes: 4 additions & 3 deletions tt_metal/impl/program/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,7 @@ detail::Program_::Program_() :
}

program_configs_.resize(programmable_core_count);
program_config_sizes_.resize(programmable_core_count + 1);
// Always need one launch buffer msg for a program.
program_config_sizes_[programmable_core_count] = 1;
program_config_sizes_.resize(programmable_core_count + 2);
}

Program::Program() : pimpl_(std::make_unique<detail::Program_>()) {}
Expand Down Expand Up @@ -1504,6 +1502,9 @@ void detail::Program_::finalize(Device *device) {
offset, max_size, magic_enum::enum_name(programmable_core_type));
}

this->get_program_config_size(hal.get_programmable_core_type_count()) = runs_on_noc_multicast_only_cores();
this->get_program_config_size(hal.get_programmable_core_type_count() + 1) = runs_on_noc_unicast_only_cores();

// The sem offsets cross programmable_core_types so must be set after the loop above
this->set_launch_msg_sem_offsets();

Expand Down
Loading