-
Notifications
You must be signed in to change notification settings - Fork 114
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable multi-buffer per channel in EDM (#11387)
#6300: Add multi-buffering per EDM channel Adds the option to add a multiple buffers (e.g. double buffered) per EDM channel. This is useful for improving performance of CCL operation. To simplify the worker <-> EDM interface to allow a kernel to automatically support multi-buffered channels, new adapter components are added: - WorkerToEdmReader: for a worker pulling data from EDM - WorkerToEdmSender: for a worker pushing data to the EDM These hide details such as buffer offsets in the channel and any other details that may only be relevant to the EDM. Additionally, their use encapsulates the worker <-> EDM data movement protocol, allowing future low level changes to buffer layouts and allocations on the EDM without requiring worker kernel changes. As an a coinciding required step to enable this functionality the EDM channel count limit has been lifted to unlimited (limited only to as many buffers can fit into L1). This provides additional flexibility for op writers and let's the `erisc_info::channels` to be shrunk back to single entry. Note that this commit only adds this feature, but does not yet enable it for CCL ops.
- Loading branch information
1 parent
6ca0fbb
commit 6c566aa
Showing
34 changed files
with
1,900 additions
and
500 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
89 changes: 0 additions & 89 deletions
89
...t_metal/test_kernels/dataflow/unit_tests/erisc/erisc_datamover_receiver_worker_reader.cpp
This file was deleted.
Oops, something went wrong.
43 changes: 0 additions & 43 deletions
43
...t_metal/test_kernels/dataflow/unit_tests/erisc/erisc_datamover_receiver_worker_sender.cpp
This file was deleted.
Oops, something went wrong.
93 changes: 0 additions & 93 deletions
93
.../tt_metal/test_kernels/dataflow/unit_tests/erisc/erisc_datamover_sender_worker_sender.cpp
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
tests/ttnn/unit_tests/gtests/ccl/kernels/erisc_datamover_receiver_worker_reader.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include <cstdint> | ||
#include <array> | ||
|
||
#include "dataflow_api.h" | ||
#include "ttnn/cpp/ttnn/operations/ccl/kernel_common/worker_edm_utils.hpp" | ||
#include "ttnn/cpp/ttnn/operations/ccl/kernel_common/worker_edm_adapters.hpp" | ||
|
||
void kernel_main() { | ||
constexpr uint32_t eth_receiver_l1_base_addr = get_compile_time_arg_val(0); | ||
constexpr uint32_t eth_receiver_l1_sem_addr = get_compile_time_arg_val(1); | ||
constexpr uint32_t num_buffers_per_channel = get_compile_time_arg_val(2); | ||
constexpr ttnn::ccl::EriscDataMoverTerminationMode termination_mode = static_cast<ttnn::ccl::EriscDataMoverTerminationMode>(get_compile_time_arg_val(3)); | ||
const uint32_t num_pages_per_read_chunk = get_arg_val<uint32_t>(0); | ||
const uint32_t total_pages_to_read = get_arg_val<uint32_t>(1); | ||
const uint32_t page_size = get_arg_val<uint32_t>(2); | ||
const uint32_t receiver_erisc_datamover_noc_x = get_arg_val<uint32_t>(3); | ||
const uint32_t receiver_erisc_datamover_noc_y = get_arg_val<uint32_t>(4); | ||
// Worker local L1 semaphore that erisc datamover signals to | ||
volatile uint32_t* const receiver_read_sem_addr = reinterpret_cast<volatile uint32_t* const >(get_semaphore(get_arg_val<uint32_t>(5))); | ||
const uint32_t num_buffers_per_edm_channel = get_arg_val<uint32_t>(6); | ||
|
||
ccl::edm::WorkerToEdmReader<termination_mode> reader( | ||
ttnn::ccl::WorkerXY(receiver_erisc_datamover_noc_x, receiver_erisc_datamover_noc_y), | ||
eth_receiver_l1_base_addr, | ||
num_buffers_per_channel, | ||
eth_receiver_l1_sem_addr, | ||
num_pages_per_read_chunk * page_size, | ||
receiver_read_sem_addr); | ||
|
||
constexpr uint32_t cb_id_in0 = tt::CB::c_in0; | ||
|
||
for (uint32_t i = 0; i < total_pages_to_read; i += num_pages_per_read_chunk) { | ||
bool last_message = (i + num_pages_per_read_chunk) >= total_pages_to_read; | ||
uint32_t num_pages_to_read = std::min(total_pages_to_read - i, num_pages_per_read_chunk); | ||
reader.wait_for_payload_available(); | ||
reader.fetch_payload_blocking(cb_id_in0, num_pages_to_read, page_size, last_message); | ||
} | ||
|
||
reader.close(); | ||
} |
Oops, something went wrong.