diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp index c66a0c33d47e..7aa1811ecd5e 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp @@ -26,3 +26,7 @@ inline std::pair, std::vector> E EnqueueWriteBuffer(cq, *buffer, src, false); return std::make_pair(std::move(buffer), src); } + +inline bool does_device_have_active_eth_cores(const Device *device) { + return !(device->get_active_ethernet_cores(true).empty()); +} diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp index b4cb62889ab5..90a7b9221a39 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp @@ -44,10 +44,6 @@ struct DummyProgramMultiCBConfig { namespace local_test_functions { -bool does_device_have_active_eth_cores(const Device *device) { - return !(device->get_active_ethernet_cores(true).empty()); -} - void initialize_dummy_kernels(Program& program, const CoreRangeSet& cr_set) { auto dummy_reader_kernel = CreateKernel( program, "tt_metal/kernels/dataflow/blank.cpp", cr_set, @@ -1404,8 +1400,8 @@ TEST_F(RandomProgramFixture, TestSimpleProgramsOnTensix) { } TEST_F(RandomProgramFixture, TestSimpleProgramsOnEth) { - if (!local_test_functions::does_device_have_active_eth_cores(device_)) { - GTEST_SKIP() << "Skipping test because device " << device_->id() << "does not have any active ethernet cores"; + if (!does_device_have_active_eth_cores(device_)) { + GTEST_SKIP() << "Skipping test because device " << device_->id() << " does not have any active ethernet cores"; } for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { @@ -1421,8 +1417,8 @@ TEST_F(RandomProgramFixture, TestSimpleProgramsOnEth) { } TEST_F(RandomProgramFixture, TestSimpleProgramsOnTensixAndEth) { - if (!local_test_functions::does_device_have_active_eth_cores(device_)) { - GTEST_SKIP() << "Skipping test because device " << device_->id() << "does not have any active ethernet cores"; + if (!does_device_have_active_eth_cores(device_)) { + GTEST_SKIP() << "Skipping test because device " << device_->id() << " does not have any active ethernet cores"; } for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { @@ -1460,8 +1456,8 @@ TEST_F(RandomProgramFixture, TestProgramsOnTensix) { } TEST_F(RandomProgramFixture, TestProgramsOnEth) { - if (!local_test_functions::does_device_have_active_eth_cores(device_)) { - GTEST_SKIP() << "Skipping test because device " << device_->id() << "does not have any active ethernet cores"; + if (!does_device_have_active_eth_cores(device_)) { + GTEST_SKIP() << "Skipping test because device " << device_->id() << " does not have any active ethernet cores"; } for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { @@ -1482,8 +1478,8 @@ TEST_F(RandomProgramFixture, TestProgramsOnEth) { } TEST_F(RandomProgramFixture, TestProgramsOnTensixAndEth) { - if (!local_test_functions::does_device_have_active_eth_cores(device_)) { - GTEST_SKIP() << "Skipping test because device " << device_->id() << "does not have any active ethernet cores"; + if (!does_device_have_active_eth_cores(device_)) { + GTEST_SKIP() << "Skipping test because device " << device_->id() << " does not have any active ethernet cores"; } for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueTrace.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueTrace.cpp index b6dbd81212ce..dbac6e5dac41 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueTrace.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueTrace.cpp @@ -4,8 +4,10 @@ #include #include +#include #include "command_queue_fixture.hpp" +#include "command_queue_test_utils.hpp" #include "detail/tt_metal.hpp" #include "tt_metal/common/env_lib.hpp" #include "gtest/gtest.h" @@ -367,3 +369,224 @@ TEST_F(SingleDeviceTraceFixture, EnqueueMultiProgramTraceBenchmark) { } } // end namespace basic_tests + +TEST_F(RandomProgramTraceFixture, TensixTestSimpleProgramsTrace) { + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + this->create_kernel(program, CoreType::WORKER, true); + EnqueueProgram(this->device_->command_queue(), program, false); + } + + const uint32_t trace_id = this->trace_programs(); + + Finish(this->device_->command_queue()); + ReleaseTrace(this->device_, trace_id); +} + +TEST_F(RandomProgramTraceFixture, ActiveEthTestSimpleProgramsTrace) { + if (!does_device_have_active_eth_cores(this->device_)) { + GTEST_SKIP() << "Skipping test because device " << this->device_->id() << " does not have any active ethernet cores"; + } + + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + this->create_kernel(program, CoreType::ETH, true); + EnqueueProgram(this->device_->command_queue(), program, false); + } + + const uint32_t trace_id = this->trace_programs(); + + Finish(this->device_->command_queue()); + ReleaseTrace(this->device_, trace_id); +} + +TEST_F(RandomProgramTraceFixture, TensixActiveEthTestSimpleProgramsTrace) { + if (!does_device_have_active_eth_cores(this->device_)) { + GTEST_SKIP() << "Skipping test because device " << this->device_->id() << " does not have any active ethernet cores"; + } + + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + + bool eth_kernel_added_to_program = false; + if (rand() % 2 == 0) { + this->create_kernel(program, CoreType::ETH, true); + eth_kernel_added_to_program = true; + } + if (rand() % 2 == 0 || !eth_kernel_added_to_program) { + this->create_kernel(program, CoreType::WORKER, true); + } + + EnqueueProgram(this->device_->command_queue(), program, false); + } + + const uint32_t trace_id = this->trace_programs(); + + Finish(this->device_->command_queue()); + ReleaseTrace(this->device_, trace_id); +} + +TEST_F(RandomProgramTraceFixture, TensixTestProgramsTrace) { + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + this->create_kernel(program, CoreType::WORKER); + EnqueueProgram(this->device_->command_queue(), program, false); + } + + Finish(device_->command_queue()); +} + +TEST_F(RandomProgramTraceFixture, ActiveEthTestProgramsTrace) { + if (!does_device_have_active_eth_cores(this->device_)) { + GTEST_SKIP() << "Skipping test because device " << this->device_->id() << " does not have any active ethernet cores"; + } + + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + // Large eth kernels currently don't fit in the ring buffer, so we're reducing the max number of RTAs + // and the max kernel size to ensure that the kernel can fit in the ring buffer + KernelProperties kernel_properties; + kernel_properties.max_kernel_size_bytes = MAX_KERNEL_SIZE_BYTES / 2; + kernel_properties.max_num_rt_args = MAX_NUM_RUNTIME_ARGS / 4; + this->create_kernel(program, CoreType::ETH, false, kernel_properties); + EnqueueProgram(this->device_->command_queue(), program, false); + } + + const uint32_t trace_id = this->trace_programs(); + + Finish(this->device_->command_queue()); + ReleaseTrace(this->device_, trace_id); +} + +TEST_F(RandomProgramTraceFixture, TensixActiveEthTestProgramsTrace) { + if (!does_device_have_active_eth_cores(this->device_)) { + GTEST_SKIP() << "Skipping test because device " << this->device_->id() << " does not have any active ethernet cores"; + } + + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + + bool eth_kernel_added_to_program = false; + if (rand() % 2 == 0) { + // Large eth kernels currently don't fit in the ring buffer, so we're reducing the max number of RTAs + // and the max kernel size to ensure that the kernel can fit in the ring buffer + KernelProperties kernel_properties; + kernel_properties.max_kernel_size_bytes = MAX_KERNEL_SIZE_BYTES / 2; + kernel_properties.max_num_rt_args = MAX_NUM_RUNTIME_ARGS / 4; + kernel_properties.max_num_sems = MAX_NUM_SEMS / 2; + this->create_kernel(program, CoreType::ETH, false, kernel_properties); + eth_kernel_added_to_program = true; + } + if (rand() % 2 == 0 || !eth_kernel_added_to_program) { + KernelProperties kernel_properties; + kernel_properties.max_num_sems = MAX_NUM_SEMS / 2; + this->create_kernel(program, CoreType::WORKER, false, kernel_properties); + } + + EnqueueProgram(this->device_->command_queue(), program, false); + } + + const uint32_t trace_id = this->trace_programs(); + + Finish(this->device_->command_queue()); + ReleaseTrace(this->device_, trace_id); +} + +TEST_F(RandomProgramTraceFixture, TensixTestAlternatingLargeAndSmallProgramsTrace) { + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + + KernelProperties kernel_properties; + if (i % 2 == 0) { + kernel_properties = this->get_large_kernel_properties(); + } else { + kernel_properties = this->get_small_kernel_properties(); + } + + this->create_kernel(program, CoreType::WORKER, false, kernel_properties); + EnqueueProgram(this->device_->command_queue(), program, false); + } + + const uint32_t trace_id = this->trace_programs(); + + Finish(this->device_->command_queue()); + ReleaseTrace(this->device_, trace_id); +} + +TEST_F(RandomProgramTraceFixture, TensixTestLargeProgramFollowedBySmallProgramsTrace) { + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + + KernelProperties kernel_properties; + if (i == 0) { + kernel_properties = this->get_large_kernel_properties(); + } else { + kernel_properties = this->get_small_kernel_properties(); + } + + this->create_kernel(program, CoreType::WORKER, false, kernel_properties); + EnqueueProgram(this->device_->command_queue(), program, false); + } + + const uint32_t trace_id = this->trace_programs(); + + Finish(this->device_->command_queue()); + ReleaseTrace(this->device_, trace_id); +} + +TEST_F(RandomProgramTraceFixture, TensixTestLargeProgramInBetweenFiveSmallProgramsTrace) { + for (uint32_t i = 0; i < NUM_PROGRAMS; i++) { + if (i % 10 == 0) { + log_info(tt::LogTest, "Creating Program {}", i); + } + this->programs[i] = CreateProgram(); + Program& program = this->programs[i]; + + KernelProperties kernel_properties; + if (i % 6 == 0) { + kernel_properties = this->get_large_kernel_properties(); + } else { + kernel_properties = this->get_small_kernel_properties(); + } + + this->create_kernel(program, CoreType::WORKER, false, kernel_properties); + EnqueueProgram(this->device_->command_queue(), program, false); + } + + const uint32_t trace_id = this->trace_programs(); + + Finish(this->device_->command_queue()); + ReleaseTrace(this->device_, trace_id); +} diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/command_queue_fixture.hpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/command_queue_fixture.hpp index 2bad2d7ba2b9..a7c9fb13f59f 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/command_queue_fixture.hpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/common/command_queue_fixture.hpp @@ -17,6 +17,7 @@ #include "impl/device/device.hpp" #include "impl/kernels/data_types.hpp" #include "impl/kernels/kernel_types.hpp" +#include "impl/dispatch/command_queue.hpp" #include "llrt/hal.hpp" #include "tt_cluster_descriptor_types.h" #include "tt_metal/host_api.hpp" @@ -91,36 +92,53 @@ class CommandQueueMultiDeviceFixture : public ::testing::Test { class CommandQueueSingleCardFixture : public ::testing::Test { protected: void SetUp() override { + this->validate_dispatch_mode(); + this->arch_ = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name()); + this->create_devices(); + } + + void TearDown() override { tt::tt_metal::detail::CloseDevices(reserved_devices_); } + + void validate_dispatch_mode() { auto slow_dispatch = getenv("TT_METAL_SLOW_DISPATCH_MODE"); if (slow_dispatch) { TT_THROW("This suite can only be run with fast dispatch or TT_METAL_SLOW_DISPATCH_MODE unset"); GTEST_SKIP(); } - auto enable_remote_chip = getenv("TT_METAL_ENABLE_REMOTE_CHIP"); - arch_ = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name()); + } + void create_devices(const std::size_t trace_region_size = DEFAULT_TRACE_REGION_SIZE) { const auto &dispatch_core_type = tt::llrt::OptionsG.get_dispatch_core_type(); const chip_id_t mmio_device_id = 0; - reserved_devices_ = tt::tt_metal::detail::CreateDevices({mmio_device_id}, 1, DEFAULT_L1_SMALL_SIZE, DEFAULT_TRACE_REGION_SIZE, dispatch_core_type); + this->reserved_devices_ = tt::tt_metal::detail::CreateDevices( + {mmio_device_id}, 1, DEFAULT_L1_SMALL_SIZE, trace_region_size, dispatch_core_type); + auto enable_remote_chip = getenv("TT_METAL_ENABLE_REMOTE_CHIP"); if (enable_remote_chip) { - for (const auto &[id, device] : reserved_devices_) { - devices_.push_back(device); + for (const auto &[id, device] : this->reserved_devices_) { + this->devices_.push_back(device); } } else { - devices_.push_back(reserved_devices_.at(mmio_device_id)); + this->devices_.push_back(this->reserved_devices_.at(mmio_device_id)); } - num_devices_ = reserved_devices_.size(); + this->num_devices_ = this->reserved_devices_.size(); } - void TearDown() override { tt::tt_metal::detail::CloseDevices(reserved_devices_); } - - std::vector devices_; - std::map reserved_devices_; + std::vector devices_; + std::map reserved_devices_; tt::ARCH arch_; size_t num_devices_; }; +class CommandQueueSingleCardTraceFixture : virtual public CommandQueueSingleCardFixture { + protected: + void SetUp() override { + this->validate_dispatch_mode(); + this->arch_ = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name()); + this->create_devices(90000000); + } +}; + class SingleDeviceTraceFixture: public ::testing::Test { protected: tt::tt_metal::Device* device_; @@ -155,7 +173,7 @@ class SingleDeviceTraceFixture: public ::testing::Test { }; -class RandomProgramFixture : public CommandQueueSingleCardFixture { +class RandomProgramFixture : virtual public CommandQueueSingleCardFixture { protected: static const uint32_t MIN_KERNEL_SIZE_BYTES = 20; static const uint32_t MAX_KERNEL_SIZE_BYTES = 4096; @@ -209,9 +227,11 @@ class RandomProgramFixture : public CommandQueueSingleCardFixture { void SetUp() override { CommandQueueSingleCardFixture::SetUp(); - this->device_ = this->devices_[0]; + this->initialize_seed(); + } + void initialize_seed() { const uint32_t seed = tt::parse_env("TT_METAL_SEED", static_cast(time(nullptr))); log_info(tt::LogTest, "Using seed: {}", seed); srand(seed); @@ -490,3 +510,37 @@ class RandomProgramFixture : public CommandQueueSingleCardFixture { return resulting_cores; } }; + +class RandomProgramTraceFixture : public RandomProgramFixture, public CommandQueueSingleCardTraceFixture { + protected: + static const uint32_t NUM_TRACE_ITERATIONS = 50; + Program programs[NUM_PROGRAMS]; + + void SetUp() override { + CommandQueueSingleCardTraceFixture::SetUp(); + this->device_ = this->devices_[0]; + this->initialize_seed(); + } + + uint32_t trace_programs() { + const uint32_t trace_id = this->capture_trace(); + this->run_trace(trace_id); + return trace_id; + } + + private: + uint32_t capture_trace() { + const uint32_t trace_id = BeginTraceCapture(this->device_, this->device_->command_queue().id()); + for (Program &program : this->programs) { + EnqueueProgram(this->device_->command_queue(), program, false); + } + EndTraceCapture(this->device_, this->device_->command_queue().id(), trace_id); + return trace_id; + } + + void run_trace(const uint32_t trace_id) { + for (uint32_t i = 0; i < NUM_TRACE_ITERATIONS; i++) { + EnqueueTrace(this->device_->command_queue(), trace_id, false); + } + } +};