diff --git a/docs/source/tt-metalium/tools/kernel_print.rst b/docs/source/tt-metalium/tools/kernel_print.rst index 0a6b5033aaf..45b7a3321b2 100644 --- a/docs/source/tt-metalium/tools/kernel_print.rst +++ b/docs/source/tt-metalium/tools/kernel_print.rst @@ -66,7 +66,8 @@ The APIs for printing data from Circular Buffers can be found in ``debug/dprint_ how to print data from a CB (in this case, ``CB::c_intermed1``) is shown below. Note that sampling happens relative to the current CB read or write pointer. This means that for printing a tile read from the front of the CB, the ``DPRINT`` call has to occur between the ``cb_wait_front`` and ``cb_pop_front`` calls. For printing a tile from the -back of the CB, the ``DPRINT`` call has to occur between the ``cb_reserve_back`` and ``cb_push_back`` calls. +back of the CB, the ``DPRINT`` call has to occur between the ``cb_reserve_back`` and ``cb_push_back`` calls. Please +note that currently only CBs with type `DataFormat::Float16_b` are supported for printing. .. code-block:: sh @@ -86,10 +87,11 @@ back of the CB, the ``DPRINT`` call has to occur between the ``cb_reserve_back`` // Print a full tile for (int32_t r = 0; r < 32; ++r) { SliceRange sr = SliceRange{.h0 = r, .h1 = r+1, .hs = 1, .w0 = 0, .w1 = 32, .ws = 1}; - // On data movement RISCs, tiles can be printed from either the CB read or write pointers - DPRINT_DATA0({ DPRINT << (uint)r << " --READ--cin1-- " << TileSlice(0, 0, sr, TSLICE_RD_PTR, true, false) << ENDL(); }); - DPRINT_DATA1({ DPRINT << (uint)r << " --READ--cin1-- " << TileSlice(0, 0, sr, TSLICE_WR_PTR, true, false) << ENDL(); }); - // Unpacker RISC only has rd_ptr, so no extra arg + // On data movement RISCs, tiles can be printed from either the CB read or write pointers. Also need to specify whether + // the CB is input or output. + DPRINT_DATA0({ DPRINT << (uint)r << " --READ--cin1-- " << TileSlice(0, 0, sr, TSLICE_INPUT_CB, TSLICE_RD_PTR, true, false) << ENDL(); }); + DPRINT_DATA1({ DPRINT << (uint)r << " --READ--cin1-- " << TileSlice(0, 0, sr, TSLICE_OUTPUT_CB, TSLICE_WR_PTR, true, false) << ENDL(); }); + // Unpacker RISC only has rd_ptr and only input CBs, so no extra args DPRINT_UNPACK({ DPRINT << (uint)r << " --READ--cin1-- " << TileSlice(0, 0, sr, true, false) << ENDL(); }); // Packer RISC only has wr_ptr DPRINT_PACK({ DPRINT << (uint)r << " --READ--cin1-- " << TileSlice(0, 0, sr, true, false) << ENDL(); }); diff --git a/tests/tt_metal/tt_metal/unit_tests_common/dprint/test_print_all_harts.cpp b/tests/tt_metal/tt_metal/unit_tests_common/dprint/test_print_all_harts.cpp index 24d66a67355..491e73e461b 100644 --- a/tests/tt_metal/tt_metal/unit_tests_common/dprint/test_print_all_harts.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_common/dprint/test_print_all_harts.cpp @@ -40,6 +40,7 @@ HEX/OCT/DEC: 0.245117188 0.249023438 0.255859375 0.263671875 0.98046875 0.99609375 1.0234375 1.0546875 0.365234375 0.373046875 0.380859375 0.388671875 1.4609375 1.4921875 1.5234375 1.5546875 +Tried printing CB::c_in1: Unsupported data format (Bfp8_b) Test Debug Print: Unpack Basic Types: 101-1.61800337@0.122558594 @@ -64,6 +65,7 @@ HEX/OCT/DEC: 0.245117188 0.249023438 0.255859375 0.263671875 0.98046875 0.99609375 1.0234375 1.0546875 0.365234375 0.373046875 0.380859375 0.388671875 1.4609375 1.4921875 1.5234375 1.5546875 +Tried printing CB::c_in1: Unsupported data format (Bfp8_b) Test Debug Print: Math Basic Types: 101-1.61800337@0.122558594 @@ -81,6 +83,7 @@ HEX/OCT/DEC: SLICE: Warning: MATH core does not support TileSlice printing, omitting print... Warning: MATH core does not support TileSlice printing, omitting print... +Warning: MATH core does not support TileSlice printing, omitting print... Test Debug Print: Pack Basic Types: 101-1.61800337@0.122558594 @@ -105,6 +108,7 @@ HEX/OCT/DEC: 0.245117188 0.249023438 0.255859375 0.263671875 0.98046875 0.99609375 1.0234375 1.0546875 0.365234375 0.373046875 0.380859375 0.388671875 1.4609375 1.4921875 1.5234375 1.5546875 +Tried printing CB::c_in1: Unsupported data format (Bfp8_b) Test Debug Print: Data1 Basic Types: 101-1.61800337@0.122558594 @@ -128,7 +132,8 @@ HEX/OCT/DEC: 0.182617188 0.186523438 0.190429688 0.194335938 0.73046875 0.74609375 0.76171875 0.77734375 0.245117188 0.249023438 0.255859375 0.263671875 0.98046875 0.99609375 1.0234375 1.0546875 0.365234375 0.373046875 0.380859375 0.388671875 1.4609375 1.4921875 1.5234375 1.5546875 -)"; + +Tried printing CB::c_in1: Unsupported data format (Bfp8_b))"; static void RunTest(DPrintFixture* fixture, Device* device) { // Set up program and command queue @@ -136,14 +141,20 @@ static void RunTest(DPrintFixture* fixture, Device* device) { Program program = Program(); // Create a CB for testing TSLICE, dimensions are 32x32 bfloat16s - constexpr uint32_t src0_cb_index = CB::c_in0; constexpr uint32_t buffer_size = 32*32*sizeof(bfloat16); CircularBufferConfig cb_src0_config = CircularBufferConfig( buffer_size, - {{src0_cb_index, tt::DataFormat::Float16_b}} - ).set_page_size(src0_cb_index, buffer_size); + {{CB::c_in0, tt::DataFormat::Float16_b}} + ).set_page_size(CB::c_in0, buffer_size); CBHandle cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config); + // A CB with an unsupported data format + CircularBufferConfig cb_src1_config = CircularBufferConfig( + buffer_size, + {{CB::c_in1, tt::DataFormat::Bfp8_b}} + ).set_page_size(CB::c_in1, buffer_size); + CBHandle cb_src1 = tt_metal::CreateCircularBuffer(program, core, cb_src1_config); + // Three different kernels to mirror typical usage and some previously // failing test cases, although all three kernels simply print. KernelHandle brisc_print_kernel_id = CreateKernel( diff --git a/tt_metal/hostdevcommon/dprint_common.h b/tt_metal/hostdevcommon/dprint_common.h index a517c163cd4..5249f6771d6 100644 --- a/tt_metal/hostdevcommon/dprint_common.h +++ b/tt_metal/hostdevcommon/dprint_common.h @@ -105,8 +105,10 @@ struct TileSliceHostDev { uint16_t w0_ ATTR_ALIGN2; uint16_t w1_ ATTR_ALIGN2; uint16_t ws_ ATTR_ALIGN2; - uint16_t count_ ATTR_ALIGN2; - uint16_t endl_rows_ ATTR_ALIGN2; + uint8_t cb_id_ ATTR_ALIGN1; + uint8_t count_ ATTR_ALIGN1; + uint8_t endl_rows_ ATTR_ALIGN1; + uint8_t data_format_ ATTR_ALIGN1; uint16_t samples_[MAXCOUNT] ATTR_ALIGN2; } ATTR_PACK; diff --git a/tt_metal/hw/inc/debug/dprint.h b/tt_metal/hw/inc/debug/dprint.h index 3d84439b317..64d5ab5d25e 100644 --- a/tt_metal/hw/inc/debug/dprint.h +++ b/tt_metal/hw/inc/debug/dprint.h @@ -299,4 +299,7 @@ template DebugPrinter operator<< (DebugPrinter, BF16 val); template DebugPrinter operator<< (DebugPrinter, F32 val); template DebugPrinter operator<< (DebugPrinter, U32 val); +// Tile printing only supported in kernels +#if defined(KERNEL_BUILD) #include "dprint_tile.h" +#endif diff --git a/tt_metal/hw/inc/debug/dprint_test_common.h b/tt_metal/hw/inc/debug/dprint_test_common.h index 54e66fc1e0c..a1ff26e9df1 100644 --- a/tt_metal/hw/inc/debug/dprint_test_common.h +++ b/tt_metal/hw/inc/debug/dprint_test_common.h @@ -33,11 +33,14 @@ inline void print_test_data() { cb_wait_front(tt::CB::c_in0, 1); #if defined(COMPILE_FOR_BRISC) || defined(COMPILE_FOR_NCRISC) // Since brisc is writing to the CB before printing, should look at read pointer - DPRINT << TSLICE(tt::CB::c_in0, 0, SliceRange::hw0_32_8(), TSLICE_RD_PTR); - DPRINT << TSLICE(tt::CB::c_in0, 0, SliceRange::hw0_32_4(), TSLICE_RD_PTR); + DPRINT << TSLICE(tt::CB::c_in0, 0, SliceRange::hw0_32_8(), TSLICE_INPUT_CB, TSLICE_RD_PTR); + DPRINT << TSLICE(tt::CB::c_in0, 0, SliceRange::hw0_32_4(), TSLICE_INPUT_CB, TSLICE_RD_PTR); + // This one has an unsupported data type, should show a warning instead of data + DPRINT << TSLICE(tt::CB::c_in1, 0, SliceRange::hw0_32_4(), TSLICE_INPUT_CB, TSLICE_RD_PTR); #else DPRINT << TSLICE(tt::CB::c_in0, 0, SliceRange::hw0_32_8()); DPRINT << TSLICE(tt::CB::c_in0, 0, SliceRange::hw0_32_4()); + DPRINT << TSLICE(tt::CB::c_in1, 0, SliceRange::hw0_32_4()); #endif #endif } diff --git a/tt_metal/hw/inc/debug/dprint_tile.h b/tt_metal/hw/inc/debug/dprint_tile.h index 7aa841d12d5..0d793498772 100644 --- a/tt_metal/hw/inc/debug/dprint_tile.h +++ b/tt_metal/hw/inc/debug/dprint_tile.h @@ -41,6 +41,9 @@ struct SliceRange { typedef bool dprint_tslice_ptr_t; #define TSLICE_RD_PTR true #define TSLICE_WR_PTR false +typedef bool dprint_tslice_cb_t; +#define TSLICE_INPUT_CB true +#define TSLICE_OUTPUT_SB false template struct TileSlice : TileSliceHostDev { @@ -61,10 +64,14 @@ struct TileSlice : TileSliceHostDev { int cb, int itile, const SliceRange& s, - // For NCRISC and BRISC, have access to both rd and wr ptr, let user choose w/ arg. + // For NCRISC and BRISC, CBs could be inputs or outputs, need user to specify so that we know what the DataFormat + // is. This isn't a problem for PACK/UNPACK because they always treat CBs as input/output. Additionally, NCRISC and + // BRISC have access to both rd and wr ptr, let user choose w/ arg. #if defined(COMPILE_FOR_NCRISC) + dprint_tslice_cb_t cb_type, dprint_tslice_ptr_t ptr_type = TSLICE_WR_PTR, #elif defined(COMPILE_FOR_BRISC) + dprint_tslice_cb_t cb_type, dprint_tslice_ptr_t ptr_type = TSLICE_RD_PTR, #endif bool endl_rows = true, @@ -72,18 +79,23 @@ struct TileSlice : TileSliceHostDev { // The math risc uses a different mechanism for syncing data, and as such doesn't have // access to CBs, so TileSlice printing is skipped on this risc. this->count_ = 0; + this->cb_id_ = cb; volatile Tile* t; - // Pointer value depends on whether we're looking at read or write ptr + // Both pointer value and data format depend on RISC #if defined(UCK_CHLKC_PACK) this->ptr_ = cb_interface[cb].fifo_wr_ptr << 4; // PACK only has write pointer + this->data_format_ = pack_dst_format[cb]; #elif defined(UCK_CHLKC_UNPACK) this->ptr_ = cb_interface[cb].fifo_rd_ptr << 4; // UNPACK only has read pointer + this->data_format_ = unpack_src_format[cb]; #elif defined(COMPILE_FOR_NCRISC) || defined(COMPILE_FOR_BRISC) - // For BRISC/NCRISC, user chooses which pointer. + // For BRISC/NCRISC, user chooses which pointer, and specifies whether the CB is input/output this->ptr_ = (ptr_type == TSLICE_WR_PTR) ? cb_interface[cb].fifo_wr_ptr << 4 : cb_interface[cb].fifo_rd_ptr << 4; + this->data_format_ = (cb_type == TSLICE_INPUT_CB) ? unpack_src_format[cb] : pack_dst_format[cb]; #else this->ptr_ = 0; + this->data_format_ = static_cast(DataFormat::Invalid); #endif #if defined(DEBUG_PRINT_ENABLED) && (defined(UCK_CHLKC_PACK) || defined(UCK_CHLKC_UNPACK) || defined(COMPILE_FOR_NCRISC) || defined(COMPILE_FOR_BRISC)) this->ptr_ += itile * sizeof(Tile); @@ -91,6 +103,10 @@ struct TileSlice : TileSliceHostDev { this->w0_ = 0xFFFF; return; // bad tile pointer, return } + if (this->data_format_ != static_cast(DataFormat::Float16_b)) { + this->w1_ = 0xFFFF; + return; // Unsupported type, return + } this->endl_rows_ = endl_rows; this->w0_ = s.w0; this->w1_ = s.w1; this->ws_ = s.ws; this->h0_ = s.h0; this->h1_ = s.h1; this->hs_ = s.hs; diff --git a/tt_metal/impl/debug/dprint_server.cpp b/tt_metal/impl/debug/dprint_server.cpp index 42bb82e793c..9cbe726c661 100644 --- a/tt_metal/impl/debug/dprint_server.cpp +++ b/tt_metal/impl/debug/dprint_server.cpp @@ -194,8 +194,14 @@ static void PrintTileSlice(ostream& stream, uint8_t* ptr, int hart_id) { TT_ASSERT(offsetof(TileSliceHostDev<0>, samples_) % sizeof(uint16_t) == 0, "TileSliceHostDev<0> samples_ field is not properly aligned"); uint16_t *samples_ = reinterpret_cast(ptr) + offsetof(TileSliceHostDev<0>, samples_) / sizeof(uint16_t); + enum CB cb = static_cast(ts->cb_id_); if (ts->w0_ == 0xFFFF) { - stream << fmt::format("BAD TILE POINTER (ptr={}, count={})\n", ts->ptr_, ts->count_) << std::flush; + uint32_t ptr = ts->ptr_; + uint8_t count = ts->count_; + stream << fmt::format("Tried printing {}: BAD TILE POINTER (ptr={}, count={})\n", cb, ptr, count) << std::flush; + } else if (ts->w1_ == 0xFFFF) { + tt::DataFormat data_format = static_cast(ts->data_format_); + stream << fmt::format("Tried printing {}: Unsupported data format ({})\n", cb, data_format); } else { uint32_t i = 0; bool count_exceeded = false;