From 908462ef22fd8f7614fa7ae0cb7418cb2feccf6b Mon Sep 17 00:00:00 2001 From: Chongfeng Hu Date: Wed, 8 Jan 2025 17:31:38 -0800 Subject: [PATCH] Add 2 new TabletReader APIs: stripesMetadata and stripeGroupsMetadata (#126) Summary: These 2 new APIs will allow clients to get insights into stripes and stripe groups metadata, e.g., offset, size, etc. This information can be useful in use cases like `nimble_dump` where we want to know the sizes of these sections in the Nimble file. Differential Revision: D67957498 --- dwio/nimble/tablet/TabletReader.cpp | 16 ++++++++++++++++ dwio/nimble/tablet/TabletReader.h | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/dwio/nimble/tablet/TabletReader.cpp b/dwio/nimble/tablet/TabletReader.cpp index 6173f8b..c119f5d 100644 --- a/dwio/nimble/tablet/TabletReader.cpp +++ b/dwio/nimble/tablet/TabletReader.cpp @@ -26,7 +26,9 @@ #include "folly/io/Cursor.h" #include +#include #include +#include #include #include #include @@ -325,6 +327,10 @@ TabletReader::TabletReader( NIMBLE_CHECK( stripes->offset() + readSize >= fileSize, "Incomplete stripes metadata."); + stripesMetadata_ = std::make_unique( + stripes->offset(), + stripes->size(), + static_cast(stripes->compression_type())); stripes_ = std::make_unique( memoryPool_, footerIOBuf, @@ -340,6 +346,16 @@ TabletReader::TabletReader( (stripeGroups->size() == *stripesRoot->group_indices()->rbegin() + 1), "Unexpected stripe group count"); + std::transform( + stripeGroups->cbegin(), + stripeGroups->cend(), + std::back_inserter(stripeGroupsMetadata_), + [](const auto& stripeGroup) { + return MetadataSection{ + stripeGroup->offset(), + stripeGroup->size(), + static_cast(stripeGroup->compression_type())}; + }); // Always eagerly load if it's the only stripe group and is already // fetched diff --git a/dwio/nimble/tablet/TabletReader.h b/dwio/nimble/tablet/TabletReader.h index 20aca32..1d49b59 100644 --- a/dwio/nimble/tablet/TabletReader.h +++ b/dwio/nimble/tablet/TabletReader.h @@ -14,7 +14,10 @@ * limitations under the License. */ #pragma once + +#include #include +#include #include "dwio/nimble/common/Checksum.h" #include "dwio/nimble/common/Types.h" @@ -252,6 +255,14 @@ class TabletReader { const StripeIdentifier& stripe, std::span streamIdentifiers) const; + const MetadataSection* stripesMetadata() const { + return stripesMetadata_.get(); + } + + std::span stripeGroupsMetadata() const { + return stripeGroupsMetadata_; + } + std::unordered_map optionalSections() const { return optionalSections_; } @@ -354,6 +365,8 @@ class TabletReader { uint32_t stripeCount_{0}; const uint32_t* stripeRowCounts_{nullptr}; const uint64_t* stripeOffsets_{nullptr}; + std::unique_ptr stripesMetadata_; + std::vector stripeGroupsMetadata_; std::unordered_map optionalSections_; mutable folly::Synchronized< std::unordered_map>>