From e717f80dbed0d98a4af5502cd91f1bdaf1bddd8d Mon Sep 17 00:00:00 2001 From: Chongfeng Hu Date: Fri, 10 Jan 2025 10:29:48 -0800 Subject: [PATCH] Add 2 new TabletReader APIs: stripesMetadata and stripeGroupsMetadata (#126) Summary: These 2 new APIs will allow clients to get insights into stripes and stripe groups metadata, e.g., offset, size, etc. This information can be useful in use cases like `nimble_dump` where we want to know the sizes of these sections in the Nimble file. Reviewed By: helfman Differential Revision: D67957498 --- dwio/nimble/tablet/TabletReader.cpp | 39 ++++++++++++++++++++++++++++- dwio/nimble/tablet/TabletReader.h | 8 ++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/dwio/nimble/tablet/TabletReader.cpp b/dwio/nimble/tablet/TabletReader.cpp index 1b52e46..01439fc 100644 --- a/dwio/nimble/tablet/TabletReader.cpp +++ b/dwio/nimble/tablet/TabletReader.cpp @@ -26,7 +26,9 @@ #include "folly/io/Cursor.h" #include +#include #include +#include #include #include #include @@ -48,7 +50,7 @@ namespace facebook::nimble { // 4 bytes footer size + 1 byte footer compression type + // 1 byte checksum type + 8 bytes checksum + // 2 bytes major version + 2 bytes minor version + -// 4 bytes magic number. +// 2 bytes magic number. namespace { template @@ -648,6 +650,41 @@ uint64_t TabletReader::getTotalStreamSize( return streamSizeSum; } +std::optional TabletReader::stripesMetadata() const { + auto footerRoot = + asFlatBuffersRoot(footer_->content()); + auto stripes = footerRoot->stripes(); + if (!stripes) { + return std::nullopt; + } + return MetadataSection{ + stripes->offset(), + stripes->size(), + static_cast(stripes->compression_type())}; +} + +std::vector TabletReader::stripeGroupsMetadata() const { + std::vector groupsMetadata; + auto footerRoot = + asFlatBuffersRoot(footer_->content()); + auto stripeGroups = footerRoot->stripe_groups(); + if (!stripeGroups) { + return groupsMetadata; + } + groupsMetadata.reserve(stripeGroups->size()); + std::transform( + stripeGroups->cbegin(), + stripeGroups->cend(), + std::back_inserter(groupsMetadata), + [](const auto& stripeGroup) { + return MetadataSection{ + stripeGroup->offset(), + stripeGroup->size(), + static_cast(stripeGroup->compression_type())}; + }); + return groupsMetadata; +} + std::optional
TabletReader::loadOptionalSection( const std::string& name, bool keepCache) const { diff --git a/dwio/nimble/tablet/TabletReader.h b/dwio/nimble/tablet/TabletReader.h index 807e8f8..dcbf43c 100644 --- a/dwio/nimble/tablet/TabletReader.h +++ b/dwio/nimble/tablet/TabletReader.h @@ -14,7 +14,11 @@ * limitations under the License. */ #pragma once + +#include +#include #include +#include #include "dwio/nimble/common/Checksum.h" #include "dwio/nimble/common/Types.h" @@ -278,6 +282,10 @@ class TabletReader { const StripeIdentifier& stripe, std::span streamIdentifiers) const; + std::optional stripesMetadata() const; + + std::vector stripeGroupsMetadata() const; + const std::unordered_map& optionalSections() const { return optionalSections_;