From a0c28da86eb6f028f42fe4011baf92a8442c6d94 Mon Sep 17 00:00:00 2001 From: Chongfeng Hu Date: Thu, 9 Jan 2025 16:15:48 -0800 Subject: [PATCH] Add new Tablet API to expose optionalSections metadata (#125) Summary: This new API will unlock use cases such as enumerating all optional sections in the Nimble file. It will also empower `nimble_dump` to be able to provide relevant information about the optional sections. Differential Revision: D67949242 --- dwio/nimble/tablet/TabletReader.cpp | 19 ++++++------ dwio/nimble/tablet/TabletReader.h | 37 +++++++++++++++++++++--- dwio/nimble/tablet/tests/TabletTests.cpp | 19 ++++++++++++ 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/dwio/nimble/tablet/TabletReader.cpp b/dwio/nimble/tablet/TabletReader.cpp index aa65878..1b52e46 100644 --- a/dwio/nimble/tablet/TabletReader.cpp +++ b/dwio/nimble/tablet/TabletReader.cpp @@ -18,6 +18,7 @@ #include "dwio/nimble/common/Buffer.h" #include "dwio/nimble/common/EncodingPrimitives.h" #include "dwio/nimble/common/Exceptions.h" +#include "dwio/nimble/common/Types.h" #include "dwio/nimble/tablet/Compression.h" #include "dwio/nimble/tablet/Constants.h" #include "dwio/nimble/tablet/FooterGenerated.h" @@ -383,11 +384,11 @@ TabletReader::TabletReader( for (auto i = 0; i < optionalSections->names()->size(); ++i) { optionalSections_.insert(std::make_pair( optionalSections->names()->GetAsString(i)->str(), - std::make_tuple( + MetadataSection{ optionalSections->offsets()->Get(i), optionalSections->sizes()->Get(i), static_cast( - optionalSections->compression_types()->Get(i))))); + optionalSections->compression_types()->Get(i))})); } } @@ -399,9 +400,9 @@ TabletReader::TabletReader( continue; } - const auto sectionOffset = std::get<0>(it->second); - const auto sectionSize = std::get<1>(it->second); - const auto sectionCompressionType = std::get<2>(it->second); + const auto sectionOffset = it->second.offset(); + const auto sectionSize = it->second.size(); + const auto sectionCompressionType = it->second.compressionType(); if (sectionOffset < offset) { // Section was not read yet. Need to read from file. @@ -427,7 +428,7 @@ TabletReader::TabletReader( auto iobuf = std::move(result[i]); const std::string preload{mustRead[i].label}; auto metadata = std::make_unique( - memoryPool_, iobuf, std::get<2>(optionalSections_[preload])); + memoryPool_, iobuf, optionalSections_.at(preload).compressionType()); optionalSectionsCache_.wlock()->insert({preload, std::move(metadata)}); } } @@ -670,9 +671,9 @@ std::optional
TabletReader::loadOptionalSection( return std::nullopt; } - const auto offset = std::get<0>(it->second); - const auto size = std::get<1>(it->second); - const auto compressionType = std::get<2>(it->second); + const auto offset = it->second.offset(); + const auto size = it->second.size(); + const auto compressionType = it->second.compressionType(); velox::common::Region region{offset, size, name}; folly::IOBuf iobuf; diff --git a/dwio/nimble/tablet/TabletReader.h b/dwio/nimble/tablet/TabletReader.h index 93ab654..807e8f8 100644 --- a/dwio/nimble/tablet/TabletReader.h +++ b/dwio/nimble/tablet/TabletReader.h @@ -17,6 +17,7 @@ #include #include "dwio/nimble/common/Checksum.h" +#include "dwio/nimble/common/Types.h" #include "dwio/nimble/common/Vector.h" #include "folly/Range.h" #include "folly/Synchronized.h" @@ -85,6 +86,32 @@ class Section { MetadataBuffer buffer_; }; +class MetadataSection { + public: + MetadataSection( + uint64_t offset, + uint32_t size, + CompressionType compressionType) + : offset_{offset}, size_{size}, compressionType_{compressionType} {} + + uint64_t offset() const { + return offset_; + } + + uint32_t size() const { + return size_; + } + + CompressionType compressionType() const { + return compressionType_; + } + + private: + uint64_t offset_; + uint32_t size_; + CompressionType compressionType_; +}; + class Postscript { public: uint32_t footerSize() const { @@ -251,6 +278,11 @@ class TabletReader { const StripeIdentifier& stripe, std::span streamIdentifiers) const; + const std::unordered_map& optionalSections() + const { + return optionalSections_; + } + std::optional
loadOptionalSection( const std::string& name, bool keepCache = false) const; @@ -349,10 +381,7 @@ class TabletReader { uint32_t stripeCount_{0}; const uint32_t* stripeRowCounts_{nullptr}; const uint64_t* stripeOffsets_{nullptr}; - std::unordered_map< - std::string, - std::tuple> - optionalSections_; + std::unordered_map optionalSections_; mutable folly::Synchronized< std::unordered_map>> optionalSectionsCache_; diff --git a/dwio/nimble/tablet/tests/TabletTests.cpp b/dwio/nimble/tablet/tests/TabletTests.cpp index be01698..0e16fdb 100644 --- a/dwio/nimble/tablet/tests/TabletTests.cpp +++ b/dwio/nimble/tablet/tests/TabletTests.cpp @@ -561,6 +561,23 @@ TEST(TabletTests, OptionalSections) { file, useChaniedBuffers); nimble::TabletReader tablet{*pool, &readFile}; + ASSERT_EQ(tablet.optionalSections().size(), 3); + ASSERT_TRUE(tablet.optionalSections().contains("section1")); + ASSERT_EQ( + tablet.optionalSections().at("section1").compressionType(), + nimble::CompressionType::Uncompressed); + ASSERT_EQ(tablet.optionalSections().at("section1").size(), random.size()); + ASSERT_TRUE(tablet.optionalSections().contains("section2")); + ASSERT_EQ( + tablet.optionalSections().at("section2").compressionType(), + nimble::CompressionType::Uncompressed); + ASSERT_EQ(tablet.optionalSections().at("section2").size(), zeroes.size()); + ASSERT_TRUE(tablet.optionalSections().contains("section3")); + ASSERT_EQ( + tablet.optionalSections().at("section3").compressionType(), + nimble::CompressionType::Uncompressed); + ASSERT_EQ(tablet.optionalSections().at("section3").size(), 0); + auto check1 = [&]() { auto section = tablet.loadOptionalSection("section1"); ASSERT_TRUE(section.has_value()); @@ -607,6 +624,8 @@ TEST(TabletTests, OptionalSectionsEmpty) { file, useChaniedBuffers); nimble::TabletReader tablet{*pool, &readFile}; + ASSERT_TRUE(tablet.optionalSections().empty()); + auto section = tablet.loadOptionalSection("section1"); ASSERT_FALSE(section.has_value()); }