diff --git a/dwio/nimble/tablet/TabletReader.cpp b/dwio/nimble/tablet/TabletReader.cpp index aa65878..01439fc 100644 --- a/dwio/nimble/tablet/TabletReader.cpp +++ b/dwio/nimble/tablet/TabletReader.cpp @@ -18,6 +18,7 @@ #include "dwio/nimble/common/Buffer.h" #include "dwio/nimble/common/EncodingPrimitives.h" #include "dwio/nimble/common/Exceptions.h" +#include "dwio/nimble/common/Types.h" #include "dwio/nimble/tablet/Compression.h" #include "dwio/nimble/tablet/Constants.h" #include "dwio/nimble/tablet/FooterGenerated.h" @@ -25,7 +26,9 @@ #include "folly/io/Cursor.h" #include +#include #include +#include #include #include #include @@ -47,7 +50,7 @@ namespace facebook::nimble { // 4 bytes footer size + 1 byte footer compression type + // 1 byte checksum type + 8 bytes checksum + // 2 bytes major version + 2 bytes minor version + -// 4 bytes magic number. +// 2 bytes magic number. namespace { template @@ -383,11 +386,11 @@ TabletReader::TabletReader( for (auto i = 0; i < optionalSections->names()->size(); ++i) { optionalSections_.insert(std::make_pair( optionalSections->names()->GetAsString(i)->str(), - std::make_tuple( + MetadataSection{ optionalSections->offsets()->Get(i), optionalSections->sizes()->Get(i), static_cast( - optionalSections->compression_types()->Get(i))))); + optionalSections->compression_types()->Get(i))})); } } @@ -399,9 +402,9 @@ TabletReader::TabletReader( continue; } - const auto sectionOffset = std::get<0>(it->second); - const auto sectionSize = std::get<1>(it->second); - const auto sectionCompressionType = std::get<2>(it->second); + const auto sectionOffset = it->second.offset(); + const auto sectionSize = it->second.size(); + const auto sectionCompressionType = it->second.compressionType(); if (sectionOffset < offset) { // Section was not read yet. Need to read from file. @@ -427,7 +430,7 @@ TabletReader::TabletReader( auto iobuf = std::move(result[i]); const std::string preload{mustRead[i].label}; auto metadata = std::make_unique( - memoryPool_, iobuf, std::get<2>(optionalSections_[preload])); + memoryPool_, iobuf, optionalSections_.at(preload).compressionType()); optionalSectionsCache_.wlock()->insert({preload, std::move(metadata)}); } } @@ -647,6 +650,41 @@ uint64_t TabletReader::getTotalStreamSize( return streamSizeSum; } +std::optional TabletReader::stripesMetadata() const { + auto footerRoot = + asFlatBuffersRoot(footer_->content()); + auto stripes = footerRoot->stripes(); + if (!stripes) { + return std::nullopt; + } + return MetadataSection{ + stripes->offset(), + stripes->size(), + static_cast(stripes->compression_type())}; +} + +std::vector TabletReader::stripeGroupsMetadata() const { + std::vector groupsMetadata; + auto footerRoot = + asFlatBuffersRoot(footer_->content()); + auto stripeGroups = footerRoot->stripe_groups(); + if (!stripeGroups) { + return groupsMetadata; + } + groupsMetadata.reserve(stripeGroups->size()); + std::transform( + stripeGroups->cbegin(), + stripeGroups->cend(), + std::back_inserter(groupsMetadata), + [](const auto& stripeGroup) { + return MetadataSection{ + stripeGroup->offset(), + stripeGroup->size(), + static_cast(stripeGroup->compression_type())}; + }); + return groupsMetadata; +} + std::optional
TabletReader::loadOptionalSection( const std::string& name, bool keepCache) const { @@ -670,9 +708,9 @@ std::optional
TabletReader::loadOptionalSection( return std::nullopt; } - const auto offset = std::get<0>(it->second); - const auto size = std::get<1>(it->second); - const auto compressionType = std::get<2>(it->second); + const auto offset = it->second.offset(); + const auto size = it->second.size(); + const auto compressionType = it->second.compressionType(); velox::common::Region region{offset, size, name}; folly::IOBuf iobuf; diff --git a/dwio/nimble/tablet/TabletReader.h b/dwio/nimble/tablet/TabletReader.h index 93ab654..dcbf43c 100644 --- a/dwio/nimble/tablet/TabletReader.h +++ b/dwio/nimble/tablet/TabletReader.h @@ -14,9 +14,14 @@ * limitations under the License. */ #pragma once + +#include +#include #include +#include #include "dwio/nimble/common/Checksum.h" +#include "dwio/nimble/common/Types.h" #include "dwio/nimble/common/Vector.h" #include "folly/Range.h" #include "folly/Synchronized.h" @@ -85,6 +90,32 @@ class Section { MetadataBuffer buffer_; }; +class MetadataSection { + public: + MetadataSection( + uint64_t offset, + uint32_t size, + CompressionType compressionType) + : offset_{offset}, size_{size}, compressionType_{compressionType} {} + + uint64_t offset() const { + return offset_; + } + + uint32_t size() const { + return size_; + } + + CompressionType compressionType() const { + return compressionType_; + } + + private: + uint64_t offset_; + uint32_t size_; + CompressionType compressionType_; +}; + class Postscript { public: uint32_t footerSize() const { @@ -251,6 +282,15 @@ class TabletReader { const StripeIdentifier& stripe, std::span streamIdentifiers) const; + std::optional stripesMetadata() const; + + std::vector stripeGroupsMetadata() const; + + const std::unordered_map& optionalSections() + const { + return optionalSections_; + } + std::optional
loadOptionalSection( const std::string& name, bool keepCache = false) const; @@ -349,10 +389,7 @@ class TabletReader { uint32_t stripeCount_{0}; const uint32_t* stripeRowCounts_{nullptr}; const uint64_t* stripeOffsets_{nullptr}; - std::unordered_map< - std::string, - std::tuple> - optionalSections_; + std::unordered_map optionalSections_; mutable folly::Synchronized< std::unordered_map>> optionalSectionsCache_; diff --git a/dwio/nimble/tablet/tests/TabletTests.cpp b/dwio/nimble/tablet/tests/TabletTests.cpp index be01698..0e16fdb 100644 --- a/dwio/nimble/tablet/tests/TabletTests.cpp +++ b/dwio/nimble/tablet/tests/TabletTests.cpp @@ -561,6 +561,23 @@ TEST(TabletTests, OptionalSections) { file, useChaniedBuffers); nimble::TabletReader tablet{*pool, &readFile}; + ASSERT_EQ(tablet.optionalSections().size(), 3); + ASSERT_TRUE(tablet.optionalSections().contains("section1")); + ASSERT_EQ( + tablet.optionalSections().at("section1").compressionType(), + nimble::CompressionType::Uncompressed); + ASSERT_EQ(tablet.optionalSections().at("section1").size(), random.size()); + ASSERT_TRUE(tablet.optionalSections().contains("section2")); + ASSERT_EQ( + tablet.optionalSections().at("section2").compressionType(), + nimble::CompressionType::Uncompressed); + ASSERT_EQ(tablet.optionalSections().at("section2").size(), zeroes.size()); + ASSERT_TRUE(tablet.optionalSections().contains("section3")); + ASSERT_EQ( + tablet.optionalSections().at("section3").compressionType(), + nimble::CompressionType::Uncompressed); + ASSERT_EQ(tablet.optionalSections().at("section3").size(), 0); + auto check1 = [&]() { auto section = tablet.loadOptionalSection("section1"); ASSERT_TRUE(section.has_value()); @@ -607,6 +624,8 @@ TEST(TabletTests, OptionalSectionsEmpty) { file, useChaniedBuffers); nimble::TabletReader tablet{*pool, &readFile}; + ASSERT_TRUE(tablet.optionalSections().empty()); + auto section = tablet.loadOptionalSection("section1"); ASSERT_FALSE(section.has_value()); }