Skip to content

Commit

Permalink
Add new Tablet API to expose optionalSections metadata (#125)
Browse files Browse the repository at this point in the history
Summary:

This new API will unlock use cases such as enumerating all optional sections in the Nimble file. It will also empower `nimble_dump` to be able to provide relevant information about the optional sections.

Reviewed By: helfman

Differential Revision: D67949242
  • Loading branch information
Chongfeng Hu authored and facebook-github-bot committed Jan 10, 2025
1 parent e098251 commit 3da7fa8
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 13 deletions.
19 changes: 10 additions & 9 deletions dwio/nimble/tablet/TabletReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "dwio/nimble/common/Buffer.h"
#include "dwio/nimble/common/EncodingPrimitives.h"
#include "dwio/nimble/common/Exceptions.h"
#include "dwio/nimble/common/Types.h"
#include "dwio/nimble/tablet/Compression.h"
#include "dwio/nimble/tablet/Constants.h"
#include "dwio/nimble/tablet/FooterGenerated.h"
Expand Down Expand Up @@ -383,11 +384,11 @@ TabletReader::TabletReader(
for (auto i = 0; i < optionalSections->names()->size(); ++i) {
optionalSections_.insert(std::make_pair(
optionalSections->names()->GetAsString(i)->str(),
std::make_tuple(
MetadataSection{
optionalSections->offsets()->Get(i),
optionalSections->sizes()->Get(i),
static_cast<CompressionType>(
optionalSections->compression_types()->Get(i)))));
optionalSections->compression_types()->Get(i))}));
}
}

Expand All @@ -399,9 +400,9 @@ TabletReader::TabletReader(
continue;
}

const auto sectionOffset = std::get<0>(it->second);
const auto sectionSize = std::get<1>(it->second);
const auto sectionCompressionType = std::get<2>(it->second);
const auto sectionOffset = it->second.offset();
const auto sectionSize = it->second.size();
const auto sectionCompressionType = it->second.compressionType();

if (sectionOffset < offset) {
// Section was not read yet. Need to read from file.
Expand All @@ -427,7 +428,7 @@ TabletReader::TabletReader(
auto iobuf = std::move(result[i]);
const std::string preload{mustRead[i].label};
auto metadata = std::make_unique<MetadataBuffer>(
memoryPool_, iobuf, std::get<2>(optionalSections_[preload]));
memoryPool_, iobuf, optionalSections_.at(preload).compressionType());
optionalSectionsCache_.wlock()->insert({preload, std::move(metadata)});
}
}
Expand Down Expand Up @@ -670,9 +671,9 @@ std::optional<Section> TabletReader::loadOptionalSection(
return std::nullopt;
}

const auto offset = std::get<0>(it->second);
const auto size = std::get<1>(it->second);
const auto compressionType = std::get<2>(it->second);
const auto offset = it->second.offset();
const auto size = it->second.size();
const auto compressionType = it->second.compressionType();

velox::common::Region region{offset, size, name};
folly::IOBuf iobuf;
Expand Down
37 changes: 33 additions & 4 deletions dwio/nimble/tablet/TabletReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <span>

#include "dwio/nimble/common/Checksum.h"
#include "dwio/nimble/common/Types.h"
#include "dwio/nimble/common/Vector.h"
#include "folly/Range.h"
#include "folly/Synchronized.h"
Expand Down Expand Up @@ -85,6 +86,32 @@ class Section {
MetadataBuffer buffer_;
};

class MetadataSection {
public:
MetadataSection(
uint64_t offset,
uint32_t size,
CompressionType compressionType)
: offset_{offset}, size_{size}, compressionType_{compressionType} {}

uint64_t offset() const {
return offset_;
}

uint32_t size() const {
return size_;
}

CompressionType compressionType() const {
return compressionType_;
}

private:
uint64_t offset_;
uint32_t size_;
CompressionType compressionType_;
};

class Postscript {
public:
uint32_t footerSize() const {
Expand Down Expand Up @@ -251,6 +278,11 @@ class TabletReader {
const StripeIdentifier& stripe,
std::span<const uint32_t> streamIdentifiers) const;

const std::unordered_map<std::string, MetadataSection>& optionalSections()
const {
return optionalSections_;
}

std::optional<Section> loadOptionalSection(
const std::string& name,
bool keepCache = false) const;
Expand Down Expand Up @@ -349,10 +381,7 @@ class TabletReader {
uint32_t stripeCount_{0};
const uint32_t* stripeRowCounts_{nullptr};
const uint64_t* stripeOffsets_{nullptr};
std::unordered_map<
std::string,
std::tuple<uint64_t, uint32_t, CompressionType>>
optionalSections_;
std::unordered_map<std::string, MetadataSection> optionalSections_;
mutable folly::Synchronized<
std::unordered_map<std::string, std::unique_ptr<MetadataBuffer>>>
optionalSectionsCache_;
Expand Down
19 changes: 19 additions & 0 deletions dwio/nimble/tablet/tests/TabletTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,23 @@ TEST(TabletTests, OptionalSections) {
file, useChaniedBuffers);
nimble::TabletReader tablet{*pool, &readFile};

ASSERT_EQ(tablet.optionalSections().size(), 3);
ASSERT_TRUE(tablet.optionalSections().contains("section1"));
ASSERT_EQ(
tablet.optionalSections().at("section1").compressionType(),
nimble::CompressionType::Uncompressed);
ASSERT_EQ(tablet.optionalSections().at("section1").size(), random.size());
ASSERT_TRUE(tablet.optionalSections().contains("section2"));
ASSERT_EQ(
tablet.optionalSections().at("section2").compressionType(),
nimble::CompressionType::Uncompressed);
ASSERT_EQ(tablet.optionalSections().at("section2").size(), zeroes.size());
ASSERT_TRUE(tablet.optionalSections().contains("section3"));
ASSERT_EQ(
tablet.optionalSections().at("section3").compressionType(),
nimble::CompressionType::Uncompressed);
ASSERT_EQ(tablet.optionalSections().at("section3").size(), 0);

auto check1 = [&]() {
auto section = tablet.loadOptionalSection("section1");
ASSERT_TRUE(section.has_value());
Expand Down Expand Up @@ -607,6 +624,8 @@ TEST(TabletTests, OptionalSectionsEmpty) {
file, useChaniedBuffers);
nimble::TabletReader tablet{*pool, &readFile};

ASSERT_TRUE(tablet.optionalSections().empty());

auto section = tablet.loadOptionalSection("section1");
ASSERT_FALSE(section.has_value());
}
Expand Down

0 comments on commit 3da7fa8

Please sign in to comment.