Skip to content

Commit

Permalink
Add new Tablet API to expose optionalSections metadata (#125)
Browse files Browse the repository at this point in the history
Summary:

This new API will unlock use cases such as enumerating all optional sections in the Nimble file. It will also empower `nimble_dump` to be able to provide relevant information about the optional sections.

Differential Revision: D67949242
  • Loading branch information
Chongfeng Hu authored and facebook-github-bot committed Jan 9, 2025
1 parent e098251 commit a05802e
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 12 deletions.
26 changes: 26 additions & 0 deletions dwio/nimble/common/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,32 @@ enum class CompressionType : uint8_t {
MetaInternal = 2,
};

class MetadataSection {
public:
MetadataSection(
uint64_t offset,
uint32_t size,
CompressionType compressionType)
: offset_(offset), size_(size), compressionType_(compressionType) {}

uint64_t offset() const {
return offset_;
}

uint32_t size() const {
return size_;
}

CompressionType compressionType() const {
return compressionType_;
}

private:
uint64_t offset_;
uint32_t size_;
CompressionType compressionType_;
};

std::string toString(CompressionType compressionType);
std::ostream& operator<<(std::ostream& out, CompressionType compressionType);

Expand Down
17 changes: 9 additions & 8 deletions dwio/nimble/tablet/TabletReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "dwio/nimble/common/Buffer.h"
#include "dwio/nimble/common/EncodingPrimitives.h"
#include "dwio/nimble/common/Exceptions.h"
#include "dwio/nimble/common/Types.h"
#include "dwio/nimble/tablet/Compression.h"
#include "dwio/nimble/tablet/Constants.h"
#include "dwio/nimble/tablet/FooterGenerated.h"
Expand Down Expand Up @@ -383,7 +384,7 @@ TabletReader::TabletReader(
for (auto i = 0; i < optionalSections->names()->size(); ++i) {
optionalSections_.insert(std::make_pair(
optionalSections->names()->GetAsString(i)->str(),
std::make_tuple(
MetadataSection(
optionalSections->offsets()->Get(i),
optionalSections->sizes()->Get(i),
static_cast<CompressionType>(
Expand All @@ -399,9 +400,9 @@ TabletReader::TabletReader(
continue;
}

const auto sectionOffset = std::get<0>(it->second);
const auto sectionSize = std::get<1>(it->second);
const auto sectionCompressionType = std::get<2>(it->second);
const auto sectionOffset = it->second.offset();
const auto sectionSize = it->second.size();
const auto sectionCompressionType = it->second.compressionType();

if (sectionOffset < offset) {
// Section was not read yet. Need to read from file.
Expand All @@ -427,7 +428,7 @@ TabletReader::TabletReader(
auto iobuf = std::move(result[i]);
const std::string preload{mustRead[i].label};
auto metadata = std::make_unique<MetadataBuffer>(
memoryPool_, iobuf, std::get<2>(optionalSections_[preload]));
memoryPool_, iobuf, optionalSections_.at(preload).compressionType());
optionalSectionsCache_.wlock()->insert({preload, std::move(metadata)});
}
}
Expand Down Expand Up @@ -670,9 +671,9 @@ std::optional<Section> TabletReader::loadOptionalSection(
return std::nullopt;
}

const auto offset = std::get<0>(it->second);
const auto size = std::get<1>(it->second);
const auto compressionType = std::get<2>(it->second);
const auto offset = it->second.offset();
const auto size = it->second.size();
const auto compressionType = it->second.compressionType();

velox::common::Region region{offset, size, name};
folly::IOBuf iobuf;
Expand Down
10 changes: 6 additions & 4 deletions dwio/nimble/tablet/TabletReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <span>

#include "dwio/nimble/common/Checksum.h"
#include "dwio/nimble/common/Types.h"
#include "dwio/nimble/common/Vector.h"
#include "folly/Range.h"
#include "folly/Synchronized.h"
Expand Down Expand Up @@ -251,6 +252,10 @@ class TabletReader {
const StripeIdentifier& stripe,
std::span<const uint32_t> streamIdentifiers) const;

std::unordered_map<std::string, MetadataSection> optionalSections() const {
return optionalSections_;
}

std::optional<Section> loadOptionalSection(
const std::string& name,
bool keepCache = false) const;
Expand Down Expand Up @@ -349,10 +354,7 @@ class TabletReader {
uint32_t stripeCount_{0};
const uint32_t* stripeRowCounts_{nullptr};
const uint64_t* stripeOffsets_{nullptr};
std::unordered_map<
std::string,
std::tuple<uint64_t, uint32_t, CompressionType>>
optionalSections_;
std::unordered_map<std::string, MetadataSection> optionalSections_;
mutable folly::Synchronized<
std::unordered_map<std::string, std::unique_ptr<MetadataBuffer>>>
optionalSectionsCache_;
Expand Down
7 changes: 7 additions & 0 deletions dwio/nimble/tablet/tests/TabletTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,11 @@ TEST(TabletTests, OptionalSections) {
file, useChaniedBuffers);
nimble::TabletReader tablet{*pool, &readFile};

ASSERT_EQ(tablet.optionalSections().size(), 3);
ASSERT_TRUE(tablet.optionalSections().contains("section1"));
ASSERT_TRUE(tablet.optionalSections().contains("section2"));
ASSERT_TRUE(tablet.optionalSections().contains("section3"));

auto check1 = [&]() {
auto section = tablet.loadOptionalSection("section1");
ASSERT_TRUE(section.has_value());
Expand Down Expand Up @@ -607,6 +612,8 @@ TEST(TabletTests, OptionalSectionsEmpty) {
file, useChaniedBuffers);
nimble::TabletReader tablet{*pool, &readFile};

ASSERT_TRUE(tablet.optionalSections().empty());

auto section = tablet.loadOptionalSection("section1");
ASSERT_FALSE(section.has_value());
}
Expand Down

0 comments on commit a05802e

Please sign in to comment.