Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ntuple] add different modes of descriptor loading to RNTupleSerializer #17541

Merged
merged 2 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion tree/ntuple/v7/inc/ROOT/RNTupleSerialize.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ Deserialization errors throw exceptions. Only when indicated or when passed as a
*/
// clang-format on
class RNTupleSerializer {
static RResult<std::vector<RClusterDescriptorBuilder>>
DeserializePageListRaw(const void *buffer, std::uint64_t bufSize, DescriptorId_t clusterGroupId,
const RNTupleDescriptor &desc);

public:
static constexpr std::uint16_t kEnvelopeTypeHeader = 0x01;
static constexpr std::uint16_t kEnvelopeTypeFooter = 0x02;
Expand Down Expand Up @@ -275,9 +279,21 @@ public:
DeserializeHeader(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder);
static RResult<void>
DeserializeFooter(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder);

enum class EDescriptorDeserializeMode {
/// Deserializes the descriptor as-is without performing any additional fixup. The produced descriptor is
/// unsuitable for reading or writing, but it's a faithful representation of the on-disk information.
kRaw,
/// Deserializes the descriptor and performs fixup on the suppressed column ranges. This produces a descriptor
/// that is suitable for writing, but not reading.
kForWriting,
/// Deserializes the descriptor and performs fixup on the suppressed column ranges and on clusters, taking
/// into account the header extension. This produces a descriptor that is suitable for reading.
kForReading,
};
// The clusters vector must be initialized with the cluster summaries corresponding to the page list
static RResult<void> DeserializePageList(const void *buffer, std::uint64_t bufSize, DescriptorId_t clusterGroupId,
RNTupleDescriptor &desc);
RNTupleDescriptor &desc, EDescriptorDeserializeMode mode);

// Helper functions to (de-)serialize the streamer info type extra information
static std::string SerializeStreamerInfos(const StreamerInfoMap_t &infos);
Expand Down
5 changes: 3 additions & 2 deletions tree/ntuple/v7/inc/ROOT/RPageStorage.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ protected:

virtual void LoadStructureImpl() = 0;
/// `LoadStructureImpl()` has been called before `AttachImpl()` is called
virtual RNTupleDescriptor AttachImpl() = 0;
virtual RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) = 0;
/// Returns a new, unattached page source for the same data set
virtual std::unique_ptr<RPageSource> CloneImpl() const = 0;
// Only called if a task scheduler is set. No-op be default.
Expand Down Expand Up @@ -766,7 +766,8 @@ public:
/// Therefore, `LoadStructure()` may do nothing and defer loading the meta-data to `Attach()`.
void LoadStructure();
/// Open the physical storage container and deserialize header and footer
void Attach();
void Attach(
RNTupleSerializer::EDescriptorDeserializeMode mode = RNTupleSerializer::EDescriptorDeserializeMode::kForReading);
NTupleSize_t GetNEntries();
NTupleSize_t GetNElements(ColumnHandle_t columnHandle);

Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/inc/ROOT/RPageStorageDaos.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ private:

protected:
void LoadStructureImpl() final {}
RNTupleDescriptor AttachImpl() final;
RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
/// The cloned page source creates a new connection to the pool/container.
std::unique_ptr<RPageSource> CloneImpl() const final;

Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/inc/ROOT/RPageStorageFile.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ private:

protected:
void LoadStructureImpl() final;
RNTupleDescriptor AttachImpl() final;
RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
/// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
std::unique_ptr<RPageSource> CloneImpl() const final;

Expand Down
57 changes: 46 additions & 11 deletions tree/ntuple/v7/src/RNTupleSerialize.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1765,10 +1765,10 @@ ROOT::Experimental::Internal::RNTupleSerializer::DeserializeFooter(const void *b
return RResult<void>::Success();
}

ROOT::RResult<void> ROOT::Experimental::Internal::RNTupleSerializer::DeserializePageList(const void *buffer,
std::uint64_t bufSize,
DescriptorId_t clusterGroupId,
RNTupleDescriptor &desc)
ROOT::RResult<std::vector<ROOT::Experimental::Internal::RClusterDescriptorBuilder>>
ROOT::Experimental::Internal::RNTupleSerializer::DeserializePageListRaw(const void *buffer, std::uint64_t bufSize,
DescriptorId_t clusterGroupId,
const RNTupleDescriptor &desc)
{
auto base = reinterpret_cast<const unsigned char *>(buffer);
auto bytes = base;
Expand Down Expand Up @@ -1828,7 +1828,6 @@ ROOT::RResult<void> ROOT::Experimental::Internal::RNTupleSerializer::Deserialize
if (nClusters != nClusterSummaries)
return R__FAIL("mismatch between number of clusters and number of cluster summaries");

std::vector<RClusterDescriptor> clusters;
for (std::uint32_t i = 0; i < nClusters; ++i) {
std::uint64_t outerFrameSize;
auto outerFrame = bytes;
Expand Down Expand Up @@ -1891,13 +1890,49 @@ ROOT::RResult<void> ROOT::Experimental::Internal::RNTupleSerializer::Deserialize
} // loop over columns

bytes = outerFrame + outerFrameSize;

auto voidRes = clusterBuilders[i].CommitSuppressedColumnRanges(desc);
if (!voidRes)
return R__FORWARD_ERROR(voidRes);
clusterBuilders[i].AddExtendedColumnRanges(desc);
clusters.emplace_back(clusterBuilders[i].MoveDescriptor().Unwrap());
} // loop over clusters

return clusterBuilders;
}

ROOT::RResult<void>
ROOT::Experimental::Internal::RNTupleSerializer::DeserializePageList(const void *buffer, std::uint64_t bufSize,
DescriptorId_t clusterGroupId,
RNTupleDescriptor &desc,
EDescriptorDeserializeMode mode)
{
auto clusterBuildersRes = RNTupleSerializer::DeserializePageListRaw(buffer, bufSize, clusterGroupId, desc);
if (!clusterBuildersRes)
return R__FORWARD_ERROR(clusterBuildersRes);

auto clusterBuilders = clusterBuildersRes.Unwrap();

std::vector<RClusterDescriptor> clusters;
clusters.reserve(clusterBuilders.size());

// Conditionally fixup the clusters depending on the attach purpose
switch (mode) {
case EDescriptorDeserializeMode::kForReading:
for (auto &builder : clusterBuilders) {
if (auto res = builder.CommitSuppressedColumnRanges(desc); !res)
return R__FORWARD_RESULT(res);
builder.AddExtendedColumnRanges(desc);
clusters.emplace_back(builder.MoveDescriptor().Unwrap());
}
break;
case EDescriptorDeserializeMode::kForWriting:
for (auto &builder : clusterBuilders) {
if (auto res = builder.CommitSuppressedColumnRanges(desc); !res)
return R__FORWARD_RESULT(res);
clusters.emplace_back(builder.MoveDescriptor().Unwrap());
}
break;
case EDescriptorDeserializeMode::kRaw:
for (auto &builder : clusterBuilders)
clusters.emplace_back(builder.MoveDescriptor().Unwrap());
break;
}

desc.AddClusterGroupDetails(clusterGroupId, clusters);

return RResult<void>::Success();
Expand Down
6 changes: 3 additions & 3 deletions tree/ntuple/v7/src/RPageStorage.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,11 @@ void ROOT::Experimental::Internal::RPageSource::LoadStructure()
fHasStructure = true;
}

void ROOT::Experimental::Internal::RPageSource::Attach()
void ROOT::Experimental::Internal::RPageSource::Attach(RNTupleSerializer::EDescriptorDeserializeMode mode)
{
LoadStructure();
if (!fIsAttached)
GetExclDescriptorGuard().MoveIn(AttachImpl());
GetExclDescriptorGuard().MoveIn(AttachImpl(mode));
fIsAttached = true;
}

Expand Down Expand Up @@ -297,7 +297,7 @@ void ROOT::Experimental::Internal::RPageSource::UnzipClusterImpl(RCluster *clust

fCounters->fNPageUnsealed.Add(pageNo);
} // for all in-memory types of the column
} // for all columns in cluster
} // for all columns in cluster

fTaskScheduler->Wait();

Expand Down
5 changes: 3 additions & 2 deletions tree/ntuple/v7/src/RPageStorageDaos.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,8 @@ ROOT::Experimental::Internal::RPageSourceDaos::RPageSourceDaos(std::string_view

ROOT::Experimental::Internal::RPageSourceDaos::~RPageSourceDaos() = default;

ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl()
ROOT::Experimental::RNTupleDescriptor
ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode)
{
ROOT::Experimental::RNTupleDescriptor ntplDesc;
std::unique_ptr<unsigned char[]> buffer, zipBuffer;
Expand Down Expand Up @@ -532,7 +533,7 @@ ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceD
RNTupleDecompressor::Unzip(zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(),
cgDesc.GetPageListLength(), buffer.get());

RNTupleSerializer::DeserializePageList(buffer.get(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc);
RNTupleSerializer::DeserializePageList(buffer.get(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode);
}

return desc;
Expand Down
5 changes: 3 additions & 2 deletions tree/ntuple/v7/src/RPageStorageFile.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,8 @@ void ROOT::Experimental::Internal::RPageSourceFile::LoadStructureImpl()
}
}

ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceFile::AttachImpl()
ROOT::Experimental::RNTupleDescriptor
ROOT::Experimental::Internal::RPageSourceFile::AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode)
{
auto unzipBuf = reinterpret_cast<unsigned char *>(fStructureBuffer.fPtrFooter) + fAnchor->GetNBytesFooter();

Expand All @@ -373,7 +374,7 @@ ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceF
RNTupleDecompressor::Unzip(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(),
cgDesc.GetPageListLength(), buffer.data());

RNTupleSerializer::DeserializePageList(buffer.data(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc);
RNTupleSerializer::DeserializePageList(buffer.data(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode);
}

// For the page reads, we rely on the I/O scheduler to define the read requests
Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/test/ntuple_cluster.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace {
class RPageSourceMock : public RPageSource {
protected:
void LoadStructureImpl() final {}
RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); }
RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); }
std::unique_ptr<RPageSource> CloneImpl() const final { return nullptr; }
RPageRef LoadPageImpl(ColumnHandle_t, const RClusterInfo &, NTupleSize_t) final { return RPageRef(); }

Expand Down
5 changes: 4 additions & 1 deletion tree/ntuple/v7/test/ntuple_endian.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ class RPageSourceMock : public RPageSource {
const std::vector<RPageStorage::RSealedPage> &fPages;

void LoadStructureImpl() final {}
RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); }
RNTupleDescriptor AttachImpl(ROOT::Experimental::Internal::RNTupleSerializer::EDescriptorDeserializeMode) final
{
return RNTupleDescriptor();
}
std::unique_ptr<RPageSource> CloneImpl() const final { return nullptr; }
RPageRef LoadPageImpl(ColumnHandle_t, const RClusterInfo &, ROOT::Experimental::NTupleSize_t) final
{
Expand Down
Loading
Loading