From 07fcc459a65380e68d2d3b9e3665813745205467 Mon Sep 17 00:00:00 2001 From: Chongfeng Hu Date: Fri, 10 Jan 2025 11:43:37 -0800 Subject: [PATCH] Augment nimble_dump streams subcommand to include InMap stream indicator (#130) Summary: Add a new `--inmap_stream` option to the `streams` subcommand. When set, the output includes a new column, which indicates if each stream is an InMap stream for a FlatMap, or not. Reviewed By: helfman Differential Revision: D67996331 --- dwio/nimble/tools/NimbleDump.cpp | 5 +++++ dwio/nimble/tools/NimbleDumpLib.cpp | 32 +++++++++++++++++++++++++---- dwio/nimble/tools/NimbleDumpLib.h | 1 + 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/dwio/nimble/tools/NimbleDump.cpp b/dwio/nimble/tools/NimbleDump.cpp index 798b2b4..44914c0 100644 --- a/dwio/nimble/tools/NimbleDump.cpp +++ b/dwio/nimble/tools/NimbleDump.cpp @@ -136,6 +136,7 @@ int main(int argc, char* argv[]) { options["no_header"].as(), options["labels"].as(), options["raw_size"].as(), + options["inmap_stream"].as(), getOptional(options["stripe"])); }, positionalArgs) @@ -163,6 +164,10 @@ int main(int argc, char* argv[]) { po::bool_switch()->default_value(false), "Include stream labels. Lables provide a readable path from the " "root node to the stream, as they appear in the schema tree." + )( + "inmap_stream,i", + po::bool_switch()->default_value(false), + "Include InMap stream indicator." ); // clang-format on diff --git a/dwio/nimble/tools/NimbleDumpLib.cpp b/dwio/nimble/tools/NimbleDumpLib.cpp index fa24381..d2037bc 100644 --- a/dwio/nimble/tools/NimbleDumpLib.cpp +++ b/dwio/nimble/tools/NimbleDumpLib.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "common/strings/Zstd.h" @@ -444,6 +445,7 @@ void NimbleDumpLib::emitStreams( bool noHeader, bool showStreamLabels, bool showStreamRawSize, + bool showInMapStream, std::optional stripeId) { auto tabletReader = std::make_shared(*pool_, file_.get()); @@ -459,14 +461,33 @@ void NimbleDumpLib::emitStreams( if (showStreamLabels) { fields.push_back({"Stream Label", 16, Alignment::Left}); } + if (showInMapStream) { + fields.push_back({"InMap Stream", 14, Alignment::Left}); + } fields.push_back({"Type", 30, Alignment::Left}); - TableFormatter formatter(ostream_, fields, noHeader); + TableFormatter formatter(ostream_, std::move(fields), noHeader); std::optional labels{}; - if (showStreamLabels) { + std::unordered_set inMapStreams; + if (showStreamLabels || showInMapStream) { VeloxReader reader{*pool_, tabletReader}; - labels.emplace(reader.schema()); + if (showStreamLabels) { + labels.emplace(reader.schema()); + } + if (showInMapStream) { + VeloxReader reader{*pool_, tabletReader}; + SchemaReader::traverseSchema( + reader.schema(), + [&](auto /*level*/, const Type& type, auto /*info*/) { + if (type.kind() == Kind::FlatMap) { + auto& map = type.asFlatMap(); + for (size_t i = 0; i < map.childrenCount(); ++i) { + inMapStreams.insert(map.inMapDescriptorAt(i).offset()); + } + } + }); + } } traverseTablet( @@ -500,7 +521,10 @@ void NimbleDumpLib::emitStreams( } values.push_back(folly::to(itemCount)); if (showStreamLabels) { - auto it = values.emplace_back(labels->streamLabel(streamId)); + values.emplace_back(labels->streamLabel(streamId)); + } + if (showInMapStream) { + values.emplace_back(inMapStreams.contains(streamId) ? "T" : "F"); } values.push_back(getStreamInputLabel(stream)); formatter.writeRow(values); diff --git a/dwio/nimble/tools/NimbleDumpLib.h b/dwio/nimble/tools/NimbleDumpLib.h index 3553ade..18ff610 100644 --- a/dwio/nimble/tools/NimbleDumpLib.h +++ b/dwio/nimble/tools/NimbleDumpLib.h @@ -33,6 +33,7 @@ class NimbleDumpLib { bool noHeader, bool flatmapKeys, bool rawSize, + bool showInMapStream, std::optional stripeId); void emitHistogram(bool topLevel, bool noHeader, std::optional stripeId);