Skip to content

Commit

Permalink
Augment nimble_dump streams subcommand to include InMap stream indica…
Browse files Browse the repository at this point in the history
…tor (#130)

Summary:

Add a new `--inmap_stream` option to the `streams` subcommand. When set, the output includes a new column, which indicates if each stream is an InMap stream for a FlatMap, or not.

Differential Revision: D67996331
  • Loading branch information
Chongfeng Hu authored and facebook-github-bot committed Jan 10, 2025
1 parent 7c4ce57 commit fdade2f
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 2 deletions.
5 changes: 5 additions & 0 deletions dwio/nimble/tools/NimbleDump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ int main(int argc, char* argv[]) {
options["no_header"].as<bool>(),
options["labels"].as<bool>(),
options["raw_size"].as<bool>(),
options["inmap_stream"].as<bool>(),
getOptional<uint32_t>(options["stripe"]));
},
positionalArgs)
Expand Down Expand Up @@ -163,6 +164,10 @@ int main(int argc, char* argv[]) {
po::bool_switch()->default_value(false),
"Include stream labels. Lables provide a readable path from the "
"root node to the stream, as they appear in the schema tree."
)(
"inmap_stream,i",
po::bool_switch()->default_value(false),
"Include InMap stream indicator."
);
// clang-format on

Expand Down
25 changes: 23 additions & 2 deletions dwio/nimble/tools/NimbleDumpLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <numeric>
#include <ostream>
#include <tuple>
#include <unordered_set>
#include <utility>

#include "common/strings/Zstd.h"
Expand Down Expand Up @@ -444,6 +445,7 @@ void NimbleDumpLib::emitStreams(
bool noHeader,
bool showStreamLabels,
bool showStreamRawSize,
bool showInMapStream,
std::optional<uint32_t> stripeId) {
auto tabletReader = std::make_shared<TabletReader>(*pool_, file_.get());

Expand All @@ -459,15 +461,31 @@ void NimbleDumpLib::emitStreams(
if (showStreamLabels) {
fields.push_back({"Stream Label", 16, Alignment::Left});
}
if (showInMapStream) {
fields.push_back({"InMap Stream", 16, Alignment::Left});
}
fields.push_back({"Type", 30, Alignment::Left});

TableFormatter formatter(ostream_, fields, noHeader);
TableFormatter formatter(ostream_, std::move(fields), noHeader);

std::optional<StreamLabels> labels{};
if (showStreamLabels) {
VeloxReader reader{*pool_, tabletReader};
labels.emplace(reader.schema());
}
std::unordered_set<uint32_t> inMapStreams;
if (showInMapStream) {
VeloxReader reader{*pool_, tabletReader};
SchemaReader::traverseSchema(
reader.schema(), [&](auto /*level*/, const Type& type, auto /*info*/) {
if (type.kind() == Kind::FlatMap) {
auto& map = type.asFlatMap();
for (size_t i = 0; i < map.childrenCount(); ++i) {
inMapStreams.insert(map.inMapDescriptorAt(i).offset());
}
}
});
}

traverseTablet(
*pool_,
Expand Down Expand Up @@ -500,7 +518,10 @@ void NimbleDumpLib::emitStreams(
}
values.push_back(folly::to<std::string>(itemCount));
if (showStreamLabels) {
auto it = values.emplace_back(labels->streamLabel(streamId));
values.emplace_back(labels->streamLabel(streamId));
}
if (showInMapStream) {
values.emplace_back(inMapStreams.contains(streamId) ? "T" : "F");
}
values.push_back(getStreamInputLabel(stream));
formatter.writeRow(values);
Expand Down
1 change: 1 addition & 0 deletions dwio/nimble/tools/NimbleDumpLib.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class NimbleDumpLib {
bool noHeader,
bool flatmapKeys,
bool rawSize,
bool showInMapStream,
std::optional<uint32_t> stripeId);
void
emitHistogram(bool topLevel, bool noHeader, std::optional<uint32_t> stripeId);
Expand Down

0 comments on commit fdade2f

Please sign in to comment.