Skip to content

Commit

Permalink
Adds enable_frame_num to the experimental video reader (#5628)
Browse files Browse the repository at this point in the history
- adds an ability to output frame numbers in the experimental video
  reader
- this allows making VideoReaderDecoderCpuTest.RandomShuffle_* test
  independent from the hardcoding of the expected frame order (in case
  random generator implementation changes)

Signed-off-by: Janusz Lisiecki <[email protected]>
  • Loading branch information
JanuszL authored Sep 11, 2024
1 parent f1a9a4d commit 4b1833a
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ template <typename Backend>
class VideoSample {
public:
Tensor<Backend> data_;
int label_;
int label_ = -1;
int first_frame_ = -1;
};

class VideoLoaderDecoderBase {
Expand All @@ -46,6 +47,7 @@ class VideoLoaderDecoderBase {
stride_(spec.GetArgument<int>("stride")),
step_(spec.GetArgument<int>("step")) {
has_labels_ = spec.TryGetRepeatedArgument(labels_, "labels");
has_frame_idx_ = spec.GetArgument<bool>("enable_frame_num");
DALI_ENFORCE(
!has_labels_ || labels_.size() == filenames_.size(),
make_string(
Expand All @@ -61,6 +63,7 @@ class VideoLoaderDecoderBase {
std::vector<std::string> filenames_;
std::vector<int> labels_;
bool has_labels_ = false;
bool has_frame_idx_ = false;

Index current_index_ = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ void VideoLoaderDecoderCpu::ReadSample(VideoSample<CPUBackend> &sample) {
if (has_labels_) {
sample.label_ = labels_[sample_span.video_idx_];
}
if (has_frame_idx_) {
sample.first_frame_ = sample_span.start_;
}
}

Index VideoLoaderDecoderCpu::SizeImpl() {
Expand Down
21 changes: 18 additions & 3 deletions dali/operators/reader/video_reader_decoder_cpu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace dali {

VideoReaderDecoderCpu::VideoReaderDecoderCpu(const OpSpec &spec)
: DataReader<CPUBackend, VideoSampleCpu, VideoSampleCpu, true>(spec),
has_labels_(spec.HasArgument("labels")) {
has_labels_(spec.HasArgument("labels")),
has_frame_idx_(spec.GetArgument<bool>("enable_frame_num")) {
loader_ = InitLoader<VideoLoaderDecoderCpu>(spec);
this->SetInitialSnapshot();
}
Expand All @@ -32,16 +33,26 @@ void VideoReaderDecoderCpu::RunImpl(SampleWorkspace &ws) {
video_output.Copy(sample.data_);
video_output.SetSourceInfo(sample.data_.GetSourceInfo());

int out_index = 1;
if (has_labels_) {
auto &label_output = ws.Output<CPUBackend>(1);
auto &label_output = ws.Output<CPUBackend>(out_index);
label_output.Resize({}, DALIDataType::DALI_INT32);
label_output.mutable_data<int>()[0] = sample.label_;
out_index++;
}
if (has_frame_idx_) {
auto &frame_idx_output = ws.Output<CPUBackend>(out_index);
frame_idx_output.Resize({}, DALIDataType::DALI_INT32);
frame_idx_output.mutable_data<int>()[0] = sample.first_frame_;
out_index++;
}
}

namespace detail {
inline int VideoReaderDecoderOutputFn(const OpSpec &spec) {
return spec.HasArgument("labels") ? 2 : 1;
bool has_labels = spec.HasArgument("labels");
bool has_frame_num_output = spec.GetArgument<bool>("enable_frame_num");
return 1 + has_labels + has_frame_num_output;
}
} // namespace detail

Expand All @@ -68,6 +79,10 @@ even in the variable frame rate scenario.)code")
.AddArg("sequence_length",
R"code(Frames to load per sequence.)code",
DALI_INT32)
.AddOptionalArg("enable_frame_num",
R"code(If set, returns the index of the first frame in the decoded sequence
as an additional output.)code",
false)
.AddOptionalArg("step",
R"code(Frame interval between each sequence.
Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/video_reader_decoder_cpu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class VideoReaderDecoderCpu

private:
bool has_labels_ = false;
bool has_frame_idx_ = false;
};

} // namespace dali
Expand Down
66 changes: 45 additions & 21 deletions dali/operators/reader/video_reader_decoder_gpu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace dali {

VideoReaderDecoderGpu::VideoReaderDecoderGpu(const OpSpec &spec)
: DataReader<GPUBackend, VideoSampleGpu, VideoSampleGpu, true>(spec),
has_labels_(spec.HasArgument("labels")) {
has_labels_(spec.HasArgument("labels")),
has_frame_idx_(spec.GetArgument<bool>("enable_frame_num")) {
loader_ = InitLoader<VideoLoaderDecoderGpu>(spec);
this->SetInitialSnapshot();
}
Expand Down Expand Up @@ -50,14 +51,21 @@ bool VideoReaderDecoderGpu::SetupImpl(

output_desc[0] = { video_shape, DALI_UINT8 };

if (!has_labels_) {
return true;
int out_index = 1;
if (has_labels_) {
output_desc[out_index] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};
out_index++;
}
if (has_frame_idx_) {
output_desc[out_index] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};
out_index++;
}

output_desc[1] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};

return true;
}
Expand All @@ -80,23 +88,39 @@ void VideoReaderDecoderGpu::RunImpl(Workspace &ws) {
video_output.SetSourceInfo(sample_id, sample.data_.GetSourceInfo());
}

if (!has_labels_) {
return;
}
int out_index = 1;
if (has_labels_) {
auto &labels_output = ws.Output<GPUBackend>(out_index);
SmallVector<int, 32> labels_cpu;

auto &labels_output = ws.Output<GPUBackend>(1);
SmallVector<int, 32> labels_cpu;
for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
labels_cpu[sample_id] = sample.label_;
}

for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
labels_cpu[sample_id] = sample.label_;
MemCopy(
labels_output.AsTensor().raw_mutable_data(),
labels_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
out_index++;
}
if (has_frame_idx_) {
auto &frame_idx_output = ws.Output<GPUBackend>(out_index);
SmallVector<int, 32> frame_idx_output_cpu;

for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
frame_idx_output_cpu[sample_id] = sample.span_ ? sample.span_->start_ : -1;
}

MemCopy(
labels_output.AsTensor().raw_mutable_data(),
labels_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
MemCopy(
frame_idx_output.AsTensor().raw_mutable_data(),
frame_idx_output_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
out_index++;
}
}

DALI_REGISTER_OPERATOR(experimental__readers__Video, VideoReaderDecoderGpu, GPU);
Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/video_reader_decoder_gpu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class VideoReaderDecoderGpu : public DataReader<GPUBackend, VideoSampleGpu, Vide

private:
bool has_labels_ = false;
bool has_frame_idx_ = false;
};

} // namespace dali
Expand Down
34 changes: 28 additions & 6 deletions dali/operators/reader/video_reader_decoder_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
virtual void AssertFrame(
int frame_id, const uint8_t *frame, TestVideo &ground_truth) = 0;

template<typename Backend>
int GetFrameIdx(dali::TensorList<Backend> &device_frame_idx);

private:
template<typename Backend>
void RunTestImpl(
Expand Down Expand Up @@ -129,15 +132,15 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
.AddArg("device", backend)
.AddArg("sequence_length", sequence_length)
.AddArg("random_shuffle", true)
.AddArg("enable_frame_num", true)
.AddArg("initial_fill", cfr_videos_[0].NumFrames())
.AddArg(
"filenames",
std::vector<std::string>{cfr_videos_paths_[0]})
.AddOutput("frames", backend));

pipe.Build({{"frames", backend}});
.AddOutput("frames", backend)
.AddOutput("frame_idx", backend));

std::vector<int> expected_order = {29, 46, 33, 6, 37};
pipe.Build({{"frames", backend}, {"frame_idx", backend}});

int num_sequences = 5;

Expand All @@ -148,9 +151,10 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {

auto &frame_video_output = ws.Output<Backend>(0);
const auto sample = frame_video_output.template tensor<uint8_t>(0);
int frame_idx = GetFrameIdx(ws.Output<Backend>(1));

// We want to access correct order, so we comapre only the first frame of the sequence
AssertFrame(expected_order[sequence_id], sample, ground_truth_video);
// We want to access correct order, so we compare only the first frame of the sequence
AssertFrame(frame_idx, sample, ground_truth_video);
}
}
};
Expand All @@ -168,6 +172,15 @@ void VideoReaderDecoderBaseTest::RunShuffleTest<dali::CPUBackend>() {
RunShuffleTestImpl<dali::CPUBackend>("cpu", dali::CPU_ONLY_DEVICE_ID);
}

template<>
int VideoReaderDecoderBaseTest::GetFrameIdx(
dali::TensorList<dali::CPUBackend> &device_frame_idx) {
const auto frame_idx = device_frame_idx.template tensor<int>(0);
int frame_idx_buffer = -1;
std::copy_n(frame_idx, 1, &frame_idx_buffer);
return frame_idx_buffer;
}

template<>
void VideoReaderDecoderBaseTest::RunTest<dali::GPUBackend>(
std::vector<std::string> &videos_paths,
Expand All @@ -181,6 +194,15 @@ void VideoReaderDecoderBaseTest::RunShuffleTest<dali::GPUBackend>() {
RunShuffleTestImpl<dali::GPUBackend>("gpu", 0);
}

template<>
int VideoReaderDecoderBaseTest::GetFrameIdx(
dali::TensorList<dali::GPUBackend> &device_frame_idx) {
const auto frame_idx = device_frame_idx.template tensor<int>(0);
int frame_idx_buffer = -1;
MemCopy(&frame_idx_buffer, frame_idx, sizeof(int));
return frame_idx_buffer;
}

class VideoReaderDecoderCpuTest : public VideoReaderDecoderBaseTest {
public:
void AssertLabel(const int *label, int ground_truth_label) override {
Expand Down

0 comments on commit 4b1833a

Please sign in to comment.