Skip to content

Commit

Permalink
Fix ArrayOfRagged (#957)
Browse files Browse the repository at this point in the history
* Fix ArrayOfRagged

* Minor fixes
  • Loading branch information
pkufool authored Apr 18, 2022
1 parent 6299e47 commit 2a07e4a
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 84 deletions.
99 changes: 50 additions & 49 deletions k2/csrc/array_of_ragged.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@

namespace k2 {

Array1OfRaggedShape::Array1OfRaggedShape(RaggedShape *srcs, int32_t num_srcs) :
num_srcs_(num_srcs) {
Array1OfRaggedShape::Array1OfRaggedShape(RaggedShape *srcs, int32_t num_srcs,
bool populate_meta)
: num_srcs_(num_srcs), populate_meta_(populate_meta) {
K2_CHECK_GT(num_srcs, 0);
K2_CHECK(srcs);

Expand All @@ -44,8 +45,8 @@ Array1OfRaggedShape::Array1OfRaggedShape(RaggedShape *srcs, int32_t num_srcs) :
// row_ids_ are populated on CPU, although the operator() of Array2 is a
// __host__ and __device__ function. Bear in mind, we cannot access the
// GPU data on CPU.
row_splits_ = Array2<const int32_t *>(GetCpuContext(),
num_axes_ - 1, num_srcs_);
row_splits_ =
Array2<const int32_t *>(GetCpuContext(), num_axes_ - 1, num_srcs_);
row_ids_ = Array2<const int32_t *>(GetCpuContext(), num_axes_ - 1, num_srcs_);

// Notice: no matter the return value of TotSize() is from 'cached_tot_size'
Expand All @@ -69,60 +70,60 @@ Array1OfRaggedShape::Array1OfRaggedShape(RaggedShape *srcs, int32_t num_srcs) :

row_splits_ = row_splits_.To(c_);
row_ids_ = row_ids_.To(c_);
tot_sizes_ = tot_sizes_.To(c_);

if (populate_meta_) {
// Initialize meta_row_splits_
// We populate this on CPU and transfer to GPU.
meta_row_splits_ =
Array2<int32_t>(GetCpuContext(), num_axes_, num_srcs_ + 1);
offsets_ = Array2<int32_t>(GetCpuContext(), num_axes_ + 1, num_srcs_ + 1);

// Initialize meat_row_splits_
// We populate this on CPU and transfer to GPU.
meta_row_splits_ = Array2<int32_t>(GetCpuContext(), num_axes_, num_srcs_ + 1);
offsets_ = Array2<int32_t>(GetCpuContext(), num_axes_ + 1, num_srcs_ + 1);
auto meta_row_splits_acc = meta_row_splits_.Accessor(),
offsets_acc = offsets_.Accessor();

auto meta_row_splits_acc = meta_row_splits_.Accessor(),
offsets_acc = offsets_.Accessor();

// Initialize the 1st row of offsets_, which contains 0,1,2,...
for (int32_t col = 0; col <= num_srcs_; ++col) {
offsets_acc(0, col) = col;
}
// Initialize the 1st col of meta_row_splits_ and offsets_
for (int32_t row = 0; row < num_axes_; ++row) {
meta_row_splits_acc(row, 0) = 0;
offsets_acc(row + 1, 0) = 0;
}
// Initialize the 1st row of offsets_, which contains 0,1,2,...
for (int32_t col = 0; col <= num_srcs_; ++col) {
offsets_acc(0, col) = col;
}
// Initialize the 1st col of meta_row_splits_ and offsets_
for (int32_t row = 0; row < num_axes_; ++row) {
meta_row_splits_acc(row, 0) = 0;
offsets_acc(row + 1, 0) = 0;
}

// The meta_row_splits_ is the cumulative sum of the tot-sizes of the
// individual arrays.
for (int32_t i = 0; i < num_axes_; ++i) {
for (int32_t j = 1; j <= num_srcs_; ++j) {
meta_row_splits_acc(i, j) = meta_row_splits_acc(i, j - 1) +
srcs[j - 1].TotSize(i);
offsets_acc(i + 1, j) = meta_row_splits_acc(i, j);
// The meta_row_splits_ is the cumulative sum of the tot-sizes of the
// individual arrays.
for (int32_t i = 0; i < num_axes_; ++i) {
for (int32_t j = 1; j <= num_srcs_; ++j) {
meta_row_splits_acc(i, j) =
meta_row_splits_acc(i, j - 1) + srcs[j - 1].TotSize(i);
offsets_acc(i + 1, j) = meta_row_splits_acc(i, j);
}
}
}

// Initialize meta_row_ids_
// Elements are in [0, NumSrcs() - 1]
meta_row_ids_.resize(num_axes_);

for (int32_t axis = 0; axis < num_axes_; ++axis) {
// The length equals to TotSize(axis)
meta_row_ids_.at(axis) = Array1<int32_t>(
GetCpuContext(), meta_row_splits_acc(axis, num_srcs_));
int32_t *meta_row_ids_data = meta_row_ids_[axis].Data();

int32_t cur_row_start = meta_row_splits_acc(axis, 0);
for (int32_t src = 0; src < num_srcs_; ++src) {
int32_t next_row_start = meta_row_splits_acc(axis, src + 1);
for (; cur_row_start < next_row_start; ++cur_row_start) {
meta_row_ids_data[cur_row_start] = src;
// Initialize meta_row_ids_
// Elements are in [0, NumSrcs() - 1]
meta_row_ids_.resize(num_axes_);

for (int32_t axis = 0; axis < num_axes_; ++axis) {
// The length equals to TotSize(axis)
meta_row_ids_.at(axis) = Array1<int32_t>(
GetCpuContext(), meta_row_splits_acc(axis, num_srcs_));
int32_t *meta_row_ids_data = meta_row_ids_[axis].Data();

int32_t cur_row_start = meta_row_splits_acc(axis, 0);
for (int32_t src = 0; src < num_srcs_; ++src) {
int32_t next_row_start = meta_row_splits_acc(axis, src + 1);
for (; cur_row_start < next_row_start; ++cur_row_start) {
meta_row_ids_data[cur_row_start] = src;
}
}
meta_row_ids_[axis] = meta_row_ids_[axis].To(c_);
}
meta_row_ids_[axis] = meta_row_ids_[axis].To(c_);
}

meta_row_splits_ = meta_row_splits_.To(c_);
offsets_ = offsets_.To(c_);
meta_row_splits_ = meta_row_splits_.To(c_);
offsets_ = offsets_.To(c_);
}
}


} // namespace k2
79 changes: 45 additions & 34 deletions k2/csrc/array_of_ragged.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ namespace k2 {
wrapper that saves you the trouble of creating arrays of pointers.
*/


/*
Array1OfRaggedShape is a convenience function that gives you easy access
to pointers-of-pointers for an array of ragged shapes.
Expand All @@ -57,16 +56,21 @@ class Array1OfRaggedShape {
srcs: pointers to the source shapes, a CPU pointer
num_srcs: the number of source shapes. All shapes must have the
same NumAxes() and must be on the same device.
populate_meta: Whether to populate meta_row_splits_ and meta_row_id_.
meta_row_splits_ and meta_row_id_ are useful at some time,
but it will be impossible for `int32_t` to hold all the
elements of source shapes when num_srcs is large. Users
could decide whether to use them at their need.
Not to use them by default.
TODO: we'll likely, later, add optional args which dictate which of
the MetaRowSplits() and MetaRowIds() are to be pre-populated; this should
enable us to save kernels by combining certain operations across the
axes.
*/
Array1OfRaggedShape(RaggedShape *srcs,
int32_t num_srcs);

Array1OfRaggedShape(RaggedShape *srcs, int32_t num_srcs,
bool populate_meta = false);

int32_t NumSrcs() const { return num_srcs_; }
int32_t NumAxes() const { return num_axes_; }
Expand All @@ -81,34 +85,29 @@ class Array1OfRaggedShape {
// Returns device-accessible vector of row-splits for a particular
// axis, indexed by 0 <= src < num_srcs.
const int32_t **RowSplits(int32_t axis) {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
return row_splits_.Row(axis - 1).Data();
K2_CHECK_LT(static_cast<uint32_t>(axis), static_cast<uint32_t>(num_axes_));
return row_splits_.Row(axis - 1).Data();
}

// Returns device-accessible array of row-ids for the individual shapes
// indexed [axis-1][src], with 0 <= src < num_srcs. The shape of this
// Array2 is [NumAxes() - 1][NumSrcs()].
const Array2<const int32_t*> *RowIds() const { return &row_ids_; }

const Array2<const int32_t *> *RowIds() const { return &row_ids_; }

// Returns device-accessible vector of row-splits for a particular
// axis, indexed by 0 <= src < num_srcs.
const int32_t **RowIds(int32_t axis) {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
return row_ids_.Row(axis - 1).Data();
K2_CHECK_LT(static_cast<uint32_t>(axis), static_cast<uint32_t>(num_axes_));
return row_ids_.Row(axis - 1).Data();
}


/* Return the total size on this axis, which is the sum of the TotSize() of
the individual shapes. Requires 0 <= axis < NumAxes() and
for axis=0 the returned value is the same as Dim0().
*/
int32_t TotSize(int32_t axis) const {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
return tot_sizes_[axis];
K2_CHECK_LT(static_cast<uint32_t>(axis), static_cast<uint32_t>(num_axes_));
return tot_sizes_[axis];
}

// equivalent to TotSize(0).
Expand All @@ -129,13 +128,21 @@ class Array1OfRaggedShape {
to GPU, this will be faster than invoking an extra kernel in normal cases
when the NumSrcs() is small. [Also: see GetRowInfoMulti()].
*/
const Array2<int32_t> &MetaRowSplits() const { return meta_row_splits_; }
const Array2<int32_t> &MetaRowSplits() const {
K2_CHECK(populate_meta_) << "To use this function, you need to initialize "
"the object with populate_meta equaling true";
return meta_row_splits_;
}

// could POSSIBLY add this so this code could be used in functions like
// Stack(). would be like MetaRowSplits but with an extra 1st row containing
// 0,1,2,... We could perhaps create it with 1 extra initial row so this is
// always convenient to output.
const Array2<int32_t> &Offsets() const { return offsets_; }
const Array2<int32_t> &Offsets() const {
K2_CHECK(populate_meta_) << "To use this function, you need to initialize "
"the object with populate_meta equaling true";
return offsets_;
}

/*
Returns the meta-row-splits for a particular axis, with
Expand All @@ -146,8 +153,9 @@ class Array1OfRaggedShape {
Note: in ragged_opts.cu we refer to this as composed_row_splits
*/
Array1<int32_t> MetaRowSplits(int32_t axis) {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
K2_CHECK(populate_meta_) << "To use this function, you need to initialize "
"the object with populate_meta equaling true";
K2_CHECK_LT(static_cast<uint32_t>(axis), static_cast<uint32_t>(num_axes_));
return meta_row_splits_.Row(axis);
}

Expand All @@ -161,9 +169,11 @@ class Array1OfRaggedShape {
Note: in ragged_ops.cu we refer to this as composed_row_ids.
*/
Array1<const int32_t*> MetaRowIds() {
Array1<const int32_t*> ans(GetCpuContext(), num_axes_);
const int32_t* *ans_data = ans.Data();
Array1<const int32_t *> MetaRowIds() {
K2_CHECK(populate_meta_) << "To use this function, you need to initialize "
"the object with populate_meta equaling true";
Array1<const int32_t *> ans(GetCpuContext(), num_axes_);
const int32_t **ans_data = ans.Data();
for (int32_t i = 0; i < num_axes_; ++i) {
ans_data[i] = meta_row_ids_[i].Data();
}
Expand All @@ -180,8 +190,9 @@ class Array1OfRaggedShape {
concatenated array would come from.
*/
const Array1<int32_t> &MetaRowIds(int32_t axis) const {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
K2_CHECK(populate_meta_) << "To use this function, you need to initialize "
"the object with populate_meta equaling true";
K2_CHECK_LT(static_cast<uint32_t>(axis), static_cast<uint32_t>(num_axes_));
return meta_row_ids_[axis];
}

Expand All @@ -190,17 +201,17 @@ class Array1OfRaggedShape {
int32_t num_srcs_;
int32_t num_axes_;

bool populate_meta_;

Array2<const int32_t *> row_splits_; // shape [num_axes_ - 1][num_srcs_]
Array2<const int32_t *> row_ids_; // shape [num_axes_ - 1][num_srcs_]
Array1<int32_t> tot_sizes_; // dim num_axes_
Array1<int32_t> tot_sizes_; // dim num_axes_, a CPU Array.

Array2<int32_t> meta_row_splits_; // shape [num_axes_][num_srcs_ + 1]
Array2<int32_t> offsets_; // shape [num_axes_][num_srcs_ + 1]
std::vector<Array1<int32_t> > meta_row_ids_; // dim num_axes_
Array2<int32_t> meta_row_splits_; // shape [num_axes_][num_srcs_ + 1]
Array2<int32_t> offsets_; // shape [num_axes_][num_srcs_ + 1]
std::vector<Array1<int32_t>> meta_row_ids_; // dim num_axes_
};



/*
Array1OfRagged<T> is a 1-dimensional array of Ragged<T>.
It is intended for situations where you want to do some operations on
Expand All @@ -226,7 +237,8 @@ struct Array1OfRagged {
Array1OfRagged() = default;

// The 'srcs' should have the same number of axes.
Array1OfRagged(Ragged<T> *srcs, int32_t num_srcs) {
Array1OfRagged(Ragged<T> *srcs, int32_t num_srcs,
bool populate_meta = false) {
K2_CHECK_GT(num_srcs, 0);
K2_CHECK(srcs);
values = Array1<T *>(GetCpuContext(), num_srcs);
Expand All @@ -236,12 +248,11 @@ struct Array1OfRagged {
shapes[i] = srcs[i].shape;
values_data[i] = srcs[i].values.Data();
}
shape = Array1OfRaggedShape(shapes.data(), num_srcs);
shape = Array1OfRaggedShape(shapes.data(), num_srcs, populate_meta);
values = values.To(shape.Context());
}
};


} // namespace k2

#endif // K2_CSRC_ARRAY_OF_RAGGED_H_
3 changes: 2 additions & 1 deletion k2/csrc/array_of_ragged_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ void TestArray1OfRaggedConstruct() {
0 /*min_num_elements*/, 100 /*max_num_elements*/)
.To(c, true /*copy_all*/));
}
auto array_of_ragged = Array1OfRagged<T>(raggeds.data(), num_srcs);
auto array_of_ragged =
Array1OfRagged<T>(raggeds.data(), num_srcs, true /*populate_meta*/);
for (int32_t j = 1; j < num_axes; ++j) {
const int32_t **row_splits = array_of_ragged.shape.RowSplits(j);
const int32_t **row_ids = array_of_ragged.shape.RowIds(j);
Expand Down

0 comments on commit 2a07e4a

Please sign in to comment.