forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIndexKernel.cpp
75 lines (60 loc) · 2.72 KB
/
IndexKernel.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <ATen/native/cuda/IndexKernel.h>
#include <ATen/native/TensorAdvancedIndexing.h> // For at::native::index_out
#include <ATen/ExpandUtils.h>
#include <ATen/Functions.h>
#include <ATen/MemoryOverlap.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/NativeFunctions.h>
namespace at {
namespace native {
static Tensor & masked_select_out_cuda_impl(Tensor & result, const Tensor & self, const Tensor & mask) {
NoNamesGuard guard;
TORCH_CHECK(mask.scalar_type() == ScalarType::Byte || mask.scalar_type() == ScalarType::Bool,
"masked_select: expected BoolTensor or ByteTensor for mask");
TORCH_CHECK(self.scalar_type() == result.scalar_type(),
"masked_select(): self and result must have the same scalar type");
auto mask_temp = (mask.dim() == 0)
? c10::MaybeOwned<Tensor>::owned(mask.unsqueeze(0))
: c10::MaybeOwned<Tensor>::borrowed(mask);
auto self_temp = (self.dim() == 0)
? c10::MaybeOwned<Tensor>::owned(self.unsqueeze(0))
: c10::MaybeOwned<Tensor>::borrowed(self);
// Cannot reassign to mask_temp and self_temp here! if they are
// owning and expand_outplace returns a borrow, the returned borrow
// would dangle.
auto mask_self_expanded = expand_outplace(*mask_temp, *self_temp);
at::native::index_out(
result, *std::get<1>(mask_self_expanded),
c10::List<c10::optional<at::Tensor>>({*std::move(std::get<0>(mask_self_expanded))}));
return result;
}
Tensor masked_select_cuda(const Tensor & self, const Tensor & mask) {
namedinference::compute_broadcast_outnames(self, mask);
Tensor result = at::empty({0}, self.options());
return masked_select_out_cuda_impl(result, self, mask);
}
Tensor & masked_select_out_cuda(const Tensor & self, const Tensor & mask, Tensor & result) {
namedinference::compute_broadcast_outnames(self, mask);
return masked_select_out_cuda_impl(result, self, mask);
}
Tensor & masked_scatter__cuda(Tensor& self, const Tensor& mask, const Tensor& source) {
at::assert_no_internal_overlap(self);
TORCH_CHECK(
self.scalar_type() == source.scalar_type(),
"masked_scatter: expected self and source to have same dtypes but got",
self.scalar_type(),
" and ",
source.scalar_type());
c10::MaybeOwned<Tensor> b_mask = expand_inplace(self, mask, "masked_scatter_");
if (b_mask->dtype() == ScalarType::Byte) {
TORCH_WARN("masked_scatter_ received a mask with dtype torch.uint8, this behavior is now deprecated," \
"please use a mask with dtype torch.bool instead.");
}
if (self.numel() == 0) {
return self;
}
auto maskPrefixSum = at::empty(self.sizes(), mask.options().dtype(kLong));
launch_masked_scatter_kernel(self, *b_mask, maskPrefixSum, source);
return self;
}
}} // namespace at::native