Skip to content

Commit

Permalink
#10830: Remove Binary and ternary composite op from tt_eager
Browse files Browse the repository at this point in the history
  • Loading branch information
ruthreshx committed Jul 29, 2024
1 parent 66bfeaf commit 92d7a6f
Show file tree
Hide file tree
Showing 9 changed files with 14 additions and 363 deletions.
6 changes: 0 additions & 6 deletions docs/source/ttnn/ttnn/dependencies/tt_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,6 @@ Tensor elementwise operations

.. autofunction:: tt_lib.tensor.cbrt

.. autofunction:: tt_lib.tensor.polyval

.. autofunction:: tt_lib.tensor.mac

.. autofunction:: tt_lib.tensor.swish

.. autofunction:: tt_lib.tensor.softsign
Expand Down Expand Up @@ -562,8 +558,6 @@ Other Operations

.. autofunction:: tt_lib.tensor.acosh

.. autofunction:: tt_lib.tensor.lerp

.. autofunction:: tt_lib.tensor.fill_rm

.. autofunction:: tt_lib.tensor.fill_ones_rm
Expand Down
8 changes: 2 additions & 6 deletions models/experimental/mistral/tt/mistral_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,10 @@ def forward(
self.cache_v[:bsz].scatter_(dim=1, index=scatter_pos, src=xv[:, -self.sliding_window :])
else:
self.cache_k = tt_to_torch_tensor(
tt_lib.tensor.scatter(
torch_to_tt_tensor_rm(xk, self.device), torch_to_tt_tensor_rm(self.cache_k, self.device)
)
ttnn.scatter(torch_to_tt_tensor_rm(xk, self.device), torch_to_tt_tensor_rm(self.cache_k, self.device))
)
self.cache_v = tt_to_torch_tensor(
tt_lib.tensor.scatter(
torch_to_tt_tensor_rm(xv, self.device), torch_to_tt_tensor_rm(self.cache_v, self.device)
)
ttnn.scatter(torch_to_tt_tensor_rm(xv, self.device), torch_to_tt_tensor_rm(self.cache_v, self.device))
)

if positions.shape[0] > 1:
Expand Down
12 changes: 6 additions & 6 deletions tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def eltwise_polyval(
**kwargs,
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = ttl.tensor.polyval(t0, coeffs, output_mem_config=output_mem_config)
t1 = ttnn.polyval(t0, coeffs, output_mem_config=output_mem_config)

return tt2torch_tensor(t1)

Expand All @@ -485,7 +485,7 @@ def eltwise_mac(x, y, z, *args, device, dtype, layout, input_mem_config, output_
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1])
t2 = setup_tt_tensor(z, device, layout[2], input_mem_config[2], dtype[2])
t3 = ttl.tensor.mac(t0, t1, t2, output_mem_config=output_mem_config)
t3 = ttnn.mac(t0, t1, t2, output_mem_config=output_mem_config)

return tt2torch_tensor(t3)

Expand Down Expand Up @@ -654,7 +654,7 @@ def eltwise_lerp_binary(
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1])
t2 = ttl.tensor.lerp(t0, t1, weight, output_mem_config=output_mem_config)
t2 = ttnn.lerp(t0, t1, weight, output_mem_config=output_mem_config)

return tt2torch_tensor(t2)

Expand Down Expand Up @@ -763,7 +763,7 @@ def eltwise_lerp_ternary(x, y, z, *args, device, dtype, layout, input_mem_config
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1])
t2 = setup_tt_tensor(z, device, layout[2], input_mem_config[2], dtype[2])
t3 = ttl.tensor.lerp(t0, t1, t2, output_mem_config=output_mem_config)
t3 = ttnn.lerp(t0, t1, t2, output_mem_config=output_mem_config)

return tt2torch_tensor(t3)

Expand Down Expand Up @@ -2618,9 +2618,9 @@ def binary_op(
eltwise_max = make_binary_op(ttl.tensor.max)

matmul = make_binary_op_ttnn(ttnn.matmul)
outer = make_binary_op(ttl.tensor.outer)
outer = make_binary_op(ttnn.outer)

eltwise_scatter = make_binary_op(ttl.tensor.scatter)
eltwise_scatter = make_binary_op(ttnn.scatter)
eltwise_nextafter = make_binary_op_ttnn(ttnn.nextafter)
eltwise_remainder = make_binary_op(ttl.tensor.remainder)
eltwise_fmod = make_binary_op(ttl.tensor.fmod)
Expand Down
4 changes: 2 additions & 2 deletions tests/ttnn/profiling/ops_for_profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2551,8 +2551,8 @@ def fused_linear_shape_func(input_shape):

all_ternary_ops = [
{
"op": tt_lib.tensor.mac,
"name": "tt_lib.tensor.mac",
"op": ttnn.mac,
"name": "ttnn.mac",
},
{
"op": ttnn.where,
Expand Down
180 changes: 0 additions & 180 deletions ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,90 +207,6 @@ Tensor clip(const Tensor& a, float low, float high, const MemoryConfig& output_m
return operation::decorate_as_composite(__func__, _clip)(a, low, high, output_mem_config);
}

// compute polyval by Horner's rule
Tensor _polyval(const Tensor& input_tensor, std::vector<float> coeffs, const MemoryConfig& output_mem_config) {
TT_ASSERT(coeffs.size() != 0 && "coeffs should be 1 or more coefficients");
if (coeffs.size() == 1) {
return mk_filled_tensor_like(input_tensor, coeffs[0], output_mem_config);
}

Tensor scalar = ttnn::operations::creation::create_scalar(
coeffs[0], input_tensor.get_dtype(), Layout::TILE, input_tensor.device());
Tensor result = ttnn::multiply(input_tensor, scalar, std::nullopt, output_mem_config);
scalar.deallocate();
for (int idx = 1; idx < coeffs.size() - 1; idx++) {
Tensor scalar = ttnn::operations::creation::create_scalar(
coeffs[idx], input_tensor.get_dtype(), Layout::TILE, input_tensor.device());
result = ttnn::add(result, scalar, std::nullopt, output_mem_config);
scalar.deallocate();
result = ttnn::multiply(input_tensor, result, std::nullopt, output_mem_config);
}
Tensor last_coeffs = ttnn::operations::creation::create_scalar(
coeffs.back(), input_tensor.get_dtype(), Layout::TILE, input_tensor.device());
Tensor final_tensor = ttnn::add(result, last_coeffs, std::nullopt, output_mem_config);
last_coeffs.deallocate();
return final_tensor;
}
Tensor polyval(const Tensor& input_tensor, std::vector<float> coeffs, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _polyval)(input_tensor, coeffs, output_mem_config);
}

// Function: MAC
// compute multiply-accumulate: y = a * b + c, over various 8 combinations of a, b, c
// being a scalar or tensor
Tensor _mac(const Tensor& a, const Tensor& b, const Tensor& c, const MemoryConfig& output_mem_config) {
bool a_is_scalar = a.intended_volume() == 1;
bool b_is_scalar = b.intended_volume() == 1;
bool c_is_scalar = c.intended_volume() == 1;

if (!a_is_scalar && !b_is_scalar && !c_is_scalar) {
// all tensors
return ttnn::add(ttnn::multiply(a, b, std::nullopt, output_mem_config), c, std::nullopt, output_mem_config);
} else if (!a_is_scalar && !b_is_scalar && c_is_scalar) {
// a - tensor, b - tensor, c - is scalar
return ttnn::add(
ttnn::multiply(a, b, std::nullopt, output_mem_config), c, std::nullopt, output_mem_config);
} else if (!a_is_scalar && b_is_scalar && !c_is_scalar) {
// a - tensor, b - scalar, c - is tensor
return ttnn::add(ttnn::multiply(a, b, std::nullopt, output_mem_config), c, std::nullopt, output_mem_config);
} else if (!a_is_scalar && b_is_scalar && c_is_scalar) {
// a - tensor, b - scalar, c - is scalar
return ttnn::add(
ttnn::multiply(a, b, std::nullopt, output_mem_config), c, std::nullopt, output_mem_config);
} else if (a_is_scalar && !b_is_scalar && !c_is_scalar) {
// a - scalar, b - tensor, c - tensor
return ttnn::add(ttnn::multiply(b, a, std::nullopt, output_mem_config), c, std::nullopt, output_mem_config);
} else if (a_is_scalar && !b_is_scalar && c_is_scalar) {
// a - scalar, b - tensor, c - is scalar
return ttnn::add(
ttnn::multiply(b, a, std::nullopt, output_mem_config), c, std::nullopt, output_mem_config);
} else if (a_is_scalar && b_is_scalar && !c_is_scalar) {
// a - scalar, b - scalar, c - is tensor
return ttnn::add(
c, ttnn::multiply(a, b, std::nullopt, output_mem_config), std::nullopt, output_mem_config);
}

// all scalars
// a - scalar, b - scalar, c - is scalar
TT_ASSERT(a_is_scalar && b_is_scalar && c_is_scalar);
return ttnn::add(ttnn::multiply(a, b), c);
}
Tensor mac(const Tensor& a, const Tensor& b, const Tensor& c, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _mac)(a, b, c, output_mem_config);
}

Tensor _mac_overload(const Tensor& a, float b, float c, const MemoryConfig& output_mem_config) {
Tensor t_b = ttnn::operations::creation::create_scalar(b, a.get_dtype(), Layout::TILE, a.device());
Tensor t_c = ttnn::operations::creation::create_scalar(c, a.get_dtype(), Layout::TILE, a.device());
Tensor return_tensor = mac(a, t_b, t_c, output_mem_config);
t_b.deallocate();
t_c.deallocate();
return return_tensor;
}
Tensor mac(const Tensor& input_a, float b, float c, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _mac_overload)(input_a, b, c, output_mem_config);
}

// min(a,b) = a - (a - b > 0 )*(a-b)
Tensor _min(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& output_mem_config) {
Tensor t_diff = ttnn::subtract(input_a, input_b, std::nullopt, output_mem_config);
Expand Down Expand Up @@ -439,19 +355,6 @@ Tensor atanh(const Tensor& input_a, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _atanh)(input_a, output_mem_config);
}

// lerp(input, end, weight) = start + weight * (end - start)
Tensor _lerp(const Tensor& input_a, const Tensor& input_b, float value, const MemoryConfig& output_mem_config) {
Tensor t_value =
ttnn::operations::creation::create_scalar(value, input_a.get_dtype(), Layout::TILE, input_a.device());
Tensor t_diff = ttnn::subtract(input_b, input_a, std::nullopt, output_mem_config);
Tensor t_mul = ttnn::multiply(t_diff, t_value, std::nullopt, output_mem_config);
Tensor result = ttnn::add(input_a, t_mul, std::nullopt, output_mem_config);
return result;
}
Tensor lerp(const Tensor& input_a, const Tensor& input_b, float value, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _lerp)(input_a, input_b, value, output_mem_config);
}

Tensor _atan2(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& output_mem_config) {
Tensor result(input_a);
{
Expand Down Expand Up @@ -489,19 +392,6 @@ Tensor atan2(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& o
return operation::decorate_as_composite(__func__, _atan2)(input_a, input_b, output_mem_config);
}

// lerp(input, end, weight) = start + weight * (end - start)
Tensor _lerp_overload(
const Tensor& input_a, const Tensor& input_b, const Tensor& input_c, const MemoryConfig& output_mem_config) {
Tensor t_diff = ttnn::multiply(
ttnn::subtract(input_b, input_a, std::nullopt, output_mem_config), input_c, std::nullopt, output_mem_config);
Tensor result = ttnn::add(input_a, t_diff, std::nullopt, output_mem_config);
return result;
}
Tensor lerp(
const Tensor& input_a, const Tensor& input_b, const Tensor& input_c, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _lerp_overload)(input_a, input_b, input_c, output_mem_config);
}

// ldexp(input,other)=input * (2^other)
Tensor _ldexp(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& output_mem_config) {
Tensor result = ttnn::multiply(input_a, ttnn::exp2(input_b, output_mem_config), std::nullopt, output_mem_config);
Expand Down Expand Up @@ -1056,18 +946,6 @@ Tensor normalize_global(const Tensor& y, const MemoryConfig& output_mem_config)
return operation::decorate_as_composite(__func__, _normalize_global)(y, output_mem_config);
}

Tensor _scatter(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& output_mem_config) {
tt::tt_metal::Array4D start_index = {0, 0, 0, 0};
ttnn::Tensor input_tensor_4D = ttnn::unsqueeze_to_4D(input_a);

Tensor index = ttnn::pad(0, ones_like(input_tensor_4D, output_mem_config), input_b.get_legacy_shape().to_array_4D(), start_index, 0, false, std::nullopt);
Tensor temp_a = ttnn::pad(0, input_tensor_4D,input_b.get_legacy_shape().to_array_4D(), start_index, 0, false, std::nullopt);
return where(index, temp_a, input_b, output_mem_config);
}
Tensor scatter(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _scatter)(input_a, input_b, output_mem_config);
}

// threshold(a,t,v) = (a <= t)*v + (a > t)*a
Tensor _threshold(const Tensor& input_tensor, float threshold, float value, const MemoryConfig& output_mem_config) {
Tensor t_threshold = ttnn::operations::creation::create_scalar(
Expand Down Expand Up @@ -1390,64 +1268,6 @@ Tensor arange(int32_t start, int32_t end, int32_t step, Device* device, const Me
return tt::numpy::arange<bfloat16>(start, end, step, Layout::ROW_MAJOR, device, output_mem_config);
}

/**
* outer product = matrix multiply when a = [1,1,N,1] and b = [1,1,1,M]
* and result is of size [1,1,N,M].
* - implementation supports any 1D "squeezable tensor" at input operands
* by running reshape.
*/
Tensor _outer(Tensor& a, Tensor& b, const MemoryConfig& output_mem_config) {
const Shape s_a = a.get_legacy_shape();
const Shape s_b = b.get_legacy_shape();

auto num_ones = [](const Shape& s) -> uint32_t {
uint32_t num1s = 0;
for (uint32_t idx = 0; idx < 4; idx++) num1s += (uint32_t)(s[idx] == 1);
return num1s;
};

// check if 3 dimensions are 1
TT_ASSERT(!(num_ones(s_a) < 3), "3 dimensions are required to be 1 for use with outer product");
TT_ASSERT(!(num_ones(s_b) < 3), "3 dimensions are required to be 1 for use with outer product");

const bool skip_reshape_a = (s_a[0] == 1 && s_a[1] == 1 && s_a[2] >= 1 && s_a[3] == 1);
const bool skip_reshape_b = (s_b[0] == 1 && s_b[1] == 1 && s_b[2] == 1 && s_b[3] >= 1);

Tensor a_slim = a;
Tensor b_slim = b;

if (!skip_reshape_a) {
a_slim = reshape(a, 1, 1, a.volume(), 1, output_mem_config);
}
if (!skip_reshape_b) {
b_slim = reshape(b, 1, 1, 1, b.volume(), output_mem_config);
}
a_slim = ttnn::to_layout(a_slim, ttnn::TILE_LAYOUT, std::nullopt, std::nullopt, (Device*)nullptr);
b_slim = ttnn::to_layout(b_slim, ttnn::TILE_LAYOUT, std::nullopt, std::nullopt, (Device*)nullptr);
Device* device = AutoFormat::GetDefaultDevice();
if (device != nullptr) {
if (a_slim.storage_type() != tt::tt_metal::StorageType::DEVICE) {
a_slim = AutoFormat::move_tensor_to_device(a_slim, device);
}
if (b_slim.storage_type() != tt::tt_metal::StorageType::DEVICE) {
b_slim = AutoFormat::move_tensor_to_device(b_slim, device);
}
}

return ttnn::operations::matmul::matmul(
a_slim,
b_slim,
/*bias=*/std::nullopt,
tt::operations::primary::Matmul{
/*program_config=*/std::nullopt,
/*bcast_batch=*/std::nullopt,
output_mem_config}
);
}
Tensor outer(Tensor& a, Tensor& b, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _outer)(a, b, output_mem_config);
}

std::vector<Tensor> split_tensor_for_glu(const Tensor& input_a, int32_t dim, const MemoryConfig& output_mem_config) {
std::vector<Tensor> t_split;
Shape inshape = input_a.get_legacy_shape();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,6 @@ Tensor bias_gelu_unary(
// Ref: https://pytorch.org/docs/stable/generated/torch.nn.Softsign.html
Tensor softsign(const Tensor& a, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// Function: MAC
// compute multiply-accumulate: y = a * b + c, over various 8 combinations of a, b, c
// being a scalar or tensor
Tensor mac(
const Tensor& a,
const Tensor& b,
const Tensor& c,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
Tensor mac(
const Tensor& a, float b, float c, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// Function Selu - scaled exponential linear
// use transformation y = scale * alpha * (exp(X)-1) by broadcast
Expand All @@ -72,11 +62,6 @@ Tensor celu(
// use transformation y = x * sigmoid( x ) by broadcast
Tensor swish(const Tensor& a, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// compute polyval by Horner's rule
Tensor polyval(
const Tensor& input_tensor,
std::vector<float> coeffs,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// min(a,b)
Tensor min(
Expand Down Expand Up @@ -243,25 +228,6 @@ Tensor addalpha(
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
std::optional<Tensor> output_tensor = std::nullopt);

// lerp(input, end, weight) = start + weight * (end - start), weight is float
Tensor lerp(
const Tensor& input_a,
const Tensor& input_b,
float value,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// lerp(input, end, weight) = start + weight * (end - start), weight is tensor
Tensor lerp(
const Tensor& input_a,
const Tensor& input_b,
const Tensor& input_c,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

Tensor scatter(
const Tensor& input_a,
const Tensor& input_b,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// threshold(a,t,v) = (a < t)*v + (a > t)*a
Tensor threshold(
const Tensor& input_a,
Expand Down Expand Up @@ -458,14 +424,6 @@ Tensor acosh(const Tensor& input_a, const MemoryConfig& output_mem_config = oper
// atanh[x] = 0.5 * ln((1 + x) / (1 - x))
Tensor atanh(const Tensor& input_a, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

/**
* outer product = matrix multiply when a = [1,1,N,1] and b = [1,1,1,M]
* and result is of size [1,1,N,M].
* - implementation supports any 1D "squeezable tensor" at input operands
* by running reshape.
*/
Tensor outer(Tensor& a, Tensor& b, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// Function variance of whole tensor.
// Tensor variance(const Tensor& y,const Tensor& mean_y);
Tensor var_hw(const Tensor& y, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
Expand Down
Loading

0 comments on commit 92d7a6f

Please sign in to comment.