Skip to content

Commit

Permalink
[tmva][sofie] Add new complete implementation of Split operator
Browse files Browse the repository at this point in the history
Add a new implementation of SPlit supporting the various cases: split in any axis and  with different split sizes,.
Before split was working only for 1D tensors
  • Loading branch information
lmoneta committed Feb 5, 2025
1 parent fea4b7e commit f405ea6
Show file tree
Hide file tree
Showing 6 changed files with 243 additions and 31 deletions.
112 changes: 88 additions & 24 deletions tmva/sofie/inc/TMVA/ROperator_Split.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,26 @@ namespace TMVA{
namespace Experimental{
namespace SOFIE{

template <typename T>

class ROperator_Split final : public ROperator
{

private:

int fAxis = 0;
std::string fNX;
std::string fNS;
std::string fNSplit;
std::vector<std::string> fNYs;
std::vector<size_t> fInputShape;
std::vector<int64_t> fSplit;
std::vector<std::vector<size_t>> fOutputShapes;



public:
ROperator_Split(){}
ROperator_Split(const std::string & nameX, const std::string & nameS, const std::vector<std::string> & namesY):
fNX(UTILITY::Clean_name(nameX)), fNS(UTILITY::Clean_name(nameS)){
ROperator_Split(const std::string & nameX, const std::string & nameS, int axis, const std::vector<std::string> & namesY):
fAxis(axis), fNX(UTILITY::Clean_name(nameX)), fNSplit(UTILITY::Clean_name(nameS)) {
fNYs.reserve(namesY.size());
for (auto & name : namesY)
fNYs.push_back(UTILITY::Clean_name(name));
Expand All @@ -45,26 +49,52 @@ public:
if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor
throw std::runtime_error("TMVA SOFIE Split Op Input Tensor is not found in model");
}
auto inputShape = model.GetTensorShape(fNX);

// support now splitting only of 1D tensors and assuming tensor can be split in equal parts
//int splitAxis = 0; // assume split with zero axis
int nsplit = fNYs.size();
// support now only 1D tensor
if (inputShape.size() > 1)
throw std::runtime_error("TMVA SOFIE Split Op supports now only 1D tensors");
// support only equal splits
if (inputShape[0] % nsplit != 0)
throw std::runtime_error("TMVA SOFIE Split Op does not support splitting of " + ConvertShapeToString(inputShape)
+ " into " + std::to_string(nsplit));

fInputShape = model.GetTensorShape(fNX);

// correct for negative axis
if (fAxis < 0) fAxis += fInputShape.size();
if (fAxis < 0 || fAxis >= static_cast<int>(fInputShape.size()) )
throw std::runtime_error("TMVA SOFIE Split - invalid axis " + std::to_string(fAxis));

// compute output shapes
size_t nsplit = fNYs.size();
// case split tensor is empty
if (fNSplit.empty()) {
int64_t splitValue = 0;
if (fInputShape[fAxis] % nsplit == 0) {
splitValue = fInputShape[fAxis]/nsplit;
fSplit = std::vector<int64_t>(nsplit, splitValue);
} else {
// case of not equal splitting
splitValue = std::ceil(double(fInputShape[fAxis])/nsplit);
fSplit = std::vector<int64_t>(nsplit-1, splitValue);
fSplit.push_back(fInputShape[fAxis] % splitValue);
}
} else {
// get split tensor values
if (!model.IsInitializedTensor(fNSplit))
throw std::runtime_error("TMVA SOFIE Split - non-initialized split tensors are not supported");
auto splitShape = model.GetTensorShape(fNSplit);
if (splitShape.size() != 1 || splitShape[0] != nsplit)
throw std::runtime_error("TMVA SOFIE Split - split input tensor has invalid shape");
auto split_data = static_cast<int64_t *>(model.GetInitializedTensorData(fNSplit).get());
fSplit = std::vector<int64_t>(split_data, split_data + nsplit);
}
// compute now the output shapes
size_t tot_split = 0;
for (size_t i = 0; i < fNYs.size(); i++) {
std::vector<size_t> outputShape = { inputShape[0]/nsplit };
std::vector<size_t> outputShape = fInputShape;
outputShape[fAxis] = fSplit[i];
tot_split += fSplit[i];
model.AddIntermediateTensor(fNYs[i], model.GetTensorType(fNX), outputShape);
fOutputShapes.push_back(outputShape); // need for generating code
fOutputShapes.push_back(outputShape);
}
if (tot_split != fInputShape[fAxis])
throw std::runtime_error("TMVA SOFIE Split - Sum of split sizes must match the input dimension along the axis");


if (model.Verbose()) {
std::cout << "Split - input shape " << ConvertShapeToString(inputShape) << " --> ";
std::cout << "Split - input shape " << ConvertShapeToString(fInputShape) << " --> ";
for (auto & s : fOutputShapes)
std::cout << ConvertShapeToString(s) << " ";
std::cout << std::endl;
Expand All @@ -77,15 +107,49 @@ public:
if (fOutputShapes.empty()){
throw std::runtime_error("TMVA SOFIE Operator Split called to Generate without being initialized first");
}

// compute input and output strides
auto input_strides = UTILITY::ComputeStrideFromShape(fInputShape);
std::vector<std::vector<size_t>> output_strides;
for (size_t i = 0; i < fOutputShapes.size(); i++) {
output_strides.emplace_back( UTILITY::ComputeStrideFromShape(fOutputShapes[i]));
}

// generate now the code for split
std::stringstream out;
out << "\n//------ Split\n";
out << "size_t offset = 0;\n";
out << "\n" << SP << "//------ Split\n";
out << SP << "size_t " << OpName << "_axis_offset = 0;\n";
// unroll the loop on split outputs
for (size_t i = 0; i < fNYs.size(); i++) {
int length = ConvertShapeToLength(fOutputShapes[i]);
auto output_strides = UTILITY::ComputeStrideFromShape(fOutputShapes[i]);

out << SP << "for (int id = 0; id < " << length << " ; id++){\n";
out << SP << SP << "tensor_" << fNYs[i] << "[id] = tensor_" << fNX <<"[offset+id];\n";
// convert output index to input index
out << SP << SP << "int input_index = 0;\n";
out << SP << SP << "int remaining = id;\n";
// loop on dimensions to compute the input indices(unroll this loop)
for (size_t k = 0; k < fOutputShapes[i].size(); ++k) {
out << SP << SP << "// dim " << k << "\n";
if (k < fOutputShapes[i].size()-1) {
out << SP << SP << "input_index += (int(remaining / " << output_strides[k] << ")";
// for the split axis we need to consider the offset in the splits when converting to input coordinates
if (k == static_cast<size_t>(fAxis) && i > 0)
out << " + " << OpName << "_axis_offset";
out << ") * " << input_strides[k] << ";\n";
out << SP << SP << "remaining %= " << output_strides[k] << ";\n";
} else {
// for last dims all strides are one
out << SP << SP << "input_index += remaining";
if (k == static_cast<size_t>(fAxis) && i > 0)
out << " + " << OpName << "_axis_offset";
out << ";\n\n";
}
}

out << SP << SP << "tensor_" << fNYs[i] << "[id] = tensor_" << fNX <<"[input_index];\n";
out << SP << "}\n";
if (i < fNYs.size()-1) out << SP << "offset += " << length << ";\n";
if (i < fNYs.size()-1) out << SP << OpName << "_axis_offset += " << fSplit[i] << ";\n";
}
return out.str();
}
Expand Down
64 changes: 64 additions & 0 deletions tmva/sofie/test/TestCustomModelsFromONNX.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,10 @@
#include "RandomUniform_FromONNX.hxx"
#include "RandomNormal_FromONNX.hxx"

#include "Split_0_FromONNX.hxx"
#include "Split_1_FromONNX.hxx"
#include "Split_2_FromONNX.hxx"

#include "gtest/gtest.h"

constexpr float DEFAULT_TOLERANCE = 1e-3f;
Expand Down Expand Up @@ -3103,4 +3107,64 @@ TEST(ONNX, RandomNormal)
for (size_t i = 0; i < output.size(); ++i) {
EXPECT_LE(std::abs(output[i] - correct_output[i]), DEFAULT_TOLERANCE);
}
}

TEST(ONNX, Split_0)
{
// split in axis 0 in 2 tensor {2,2,3}
std::vector<float> input {1.,2.,3,4,5,6,7,8,9,10,11,12};
std::vector<std::vector<float>> correct_output ={ {1,2,3,4,5,6}, {7,8,9,10,11,12} };

TMVA_SOFIE_Split_0::Session s("Split_0_FromONNX.dat");

auto output = s.infer(input.data());

// Checking output size
EXPECT_EQ(output.size(), correct_output.size());
// Checking output
for (size_t i = 0; i < output.size(); ++i) {
for (size_t j = 0; j < output[i].size(); ++j) {
EXPECT_LE(std::abs(output[i][j] - correct_output[i][j]), DEFAULT_TOLERANCE);
}
}
}

TEST(ONNX, Split_1)
{
// split in axis 1 in 2 tensor {2,2,3}
std::vector<float> input {1.,2.,3,4,5,6,7,8,9,10,11,12};
std::vector<std::vector<float>> correct_output ={ {1,2,3,7,8,9}, {4,5,6,10,11,12} };

TMVA_SOFIE_Split_1::Session s("Split_1_FromONNX.dat");

auto output = s.infer(input.data());

// Checking output size
EXPECT_EQ(output.size(), correct_output.size());
// Checking output
for (size_t i = 0; i < output.size(); ++i) {
for (size_t j = 0; j < output[i].size(); ++j) {
EXPECT_LE(std::abs(output[i][j] - correct_output[i][j]), DEFAULT_TOLERANCE);
}
}
}

TEST(ONNX, Split_2)
{
// split in axis 2 in 2 tensor {2,2,3} -> { 2,2,2} and {2,2,1}
std::vector<float> input {1.,2.,3,4,5,6,7,8,9,10,11,12};
std::vector<std::vector<float>> correct_output ={ {1,2,4,5,7,8,10,11}, {3,6,9,12} };

TMVA_SOFIE_Split_2::Session s("Split_2_FromONNX.dat");

auto output = s.infer(input.data());

// Checking output size
EXPECT_EQ(output.size(), correct_output.size());
// Checking output
for (size_t i = 0; i < output.size(); ++i) {
for (size_t j = 0; j < output[i].size(); ++j) {
EXPECT_LE(std::abs(output[i][j] - correct_output[i][j]), DEFAULT_TOLERANCE);
}
}
}
23 changes: 23 additions & 0 deletions tmva/sofie/test/input_models/Split_0.onnx
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

 onnx-example:�
5 split_values"Constant*
value*:Bsplit�
.
input
split_valuesoutput1output2"Split
SplitGraphZ
input



b
output1



b
output2



B
24 changes: 24 additions & 0 deletions tmva/sofie/test/input_models/Split_1.onnx
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

 onnx-example:�
5 split_values"Constant*
value*:Bsplit�
;
input
split_valuesoutput1output2"Split*
axis�
SplitGraphZ
input



b
output1



b
output2



B
22 changes: 22 additions & 0 deletions tmva/sofie/test/input_models/Split_2.onnx
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

 onnx-example:�
A
inputoutput1output2"Split*
axis�*
num_outputs�
SplitGraphZ
input



b
output1



b
output2



B
29 changes: 22 additions & 7 deletions tmva/sofie_parsers/src/ParseSplit.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,34 @@ ParserFuncSignature ParseSplit = [](RModelParser_ONNX &parser, const onnx::NodeP
}
}

// ignore for time being attributes
if (nodeproto.attribute_size() > 0 )
std::cout << "WARNING: TMVA::SOFIE ONNX Parser Split operator: attributes are not yet supported- they are ignored" << std::endl;
int axis = 0;
int num_outputs = 0;
for (int i = 0; i < nodeproto.attribute_size(); i++) {
std::string attribute_name = nodeproto.attribute(i).name();
if (attribute_name == "axis") {
axis = nodeproto.attribute(i).i();
}
else if (attribute_name == "num_outputs") {
num_outputs = nodeproto.attribute(i).i();
}
else
throw std::runtime_error("TMVA::SOFIE ONNX Parser Split operator: attribute" + attribute_name + "is not yet supported");
}

// number of splits are given by the number of output tensors
size_t output_size = nodeproto.output_size();
int output_size = nodeproto.output_size();
std::vector<std::string> output_names(output_size);
for (size_t i = 0; i < output_size; i++)
for (int i = 0; i < output_size; i++)
output_names[i] = nodeproto.output(i);

std::unique_ptr<ROperator> op(new ROperator_Split<float>(input_name, split_name, output_names));
if (num_outputs > 0 && num_outputs != output_size)
throw std::runtime_error("TMVA::SOFIE ONNX Parser Split - invalid output size: " + std::to_string(output_size) + " instead of " +
std::to_string(num_outputs));

std::unique_ptr<ROperator> op(new ROperator_Split(input_name, split_name, axis, output_names));


for (size_t i = 0; i < output_size; i++) {
for (int i = 0; i < output_size; i++) {
if (!parser.IsRegisteredTensorType(output_names[i])) {
parser.RegisterTensorType(output_names[i], input_type);
}
Expand Down

0 comments on commit f405ea6

Please sign in to comment.