fix: CheckAndFlushIntermediateMemory method with the new memory conta…

…iners
root-project · Jan 28, 2025 · 9586209 · 9586209
1 parent 87bdcae
commit 9586209
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 32 deletions.
diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -144,7 +144,7 @@ public:
 
    void EvaluateIntermediateMemory(const std::vector<std::string>& op_input_tensors, const size_t& current_op_idx, size_t& total_memory, std::vector<size_t>& available_memory);
    std::string CheckAndAllocateIntermediateMemory(const std::vector<std::string>& op_output_tensors);
-   void CheckAndFlushIntermediateMemory(const std::vector<std::string>& op_output_tensors);
+   void CheckAndFlushIntermediateMemory(const std::vector<std::string>& op_output_tensors, const size_t& op_idx);
 
 protected:
    // internal functions

diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -69,22 +69,17 @@ struct DynamicTensorInfo{
 
 struct TensorMemoryInfo {
    std::string tensor_name;
-   unsigned int chunk_idx;
+   size_t chunk_idx;
    size_t tensor_size;
 };
 
 struct AvailableChunkInfo {
-   unsigned int chunk_idx;
+   size_t chunk_idx;
    size_t chunk_size;
 };
 struct MemoryPoolInfo {
    std::vector<TensorMemoryInfo> total_memory;
-   std::map<unsigned int, size_t> available_memory;
-};
-
-struct TensorCounter {
-   bool check_flag;
-   unsigned int frequency;
+   std::vector<std::pair<size_t, size_t>> available_memory;
 };
 
 std::vector<Dim> ConvertShapeToDim(std::vector<size_t> shape);

diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
@@ -314,16 +314,16 @@ void RModel::EvaluateIntermediateMemory(const std::vector<std::string>& op_input
                   *chunk -= tensor_size;
                   allocated = true;
 
-                  // If the chunk is fully used, erase it
+                  // erase the chunk if fully used
                   if (*chunk == 0) {
-                     chunk = available_memory.erase(chunk);  // Erase and update iterator
+                     chunk = available_memory.erase(chunk);  // erase and update iterator
                   } else {
-                     ++chunk;  // Move to the next chunk
+                     ++chunk;
                   }
 
-                  break;  // Allocation successful, exit loop
+                  break;
             } else {
-                  ++chunk;  // Move to the next chunk
+                  ++chunk;
             }
          }
 
@@ -343,9 +343,9 @@ void RModel::EvaluateIntermediateMemory(const std::vector<std::string>& op_input
 
 std::string RModel::CheckAndAllocateIntermediateMemory(const std::vector<std::string>& op_output_tensors){
    std::string memory_allocation_string;
+   bool allocated;
+
    for (auto& it:op_output_tensors){
-      fIntermediateTensorFrequencyLookup[it].frequency--;
-      if(!fIntermediateTensorFrequencyLookup[it].check_flag){
          auto tensor_size = ConvertShapeToLength(GetTensorShape(it));
 
          if (!fIntermediateMemoryInfo.available_memory.empty()){
@@ -355,7 +355,7 @@ std::string RModel::CheckAndAllocateIntermediateMemory(const std::vector<std::st
                if (chunk->second >= tensor_size) {
                   chunk->second -= tensor_size;
 
-                  fIntermediateTensorFrequencyLookup[it].check_flag = true;
+                  allocated = true;
 
                   if (chunk->second == 0) {
                         chunk = fIntermediateMemoryInfo.available_memory.erase(chunk);
@@ -368,7 +368,7 @@ std::string RModel::CheckAndAllocateIntermediateMemory(const std::vector<std::st
             }
          } 
 
-         if (!fIntermediateTensorFrequencyLookup[it].check_flag) {
+         if (!allocated) {
             if (fIntermediateMemoryInfo.total_memory.size()){
                auto previous_tensor_idx = fIntermediateMemoryInfo.total_memory.back().chunk_idx;
                auto previous_tensor_size = fIntermediateMemoryInfo.total_memory.back().tensor_size;
@@ -388,19 +388,20 @@ std::string RModel::CheckAndAllocateIntermediateMemory(const std::vector<std::st
             }
          }
 
-      }
+
    }
    return memory_allocation_string;
 }
 
-void RModel::CheckAndFlushIntermediateMemory(const std::vector<std::string>& op_output_tensors){
+void RModel::CheckAndFlushIntermediateMemory(const std::vector<std::string>& op_output_tensors, const size_t& op_idx){
    for (auto &it : op_output_tensors){
-      fIntermediateTensorFrequencyLookup[it].frequency--;
-      if (fIntermediateTensorFrequencyLookup[it].frequency == 0) {
+
+      // last occurence of the tensor is reached => flush it from memory
+      if (fIntermediateTensorFrequencyLookup[it].second == op_idx) {
          for (auto chunk = fIntermediateMemoryInfo.total_memory.begin(); 
                chunk != fIntermediateMemoryInfo.total_memory.end(); ) {
             if (chunk->tensor_name == it) {
-                  fIntermediateMemoryInfo.available_memory.insert({chunk->chunk_idx, chunk->tensor_size});
+                  fIntermediateMemoryInfo.available_memory.push_back({chunk->chunk_idx, chunk->tensor_size});
 
                   chunk = fIntermediateMemoryInfo.total_memory.erase(chunk);
             } else {
@@ -427,8 +428,6 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
 
    fVerbose = int(verbose);
 
-   auto fIntermediateTensorCounter  = fIntermediateTensorFrequencyLookup;
-
    if (fIsInitialized) {
       if (verbose)
          std::cout << "Model is already initialized  - skip initialization " << std::endl;
@@ -505,15 +504,15 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
 
 
    size_t total_memory;
-   std::vector<size_t> available_memory;
+   std::vector<size_t> available_memory; // vector stores individual chunks of available memory that maybe reused
 
    for(size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx){
       if (verbose) {
          auto& r = *fOperators[op_idx].get();
          std::cout << "Initializing operator " << i << "  " << typeid(r).name() << std::endl;
       }
       fOperators[op_idx]->Initialize(*this);
-      EvaluateIntermediateMemory(op->GetOpInputTensors(), op_idx, &total_memory, &available_memory);
+      EvaluateIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx, total_memory, available_memory);
       i++;
    }
 
@@ -718,11 +717,11 @@ void RModel::GenerateOutput() {
 
    fGC += "){\n";
 
-   for (size_t id = 0; id < fOperators.size(); id++) {
-      if (fVerbose) std::cout << "Generating code for operator .... " << id << std::endl;
-      fGC += CheckAndAllocateIntermediateMemory(fOperators[id]->GetOpOutputTensors());
-      fGC += (fOperators[id]->Generate(std::to_string(id)));
-      CheckAndFlushIntermediateMemory(fOperators[id]->GetOpOutputTensors());
+   for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
+      if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl;
+      fGC += CheckAndAllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors());
+      fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
+      CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors(), op_idx);
    }
 
    if (outputSize == 1) {