diff --git a/benchmarks/lbm/lbm.py b/benchmarks/lbm/lbm.py
index 8e56dd9f..8606e81b 100644
--- a/benchmarks/lbm/lbm.py
+++ b/benchmarks/lbm/lbm.py
@@ -1,6 +1,6 @@
-deviceType_LIST = 'cpu gpu'.split()
+deviceType_LIST = 'gpu'.split()
 deviceIds_LIST = "0 1 2 3 4 5 6 7".split()
-grid_LIST = "dGrid bGrid_4_4_4".split()
+grid_LIST = "bGrid_4_4_4 bGridMgpu_4_4_4".split()
 domainSize_LIST = "64 80 96 112 128 144 160 176 192 208 224 240 256 272 288 304 320 336 352 368 384 400 416 432 448 464 480 496 512".split()
 computeFP_LIST = "double float".split()
 storageFP_LIST = "double float".split()
@@ -35,7 +35,7 @@ def getDeviceConfigurations(DEVICE_TYPE, deviceIds_LIST):
         return  [deviceIds_LIST[0]]
 
     if  goal_is_efficiency_max_num_devices:
-        return [deviceIds_LIST[0], deviceIds_LIST]
+        return [deviceIds_LIST[0], ' '.join(deviceIds_LIST)]
 
 
 def printProgressBar(value, label):
diff --git a/benchmarks/lbm/src/RunCavityTwoPop.cu b/benchmarks/lbm/src/RunCavityTwoPop.cu
index ea6ae23f..c3cfc561 100644
--- a/benchmarks/lbm/src/RunCavityTwoPop.cu
+++ b/benchmarks/lbm/src/RunCavityTwoPop.cu
@@ -3,11 +3,9 @@
 #include "D3Q19.h"
 #include "D3Q27.h"
 
-#include "Neon/domain/bGrid.h"
-#include "Neon/domain/dGrid.h"
+#include "Neon/domain/Grids.h"
 #include "Neon/domain/details/dGridDisg/dGrid.h"
 #include "Neon/domain/details/dGridSoA/dGridSoA.h"
-#include "Neon/domain/eGrid.h"
 
 #include "./Lbm.h"
 #include "CellType.h"
@@ -135,18 +133,18 @@ auto runFilterCollision(Config&            config,
         testCode << "_bgk";
         return runFilterMethod<Collision::bgk, Lattice, Grid, Storage, Compute>(config, report, testCode);
     }
-    //    if (config.collisionCli.getOption() == Collision::kbc) {
-    //        if (config.lattice != "d3q27" && config.lattice != "D3Q27") {
-    //            Neon::NeonException e("runFilterCollision");
-    //            e << "LBM kbc collision model only supports d3q27 lattice";
-    //            NEON_THROW(e);
-    //        }
-    //        testCode << "_kbc";
-    //        using L = D3Q27<Precision<Storage, Compute>>;
-    //        if constexpr (std::is_same_v<Lattice, L>) {
-    //            return runFilterMethod<Collision::kbc, Lattice, Grid, Storage, Compute>(config, report, testCode);
-    //        }
-    //    }
+    if (config.collisionCli.getOption() == Collision::kbc) {
+        if (config.lattice != "d3q27" && config.lattice != "D3Q27") {
+            Neon::NeonException e("runFilterCollision");
+            e << "LBM kbc collision model only supports d3q27 lattice";
+            NEON_THROW(e);
+        }
+        testCode << "_kbc";
+        using L = D3Q27<Precision<Storage, Compute>>;
+        if constexpr (std::is_same_v<Lattice, L>) {
+            return runFilterMethod<Collision::kbc, Lattice, Grid, Storage, Compute>(config, report, testCode);
+        }
+    }
     NEON_DEV_UNDER_CONSTRUCTION("");
 }
 
@@ -218,13 +216,25 @@ auto run(Config&            config,
     testCode << "___" << config.N << "_";
     testCode << "_numDevs_" << config.devices.size();
 
-    if (config.gridType == "dGrid") {
-        testCode << "_dGrid";
-        return details::runFilterStoreType<Neon::dGrid>(config, report, testCode);
+    //    if (config.gridType == "dGrid") {
+    //        testCode << "_dGrid";
+    //        return details::runFilterStoreType<Neon::dGrid>(config, report, testCode);
+    //    }
+    //    if (config.gridType == "dGridDisg") {
+    //        testCode << "_dGridDisg";
+    //        return details::runFilterStoreType<Neon::domain::details::disaggregated::dGrid::dGrid>(config, report, testCode);
+    //    }
+    if (config.gridType == "bGrid_4_4_4") {
+        testCode << "_bGrid_4_4_4";
+        using Block = Neon::domain::details::bGrid::StaticBlock<4, 4, 4>;
+        using Grid = Neon::domain::details::bGrid::bGrid<Block>;
+        return details::runFilterStoreType<Grid>(config, report, testCode);
     }
-    if (config.gridType == "dGridDisg") {
-        testCode << "_dGridDisg";
-        return details::runFilterStoreType<Neon::domain::details::disaggregated::dGrid::dGrid>(config, report, testCode);
+    if (config.gridType == "bGridMgpu_4_4_4") {
+        testCode << "_bGridMgpu_4_4_4";
+        using Block = Neon::domain::details::bGridMgpu::StaticBlock<4, 4, 4>;
+        using Grid = Neon::domain::details::bGridMgpu::bGrid<Block>;
+        return details::runFilterStoreType<Neon::bGridMgpu>(config, report, testCode);
     }
     //    if (config.gridType == "eGrid") {
     //        if constexpr (!skipTest) {
diff --git a/libNeonDomain/include/Neon/domain/Grids.h b/libNeonDomain/include/Neon/domain/Grids.h
index a7c9a878..6e48f584 100644
--- a/libNeonDomain/include/Neon/domain/Grids.h
+++ b/libNeonDomain/include/Neon/domain/Grids.h
@@ -5,3 +5,4 @@
 #include "Neon/domain/bGrid.h"
 #include "Neon/domain/dGridSoA.h"
 #include "Neon/domain/bGridDisg.h"
+#include "Neon/domain/bGridMgpuDisg.h"
diff --git a/libNeonDomain/include/Neon/domain/bGridMgpuDisg.h b/libNeonDomain/include/Neon/domain/bGridMgpuDisg.h
new file mode 100644
index 00000000..25d68e88
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/bGridMgpuDisg.h
@@ -0,0 +1,6 @@
+#pragma once
+#include "Neon/domain/details/bGridDisgMgpu//bGrid.h"
+
+namespace Neon {
+using bGridMgpu = Neon::domain::details::bGridMgpu::bGridMgpuDefault;
+}
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h b/libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h
index 8ce0fea8..a5a79ae2 100644
--- a/libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h
+++ b/libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h
@@ -139,6 +139,7 @@ class bPartition
     getDomainSize()
         const -> Neon::index_3d;
 
+    NEON_CUDA_HOST_DEVICE
     auto mem() const -> T const *;
 
     /**
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisg/bField.h b/libNeonDomain/include/Neon/domain/details/bGridDisg/bField.h
index 7b9532de..86a45575 100644
--- a/libNeonDomain/include/Neon/domain/details/bGridDisg/bField.h
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisg/bField.h
@@ -109,9 +109,7 @@ class bField : public Neon::domain::interface::FieldBaseTemplate<T,
         BlockViewField<T, 0>  memoryField;
         int                   cardinality;
 
-        //        Neon::domain::tool::HaloTable1DPartitioning   latticeHaloUpdateTable;
-        Neon::domain::tool::HaloTable1DPartitioning soaHaloUpdateTable;
-        //        Neon::domain::tool::HaloTable1DPartitioning   aosHaloUpdateTable;
+        Neon::domain::tool::HaloTable1DPartitioning   mStandardHaloUpdateTable;
         Neon::domain::tool::PartitionTable<Partition> partitionTable;
     };
     std::shared_ptr<Data> mData;
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisg/bField_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisg/bField_imp.h
index ab122f8b..e7acbe8c 100644
--- a/libNeonDomain/include/Neon/domain/details/bGridDisg/bField_imp.h
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisg/bField_imp.h
@@ -191,7 +191,7 @@ auto bField<T, C, SBlock>::newHaloUpdate(Neon::set::StencilSemantic stencilSeman
             for (auto byDirection : {tool::partitioning::ByDirection::up,
                                      tool::partitioning::ByDirection::down}) {
 
-                auto const& tableEntryByDir = mData->soaHaloUpdateTable.get(transferMode,
+                auto const& tableEntryByDir = mData->mStandardHaloUpdateTable.get(transferMode,
                                                                             execution,
                                                                             byDirection);
 
@@ -262,7 +262,7 @@ auto bField<T, C, SBlock>::initHaloUpdateTable() -> void
         return res;
     };
 
-    mData->soaHaloUpdateTable.forEachPutConfiguration(
+    mData->mStandardHaloUpdateTable.forEachPutConfiguration(
         bk, [&](Neon::SetIdx                                  setIdxSrc,
                 Execution                                     execution,
                 Neon::domain::tool::partitioning::ByDirection byDirection,
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisg/bGrid.h b/libNeonDomain/include/Neon/domain/details/bGridDisg/bGrid.h
index 5976b2c8..72e39ee9 100644
--- a/libNeonDomain/include/Neon/domain/details/bGridDisg/bGrid.h
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisg/bGrid.h
@@ -223,6 +223,10 @@ class bGrid : public Neon::domain::interface::GridBaseTemplate<bGrid<SBlock>,
      */
     auto helpGetSetIdxAndGridIdx(Neon::index_3d idx) const -> std::tuple<Neon::SetIdx, Idx>;
 
+    template <typename ActiveCellLambda>
+    auto init_mask_field(ActiveCellLambda activeCellLambda) -> void;
+    auto helpGetClassField() -> Field<uint8_t, 1>&;
+
     struct Data
     {
         auto init(const Neon::Backend& bk)
@@ -258,6 +262,9 @@ class bGrid : public Neon::domain::interface::GridBaseTemplate<bGrid<SBlock>,
 
         AlphaGrid alphaGrid;
         BetaGrid  betaGrid;
+
+        Field<uint8_t, 1> maskClassField;
+
     };
     std::shared_ptr<Data> mData;
 };
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisg/bGrid_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisg/bGrid_imp.h
index 4af56db8..e5724510 100644
--- a/libNeonDomain/include/Neon/domain/details/bGridDisg/bGrid_imp.h
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisg/bGrid_imp.h
@@ -279,6 +279,7 @@ bGrid<SBlock>::bGrid(const Neon::Backend&                         backend,
 
     mData->alphaGrid = details::cGrid::cGrid<SBlock, int(details::cGrid::ClassSelector::alpha)>(*this);
     mData->betaGrid = details::cGrid::cGrid<SBlock, int(details::cGrid::ClassSelector::beta)>(*this);
+    init_mask_field(activeCellLambda);
 }
 
 template <typename SBlock>
@@ -332,11 +333,11 @@ auto bGrid<SBlock>::newContainer(const std::string& name,
 {
     const Neon::index_3d& defaultBlockSize = this->getDefaultBlock();
     Neon::set::Container  kContainer = Neon::set::Container::factory<execution>(name,
-                                                                               Neon::set::internal::ContainerAPI::DataViewSupport::on,
-                                                                               *this,
-                                                                               lambda,
-                                                                               defaultBlockSize,
-                                                                               [](const Neon::index_3d&) { return 0; });
+                                                                                Neon::set::internal::ContainerAPI::DataViewSupport::on,
+                                                                                *this,
+                                                                                lambda,
+                                                                                defaultBlockSize,
+                                                                                [](const Neon::index_3d&) { return 0; });
     return kContainer;
 }
 
@@ -534,4 +535,40 @@ auto bGrid<SBlock>::helpGetPartitioner1D() -> Neon::domain::tool::Partitioner1D&
     return mData->partitioner1D;
 }
 
+template <typename SBlock>
+template <typename ActiveCellLambda>
+auto bGrid<SBlock>::init_mask_field([[maybe_unused]] ActiveCellLambda activeCellLambda) -> void
+{
+    using returTypeOfLambda = typename std::invoke_result<ActiveCellLambda, Neon::index_3d>::type;
+    if constexpr (std::is_same_v<returTypeOfLambda, details::cGrid::ClassSelector>) {
+
+
+        auto maskField = this->newField<uint8_t, 1>("maskField", 1, 0, Neon::DataUse::HOST_DEVICE);
+        maskField.getGrid().template newContainer<Neon::Execution::host>(
+                               "maskFieldInit",
+                               [&](Neon::set::Loader& loader) {
+                                   auto maskFieldPartition = loader.load(maskField);
+                                   return [activeCellLambda, maskFieldPartition]  (const auto& gIdx) mutable {
+                                       auto globalPosition = maskFieldPartition.getGlobalIndex(gIdx);
+                                       details::cGrid::ClassSelector voxelClass = activeCellLambda(globalPosition);
+                                       maskFieldPartition(gIdx, 0) = static_cast<uint8_t>(voxelClass);
+//                                       maskFieldPartition(gIdx, 0) = 33;
+                                   };
+                               })
+            .run(Neon::Backend::mainStreamIdx);
+        this->getBackend().sync(Neon::Backend::mainStreamIdx);
+        maskField.updateDeviceData(Neon::Backend::mainStreamIdx);
+        this->getBackend().sync(Neon::Backend::mainStreamIdx);
+        maskField.template ioToVtk<int>("maskField", "maskField");
+        this->mData->maskClassField = maskField;
+        return;
+    }
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::helpGetClassField() -> Field<uint8_t, 1>&
+{
+    return mData->maskClassField;
+}
+
 }  // namespace Neon::domain::details::disaggregated::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisg/bPartition.h b/libNeonDomain/include/Neon/domain/details/bGridDisg/bPartition.h
index 7af86346..23e8b4bb 100644
--- a/libNeonDomain/include/Neon/domain/details/bGridDisg/bPartition.h
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisg/bPartition.h
@@ -136,6 +136,7 @@ namespace Neon::domain::details::disaggregated::bGrid {
         getDomainSize()
         const -> Neon::index_3d;
 
+        NEON_CUDA_HOST_DEVICE
         auto mem() const -> T const *;
 
         /**
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockView.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockView.h
new file mode 100644
index 00000000..6b5cfffc
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockView.h
@@ -0,0 +1,29 @@
+#include "Neon/domain/details/bGridDisgMask/BlockViewGrid//BlockViewGrid.h"
+#include "Neon/domain/tools/GridTransformer.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+struct BlockView
+{
+   public:
+    using Grid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
+    template <typename T, int C = 0>
+    using Field = Grid::template Field<T, C>;
+    using index_3d = Neon::index_3d;
+
+    template <typename T, int C = 0>
+    static auto helpGetReference(T* mem, const int idx, const int card) -> std::enable_if_t<C == 0, T&>
+    {
+        return mem[idx * card];
+    }
+
+    template <typename T, int C = 0>
+    static auto helpGetReference(T* mem, const int idx, const int card) -> std::enable_if_t<C != 0, T&>
+    {
+        return mem[idx * C];
+    }
+
+    static constexpr Neon::MemoryLayout layout = Neon::MemoryLayout::arrayOfStructs;
+};
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockViewGrid/BlockViewGrid.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockViewGrid/BlockViewGrid.h
new file mode 100644
index 00000000..51eb9717
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockViewGrid/BlockViewGrid.h
@@ -0,0 +1,97 @@
+#pragma once
+#include <assert.h>
+
+#include "Neon/core/core.h"
+#include "Neon/core/types/DataUse.h"
+#include "Neon/core/types/Macros.h"
+
+#include "Neon/set/BlockConfig.h"
+#include "Neon/set/Containter.h"
+#include "Neon/set/DevSet.h"
+#include "Neon/set/MemoryOptions.h"
+
+#include "Neon/sys/memory/MemDevice.h"
+
+#include "Neon/domain/aGrid.h"
+
+#include "Neon/domain/interface/GridBaseTemplate.h"
+#include "Neon/domain/interface/GridConcept.h"
+#include "Neon/domain/interface/KernelConfig.h"
+#include "Neon/domain/interface/LaunchConfig.h"
+#include "Neon/domain/interface/Stencil.h"
+#include "Neon/domain/interface/common.h"
+
+#include "Neon/domain/tools/GridTransformer.h"
+#include "Neon/domain/tools/SpanTable.h"
+
+#include "Neon/domain/details/eGrid/eGrid.h"
+#include "Neon/domain/patterns/PatternScalar.h"
+
+#include "BlockViewPartition.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+namespace details {
+struct GridTransformation
+{
+    template <typename T, int C>
+    using Partition = BlockViewPartition<T, C>;
+    using Span = Neon::domain::details::eGrid::eSpan;
+    static constexpr Neon::set::internal::ContainerAPI::DataViewSupport dataViewSupport = Neon::set::internal::ContainerAPI::DataViewSupport::on;
+
+    using FoundationGrid = Neon::domain::details::eGrid::eGrid;
+    static constexpr Neon::set::details::ExecutionThreadSpan executionThreadSpan = FoundationGrid::executionThreadSpan;
+    using ExecutionThreadSpanIndexType = int32_t;
+    using Idx = FoundationGrid::Idx;
+
+    static auto getDefaultBlock(FoundationGrid& foundationGrid) -> Neon::index_3d const&
+    {
+        return foundationGrid.getDefaultBlock();
+    }
+
+    static auto initSpan(FoundationGrid& foundationGrid, Neon::domain::tool::SpanTable<Span>& spanTable) -> void
+    {
+        spanTable.forEachConfiguration([&](Neon::Execution execution,
+                                           Neon::SetIdx    setIdx,
+                                           Neon::DataView  dw,
+                                           Span&           span) {
+            span = foundationGrid.getSpan(execution, setIdx, dw);
+        });
+    }
+
+    static auto initLaunchParameters(FoundationGrid&       foundationGrid,
+                                     Neon::DataView        dataView,
+                                     const Neon::index_3d& blockSize,
+                                     const size_t&         shareMem) -> Neon::set::LaunchParameters
+    {
+        return foundationGrid.getLaunchParameters(dataView, blockSize, shareMem);
+    }
+
+    static auto helpGetGridIdx(FoundationGrid&,
+                               Neon::SetIdx const&,
+                               FoundationGrid::Idx const& fgIdx)
+        -> GridTransformation::Idx
+    {
+        GridTransformation::Idx tgIdx = fgIdx;
+        return tgIdx;
+    }
+
+    template <typename T, int C>
+    static auto initFieldPartition(FoundationGrid::Field<T, C>&                         foundationField,
+                                   Neon::domain::tool::PartitionTable<Partition<T, C>>& partitionTable) -> void
+    {
+        partitionTable.forEachConfiguration(
+            [&](Neon::Execution  execution,
+                Neon::SetIdx     setIdx,
+                Neon::DataView   dw,
+                Partition<T, C>& partition) {
+                auto& foundationPartition = foundationField.getPartition(execution, setIdx, dw);
+                partition = Partition<T, C>(foundationPartition);
+            });
+    }
+};
+using BlockViewGrid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
+
+}  // namespace details
+
+}  // namespace Neon::domain::details::bGrid
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockViewGrid/BlockViewPartition.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockViewGrid/BlockViewPartition.h
new file mode 100644
index 00000000..3a860baa
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/BlockViewGrid/BlockViewPartition.h
@@ -0,0 +1,43 @@
+#pragma once
+#include <assert.h>
+#include "Neon/core/core.h"
+#include "Neon/core/types/Macros.h"
+#include "Neon/domain/details/eGrid/eGrid.h"
+#include "Neon/domain/details/eGrid/eIndex.h"
+#include "Neon/domain/interface/NghData.h"
+#include "Neon/set/DevSet.h"
+#include "Neon/sys/memory/CudaIntrinsics.h"
+#include "cuda_fp16.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+template <typename T,
+          int C = 1>
+class BlockViewPartition : public Neon::domain::details::eGrid::ePartition<T, C>
+{
+   public:
+    BlockViewPartition()
+    {
+    }
+    BlockViewPartition(Neon::domain::details::eGrid::ePartition<T, C> ePartition)
+        : Neon::domain::details::eGrid::ePartition<T, C>(ePartition)
+    {
+    }
+
+    template <class BlockIdexType>
+    static auto getInBlockIdx(typename Neon::domain::details::eGrid::ePartition<T, C>::Idx const& idx,
+                                  uint8_3d const&                                            inBlockLocation) -> BlockIdexType
+    {
+        BlockIdexType blockIdx(idx.helpGet(), inBlockLocation);
+        return inBlockLocation;
+    }
+
+    auto getCountAllocated() const -> int32_t;
+};
+
+template <typename T, int C>
+auto BlockViewPartition<T, C>::getCountAllocated() const -> int32_t
+{
+    return this->mCountAllocated;
+}
+}  // namespace Neon::domain::details::bGrid
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/ClassSelector.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/ClassSelector.h
new file mode 100644
index 00000000..9b2f441b
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/ClassSelector.h
@@ -0,0 +1,10 @@
+#pragma once
+
+namespace Neon::domain::details::disaggregated::bGridMask::details::cGrid {
+enum ClassSelector
+{
+    alpha = 1,
+    beta = 2,
+    outside = 0,
+};
+}
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cGrid.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cGrid.h
new file mode 100644
index 00000000..a6c20f48
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cGrid.h
@@ -0,0 +1,145 @@
+#pragma once
+#include <cassert>
+
+#include "Neon/core/core.h"
+#include "Neon/core/types/DataUse.h"
+#include "Neon/core/types/Macros.h"
+
+#include "Neon/set/BlockConfig.h"
+#include "Neon/set/Containter.h"
+#include "Neon/set/DevSet.h"
+#include "Neon/set/MemoryOptions.h"
+
+#include "Neon/sys/memory/MemDevice.h"
+
+#include "Neon/domain/interface/GridBaseTemplate.h"
+#include "Neon/domain/interface/GridConcept.h"
+#include "Neon/domain/interface/KernelConfig.h"
+#include "Neon/domain/interface/LaunchConfig.h"
+#include "Neon/domain/interface/Stencil.h"
+#include "Neon/domain/interface/common.h"
+
+#include "Neon/domain/patterns/PatternScalar.h"
+#include "Neon/domain/tools/GridTransformer.h"
+#include "Neon/domain/tools/SpanTable.h"
+
+#include "../bGrid.h"
+#include "./ClassSelector.h"
+#include "./cSpan.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+namespace details::cGrid {
+
+template <typename SBlock, int classSelector>
+struct GridTransformation_cGrid
+{
+    using FoundationGrid = Neon::domain::details::disaggregated::bGridMask::bGrid<SBlock>;
+
+    template <typename T, int C>
+    using Partition = Neon::domain::details::disaggregated::bGridMask::bPartition<T, C, SBlock>;
+    using Span = cSpan<SBlock, classSelector>;
+    static constexpr Neon::set::internal::ContainerAPI::DataViewSupport dataViewSupport = Neon::set::internal::ContainerAPI::DataViewSupport::on;
+
+    static constexpr Neon::set::details::ExecutionThreadSpan executionThreadSpan = FoundationGrid::executionThreadSpan;
+    using ExecutionThreadSpanIndexType = typename FoundationGrid::ExecutionThreadSpanIndexType;
+    using Idx = typename FoundationGrid::Idx;
+
+    static auto getDefaultBlock(FoundationGrid& foundationGrid) -> Neon::index_3d const&
+    {
+        return foundationGrid.getDefaultBlock();
+    }
+
+    static auto initSpan(FoundationGrid&                      foundationGrid,
+                         Neon::domain::tool::SpanTable<Span>& spanTable) -> void
+    {
+        spanTable.forEachConfiguration([&](Neon::Execution execution,
+                                           Neon::SetIdx    setIdx,
+                                           Neon::DataView  dataViewOfTheTableEntry,
+                                           Span& NEON_OUT  span) {
+            typename FoundationGrid::Span const& foundationSpan = foundationGrid.getSpan(execution, setIdx, dataViewOfTheTableEntry);
+            span = cSpan<SBlock, classSelector>(foundationSpan.mFirstDataBlockOffset,
+                                                foundationSpan.mActiveMask,
+                                                foundationSpan.mDataView,
+                                                foundationGrid.helpGetClassField().getPartition(execution, setIdx, dataViewOfTheTableEntry));
+        });
+    }
+
+    static auto initLaunchParameters(FoundationGrid&       foundationGrid,
+                                     Neon::DataView        dataView,
+                                     const Neon::index_3d& blockSize,
+                                     const size_t&         shareMem) -> Neon::set::LaunchParameters
+    {
+        Neon::set::LaunchParameters launchParameters = foundationGrid.getLaunchParameters(dataView, blockSize, shareMem);
+
+        launchParameters.forEachSeq([&](Neon::SetIdx setIdx, Neon::sys::GpuLaunchInfo& launchParameter) {
+            Neon::domain::tool::Partitioner1D const& foundationPartitioner1D = foundationGrid.helpGetPartitioner1D();
+            auto const&                              spanLayout = foundationPartitioner1D.getSpanLayout();
+            int                                      nBlocks;
+
+            Neon::domain::tool::partitioning::ByDomain byDomain = classSelector == cGrid::ClassSelector::alpha
+                                                                      ? Neon::domain::tool::partitioning::ByDomain::bulk
+                                                                      : Neon::domain::tool::partitioning::ByDomain::bc;
+
+            int countInternal = spanLayout.getBoundsInternal(setIdx, byDomain).count;
+            int countBcUp = spanLayout.getBoundsBoundary(setIdx,
+                                                         Neon::domain::tool::partitioning::ByDirection::up,
+                                                         byDomain)
+                                .count;
+            int countBcDw = spanLayout.getBoundsBoundary(setIdx,
+                                                         Neon::domain::tool::partitioning::ByDirection::down,
+                                                         byDomain)
+                                .count;
+
+            switch (dataView) {
+                case Neon::DataView::INTERNAL:
+                    nBlocks = countInternal;
+                    break;
+                case Neon::DataView::BOUNDARY:
+                    nBlocks = countBcUp + countBcDw;
+                    break;
+                case Neon::DataView::STANDARD:
+                    nBlocks = countInternal +
+                              countBcUp +
+                              countBcDw;
+                    break;
+                default:
+                    throw Neon::NeonException("Unknown data view");
+            }
+
+            launchParameter.set(Neon::sys::GpuLaunchInfo::mode_e::cudaGridMode,
+                                nBlocks,
+                                SBlock::memBlockSize3D.template newType<int32_t>(), shareMem);
+        });
+        return launchParameters;
+    }
+
+    static auto helpGetGridIdx(FoundationGrid&,
+                               Neon::SetIdx const&,
+                               typename FoundationGrid::Idx const& fgIdx)
+        -> GridTransformation::Idx
+    {
+        GridTransformation::Idx tgIdx = fgIdx;
+        return tgIdx;
+    }
+
+    template <typename T, int C>
+    static auto initFieldPartition(typename FoundationGrid::template Field<T, C>&       foundationField,
+                                   Neon::domain::tool::PartitionTable<Partition<T, C>>& partitionTable) -> void
+    {
+        partitionTable.forEachConfiguration(
+            [&](Neon::Execution  execution,
+                Neon::SetIdx     setIdx,
+                Neon::DataView   dw,
+                Partition<T, C>& partition) {
+                auto& foundationPartition = foundationField.getPartition(execution, setIdx, dw);
+                partition = foundationPartition;
+            });
+    }
+};
+
+template <typename SBlock, int classSelector>
+using cGrid = typename Neon::domain::tool::GridTransformer<GridTransformation_cGrid<SBlock, classSelector>>::Grid;
+
+}  // namespace details::cGrid
+}  // namespace Neon::domain::details::disaggregated::bGridMask
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cSpan.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cSpan.h
new file mode 100644
index 00000000..e38180ee
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cSpan.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMask/bIndex.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+namespace details::cGrid {
+
+template <typename SBlock, int classSelector>
+class cSpan
+{
+   public:
+    // bit mask information
+    using BitMaskWordType = uint64_t;
+
+    static constexpr uint32_t           bitMaskStorageBitWidth = 64;
+    static constexpr Neon::MemoryLayout activeMaskMemoryLayout = Neon::MemoryLayout::arrayOfStructs;
+    static constexpr uint32_t           log2OfbitMaskWordSize = 6;
+
+    using Idx = bIndex<SBlock>;
+    friend class bGrid<SBlock>;
+
+    static constexpr int SpaceDim = 3;
+
+    cSpan() = default;
+    virtual ~cSpan() = default;
+
+    NEON_CUDA_HOST_DEVICE inline static auto getInvalidBlockId()
+        -> typename Idx::DataBlockIdx
+    {
+        return std::numeric_limits<uint32_t>::max();
+    }
+
+    inline cSpan(
+        typename Idx::DataBlockCount                  mFirstDataBlockOffset,
+        typename SBlock::BitMask const* NEON_RESTRICT mActiveMask,
+        Neon::DataView                                mDataView,
+        uint8_t const* NEON_RESTRICT                  mClassMask);
+
+    NEON_CUDA_HOST_DEVICE inline auto setAndValidateCPUDevice(
+        Idx&            bidx,
+        uint32_t const& threadIdx,
+        uint32_t const& x,
+        uint32_t const& y,
+        uint32_t const& z) const -> bool;
+
+    NEON_CUDA_HOST_DEVICE inline auto setAndValidateGPUDevice(
+        Idx& bidx) const -> bool;
+
+
+    // We don't need to have a count on active blocks
+    typename Idx::DataBlockCount                  mFirstDataBlockOffset;
+    typename SBlock::BitMask const* NEON_RESTRICT mActiveMask;
+    Neon::DataView                                mDataView;
+    uint8_t const* NEON_RESTRICT                  mClassMask;
+};
+}  // namespace Neon::domain::details::disaggregated::bGridMask
+}
+#include "Neon/domain/details/bGridDisgMask/ClassificationGrid/cSpan_imp.h"
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cSpan_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cSpan_imp.h
new file mode 100644
index 00000000..f8783a2e
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/ClassificationGrid/cSpan_imp.h
@@ -0,0 +1,60 @@
+#include <cstdint>
+#include "Neon/domain/details/bGridDisgMask/ClassificationGrid/cSpan.h"
+namespace Neon::domain::details::disaggregated::bGridMask {
+namespace details::cGrid {
+
+template <typename SBlock, int classSelector>
+NEON_CUDA_HOST_DEVICE inline auto
+cSpan<SBlock, classSelector>::setAndValidateGPUDevice([[maybe_unused]] Idx& bidx) const -> bool
+{
+#ifdef NEON_PLACE_CUDA_DEVICE
+    bidx.mDataBlockIdx = blockIdx.x + mFirstDataBlockOffset;
+    bidx.mInDataBlockIdx.x = threadIdx.x;
+    bidx.mInDataBlockIdx.y = threadIdx.y;
+    bidx.mInDataBlockIdx.z = threadIdx.z;
+
+    const bool isActive = mActiveMask[bidx.mDataBlockIdx].isActive(bidx.mInDataBlockIdx.x, bidx.mInDataBlockIdx.y, bidx.mInDataBlockIdx.z);
+    const bool isClass = mClassMask[bidx.mDataBlockIdx * SB] == classSelector;
+    return isActive;
+#else
+    NEON_THROW_UNSUPPORTED_OPERATION("Operation supported only on GPU");
+#endif
+}
+
+template <typename SBlock, int classSelector>
+NEON_CUDA_HOST_DEVICE inline auto
+cSpan<SBlock, classSelector>::setAndValidateCPUDevice(Idx&            bidx,
+                                                      uint32_t const& dataBlockIdx,
+                                                      uint32_t const& x,
+                                                      uint32_t const& y,
+                                                      uint32_t const& z) const -> bool
+{
+
+    bidx.mDataBlockIdx = dataBlockIdx + mFirstDataBlockOffset;
+    ;
+    bidx.mInDataBlockIdx.x = static_cast<typename Idx::InDataBlockIdx::Integer>(x);
+    bidx.mInDataBlockIdx.y = static_cast<typename Idx::InDataBlockIdx::Integer>(y);
+    bidx.mInDataBlockIdx.z = static_cast<typename Idx::InDataBlockIdx::Integer>(z);
+    const bool isActive = mActiveMask[dataBlockIdx].isActive(bidx.mInDataBlockIdx.x, bidx.mInDataBlockIdx.y, bidx.mInDataBlockIdx.z);
+    const bool isClass = mClassMask[bidx.mDataBlockIdx * SBlock::memBlockCountElements +
+                                    bidx.mInDataBlockIdx.x +
+                                    bidx.mInDataBlockIdx.y * SBlock::memBlockSizeX +
+                                    bidx.mInDataBlockIdx.z * SBlock::memBlockSizeX * SBlock::memBlockSizeY] == classSelector;
+
+    return isActive && isClass;
+}
+
+template <typename SBlock, int classSelector>
+cSpan<SBlock, classSelector>::cSpan(typename Idx::DataBlockCount                  firstDataBlockOffset,
+                                    typename SBlock::BitMask const* NEON_RESTRICT activeMask,
+                                    Neon::DataView                                dataView,
+                                    uint8_t const* NEON_RESTRICT                  ClassMask)
+    : mFirstDataBlockOffset(firstDataBlockOffset),
+      mActiveMask(activeMask),
+      mDataView(dataView),
+      mClassMask(ClassMask)
+{
+}
+
+}
+}  // namespace Neon::domain::details::disaggregated::bGridMask
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/StaticBlock.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/StaticBlock.h
new file mode 100644
index 00000000..8ef687ce
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/StaticBlock.h
@@ -0,0 +1,106 @@
+#pragma once 
+
+#include "Neon/domain/details/bGridDisgMask/bSpan.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+template <uint32_t memBlockSizeX_,
+          uint32_t memBlockSizeY_,
+          uint32_t memBlockSizeZ_,
+          uint32_t userBlockSizeX_ = memBlockSizeX_,
+          uint32_t userBlockSizeY_ = memBlockSizeY_,
+          uint32_t userBlockSizeZ_ = memBlockSizeZ_,
+          bool     isMultiResMode_ = false>
+struct StaticBlock
+{
+   public:
+    constexpr static uint32_t        memBlockSizeX = memBlockSizeX_;
+    constexpr static uint32_t        memBlockSizeY = memBlockSizeY_;
+    constexpr static uint32_t        memBlockSizeZ = memBlockSizeZ_;
+    constexpr static Neon::uint32_3d memBlockSize3D = Neon::uint32_3d(memBlockSizeX, memBlockSizeY, memBlockSizeZ);
+
+    constexpr static uint32_t        userBlockSizeX = userBlockSizeX_;
+    constexpr static uint32_t        userBlockSizeY = userBlockSizeY_;
+    constexpr static uint32_t        userBlockSizeZ = userBlockSizeZ_;
+    constexpr static Neon::uint32_3d userBlockSize3D = Neon::uint32_3d(userBlockSizeX, userBlockSizeY, userBlockSizeZ);
+
+    constexpr static uint32_t blockRatioX = memBlockSizeX / userBlockSizeX;
+    constexpr static uint32_t blockRatioY = memBlockSizeY / userBlockSizeY;
+    constexpr static uint32_t blockRatioZ = memBlockSizeZ / userBlockSizeZ;
+
+    constexpr static uint32_t memBlockPitchX = 1;
+    constexpr static uint32_t memBlockPitchY = memBlockSizeX;
+    constexpr static uint32_t memBlockPitchZ = memBlockSizeX * memBlockSizeY;
+
+    constexpr static bool isMultiResMode = isMultiResMode_;
+
+    constexpr static uint32_t memBlockCountElements = memBlockSizeX * memBlockSizeY * memBlockSizeZ;
+
+    static_assert(memBlockSizeX >= userBlockSizeX);
+    static_assert(memBlockSizeY >= userBlockSizeY);
+    static_assert(memBlockSizeZ >= userBlockSizeZ);
+
+    static_assert(memBlockSizeX % userBlockSizeX == 0);
+    static_assert(memBlockSizeY % userBlockSizeY == 0);
+    static_assert(memBlockSizeZ % userBlockSizeZ == 0);
+
+    struct BitMask
+    {
+        using BitMaskWordType = uint32_t;
+        auto reset() -> void
+        {
+            for (BitMaskWordType i = 0; i < nWords; ++i) {
+                bits[i] = 0;
+            }
+        }
+
+        auto setActive(int threadX,
+                       int threadY,
+                       int threadZ) -> void
+        {
+            BitMaskWordType mask;
+            uint32_t        wordIdx;
+            getMaskAndWordI(threadX, threadY, threadZ, mask, wordIdx);
+            auto& word = bits[wordIdx];
+            word = word | mask;
+        }
+
+        inline auto NEON_CUDA_HOST_DEVICE isActive(int threadX,
+                                                   int threadY,
+                                                   int threadZ) const -> bool
+        {
+            BitMaskWordType mask;
+            uint32_t        wordIdx;
+            getMaskAndWordI(threadX, threadY, threadZ, mask, wordIdx);
+            auto& word = bits[wordIdx];
+            return (word & mask) != 0;
+        }
+
+        static inline auto NEON_CUDA_HOST_DEVICE getMaskAndWordI(int                       threadX,
+                                                                 int                       threadY,
+                                                                 int                       threadZ,
+                                                                 NEON_OUT BitMaskWordType& mask,
+                                                                 NEON_OUT uint32_t&        wordIdx) -> void
+        {
+            const uint32_t threadPitch = threadX * memBlockPitchX +
+                                         threadY * memBlockPitchY +
+                                         threadZ * memBlockPitchZ;
+
+            // threadPitch >> log2_of_bitPerWord
+            // the same as: threadPitch / 2^{log2_of_bitPerWord}
+            wordIdx = threadPitch >> log2_of_bitPerWord;
+            // threadPitch & ((bitMaskWordType(bitMaskStorageBitWidth)) - 1);
+            // same as threadPitch % 2^{log2OfbitMaskWordSize}
+            const uint32_t offsetInWord = threadPitch & ((BitMaskWordType(bitPerWord)) - 1);
+            mask = BitMaskWordType(1) << offsetInWord;
+        }
+
+        constexpr static BitMaskWordType nWords = (memBlockCountElements + 31) / 32;
+        static constexpr uint32_t        log2_of_bitPerWord = 5;
+        static constexpr uint32_t        bitPerWord = 32;
+
+        BitMaskWordType bits[nWords];
+    };
+};
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bField.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bField.h
new file mode 100644
index 00000000..60235a8c
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bField.h
@@ -0,0 +1,119 @@
+#pragma once
+#include "Neon/domain/details/bGridDisgMask/bPartition.h"
+#include "Neon/domain/interface/FieldBaseTemplate.h"
+#include "Neon/set/patterns/BlasSet.h"
+
+#include "Neon/core/core.h"
+#include "Neon/core/types/Macros.h"
+
+#include "Neon/set/DevSet.h"
+#include "Neon/set/HuOptions.h"
+#include "Neon/set/MemoryTransfer.h"
+
+#include "Neon/domain/interface/FieldBaseTemplate.h"
+#include "Neon/domain/tools/HaloUpdateTable1DPartitioning.h"
+#include "Neon/domain/tools/PartitionTable.h"
+#include "bPartition.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+
+template <typename T, int C, typename SBlock>
+class bField : public Neon::domain::interface::FieldBaseTemplate<T,
+                                                                 C,
+                                                                 bGrid<SBlock>,
+                                                                 bPartition<T, C, SBlock>,
+                                                                 int>
+{
+    friend bGrid<SBlock>;
+
+   public:
+    using Type = T;
+    using Grid = bGrid<SBlock>;
+    using Field = bField<T, C, SBlock>;
+    using Partition = bPartition<T, C, SBlock>;
+    using Idx = bIndex<SBlock>;
+    using BlockViewGrid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
+    template <typename TT, int CC = 0>
+    using BlockViewField = BlockViewGrid::template Field<TT, CC>;
+
+    using NghIdx = typename Partition::NghIdx;
+    using NghData = typename Partition::NghData;
+
+    bField(const std::string&  fieldUserName,
+           Neon::DataUse       dataUse,
+           Neon::MemoryOptions memoryOptions,
+           const Grid&         grid,
+           int                 cardinality,
+           T                   inactiveValue);
+
+    bField();
+
+    virtual ~bField() = default;
+
+    auto getPartition(Neon::Execution,
+                      Neon::SetIdx,
+                      const Neon::DataView& dataView) const -> const Partition& final;
+
+    auto getPartition(Neon::Execution,
+                      Neon::SetIdx,
+                      const Neon::DataView& dataView) -> Partition& final;
+
+    auto isInsideDomain(const Neon::index_3d& idx) const -> bool;
+
+
+    auto operator()(const Neon::index_3d& idx,
+                    const int&            cardinality) const -> T final;
+
+    auto getReference(const Neon::index_3d& idx,
+                      const int&            cardinality) -> T& final;
+
+    auto updateHostData(int streamId = 0) -> void final;
+
+    auto updateDeviceData(int streamId = 0) -> void final;
+
+    auto newHaloUpdate(Neon::set::StencilSemantic semantic,
+                       Neon::set::TransferMode    transferMode,
+                       Neon::Execution            execution)
+        const -> Neon::set::Container;
+
+    auto getMemoryField() -> BlockViewGrid::Field<T, C>&;
+
+
+   private:
+    auto getRef(const Neon::index_3d& idx, const int& cardinality) const -> T&;
+
+    auto initHaloUpdateTable() -> void;
+
+
+    struct Data
+    {
+        Data() = default;
+        Data(Neon::Backend const& bk)
+        {
+            partitionTable.init(bk);
+        }
+
+        enum EndPoints
+        {
+            src = 1,
+            dst = 0
+        };
+
+        struct EndPointsUtils
+        {
+            static constexpr int nConfigs = 2;
+        };
+
+        std::shared_ptr<Grid> grid;
+        BlockViewField<T, 0>  memoryField;
+        int                   cardinality;
+
+        Neon::domain::tool::HaloTable1DPartitioning   mStandardHaloUpdateTable;
+        Neon::domain::tool::PartitionTable<Partition> partitionTable;
+    };
+    std::shared_ptr<Data> mData;
+};
+
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bField_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bField_imp.h
new file mode 100644
index 00000000..006a9d3f
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bField_imp.h
@@ -0,0 +1,337 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMask/bField.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+template <typename T, int C, typename SBlock>
+bField<T, C, SBlock>::bField()
+{
+    mData = std::make_shared<Data>();
+}
+
+template <typename T, int C, typename SBlock>
+bField<T, C, SBlock>::bField(const std::string&  fieldUserName,
+                             Neon::DataUse       dataUse,
+                             Neon::MemoryOptions memoryOptions,
+                             const Grid&         grid,
+                             int                 cardinality,
+                             T                   inactiveValue)
+    : Neon::domain::interface::FieldBaseTemplate<T, C, Grid, Partition, int>(&grid,
+                                                                             fieldUserName,
+                                                                             "bField",
+                                                                             cardinality,
+                                                                             inactiveValue,
+                                                                             dataUse,
+                                                                             memoryOptions,
+                                                                             Neon::domain::haloStatus_et::e::ON)
+{
+    mData = std::make_shared<Data>(grid.getBackend());
+    mData->grid = std::make_shared<Grid>(grid);
+
+    if (memoryOptions.getOrder() == Neon::MemoryLayout::arrayOfStructs) {
+        NEON_WARNING("bField does not support MemoryLayout::arrayOfStructs, enforcing MemoryLayout::structOfArrays");
+        memoryOptions.setOrder(Neon::MemoryLayout::structOfArrays);
+    }
+    // the allocation size is the number of blocks x block size x cardinality
+    mData->memoryField = mData->grid->getBlockViewGrid().template newField<T, 0>(
+        "BitMask",
+        [&] {
+            int elPerBlock = SBlock::memBlockCountElements * cardinality;
+            return elPerBlock;
+        }(),
+        inactiveValue,
+        dataUse,
+        mData->grid->getBackend().getMemoryOptions(bSpan<SBlock>::activeMaskMemoryLayout));
+
+
+    {  // Setting up partitionTable
+        // const int setCardinality = mData->grid->getBackend().getDeviceCount();
+        mData->partitionTable.forEachConfiguration(
+            [&](Neon::Execution execution,
+                Neon::SetIdx    setIdx,
+                Neon::DataView,
+                Partition& partition) {
+                auto& memoryFieldPartition = mData->memoryField.getPartition(execution, setIdx, Neon::DataView::STANDARD);
+                auto& blockConnectivity = mData->grid->helpGetBlockConnectivity().getPartition(execution, setIdx, Neon::DataView::STANDARD);
+                auto& bitmask = mData->grid->getActiveBitMask().getPartition(execution, setIdx, Neon::DataView::STANDARD);
+                auto& dataBlockOrigins = mData->grid->helpGetDataBlockOriginField().getPartition(execution, setIdx, Neon::DataView::STANDARD);
+
+                partition = bPartition<T, C, SBlock>(setIdx,
+                                                     cardinality,
+                                                     memoryFieldPartition.mem(),
+                                                     blockConnectivity.mem(),
+                                                     bitmask.mem(),
+                                                     dataBlockOrigins.mem(),
+                                                     mData->grid->helpGetStencilIdTo3dOffset().rawMem(execution, setIdx),
+                                                     mData->grid->getDimension());
+            });
+    }
+
+    initHaloUpdateTable();
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::getMemoryField() -> BlockViewGrid::Field<T, C>&
+{
+    return mData->memoryField;
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::isInsideDomain(const Neon::index_3d& idx) const -> bool
+{
+    return mData->grid->isInsideDomain(idx);
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::getReference(const Neon::index_3d& cartesianIdx,
+                                        const int&            cardinality) -> T&
+{
+    if constexpr (SBlock::isMultiResMode) {
+        auto& grid = this->getGrid();
+        auto  uniformCartesianIdx = cartesianIdx / grid.helGetMultiResDiscreteIdxSpacing();
+
+        if (cartesianIdx.x % grid.helGetMultiResDiscreteIdxSpacing() != 0 ||
+            cartesianIdx.y % grid.helGetMultiResDiscreteIdxSpacing() != 0 ||
+            cartesianIdx.z % grid.helGetMultiResDiscreteIdxSpacing() != 0) {
+            NeonException exp("bField::getReference");
+            exp << "Input index is not multiple of the grid resolution";
+            exp << "Index = " << cartesianIdx;
+            NEON_THROW(exp);
+        }
+        auto [setIdx, bIdx] = grid.helpGetSetIdxAndGridIdx(uniformCartesianIdx);
+        auto& partition = getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD);
+        auto& result = partition(bIdx, cardinality);
+        return result;
+    } else {
+        auto& grid = this->getGrid();
+        auto [setIdx, bIdx] = grid.helpGetSetIdxAndGridIdx(cartesianIdx);
+        auto& partition = getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD);
+        auto& result = partition(bIdx, cardinality);
+        return result;
+    }
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::operator()(const Neon::index_3d& cartesianIdx,
+                                      const int&            cardinality) const -> T
+{
+    auto& grid = this->getGrid();
+    auto [setIdx, bIdx] = grid.helpGetSetIdxAndGridIdx(cartesianIdx);
+    if (setIdx.idx() == -1) {
+        return this->getOutsideValue();
+    }
+    auto& partition = getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD);
+    auto& result = partition(bIdx, cardinality);
+    return result;
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::updateHostData(int streamId) -> void
+{
+    mData->memoryField.updateHostData(streamId);
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::updateDeviceData(int streamId) -> void
+{
+    mData->memoryField.updateDeviceData(streamId);
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::getPartition(Neon::Execution       execution,
+                                        Neon::SetIdx          setIdx,
+                                        const Neon::DataView& dataView) const -> const Partition&
+{
+    const Neon::DataUse dataUse = this->getDataUse();
+    bool                isOk = Neon::ExecutionUtils::checkCompatibility(dataUse, execution);
+    if (isOk) {
+        Partition const& result = mData->partitionTable.getPartition(execution, setIdx, dataView);
+        return result;
+    }
+    std::stringstream message;
+    message << "The requested execution mode ( " << execution << " ) is not compatible with the field DataUse (" << dataUse << ")";
+    NEON_THROW_UNSUPPORTED_OPERATION(message.str());
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::getPartition(Neon::Execution       execution,
+                                        Neon::SetIdx          setIdx,
+                                        const Neon::DataView& dataView) -> Partition&
+{
+    const Neon::DataUse dataUse = this->getDataUse();
+    bool                isOk = Neon::ExecutionUtils::checkCompatibility(dataUse, execution);
+    if (isOk) {
+        Partition& result = mData->partitionTable.getPartition(execution, setIdx, dataView);
+        return result;
+    }
+    std::stringstream message;
+    message << "The requested execution mode ( " << execution << " ) is not compatible with the field DataUse (" << dataUse << ")";
+    NEON_THROW_UNSUPPORTED_OPERATION(message.str());
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::newHaloUpdate(Neon::set::StencilSemantic stencilSemantic,
+                                         Neon::set::TransferMode    transferMode,
+                                         Neon::Execution            execution) const -> Neon::set::Container
+{
+
+
+    // We need to define a graph of Containers
+    // One for the actual memory transfer
+    // One for the synchronization
+    // The order depends on the transfer mode: put or get
+    Neon::set::Container dataTransferContainer;
+    auto const&          bk = this->getGrid().getBackend();
+
+    if (stencilSemantic == Neon::set::StencilSemantic::standard) {
+        auto transfers = bk.template newDataSet<std::vector<Neon::set::MemoryTransfer>>();
+
+        if (this->getMemoryOptions().getOrder() == Neon::MemoryLayout::structOfArrays) {
+            for (auto byDirection : {tool::partitioning::ByDirection::up,
+                                     tool::partitioning::ByDirection::down}) {
+
+                auto const& tableEntryByDir = mData->mStandardHaloUpdateTable.get(transferMode,
+                                                                            execution,
+                                                                            byDirection);
+
+                tableEntryByDir.forEachSeq([&](SetIdx setIdx, auto const& tableEntryByDirBySetIdx) {
+                    transfers[setIdx].insert(std::end(transfers[setIdx]),
+                                             std::begin(tableEntryByDirBySetIdx),
+                                             std::end(tableEntryByDirBySetIdx));
+                });
+            }
+            dataTransferContainer =
+                Neon::set::Container::factoryDataTransfer(
+                    *this,
+                    transferMode,
+                    stencilSemantic,
+                    transfers,
+                    execution);
+
+
+        } else {
+            NEON_THROW_UNSUPPORTED_OPERATION("");
+        }
+    } else {
+        NEON_DEV_UNDER_CONSTRUCTION("");
+    }
+    Neon::set::Container SyncContainer =
+        Neon::set::Container::factorySynchronization(
+            *this,
+            Neon::set::SynchronizationContainerType::hostOmpBarrier);
+
+    Neon::set::container::Graph graph(this->getBackend());
+    const auto&                 dataTransferNode = graph.addNode(dataTransferContainer);
+    const auto&                 syncNode = graph.addNode(SyncContainer);
+
+    switch (transferMode) {
+        case Neon::set::TransferMode::put:
+            graph.addDependency(dataTransferNode, syncNode, Neon::GraphDependencyType::data);
+            break;
+        case Neon::set::TransferMode::get:
+            graph.addDependency(syncNode, dataTransferNode, Neon::GraphDependencyType::data);
+            break;
+        default:
+            NEON_THROW_UNSUPPORTED_OPTION();
+            break;
+    }
+
+    graph.removeRedundantDependencies();
+
+    Neon::set::Container output =
+        Neon::set::Container::factoryGraph("dGrid-Halo-Update",
+                                           graph,
+                                           [](Neon::SetIdx, Neon::set::Loader&) {});
+    return output;
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::initHaloUpdateTable() -> void
+{
+    // NEON_THROW_UNSUPPORTED_OPERATION("");
+    auto& grid = this->getGrid();
+    auto  bk = grid.getBackend();
+    auto  getNghSetIdx = [&](SetIdx setIdx, Neon::domain::tool::partitioning::ByDirection direction) {
+        int res;
+        if (direction == Neon::domain::tool::partitioning::ByDirection::up) {
+            res = (setIdx + 1) % bk.getDeviceCount();
+        } else {
+            res = (setIdx + bk.getDeviceCount() - 1) % bk.getDeviceCount();
+        }
+        return res;
+    };
+
+    mData->mStandardHaloUpdateTable.forEachPutConfiguration(
+        bk, [&](Neon::SetIdx                                  setIdxSrc,
+                Execution                                     execution,
+                Neon::domain::tool::partitioning::ByDirection byDirection,
+                std::vector<Neon::set::MemoryTransfer>&       transfersVec) {
+            {
+                using namespace Neon::domain::tool::partitioning;
+
+                Neon::SetIdx setIdxDst = getNghSetIdx(setIdxSrc, byDirection);
+                Neon::SetIdx setIdxVec[2];
+                setIdxVec[Data::EndPoints::dst] = setIdxDst;
+                setIdxVec[Data::EndPoints::src] = setIdxSrc;
+
+                std::array<Partition*, Data::EndPointsUtils::nConfigs>                                  partitions;
+                std::array<BlockViewPartition<T, 0>*, Data::EndPointsUtils::nConfigs>                   blockViewPartitions;
+                std::array<std::array<int, ByDirectionUtils::nConfigs>, Data::EndPointsUtils::nConfigs> ghostZBeginIdx;
+                std::array<std::array<int, ByDirectionUtils::nConfigs>, Data::EndPointsUtils::nConfigs> boundaryZBeginIdx;
+                std::array<Neon::size_4d, Data::EndPointsUtils::nConfigs>                               memPhyDim;
+
+                partitions[Data::EndPoints::dst] = &this->getPartition(execution, setIdxDst, Neon::DataView::STANDARD);
+                partitions[Data::EndPoints::src] = &this->getPartition(execution, setIdxSrc, Neon::DataView::STANDARD);
+                blockViewPartitions[Data::EndPoints::dst] = &(mData->memoryField.getPartition(execution, setIdxDst, Neon::DataView::STANDARD));
+                blockViewPartitions[Data::EndPoints::src] = &(mData->memoryField.getPartition(execution, setIdxSrc, Neon::DataView::STANDARD));
+
+                for (auto endPoint : {Data::EndPoints::dst, Data::EndPoints::src}) {
+                    for (auto direction : {ByDirection::down, ByDirection::up}) {
+                        auto ghostFirst = mData->grid->mData->partitioner1D.getSpanLayout().getGhostBoundary(setIdxVec[endPoint], direction).first;
+                        auto boundaryFirst = mData->grid->mData->partitioner1D.getSpanLayout().getBoundsBoundary(setIdxVec[endPoint], direction).first;
+                        ghostZBeginIdx[endPoint][static_cast<int>(direction)] = ghostFirst;
+                        boundaryZBeginIdx[endPoint][static_cast<int>(direction)] = boundaryFirst;
+                    }
+
+                    memPhyDim[endPoint] = Neon::size_4d(
+                        SBlock::memBlockCountElements,
+                        1,
+                        1,
+                        size_t(blockViewPartitions[endPoint]->getCountAllocated()) * SBlock::memBlockCountElements);
+                }
+
+                if (ByDirection::up == byDirection && bk.isLastDevice(setIdxSrc)) {
+                    return;
+                }
+
+                if (ByDirection::down == byDirection && bk.isFirstDevice(setIdxSrc)) {
+                    return;
+                }
+
+                T* srcMem = blockViewPartitions[Data::EndPoints::src]->mem();
+                T* dstMem = blockViewPartitions[Data::EndPoints::dst]->mem();
+
+                Neon::size_4d dstGhostBuff(ghostZBeginIdx[Data::EndPoints::dst][static_cast<int>(ByDirectionUtils::invert(byDirection))], 0, 0, 0);
+                Neon::size_4d srcBoundaryBuff(boundaryZBeginIdx[Data::EndPoints::src][static_cast<int>(byDirection)], 0, 0, 0);
+
+                size_t        transferDataBlockCount = mData->grid->mData->partitioner1D.getSpanLayout().getBoundsBoundary(setIdxVec[Data::EndPoints::src], byDirection).count;
+
+                //                std::cout << "To  " << dstGhostBuff << " prt " << blockViewPartitions[Data::EndPoints::dst]->prtID() << " From  " << srcBoundaryBuff << " prt " << blockViewPartitions[Data::EndPoints::src]->prtID() <<  std::endl;
+                //                std::cout << "dst mem " << blockViewPartitions[Data::EndPoints::dst]->mem() << " " << std::endl;
+                //                std::cout << "dst transferDataBlockCount " << transferDataBlockCount << " " << std::endl;
+                //                std::cout << "dst pitch " << (dstGhostBuff * memPhyDim[Data::EndPoints::dst]).rSum() << " " << std::endl;
+                //                std::cout << "dst dstGhostBuff " << dstGhostBuff << " " << std::endl;
+                //                std::cout << "dst pitch all" << memPhyDim[Data::EndPoints::dst] << " " << std::endl;
+
+                Neon::set::MemoryTransfer transfer({setIdxDst, dstMem + (dstGhostBuff * memPhyDim[Data::EndPoints::dst]).rSum(), dstGhostBuff},
+                                                   {setIdxSrc, srcMem + (srcBoundaryBuff * memPhyDim[Data::EndPoints::src]).rSum(), srcBoundaryBuff},
+                                                   sizeof(T) * SBlock::memBlockCountElements * transferDataBlockCount);
+
+                transfersVec.push_back(transfer);
+            }
+        });
+}
+
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bGrid.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bGrid.h
new file mode 100644
index 00000000..de0fec98
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bGrid.h
@@ -0,0 +1,274 @@
+#pragma once
+#include "Neon/core/core.h"
+
+#include "Neon/domain/aGrid.h"
+#include "Neon/domain/details/bGridDisgMask/BlockView.h"
+#include "Neon/domain/details/bGridDisgMask/StaticBlock.h"
+#include "Neon/domain/details/bGridDisgMask/bField.h"
+#include "Neon/domain/details/bGridDisgMask/bIndex.h"
+#include "Neon/domain/details/bGridDisgMask/bPartition.h"
+#include "Neon/domain/details/bGridDisgMask/bSpan.h"
+#include "Neon/domain/interface/GridBaseTemplate.h"
+#include "Neon/domain/patterns/PatternScalar.h"
+#include "Neon/domain/tools/Partitioner1D.h"
+#include "Neon/domain/tools/PointHashTable.h"
+#include "Neon/domain/tools/SpanTable.h"
+#include "Neon/set/Containter.h"
+#include "Neon/set/LaunchParametersTable.h"
+#include "Neon/set/memory/memSet.h"
+
+#include "Neon/domain/details/bGridDisgMask/ClassificationGrid/cGrid.h"
+
+#include "ClassificationGrid/cGrid.h"
+#include "bField.h"
+#include "bPartition.h"
+#include "bSpan.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+
+template <typename T, int C, typename SBlock>
+class bField;
+
+template <typename SBlock>
+class bGrid : public Neon::domain::interface::GridBaseTemplate<bGrid<SBlock>,
+                                                               bIndex<SBlock>>
+{
+   public:
+    using Grid = bGrid<SBlock>;
+    template <typename T, int C = 0>
+    using Partition = bPartition<T, C, SBlock>;
+    template <typename T, int C = 0>
+    using Field = Neon::domain::details::disaggregated::bGridMask::bField<T, C, SBlock>;
+
+    using Span = bSpan<SBlock>;
+    using NghIdx = typename Partition<int>::NghIdx;
+    using GridBaseTemplate = Neon::domain::interface::GridBaseTemplate<Grid, bIndex<SBlock>>;
+
+    using Idx = bIndex<SBlock>;
+    static constexpr Neon::set::details::ExecutionThreadSpan executionThreadSpan = Neon::set::details::ExecutionThreadSpan::d1b3;
+    using ExecutionThreadSpanIndexType = uint32_t;
+
+    using BlockIdx = uint32_t;
+
+    using AlphaGrid = typename Neon::domain::details::disaggregated::bGridMask::details::cGrid::cGrid<SBlock,
+                                                                                                      Neon::domain::details::disaggregated::bGridMask::details::cGrid::ClassSelector::alpha>;
+    using BetaGrid = typename Neon::domain::details::disaggregated::bGridMask::details::cGrid::cGrid<SBlock,
+                                                                                                     Neon::domain::details::disaggregated::bGridMask::details::cGrid::ClassSelector::beta>;
+
+    bGrid() = default;
+    virtual ~bGrid();
+
+    /**
+     * Constructor for the vanilla block data structure with depth of 1
+     */
+    template <typename ActiveCellLambda>
+    bGrid(const Neon::Backend&                         backend,
+          const Neon::int32_3d&                        domainSize,
+          const ActiveCellLambda                       activeCellLambda,
+          const Neon::domain::Stencil&                 stencil,
+          const double_3d&                             spacingData = double_3d(1, 1, 1),
+          const double_3d&                             origin = double_3d(0, 0, 0),
+          Neon::domain::tool::spaceCurves::EncoderType encoderType = Neon::domain::tool::spaceCurves::EncoderType::sweep);
+
+
+    /**
+     * Constructor for bGrid. This constructor should be directly used only by mGrid
+     */
+    template <typename ActiveCellLambda>
+    bGrid(const Neon::Backend&         backend /**< Neon backend for the computation */,
+          const Neon::int32_3d&        domainSize /**< Size of the bounded Cartesian */,
+          const ActiveCellLambda       activeCellLambda /**< Function that identify the user domain inside the boxed Cartesian discretization  */,
+          const Neon::domain::Stencil& stencil /**< union of tall the stencil that will be used in the computation */,
+          const int                    multiResDiscreteIdxSpacing /**< Parameter for the multi-resolution. Index i and index (i+1) may be remapped as i*voxelSpacing  and (i+1)* voxelSpacing.
+                                                                   * For a uniform bGrid, i.e outside the context of multi-resolution this parameter is always 1 */
+          ,
+          const double_3d&                             spacingData /** Physical spacing between two consecutive data points in the Cartesian domain */,
+          const double_3d&                             origin /** Physical location in space of the origin of the Cartesian discretization */,
+          Neon::domain::tool::spaceCurves::EncoderType encoderType = Neon::domain::tool::spaceCurves::EncoderType::sweep);
+
+    /**
+     * Returns some properties for a given cartesian in the Cartesian domain.
+     * The provide index my be inside or outside the user defined bounded Cartesian domain
+     */
+    auto getProperties(const Neon::index_3d& idx)
+        const -> typename GridBaseTemplate::CellProperties final;
+
+    /**
+     * Returns true if the query 3D point is inside the user domain
+     * @param idx
+     * @return
+     */
+    auto isInsideDomain(const Neon::index_3d& idx)
+        const -> bool final;
+
+    /**
+     * Retrieves the device index that contains the query point
+     * @param idx
+     * @return
+     */
+    auto getSetIdx(const Neon::index_3d& idx)
+        const -> int32_t final;
+
+    /**
+     * Allocates a new field on the grid
+     */
+    template <typename T, int C = 0>
+    auto newField(const std::string   name,
+                  int                 cardinality,
+                  T                   inactiveValue,
+                  Neon::DataUse       dataUse = Neon::DataUse::HOST_DEVICE,
+                  Neon::MemoryOptions memoryOptions = Neon::MemoryOptions()) const
+        -> Field<T, C>;
+
+    /**
+     * Allocates a new field on the block view grid
+     */
+    template <typename T, int C = 0>
+    auto newBlockViewField(const std::string   name,
+                           int                 cardinality,
+                           T                   inactiveValue,
+                           Neon::DataUse       dataUse = Neon::DataUse::HOST_DEVICE,
+                           Neon::MemoryOptions memoryOptions = Neon::MemoryOptions()) const
+        -> BlockView::Field<T, C>;
+
+    /**
+     * Allocates a new container to execute some computation in the grid
+     */
+    template <Neon::Execution execution = Neon::Execution::device,
+              typename LoadingLambda = void*>
+    auto newContainer(const std::string& name,
+                      index_3d           blockSize,
+                      size_t             sharedMem,
+                      LoadingLambda      lambda) const -> Neon::set::Container;
+
+    /**
+     * Allocates a new container to execute some computation in the grid
+     */
+    template <Neon::Execution execution = Neon::Execution::device,
+              typename LoadingLambda = void*>
+    auto newContainer(const std::string& name,
+                      LoadingLambda      lambda) const -> Neon::set::Container;
+
+    template <Neon::Execution execution = Neon::Execution::device,
+              typename LoadingLambda = void*>
+    auto newAlphaContainer(const std::string& name,
+                           LoadingLambda      lambda) const -> Neon::set::Container;
+
+    template <Neon::Execution execution = Neon::Execution::device,
+              typename LoadingLambda = void*>
+    auto newBetaContainer(const std::string& name,
+                          LoadingLambda      lambda) const -> Neon::set::Container;
+
+    template <Neon::Execution execution = Neon::Execution::device,
+              typename LoadingLambdaAlpha = void*,
+              typename LoadingLambdaBeta = void*>
+    auto newAlphaBetaContainer(const std::string& name,
+                               LoadingLambdaAlpha lambdaAlpha,
+                               LoadingLambdaBeta  lambdaBeta) const -> Neon::set::Container;
+
+    /**
+     * Defines a new set of parameter to launch a Container
+     */
+    auto getLaunchParameters(Neon::DataView        dataView,
+                             const Neon::index_3d& blockSize,
+                             const size_t&         sharedMem) const -> Neon::set::LaunchParameters;
+
+    /**
+     * Retrieve the span associated to the grid w.r.t. some user defined parameters.
+     */
+    auto getSpan(Neon::Execution execution,
+                 SetIdx          setIdx,
+                 Neon::DataView  dataView) -> const Span&;
+
+    /**
+     * Retrieve the block vew grid internally used.
+     * This grid can be leverage to allocate data at the block level.
+     */
+    auto getBlockViewGrid() const -> BlockView::Grid&;
+
+
+    /**
+     * Retrieve the block vew grid internally used.
+     * This grid can be leverage to allocate data at the block level.
+     */
+    auto getActiveBitMask() const -> BlockView::Field<typename SBlock::BitMask, 1>&;
+
+    /**
+     * Helper function to retrieve the discrete index spacing used for the multi-resolution
+     */
+    template <int dummy = SBlock::isMultiResMode>
+    auto helGetMultiResDiscreteIdxSpacing() const -> std::enable_if_t<dummy == 1, int>;
+
+
+    /**
+     * Help function to retrieve the block connectivity as a BlockViewGrid field
+     */
+    auto helpGetBlockConnectivity() const -> BlockView::Field<BlockIdx, 27>&;
+
+    /**
+     * Help function to retrieve the block origin as a BlockViewGrid field
+     */
+    auto helpGetDataBlockOriginField() const -> Neon::aGrid::Field<index_3d, 0>&;
+
+    /**
+     * Help function to retrieve the map that converts a stencil point id to 3d offset
+     */
+    auto helpGetStencilIdTo3dOffset() const -> Neon::set::MemSet<Neon::int8_3d>&;
+
+    auto helpGetPartitioner1D() -> Neon::domain::tool::Partitioner1D&;
+
+    /**
+     * Help function retriev the device and the block index associated to a point in the BlockViewGrid grid
+     */
+    auto helpGetSetIdxAndGridIdx(Neon::index_3d idx) const -> std::tuple<Neon::SetIdx, Idx>;
+
+    template <typename ActiveCellLambda>
+    auto init_mask_field(ActiveCellLambda activeCellLambda) -> void;
+    auto helpGetClassField() -> Field<uint8_t, 1>&;
+
+
+    struct Data
+    {
+        auto init(const Neon::Backend& bk)
+        {
+            spanTable.init(bk);
+            launchParametersTable.init(bk);
+        }
+
+        Neon::domain::tool::SpanTable<Span> spanTable /** Span for each data view configurations */;
+        Neon::set::LaunchParametersTable    launchParametersTable;
+
+        Neon::domain::tool::Partitioner1D partitioner1D;
+        Stencil                           stencil;
+        Neon::sys::patterns::Engine       reduceEngine;
+
+        Neon::aGrid                     memoryGrid /** memory allocator for fields */;
+        Neon::aGrid::Field<index_3d, 0> mDataBlockOriginField;
+        Neon::set::MemSet<int8_t>       mStencil3dTo1dOffset;
+
+        BlockView::Grid                               blockViewGrid;
+        BlockView::Field<typename SBlock::BitMask, 1> activeBitField;
+        BlockView::Field<BlockIdx, 27>                blockConnectivity;
+        Neon::set::MemSet<Neon::int8_3d>              stencilIdTo3dOffset;
+
+        int mMultiResDiscreteIdxSpacing;
+
+        // number of active voxels in each block
+        Neon::set::DataSet<uint64_t> mNumActiveVoxel;
+
+
+        // Stencil neighbor indices
+        Neon::set::MemSet<NghIdx> mStencilNghIndex;
+
+        AlphaGrid         alphaGrid;
+        BetaGrid          betaGrid;
+        Field<uint8_t, 1> maskClassField;
+    };
+    std::shared_ptr<Data> mData;
+};
+extern template class bGrid<StaticBlock<4, 4, 4>>;
+}  // namespace Neon::domain::details::disaggregated::bGridMask
+
+#include "bField_imp.h"
+#include "bGrid_imp.h"
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bGrid_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bGrid_imp.h
new file mode 100644
index 00000000..95fde021
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bGrid_imp.h
@@ -0,0 +1,572 @@
+#include "Neon/domain/details/bGridDisgMask/bGrid.h"
+#include "Neon/domain/tools/SpaceCurves.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+template <typename SBlock>
+template <typename ActiveCellLambda>
+bGrid<SBlock>::bGrid(const Neon::Backend&                         backend,
+                     const Neon::int32_3d&                        domainSize,
+                     const ActiveCellLambda                       activeCellLambda,
+                     const Neon::domain::Stencil&                 stencil,
+                     const double_3d&                             spacingData,
+                     const double_3d&                             origin,
+                     Neon::domain::tool::spaceCurves::EncoderType encoderType)
+    : bGrid(backend, domainSize, activeCellLambda, stencil, 1, spacingData, origin, encoderType)
+{
+}
+
+template <typename SBlock>
+template <typename ActiveCellLambda>
+bGrid<SBlock>::bGrid(const Neon::Backend&                         backend,
+                     const Neon::int32_3d&                        domainSize,
+                     const ActiveCellLambda                       activeCellLambda,
+                     const Neon::domain::Stencil&                 stencil,
+                     const int                                    multiResDiscreteIdxSpacing,
+                     const double_3d&                             spacingData,
+                     const double_3d&                             origin,
+                     Neon::domain::tool::spaceCurves::EncoderType encoderType)
+{
+
+
+    mData = std::make_shared<Data>();
+    mData->init(backend);
+
+    mData->mMultiResDiscreteIdxSpacing = multiResDiscreteIdxSpacing;
+    mData->stencil = stencil;
+    const index_3d defaultKernelBlockSize(SBlock::memBlockSizeX,
+                                          SBlock::memBlockSizeY,
+                                          SBlock::memBlockSizeZ);
+
+    std::stringstream gridName;
+    gridName << "bGrid_" << SBlock::memBlockSizeX << "_"
+             << SBlock::memBlockSizeY << "_"
+             << SBlock::memBlockSizeZ;
+    {
+        auto nElementsPerPartition = backend.devSet().template newDataSet<size_t>(0);
+        // We do an initialization with nElementsPerPartition to zero,
+        // then we reset to the computed number.
+
+        bGrid::GridBase::init(gridName.str(),
+                              backend,
+                              domainSize,
+                              stencil,
+                              nElementsPerPartition,
+                              defaultKernelBlockSize,
+                              multiResDiscreteIdxSpacing,
+                              origin,
+                              encoderType,
+                              defaultKernelBlockSize);
+    }
+
+    {  // Initialization of the partitioner
+        using returTypeOfLambda = typename std::invoke_result<ActiveCellLambda, Neon::index_3d>::type;
+        if constexpr (std::is_same_v<returTypeOfLambda, bool>) {
+            mData->partitioner1D = Neon::domain::tool::Partitioner1D(
+                backend,
+                activeCellLambda,
+                nullptr,
+                SBlock::memBlockSize3D.template newType<int32_t>(),
+                domainSize,
+                Neon::domain::Stencil::s27_t(false),
+                encoderType,
+                multiResDiscreteIdxSpacing);
+        } else if constexpr (std::is_same_v<returTypeOfLambda, details::cGrid::ClassSelector>) {
+            mData->partitioner1D = Neon::domain::tool::Partitioner1D(
+                backend,
+                [&](Neon::index_3d idx) {
+                    return activeCellLambda(idx) != details::cGrid::ClassSelector::outside;
+                },
+                //                [&](Neon::index_3d idx) {
+                //                    return (activeCellLambda(idx) == details::cGrid::ClassSelector::beta)
+                //                               ? Neon::domain::tool::partitioning::ByDomain::bc
+                //                               : Neon::domain::tool::partitioning::ByDomain::bulk;
+                //                },
+                nullptr,
+                SBlock::memBlockSize3D.template newType<int32_t>(),
+                domainSize,
+                Neon::domain::Stencil::s27_t(false),
+                encoderType,
+                multiResDiscreteIdxSpacing);
+        } else {
+            NEON_THROW_UNSUPPORTED_OPERATION("The user defined lambda must return a bool or a ClassSelector");
+        }
+
+        mData->mDataBlockOriginField = mData->partitioner1D.getGlobalMapping();
+        mData->mStencil3dTo1dOffset = mData->partitioner1D.getStencil3dTo1dOffset();
+        mData->memoryGrid = mData->partitioner1D.getMemoryGrid();
+    }
+
+    {  // BlockViewGrid
+        Neon::domain::details::eGrid::eGrid egrid(
+            backend,
+            mData->partitioner1D.getBlockSpan(),
+            mData->partitioner1D,
+            Neon::domain::Stencil::s27_t(false),
+            spacingData * SBlock::memBlockSize3D,
+            origin);
+
+        mData->blockViewGrid = BlockView::Grid(egrid);
+    }
+
+    {  // Active bitmask
+        mData->activeBitField = mData->blockViewGrid.template newField<typename SBlock::BitMask, 1>(
+            "BlockViewBitMask",
+            1,
+            [] {
+                typename SBlock::BitMask outsideBitMask;
+                outsideBitMask.reset();
+                return outsideBitMask;
+            }(),
+            Neon::DataUse::HOST_DEVICE, backend.getMemoryOptions(BlockView::layout));
+
+        mData->mNumActiveVoxel = backend.devSet().template newDataSet<uint64_t>();
+
+        mData->activeBitField
+            .getGrid()
+            .template newContainer<Neon::Execution::host>(
+                "activeBitMaskInit",
+                [&, this](Neon::set::Loader& loader) {
+                    auto bitMaskPartition = loader.load(mData->activeBitField);
+                    return [&, bitMaskPartition](const auto& bitMaskIdx) mutable {
+                        auto                      prtIdx = bitMaskPartition.prtID();
+                        int                       countActive = 0;
+                        auto const                blockOrigin = bitMaskPartition.getGlobalIndex(bitMaskIdx);
+                        typename SBlock::BitMask& bitMask = bitMaskPartition(bitMaskIdx, 0);
+                        bitMask.reset();
+
+                        for (int k = 0; k < SBlock::memBlockSize3D.template newType<int32_t>().z; k++) {
+                            for (int j = 0; j < SBlock::memBlockSize3D.template newType<int32_t>().y; j++) {
+                                for (int i = 0; i < SBlock::memBlockSize3D.template newType<int32_t>().x; i++) {
+                                    auto       globalPosition = blockOrigin + Neon::int32_3d(i * this->mData->mMultiResDiscreteIdxSpacing,
+                                                                                             j * this->mData->mMultiResDiscreteIdxSpacing,
+                                                                                             k * this->mData->mMultiResDiscreteIdxSpacing);
+                                    bool const isInDomain = globalPosition < domainSize * this->mData->mMultiResDiscreteIdxSpacing;
+                                    bool const isActive = activeCellLambda(globalPosition);
+                                    if (isActive && isInDomain) {
+                                        countActive++;
+                                        bitMask.setActive(i, j, k);
+                                    }
+                                }
+                            }
+                        }
+#pragma omp critical
+                        {
+                            this->mData->mNumActiveVoxel[prtIdx] += countActive;
+                        }
+                    };
+                })
+            .run(Neon::Backend::mainStreamIdx);
+
+
+        mData->activeBitField.updateDeviceData(Neon::Backend::mainStreamIdx);
+        this->getBackend().sync(Neon::Backend::mainStreamIdx);
+        mData->activeBitField.newHaloUpdate(Neon::set::StencilSemantic::standard,
+                                            Neon::set::TransferMode::put,
+                                            Neon::Execution::device)
+            .run(Neon::Backend::mainStreamIdx);
+    }
+
+
+    {  // Neighbor blocks
+        mData->blockConnectivity = mData->blockViewGrid.template newField<BlockIdx, 27>("blockConnectivity",
+                                                                                        27,
+                                                                                        Span::getInvalidBlockId(),
+                                                                                        Neon::DataUse::HOST_DEVICE,
+                                                                                        Neon::MemoryLayout::arrayOfStructs);
+
+        mData->blockConnectivity.getGrid().template newContainer<Neon::Execution::host>(
+                                              "blockConnectivityInit",
+                                              [&](Neon::set::Loader& loader) {
+                                                  auto blockConnectivity = loader.load(mData->blockConnectivity);
+                                                  return [&, blockConnectivity](auto const& idx) mutable {
+                                                      for (int8_t k = 0; k < 3; k++) {
+                                                          for (int8_t j = 0; j < 3; j++) {
+                                                              for (int8_t i = 0; i < 3; i++) {
+                                                                  auto                                      targetDirection = i + 3 * j + 3 * 3 * k;
+                                                                  BlockIdx                                  blockNghIdx = Span::getInvalidBlockId();
+                                                                  typename decltype(blockConnectivity)::Idx nghIdx;
+                                                                  Neon::int8_3d                             stencilPoint(i - int8_t(1),
+                                                                                                                         j - int8_t(1),
+                                                                                                                         k - int8_t(1));
+                                                                  bool                                      isValid = blockConnectivity.getNghIndex(idx, stencilPoint, nghIdx);
+                                                                  if (isValid) {
+                                                                      blockNghIdx = static_cast<BlockIdx>(nghIdx.helpGet());
+                                                                  }
+                                                                  blockConnectivity(idx, targetDirection) = blockNghIdx;
+                                                              }
+                                                          }
+                                                      }
+                                                  };
+                                              })
+            .run(Neon::Backend::mainStreamIdx);
+        mData->blockConnectivity.updateDeviceData(Neon::Backend::mainStreamIdx);
+    }
+
+    // Initialization of the SPAN table
+    mData->spanTable.forEachConfiguration([&](Neon::Execution execution,
+                                              Neon::SetIdx    setIdx,
+                                              Neon::DataView  dw,
+                                              Span&           span) {
+        span.mDataView = dw;
+        switch (dw) {
+            case Neon::DataView::STANDARD: {
+                span.mFirstDataBlockOffset = 0;
+                span.mDataView = dw;
+                span.mActiveMask = mData->activeBitField.getPartition(execution, setIdx, dw).mem();
+                break;
+            }
+            case Neon::DataView::BOUNDARY: {
+                span.mFirstDataBlockOffset = mData->partitioner1D.getSpanClassifier().countInternal(setIdx);
+                span.mDataView = dw;
+                span.mActiveMask = mData->activeBitField.getPartition(execution, setIdx, dw).mem();
+
+                break;
+            }
+            case Neon::DataView::INTERNAL: {
+                span.mFirstDataBlockOffset = 0;
+                span.mDataView = dw;
+                span.mActiveMask = mData->activeBitField.getPartition(execution, setIdx, dw).mem();
+                break;
+            }
+            default: {
+                NeonException exc("dFieldDev");
+                NEON_THROW(exc);
+            }
+        }
+    });
+
+    {  // Stencil Idx to 3d offset
+        auto nPoints = backend.devSet().newDataSet<uint64_t>(stencil.nNeighbours());
+        mData->stencilIdTo3dOffset = backend.devSet().template newMemSet<int8_3d>(Neon::DataUse::HOST_DEVICE,
+                                                                                  1,
+                                                                                  backend.getMemoryOptions(),
+                                                                                  nPoints);
+        for (int i = 0; i < stencil.nNeighbours(); ++i) {
+            for (int devIdx = 0; devIdx < backend.devSet().setCardinality(); devIdx++) {
+                index_3d      pLong = stencil.neighbours()[i];
+                Neon::int8_3d pShort = pLong.newType<int8_t>();
+                mData->stencilIdTo3dOffset.eRef(devIdx, i) = pShort;
+            }
+        }
+        mData->stencilIdTo3dOffset.updateDeviceData(backend, Neon::Backend::mainStreamIdx);
+    }
+    // Init the base grid
+    bGrid::GridBase::init(gridName.str(),
+                          backend,
+                          domainSize,
+                          Neon::domain::Stencil(),
+                          mData->mNumActiveVoxel,
+                          SBlock::memBlockSize3D.template newType<int32_t>(),
+                          spacingData,
+                          origin,
+                          encoderType,
+                          defaultKernelBlockSize);
+    {  // setting launchParameters
+        mData->launchParametersTable.forEachSeq([&](Neon::DataView               dw,
+                                                    Neon::set::LaunchParameters& bLaunchParameters) {
+            auto defEGridBlock = mData->blockViewGrid.getDefaultBlock();
+            auto eGridParams = mData->blockViewGrid.getLaunchParameters(dw, defEGridBlock, 0);
+            eGridParams.forEachSeq([&](Neon::SetIdx setIdx, Neon::sys::GpuLaunchInfo const& launchSingleDev) {
+                auto eDomainGridSize = launchSingleDev.domainGrid();
+                assert(eDomainGridSize.y == 1);
+                assert(eDomainGridSize.z == 1);
+                int nBlocks = static_cast<int>(eDomainGridSize.x);
+                bLaunchParameters.get(setIdx).set(Neon::sys::GpuLaunchInfo::mode_e::cudaGridMode,
+                                                  nBlocks, SBlock::memBlockSize3D.template newType<int32_t>(), 0);
+            });
+        });
+    }
+
+    this->init_mask_field<activeCellLambda>();
+    mData->alphaGrid = details::cGrid::cGrid<SBlock, int(details::cGrid::ClassSelector::alpha)>(*this);
+    mData->betaGrid = details::cGrid::cGrid<SBlock, int(details::cGrid::ClassSelector::beta)>(*this);
+}
+
+template <typename SBlock>
+template <typename T, int C>
+auto bGrid<SBlock>::newField(const std::string   name,
+                             int                 cardinality,
+                             T                   inactiveValue,
+                             Neon::DataUse       dataUse,
+                             Neon::MemoryOptions memoryOptions) const -> Field<T, C>
+{
+    memoryOptions = this->getDevSet().sanitizeMemoryOption(memoryOptions);
+    Field<T, C> field(name, dataUse, memoryOptions, *this, cardinality, inactiveValue);
+    return field;
+}
+
+template <typename SBlock>
+template <typename T, int C>
+auto bGrid<SBlock>::newBlockViewField(const std::string   name,
+                                      int                 cardinality,
+                                      T                   inactiveValue,
+                                      Neon::DataUse       dataUse,
+                                      Neon::MemoryOptions memoryOptions) const -> BlockView::Field<T, C>
+{
+    memoryOptions = this->getDevSet().sanitizeMemoryOption(memoryOptions);
+    BlockView::Field<T, C> blockViewField = mData->blockViewGrid.template newField<T, C>(name, cardinality, inactiveValue, dataUse, memoryOptions);
+    return blockViewField;
+}
+
+template <typename SBlock>
+template <Neon::Execution execution,
+          typename LoadingLambda>
+auto bGrid<SBlock>::newContainer(const std::string& name,
+                                 index_3d           blockSize,
+                                 size_t             sharedMem,
+                                 LoadingLambda      lambda) const -> Neon::set::Container
+{
+    Neon::set::Container kContainer = Neon::set::Container::factory<execution>(name,
+                                                                               Neon::set::internal::ContainerAPI::DataViewSupport::on,
+                                                                               *this,
+                                                                               lambda,
+                                                                               blockSize,
+                                                                               [sharedMem](const Neon::index_3d&) { return sharedMem; });
+    return kContainer;
+}
+
+template <typename SBlock>
+template <Neon::Execution execution,
+          typename LoadingLambda>
+auto bGrid<SBlock>::newContainer(const std::string& name,
+                                 LoadingLambda      lambda) const -> Neon::set::Container
+{
+    const Neon::index_3d& defaultBlockSize = this->getDefaultBlock();
+    Neon::set::Container  kContainer = Neon::set::Container::factory<execution>(name,
+                                                                                Neon::set::internal::ContainerAPI::DataViewSupport::on,
+                                                                                *this,
+                                                                                lambda,
+                                                                                defaultBlockSize,
+                                                                                [](const Neon::index_3d&) { return 0; });
+    return kContainer;
+}
+
+template <typename SBlock>
+template <Neon::Execution execution,
+          typename LoadingLambda>
+auto bGrid<SBlock>::newAlphaContainer(const std::string& name,
+                                      LoadingLambda      lambda) const -> Neon::set::Container
+{
+
+    auto kContainer = mData->alphaGrid.newContainer(name,
+                                                    lambda);
+    return kContainer;
+}
+
+template <typename SBlock>
+template <Neon::Execution execution,
+          typename LoadingLambda>
+auto bGrid<SBlock>::newBetaContainer(const std::string& name,
+                                     LoadingLambda      lambda) const -> Neon::set::Container
+{
+
+    auto kContainer = mData->betaGrid.newContainer(name,
+                                                   lambda);
+    return kContainer;
+}
+template <typename SBlock>
+
+template <Neon::Execution execution,
+          typename LoadingLambdaAlpha,
+          typename LoadingLambdaBeta>
+auto bGrid<SBlock>::newAlphaBetaContainer(const std::string& name,
+                                          LoadingLambdaAlpha lambdaAlpha,
+                                          LoadingLambdaBeta  lambdaBeta) const -> Neon::set::Container
+{
+    std::vector<Neon::set::Container> sequence;
+    auto                              containerAlpha = mData->alphaGrid.newContainer(name + "Alpha",
+                                                                                     lambdaAlpha);
+    auto                              containerBeta = mData->betaGrid.newContainer(name + "Beta",
+                                                                                   lambdaBeta);
+
+    sequence.push_back(containerAlpha);
+    sequence.push_back(containerBeta);
+
+    Neon::set::Container exec = Neon::set::Container::factorySequence(name + "Sequence", sequence);
+    return exec;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::
+    getBlockViewGrid()
+        const -> BlockView::Grid&
+{
+    return mData->blockViewGrid;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::
+    getActiveBitMask()
+        const -> BlockView::Field<typename SBlock::BitMask, 1>&
+{
+    return mData->activeBitField;
+}
+
+/**
+ * Helper function to retrieve the discrete index spacing used for the multi-resolution
+ */
+template <typename SBlock>
+template <int dummy>
+auto bGrid<SBlock>::helGetMultiResDiscreteIdxSpacing() const
+    -> std::enable_if_t<dummy == 1, int>
+{
+    return mData->mMultiResDiscreteIdxSpacing;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::
+    helpGetBlockConnectivity()
+        const -> BlockView::Field<BlockIdx, 27>&
+{
+    return mData->blockConnectivity;
+}
+template <typename SBlock>
+auto bGrid<SBlock>::
+    helpGetDataBlockOriginField()
+        const -> Neon::aGrid::Field<index_3d, 0>&
+{
+    return mData->mDataBlockOriginField;
+}
+template <typename SBlock>
+auto bGrid<SBlock>::getSpan(Neon::Execution execution,
+                            SetIdx          setIdx,
+                            Neon::DataView  dataView) -> const bGrid::Span&
+{
+    return mData->spanTable.getSpan(execution, setIdx, dataView);
+}
+
+template <typename SBlock>
+bGrid<SBlock>::~bGrid()
+{
+}
+template <typename SBlock>
+auto bGrid<SBlock>::getSetIdx(const index_3d& idx) const -> int32_t
+{
+    typename GridBaseTemplate::CellProperties cellProperties;
+
+    cellProperties.setIsInside(this->isInsideDomain(idx));
+    if (!cellProperties.isInside()) {
+        return -1;
+    }
+    Neon::SetIdx setIdx = cellProperties.getSetIdx();
+    return setIdx;
+}
+template <typename SBlock>
+auto bGrid<SBlock>::getLaunchParameters(Neon::DataView dataView,
+                                        const index_3d&,
+                                        const size_t& sharedMem) const -> Neon::set::LaunchParameters
+{
+    auto res = mData->launchParametersTable.get(dataView);
+    res.forEachSeq([&](SetIdx const& /*setIdx*/,
+                       Neon::set::LaunchParameters::launchInfo_e& launchParams) -> void {
+        launchParams.setShm(sharedMem);
+    });
+    return res;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::
+    helpGetStencilIdTo3dOffset()
+        const -> Neon::set::MemSet<Neon::int8_3d>&
+{
+    return mData->stencilIdTo3dOffset;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::isInsideDomain(const index_3d& idx) const -> bool
+{
+    // 1. check if the block is active
+    const BlockView::index_3d blockIdx3d = idx / (SBlock::memBlockSize3D.template newType<int32_t>() * mData->mMultiResDiscreteIdxSpacing);
+    auto                      blockProperties = mData->blockViewGrid.getProperties(blockIdx3d);
+
+    if (!blockProperties.isInside()) {
+        return false;
+    }
+    // 2. The block is active, check the element in the block
+    typename SBlock::BitMask const& bitMask = mData->activeBitField.getReference(blockIdx3d, 0);
+
+    bool isActive = bitMask.isActive((idx.x / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.x,
+                                     (idx.y / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.y,
+                                     (idx.z / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.z);
+    return isActive;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::getProperties(const index_3d& idx)
+    const -> typename GridBaseTemplate::CellProperties
+{
+    typename GridBaseTemplate::CellProperties cellProperties;
+
+    cellProperties.setIsInside(this->isInsideDomain(idx));
+    if (!cellProperties.isInside()) {
+        return cellProperties;
+    }
+
+    if (this->getDevSet().setCardinality() == 1) {
+        cellProperties.init(0, DataView::INTERNAL);
+    } else {
+        const index_3d blockIdx3d = idx / SBlock::memBlockSize3D.template newType<int32_t>();
+        auto           blockViewProperty = mData->blockViewGrid.getProperties(blockIdx3d);
+
+        cellProperties.init(blockViewProperty.getSetIdx(),
+                            blockViewProperty.getDataView());
+    }
+    return cellProperties;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::helpGetSetIdxAndGridIdx(Neon::index_3d idx)
+    const -> std::tuple<Neon::SetIdx, Idx>
+{
+    const index_3d blockIdx3d = idx / (SBlock::memBlockSize3D.template newType<int32_t>() * mData->mMultiResDiscreteIdxSpacing);
+    auto [setIdx, bvGridIdx] = mData->blockViewGrid.helpGetSetIdxAndGridIdx(blockIdx3d);
+    Idx bIdx;
+    bIdx.mDataBlockIdx = bvGridIdx.helpGet();
+    bIdx.mInDataBlockIdx.x = static_cast<typename Idx::InDataBlockIdx::Integer>((idx.x / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.x);
+    bIdx.mInDataBlockIdx.y = static_cast<typename Idx::InDataBlockIdx::Integer>((idx.y / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.y);
+    bIdx.mInDataBlockIdx.z = static_cast<typename Idx::InDataBlockIdx::Integer>((idx.z / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.z);
+
+    return {setIdx, bIdx};
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::helpGetPartitioner1D() -> Neon::domain::tool::Partitioner1D&
+{
+    return mData->partitioner1D;
+}
+
+template <typename SBlock>
+template <typename ActiveCellLambda>
+auto bGrid<SBlock>::init_mask_field(ActiveCellLambda activeCellLambda) -> void
+{
+    using returTypeOfLambda = typename std::invoke_result<ActiveCellLambda, Neon::index_3d>::type;
+    if constexpr (std::is_same_v<returTypeOfLambda, details::cGrid::ClassSelector>) {
+
+
+        auto maskField = this->newField<uint8_t, 1>("maskField", 1, 0, Neon::DataUse::HOST_DEVICE);
+        maskField.getGrid().template newContainer<Neon::Execution::host>(
+                               "maskFieldInit",
+                               [&](Neon::set::Loader& loader) {
+                                   auto maskFieldPartition = loader.load(maskField);
+                                   return [&, maskFieldPartition](const auto& gIdx) mutable {
+                                       details::cGrid::ClassSelector voxelClass = activeCellLambda(gIdx);
+                                       maskFieldPartition(gIdx, 0) = static_cast<uint8_t>(voxelClass);
+                                   };
+                               })
+            .run(Neon::Backend::mainStreamIdx);
+        this->getBackend().sync(Neon::Backend::mainStreamIdx);
+        maskField.updateDeviceData(Neon::Backend::mainStreamIdx);
+        this->getBackend().sync(Neon::Backend::mainStreamIdx);
+        this->mData->maskClassField = maskField;
+        return;
+    }
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::helpGetClassField() -> Field<uint8_t, 1>&
+{
+    return mData->maskClassField;
+}
+
+}  // namespace Neon::domain::details::disaggregated::bGridMask
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bIndex.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bIndex.h
new file mode 100644
index 00000000..7bc7bf74
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bIndex.h
@@ -0,0 +1,142 @@
+#pragma once
+
+#include "Neon/core/core.h"
+
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+// Common forward declarations
+template <typename SBlock>
+class bGrid;
+template <typename SBlock>
+class bSpan;
+template <typename T, int C, typename SBlock>
+class bPartition;
+
+class MicroIndex
+{
+   public:
+    using TrayIdx = int32_t;
+    using InTrayIdx = int8_3d;
+
+    NEON_CUDA_HOST_DEVICE inline explicit MicroIndex()
+        : MicroIndex(0, 0, 0, 0)
+    {
+    }
+
+    NEON_CUDA_HOST_DEVICE inline explicit MicroIndex(const TrayIdx&            blockIdx,
+                                                     const InTrayIdx::Integer& x,
+                                                     const InTrayIdx::Integer& y,
+                                                     const InTrayIdx::Integer& z)
+    {
+        mTrayBlockIdx = blockIdx;
+        mInTrayBlockIdx.x = x;
+        mInTrayBlockIdx.y = y;
+        mInTrayBlockIdx.z = z;
+    }
+
+    NEON_CUDA_HOST_DEVICE inline auto getInTrayBlockIdx() const -> InTrayIdx const&
+    {
+        return mInTrayBlockIdx;
+    }
+
+    NEON_CUDA_HOST_DEVICE inline auto getTrayBlockIdx() const -> TrayIdx const&
+    {
+        return mTrayBlockIdx;
+    }
+
+    NEON_CUDA_HOST_DEVICE inline auto setInTrayBlockIdx(InTrayIdx const& inTrayIdx) -> void
+    {
+        mInTrayBlockIdx = inTrayIdx;
+    }
+
+    NEON_CUDA_HOST_DEVICE inline auto setTrayBlockIdx(TrayIdx const& trayIdx) -> void
+    {
+        mTrayBlockIdx = trayIdx;
+    }
+
+    InTrayIdx mInTrayBlockIdx;
+    TrayIdx   mTrayBlockIdx{};
+};
+
+template <typename SBlock>
+class bIndex
+{
+   public:
+    template <typename SBlock_>
+    friend class bSpan;
+    using OuterIdx = bIndex<SBlock>;
+
+    using NghIdx = int8_3d;
+    template <typename T, int C, typename SBlock_>
+    friend class bPartition;
+
+    template <typename T, int C, typename SBlock_>
+    friend class bField;
+
+    template <typename SBlock_>
+    friend class bSpan;
+    template <typename SBlock_>
+    friend class bGrid;
+
+
+    using TrayIdx = MicroIndex::TrayIdx;
+    using InTrayIdx = MicroIndex::InTrayIdx;
+
+    using DataBlockCount = std::make_unsigned_t<TrayIdx>;
+    using DataBlockIdx = std::make_unsigned_t<TrayIdx>;
+    using InDataBlockIdx = InTrayIdx;
+
+    bIndex() = default;
+    ~bIndex() = default;
+
+    NEON_CUDA_HOST_DEVICE inline explicit bIndex(const DataBlockIdx&            blockIdx,
+                                                 const InDataBlockIdx::Integer& x,
+                                                 const InDataBlockIdx::Integer& y,
+                                                 const InDataBlockIdx::Integer& z);
+
+    NEON_CUDA_HOST_DEVICE inline auto getMicroIndex() -> MicroIndex;
+    NEON_CUDA_HOST_DEVICE inline auto init(MicroIndex const&) -> void;
+
+    NEON_CUDA_HOST_DEVICE inline auto getInDataBlockIdx() const -> InDataBlockIdx const&;
+    NEON_CUDA_HOST_DEVICE inline auto getDataBlockIdx() const -> DataBlockIdx const&;
+    NEON_CUDA_HOST_DEVICE inline auto setInDataBlockIdx(InDataBlockIdx const&) -> void;
+    NEON_CUDA_HOST_DEVICE inline auto setDataBlockIdx(DataBlockIdx const&) -> void;
+    NEON_CUDA_HOST_DEVICE inline auto isActive() const -> bool;
+    // the local index within the block
+    InDataBlockIdx mInDataBlockIdx;
+    DataBlockIdx   mDataBlockIdx{};
+};
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::setDataBlockIdx(const bIndex::DataBlockIdx& dataBlockIdx) -> void
+{
+    mDataBlockIdx = dataBlockIdx;
+}
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::setInDataBlockIdx(const bIndex::InDataBlockIdx& inDataBlockIdx) -> void
+{
+    mInDataBlockIdx = inDataBlockIdx;
+}
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::getDataBlockIdx() const -> const bIndex::DataBlockIdx&
+{
+    return mDataBlockIdx;
+}
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::getInDataBlockIdx() const -> const bIndex::InDataBlockIdx&
+{
+    return mInDataBlockIdx;
+}
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::isActive() const -> bool
+{
+    return mDataBlockIdx != std::numeric_limits<typename bIndex::DataBlockIdx>::max();
+}
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
+
+#include "Neon/domain/details/bGridDisgMask/bIndex_imp.h"
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bIndex_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bIndex_imp.h
new file mode 100644
index 00000000..10ad51b5
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bIndex_imp.h
@@ -0,0 +1,67 @@
+#pragma once
+#include "Neon/domain/details/bGridDisgMask/bIndex.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline bIndex<SBlock>::
+    bIndex(const DataBlockIdx&            blockIdx,
+           const InDataBlockIdx::Integer& x,
+           const InDataBlockIdx::Integer& y,
+           const InDataBlockIdx::Integer& z)
+{
+    mDataBlockIdx = blockIdx;
+    mInDataBlockIdx.x = x;
+    mInDataBlockIdx.y = y;
+    mInDataBlockIdx.z = z;
+}
+
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bIndex<SBlock>::getMicroIndex() -> MicroIndex
+{
+
+
+    TrayIdx const exBlockOffset = mDataBlockIdx * (SBlock::blockRatioX * SBlock::blockRatioY * SBlock::blockRatioZ);
+    TrayIdx const exTrayOffset = [&] {
+        TrayIdx const trayBlockIdxX = mInDataBlockIdx.x / SBlock::userBlockSizeX;
+        TrayIdx const trayBlockIdxY = mInDataBlockIdx.y / SBlock::userBlockSizeY;
+        TrayIdx const trayBlockIdxZ = mInDataBlockIdx.z / SBlock::userBlockSizeZ;
+
+        TrayIdx const res = trayBlockIdxX + trayBlockIdxY * SBlock::blockRatioX +
+                            trayBlockIdxZ * (SBlock::blockRatioX * SBlock::blockRatioY);
+        return res;
+    }();
+    MicroIndex res;
+    res.setTrayBlockIdx(exBlockOffset + exTrayOffset);
+    res.setInTrayBlockIdx({static_cast<InTrayIdx::Integer>(mInDataBlockIdx.x % SBlock::userBlockSizeX),
+                           static_cast<InTrayIdx::Integer>(mInDataBlockIdx.y % SBlock::userBlockSizeY),
+                           static_cast<InTrayIdx::Integer>(mInDataBlockIdx.z % SBlock::userBlockSizeZ)});
+    return res;
+}
+
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bIndex<SBlock>::init(MicroIndex const& microIndex) -> void
+{
+    constexpr uint32_t memBlockSize = SBlock::memBlockSizeX * SBlock::memBlockSizeY * SBlock::memBlockSizeZ;
+    constexpr uint32_t userBlockSize = SBlock::userBlockSizeX * SBlock::userBlockSizeY * SBlock::userBlockSizeZ;
+    constexpr uint32_t blockRatioSize = memBlockSize / userBlockSize;
+
+    constexpr uint32_t blockRatioX = SBlock::memBlockSizeX / SBlock::userBlockSizeX;
+    constexpr uint32_t blockRatioY = SBlock::memBlockSizeY / SBlock::userBlockSizeY;
+
+    mDataBlockIdx = microIndex.getTrayBlockIdx() / (blockRatioSize);
+
+    uint32_t reminder = microIndex.getTrayBlockIdx() % (blockRatioSize);
+
+    const uint32_t reminderInZ = reminder / (blockRatioX * blockRatioY);
+    mInDataBlockIdx.z = static_cast<InDataBlockIdx::Integer>(microIndex.getInTrayBlockIdx().z + reminderInZ * SBlock::userBlockSizeZ);
+    reminder = reminder % (blockRatioX * blockRatioY);
+    const uint32_t reminderInY = reminder / (blockRatioX);
+    mInDataBlockIdx.y = static_cast<InDataBlockIdx::Integer>(microIndex.getInTrayBlockIdx().y + reminderInY * SBlock::userBlockSizeY);
+    const uint32_t reminderInX = reminder % blockRatioX;
+    mInDataBlockIdx.x = static_cast<InDataBlockIdx::Integer>(microIndex.getInTrayBlockIdx().x + reminderInX * SBlock::userBlockSizeX);
+}
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bPartition.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bPartition.h
new file mode 100644
index 00000000..031e50b5
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bPartition.h
@@ -0,0 +1,194 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMask/bIndex.h"
+#include "Neon/domain/details/bGridDisgMask/bSpan.h"
+
+#include "Neon/domain/interface/NghData.h"
+
+#include "Neon/sys/memory/CUDASharedMemoryUtil.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+    template<typename SBlock>
+    class bSpan;
+
+    template<typename T, int C, typename SBlock>
+    class bPartition {
+    public:
+        using Span = bSpan<SBlock>;
+        using Idx = bIndex<SBlock>;
+        using NghIdx = typename Idx::NghIdx;
+        using Type = T;
+        using NghData = Neon::domain::NghData<T>;
+
+        using BlockViewGrid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
+        using BlockViewGridIdx = BlockViewGrid::Idx;
+
+    public:
+        bPartition();
+
+        ~bPartition() = default;
+
+        explicit bPartition(int setIdx,
+                            int mCardinality,
+                            T *mMem,
+                            typename Idx::DataBlockIdx *mBlockConnectivity,
+                            typename SBlock::BitMask const *NEON_RESTRICT mMask,
+                            Neon::int32_3d *mOrigin,
+                            NghIdx *mStencilNghIndex,
+                            Neon::int32_3d mDomainSize);
+
+        /**
+         * Retrieve the cardinality of the field.
+         */
+        inline NEON_CUDA_HOST_DEVICE auto
+        cardinality()
+        const -> int;
+
+        /**
+         * Gets the field metadata at a cartesian point.
+         */
+        inline NEON_CUDA_HOST_DEVICE auto
+        operator()(const Idx &cell,
+                   int card)
+        -> T &;
+
+        /**
+         * Gets the field metadata at a cartesian point.
+         */
+        inline NEON_CUDA_HOST_DEVICE auto
+        operator()(const Idx &cell,
+                   int card)
+        const -> const T &;
+
+        /**
+         * Gets the field metadata at a neighbour cartesian point.
+         */
+        NEON_CUDA_HOST_DEVICE inline auto
+        getNghData(const Idx &cell,
+                   const NghIdx &offset,
+                   const int card)
+        const -> NghData;
+
+        /**
+         * Gets the field metadata at a neighbour cartesian point.
+         */
+        NEON_CUDA_HOST_DEVICE inline auto
+        getNghData(const Idx &eId,
+                   uint8_t nghID,
+                   int card)
+        const -> NghData;
+
+        /**
+         * Gets the field metadata at a neighbour cartesian point.
+         */
+        template<int xOff, int yOff, int zOff>
+        NEON_CUDA_HOST_DEVICE inline auto
+        getNghData(const Idx &eId,
+                   int card)
+        const -> NghData;
+
+        /**
+         * Gets the field metadata at a neighbour cartesian point.
+         */
+        template<int xOff, int yOff, int zOff>
+        NEON_CUDA_HOST_DEVICE inline auto
+        getNghData(const Idx &eId,
+                   int card,
+                   T defaultValue)
+        const -> NghData;
+
+        template<int xOff,
+                int yOff,
+                int zOff,
+                typename LambdaVALID,
+                typename LambdaNOTValid = void *>
+        NEON_CUDA_HOST_DEVICE inline auto
+        getNghData(const Idx &gidx,
+                   int card,
+                   LambdaVALID funIfValid,
+                   LambdaNOTValid funIfNOTValid = nullptr)
+        const -> std::enable_if_t<std::is_invocable_v<LambdaVALID, T> && (std::is_invocable_v<LambdaNOTValid, T> ||
+                                                                          std::is_same_v<LambdaNOTValid, void *>), void>;
+
+        template<int xOff,
+                int yOff,
+                int zOff>
+        NEON_CUDA_HOST_DEVICE inline auto
+        writeNghData(const Idx &gidx,
+                     int card,
+                     T value)
+        -> bool;
+
+        /**
+         * Gets the global coordinates of the cartesian point.
+         */
+        NEON_CUDA_HOST_DEVICE inline auto
+        getGlobalIndex(const Idx &cell)
+        const -> Neon::index_3d;
+
+        NEON_CUDA_HOST_DEVICE inline auto
+        isActive(const Idx &cell,
+                 const typename SBlock::BitMask *mask = nullptr) const -> bool;
+
+
+        NEON_CUDA_HOST_DEVICE inline auto
+        getDomainSize()
+        const -> Neon::index_3d;
+
+        NEON_CUDA_HOST_DEVICE
+        auto mem() const -> T const *;
+
+        /**
+         * Gets the Idx for in the block view space.
+         */
+        NEON_CUDA_HOST_DEVICE inline auto
+        getBlockViewIdx(const Idx &cell)
+        const -> BlockViewGridIdx;
+
+
+        NEON_CUDA_HOST_DEVICE inline auto
+        helpGetPitch(const Idx &cell, int card)
+        const -> uint32_t;
+
+        NEON_CUDA_HOST_DEVICE inline auto
+        helpGetValidIdxPitchExplicit(const Idx &idx, int card)
+        const -> uint32_t;
+
+        NEON_CUDA_HOST_DEVICE inline auto
+        helpNghPitch(const Idx &nghIdx, int card)
+        const -> std::tuple<bool, uint32_t>;
+
+        NEON_CUDA_HOST_DEVICE inline auto
+        helpGetNghIdx(const Idx &idx, const NghIdx &offset)
+        const -> Idx;
+
+        template<int xOff, int yOff, int zOff>
+        NEON_CUDA_HOST_DEVICE inline auto
+        helpGetNghIdx(const Idx &idx)
+        const -> Idx;
+
+        NEON_CUDA_HOST_DEVICE inline auto
+        helpGetNghIdx(const Idx &idx, const NghIdx &offset, const typename Idx::DataBlockIdx *blockConnectivity)
+        const -> Idx;
+
+        template<int xOff, int yOff, int zOff>
+        NEON_CUDA_HOST_DEVICE inline auto
+        helpGetNghIdx(const Idx &idx, const typename Idx::DataBlockIdx *blockConnectivity)
+        const -> Idx;
+
+
+        int mCardinality;
+        T *mMem;
+        NghIdx const *NEON_RESTRICT mStencilNghIndex;
+        typename Idx::DataBlockIdx const *NEON_RESTRICT mBlockConnectivity;
+        typename SBlock::BitMask const *NEON_RESTRICT mMask;
+        Neon::int32_3d const *NEON_RESTRICT mOrigin;
+        int mSetIdx;
+        int mMultiResDiscreteIdxSpacing = 1;
+        Neon::int32_3d mDomainSize;
+    };
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
+
+#include "Neon/domain/details/bGridDisgMask/bPartition_imp.h"
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bPartition_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bPartition_imp.h
new file mode 100644
index 00000000..80b635ba
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bPartition_imp.h
@@ -0,0 +1,454 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMask/bGrid.h"
+#include "Neon/domain/details/bGridDisgMask/bSpan.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+template <typename T, int C, typename SBlock>
+bPartition<T, C, SBlock>::bPartition()
+    : mCardinality(0),
+      mMem(nullptr),
+      mStencilNghIndex(),
+      mBlockConnectivity(nullptr),
+      mMask(nullptr),
+      mOrigin(0),
+      mSetIdx(0)
+{
+}
+
+template <typename T, int C, typename SBlock>
+bPartition<T, C, SBlock>::
+    bPartition(int                                           setIdx,
+               int                                           cardinality,
+               T*                                            mem,
+               typename Idx::DataBlockIdx*                   blockConnectivity,
+               typename SBlock::BitMask const* NEON_RESTRICT mask,
+               Neon::int32_3d*                               origin,
+               NghIdx*                                       stencilNghIndex,
+               Neon::int32_3d                                mDomainSize)
+    : mCardinality(cardinality),
+      mMem(mem),
+      mStencilNghIndex(stencilNghIndex),
+      mBlockConnectivity(blockConnectivity),
+      mMask(mask),
+      mOrigin(origin),
+      mSetIdx(setIdx),
+      mDomainSize(mDomainSize)
+{
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getGlobalIndex(const Idx& gidx)
+        const -> Neon::index_3d
+{
+    auto location = mOrigin[gidx.mDataBlockIdx];
+    location.x += gidx.mInDataBlockIdx.x;
+    location.y += gidx.mInDataBlockIdx.y;
+    location.z += gidx.mInDataBlockIdx.z;
+    if constexpr (SBlock::isMultiResMode) {
+        return location * mMultiResDiscreteIdxSpacing;
+    }
+    return location;
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getDomainSize()
+        const -> Neon::index_3d
+{
+    return mDomainSize;
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+mem() const -> T const *{
+    return mMem;
+}
+
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getBlockViewIdx(const Idx& gidx)
+        const -> BlockViewGridIdx
+{
+    BlockViewGridIdx res;
+    res.manualSet(gidx.getDataBlockIdx());
+    return res;
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+    cardinality()
+        const -> int
+{
+    return mCardinality;
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+operator()(const Idx& cell,
+           int        card) -> T&
+{
+    return mMem[helpGetPitch(cell, card)];
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+operator()(const Idx& cell,
+           int        card) const -> const T&
+{
+    return mMem[helpGetPitch(cell, card)];
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+    helpGetPitch(const Idx& idx, int card)
+        const -> uint32_t
+{
+    uint32_t const pitch = helpGetValidIdxPitchExplicit(idx, card);
+    return pitch;
+}
+
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+    helpGetValidIdxPitchExplicit(const Idx& idx, int card)
+        const -> uint32_t
+{
+    uint32_t constexpr blockPitchByCard = SBlock::memBlockSizeX * SBlock::memBlockSizeY * SBlock::memBlockSizeZ;
+    uint32_t const inBlockInCardPitch = idx.mInDataBlockIdx.x +
+                                        SBlock::memBlockSizeX * idx.mInDataBlockIdx.y +
+                                        (SBlock::memBlockSizeX * SBlock::memBlockSizeY) * idx.mInDataBlockIdx.z;
+    uint32_t const blockAdnCardPitch = (idx.mDataBlockIdx * mCardinality + card) * blockPitchByCard;
+    uint32_t const pitch = blockAdnCardPitch + inBlockInCardPitch;
+    return pitch;
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+    helpNghPitch(const Idx& nghIdx, int card)
+        const -> std::tuple<bool, uint32_t>
+{
+    if (nghIdx.mDataBlockIdx == Span::getInvalidBlockId()) {
+        return {false, 0};
+    }
+
+    const bool isActive = mMask[nghIdx.mDataBlockIdx].isActive(nghIdx.mInDataBlockIdx.x, nghIdx.mInDataBlockIdx.y, nghIdx.mInDataBlockIdx.z);
+    if (!isActive) {
+        return {false, 0};
+    }
+    auto const offset = helpGetValidIdxPitchExplicit(nghIdx, card);
+    return {true, offset};
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    helpGetNghIdx(const Idx&    idx,
+                  const NghIdx& offset)
+        const -> Idx
+{
+    return this->helpGetNghIdx(idx, offset, mBlockConnectivity);
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    helpGetNghIdx(const Idx&                        idx,
+                  const NghIdx&                     offset,
+                  const typename Idx::DataBlockIdx* blockConnectivity)
+        const -> Idx
+{
+
+    typename Idx::InDataBlockIdx ngh(idx.mInDataBlockIdx.x + offset.x,
+                                     idx.mInDataBlockIdx.y + offset.y,
+                                     idx.mInDataBlockIdx.z + offset.z);
+
+    /**
+     * 0 if no offset on the direction
+     * 1 positive offset
+     * -1 negative offset
+     */
+    const int xFlag = ngh.x < 0 ? -1 : (ngh.x >= SBlock::memBlockSizeX ? +1 : 0);
+    const int yFlag = ngh.y < 0 ? -1 : (ngh.y >= SBlock::memBlockSizeX ? +1 : 0);
+    const int zFlag = ngh.z < 0 ? -1 : (ngh.z >= SBlock::memBlockSizeX ? +1 : 0);
+
+    const bool isLocal = (xFlag | yFlag | zFlag) == 0;
+    if (!(isLocal)) {
+        typename Idx::InDataBlockIdx remoteInBlockOffset;
+        /**
+         * Example
+         * - 8 block (1D case)
+         * Case 1:
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *        ^     ^
+         *       -3     starting point
+         *
+         * - idx.inBlock = 2
+         * - offset = -1
+         * - remote.x = (2-3) - ((-1) * 4) = -1 + 4 = 3
+         * Case 2:
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *                ^     ^
+         *  starting point      +3 from 3
+         *
+         * - idx.inBlock = 3
+         * - offset = (+3,0)
+         * - remote.x = (7+3) - ((+1) * 8) = 10 - 8 = 2
+         *
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *  ^                   ^
+         *  -3 from 0          +3 from 3
+         *
+         * NOTE: if in one direction the neighbour offet is zero, xFalg is 0;
+         * */
+
+        Idx remoteNghIdx;
+        remoteNghIdx.mInDataBlockIdx.x = ngh.x - xFlag * SBlock::memBlockSizeX;
+        remoteNghIdx.mInDataBlockIdx.y = ngh.y - yFlag * SBlock::memBlockSizeX;
+        remoteNghIdx.mInDataBlockIdx.z = ngh.z - zFlag * SBlock::memBlockSizeX;
+
+        int connectivityJump = idx.mDataBlockIdx * 27 +
+                               (xFlag + 1) +
+                               (yFlag + 1) * 3 +
+                               (zFlag + 1) * 9;
+        remoteNghIdx.mDataBlockIdx = blockConnectivity[connectivityJump];
+
+        return remoteNghIdx;
+    } else {
+        Idx localNghIdx;
+        localNghIdx.mDataBlockIdx = idx.mDataBlockIdx;
+        localNghIdx.mInDataBlockIdx = ngh;
+        return localNghIdx;
+    }
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    helpGetNghIdx(const Idx& idx)
+        const -> Idx
+{
+    return this->helpGetNghIdx<xOff, yOff, zOff>(idx, mBlockConnectivity);
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    helpGetNghIdx(const Idx& idx, const typename Idx::DataBlockIdx* blockConnectivity)
+        const -> Idx
+{
+
+    typename Idx::InDataBlockIdx ngh(idx.mInDataBlockIdx.x + xOff,
+                                     idx.mInDataBlockIdx.y + yOff,
+                                     idx.mInDataBlockIdx.z + zOff);
+
+    /**
+     * 0 if no offset on the direction
+     * 1 positive offset
+     * -1 negative offset
+     */
+    const int xFlag = [&] {
+        if constexpr (xOff == 0) {
+            return 0;
+        } else {
+            return ngh.x < 0 ? -1 : (ngh.x >= SBlock::memBlockSizeX ? +1 : 0);
+        }
+    }();
+
+
+    const int yFlag = [&] {
+        if constexpr (yOff == 0) {
+            return 0;
+        } else {
+            return ngh.y < 0 ? -1 : (ngh.y >= SBlock::memBlockSizeX ? +1 : 0);
+        }
+    }();
+    const int zFlag = [&] {
+        if constexpr (zOff == 0) {
+            return 0;
+        } else {
+            return ngh.z < 0 ? -1 : (ngh.z >= SBlock::memBlockSizeX ? +1 : 0);
+        }
+    }();
+
+    const bool isLocal = (xFlag | yFlag | zFlag) == 0;
+    if (!(isLocal)) {
+        typename Idx::InDataBlockIdx remoteInBlockOffset;
+        /**
+         * Example
+         * - 8 block (1D case)
+         * Case 1:
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *        ^     ^
+         *       -3     starting point
+         *
+         * - idx.inBlock = 2
+         * - offset = -1
+         * - remote.x = (2-3) - ((-1) * 4) = -1 + 4 = 3
+         * Case 2:
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *                ^     ^
+         *  starting point      +3 from 3
+         *
+         * - idx.inBlock = 3
+         * - offset = (+3,0)
+         * - remote.x = (7+3) - ((+1) * 8) = 10 - 8 = 2
+         *
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *  ^                   ^
+         *  -3 from 0          +3 from 3
+         *
+         * NOTE: if in one direction the neighbour offet is zero, xFalg is 0;
+         * */
+
+        Idx remoteNghIdx;
+        remoteNghIdx.mInDataBlockIdx.x = ngh.x - xFlag * SBlock::memBlockSizeX;
+        remoteNghIdx.mInDataBlockIdx.y = ngh.y - yFlag * SBlock::memBlockSizeX;
+        remoteNghIdx.mInDataBlockIdx.z = ngh.z - zFlag * SBlock::memBlockSizeX;
+
+        int connectivityJump = idx.mDataBlockIdx * 27 +
+                               (xFlag + 1) +
+                               (yFlag + 1) * 3 +
+                               (zFlag + 1) * 9;
+        remoteNghIdx.mDataBlockIdx = blockConnectivity[connectivityJump];
+
+        return remoteNghIdx;
+    } else {
+        Idx localNghIdx;
+        localNghIdx.mDataBlockIdx = idx.mDataBlockIdx;
+        localNghIdx.mInDataBlockIdx = ngh;
+        return localNghIdx;
+    }
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx& eId,
+               uint8_t    nghID,
+               int        card)
+        const -> NghData
+{
+    NghIdx nghOffset = mStencilNghIndex[nghID];
+    return getNghData(eId, nghOffset, card);
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx&    idx,
+               const NghIdx& offset,
+               const int     card)
+        const -> NghData
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx(idx, offset);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+    if (!isValid) {
+        result.invalidate();
+        return result;
+    }
+    auto const value = mMem[pitch];
+    result.set(value, true);
+    return result;
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx& idx,
+               int        card)
+        const -> NghData
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx<xOff, yOff, zOff>(idx);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+    if (!isValid) {
+        result.invalidate();
+        return result;
+    }
+    auto const value = mMem[pitch];
+    result.set(value, true);
+    return result;
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx& idx,
+               int        card,
+               T          defaultValue)
+        const -> NghData
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx<xOff, yOff, zOff>(idx);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+    if (!isValid) {
+        result.set(defaultValue, false);
+        return result;
+    }
+    auto const value = mMem[pitch];
+    result.set(value, true);
+    return result;
+}
+
+template <typename T, int C, typename SBlock>
+
+template <int xOff,
+          int yOff,
+          int zOff,
+          typename LambdaVALID,
+          typename LambdaNOTValid>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx&     gidx,
+               int            card,
+               LambdaVALID    funIfValid,
+               LambdaNOTValid funIfNOTValid)
+        const -> std::enable_if_t<std::is_invocable_v<LambdaVALID, T> && (std::is_invocable_v<LambdaNOTValid, T> || std::is_same_v<LambdaNOTValid, void*>), void>
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx<xOff, yOff, zOff>(gidx);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+
+    if (isValid) {
+        auto const& value = mMem[pitch];
+        funIfValid(value);
+        return;
+    }
+
+    if constexpr (!std::is_same_v<LambdaNOTValid, void*>) {
+        funIfNOTValid();
+    }
+    return;
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    writeNghData(const Idx& gidx,
+                 int        card,
+                 T          value)
+        -> bool
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx<xOff, yOff, zOff>(gidx);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+    if (!isValid) {
+        return false;
+    }
+    mMem[pitch] = value;
+    return true;
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto
+bPartition<T, C, SBlock>::isActive(const Idx&                      cell,
+                                   const typename SBlock::BitMask* mask) const -> bool
+{
+    if (!mask) {
+        return mMask[cell.mDataBlockIdx].isActive(cell.mInDataBlockIdx.x, cell.mInDataBlockIdx.y, cell.mInDataBlockIdx.z);
+    } else {
+        return mask[cell.mDataBlockIdx].isActive(cell.mInDataBlockIdx.x, cell.mInDataBlockIdx.y, cell.mInDataBlockIdx.z);
+    }
+}
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bSpan.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bSpan.h
new file mode 100644
index 00000000..f2cb0a93
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bSpan.h
@@ -0,0 +1,55 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMask/bIndex.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+template <typename SBlock>
+class bSpan
+{
+   public:
+    // bit mask information
+    using BitMaskWordType = uint64_t;
+
+    static constexpr uint32_t           bitMaskStorageBitWidth = 64;
+    static constexpr Neon::MemoryLayout activeMaskMemoryLayout = Neon::MemoryLayout::arrayOfStructs;
+    static constexpr uint32_t           log2OfbitMaskWordSize = 6;
+
+    using Idx = bIndex<SBlock>;
+    friend class bGrid<SBlock>;
+
+    static constexpr int SpaceDim = 3;
+
+    bSpan() = default;
+    virtual ~bSpan() = default;
+
+    NEON_CUDA_HOST_DEVICE inline static auto getInvalidBlockId()
+        -> typename Idx::DataBlockIdx
+    {
+        return std::numeric_limits<uint32_t>::max();
+    }
+
+    inline bSpan(
+        typename Idx::DataBlockCount                  mFirstDataBlockOffset,
+        typename SBlock::BitMask const* NEON_RESTRICT mActiveMask,
+        Neon::DataView                                mDataView);
+
+    NEON_CUDA_HOST_DEVICE inline auto setAndValidateCPUDevice(
+        Idx&            bidx,
+        uint32_t const& threadIdx,
+        uint32_t const& x,
+        uint32_t const& y,
+        uint32_t const& z) const -> bool;
+
+    NEON_CUDA_HOST_DEVICE inline auto setAndValidateGPUDevice(
+        Idx& bidx) const -> bool;
+
+
+    // We don't need to have a count on active blocks
+    typename Idx::DataBlockCount                  mFirstDataBlockOffset;
+    typename SBlock::BitMask const* NEON_RESTRICT mActiveMask;
+    Neon::DataView                                mDataView;
+};
+}  // namespace Neon::domain::details::disaggregated::bGrid
+
+#include "Neon/domain/details/bGridDisgMask/bSpan_imp.h"
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bSpan_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bSpan_imp.h
new file mode 100644
index 00000000..8c23df74
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMask/bSpan_imp.h
@@ -0,0 +1,52 @@
+#include "Neon/domain/details/bGridDisgMask/bSpan.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto
+bSpan<SBlock>::setAndValidateGPUDevice([[maybe_unused]] Idx& bidx) const -> bool
+{
+#ifdef NEON_PLACE_CUDA_DEVICE
+    bidx.mDataBlockIdx = blockIdx.x + mFirstDataBlockOffset;
+    bidx.mInDataBlockIdx.x = threadIdx.x;
+    bidx.mInDataBlockIdx.y = threadIdx.y;
+    bidx.mInDataBlockIdx.z = threadIdx.z;
+
+    const bool isActive = mActiveMask[bidx.mDataBlockIdx].isActive(bidx.mInDataBlockIdx.x, bidx.mInDataBlockIdx.y, bidx.mInDataBlockIdx.z);
+
+    return isActive;
+#else
+    NEON_THROW_UNSUPPORTED_OPERATION("Operation supported only on GPU");
+#endif
+}
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto
+bSpan<SBlock>::setAndValidateCPUDevice(Idx&            bidx,
+                                       uint32_t const& dataBlockIdx,
+                                       uint32_t const& x,
+                                       uint32_t const& y,
+                                       uint32_t const& z) const -> bool
+{
+
+    bidx.mDataBlockIdx = dataBlockIdx + mFirstDataBlockOffset;
+    ;
+    bidx.mInDataBlockIdx.x = static_cast<typename Idx::InDataBlockIdx::Integer>(x);
+    bidx.mInDataBlockIdx.y = static_cast<typename Idx::InDataBlockIdx::Integer>(y);
+    bidx.mInDataBlockIdx.z = static_cast<typename Idx::InDataBlockIdx::Integer>(z);
+    const bool isActive = mActiveMask[dataBlockIdx].isActive(bidx.mInDataBlockIdx.x, bidx.mInDataBlockIdx.y, bidx.mInDataBlockIdx.z);
+    return isActive;
+}
+
+template <typename SBlock>
+bSpan<SBlock>::bSpan(typename Idx::DataBlockCount                  firstDataBlockOffset,
+                     typename SBlock::BitMask const* NEON_RESTRICT activeMask,
+                     Neon::DataView                                dataView)
+    : mFirstDataBlockOffset(firstDataBlockOffset),
+      mActiveMask(activeMask),
+      mDataView(dataView)
+{
+}
+
+
+}  // namespace Neon::domain::details::disaggregated::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView.h
new file mode 100644
index 00000000..5ef7df3b
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView.h
@@ -0,0 +1,29 @@
+#include "Neon/domain/details/bGridDisgMgpu//BlockView/BlockViewGrid.h"
+#include "Neon/domain/tools/GridTransformer.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+struct BlockView
+{
+   public:
+    using Grid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
+    template <typename T, int C = 0>
+    using Field = Grid::template Field<T, C>;
+    using index_3d = Neon::index_3d;
+
+    template <typename T, int C = 0>
+    static auto helpGetReference(T* mem, const int idx, const int card) -> std::enable_if_t<C == 0, T&>
+    {
+        return mem[idx * card];
+    }
+
+    template <typename T, int C = 0>
+    static auto helpGetReference(T* mem, const int idx, const int card) -> std::enable_if_t<C != 0, T&>
+    {
+        return mem[idx * C];
+    }
+
+    static constexpr Neon::MemoryLayout layout = Neon::MemoryLayout::arrayOfStructs;
+};
+
+}  // namespace Neon::domain::details::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewGrid.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewGrid.h
new file mode 100644
index 00000000..6047bb88
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewGrid.h
@@ -0,0 +1,97 @@
+#pragma once
+#include <assert.h>
+
+#include "Neon/core/core.h"
+#include "Neon/core/types/DataUse.h"
+#include "Neon/core/types/Macros.h"
+
+#include "Neon/set/BlockConfig.h"
+#include "Neon/set/Containter.h"
+#include "Neon/set/DevSet.h"
+#include "Neon/set/MemoryOptions.h"
+
+#include "Neon/sys/memory/MemDevice.h"
+
+#include "Neon/domain/aGrid.h"
+
+#include "Neon/domain/interface/GridBaseTemplate.h"
+#include "Neon/domain/interface/GridConcept.h"
+#include "Neon/domain/interface/KernelConfig.h"
+#include "Neon/domain/interface/LaunchConfig.h"
+#include "Neon/domain/interface/Stencil.h"
+#include "Neon/domain/interface/common.h"
+
+#include "Neon/domain/tools/GridTransformer.h"
+#include "Neon/domain/tools/SpanTable.h"
+
+#include "Neon/domain/details/eGrid/eGrid.h"
+#include "Neon/domain/patterns/PatternScalar.h"
+
+#include "BlockViewPartition.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+namespace details {
+struct GridTransformation
+{
+    template <typename T, int C>
+    using Partition = BlockViewPartition<T, C>;
+    using Span = Neon::domain::details::eGrid::eSpan;
+    static constexpr Neon::set::internal::ContainerAPI::DataViewSupport dataViewSupport = Neon::set::internal::ContainerAPI::DataViewSupport::on;
+
+    using FoundationGrid = Neon::domain::details::eGrid::eGrid;
+    static constexpr Neon::set::details::ExecutionThreadSpan executionThreadSpan = FoundationGrid::executionThreadSpan;
+    using ExecutionThreadSpanIndexType = int32_t;
+    using Idx = FoundationGrid::Idx;
+
+    static auto getDefaultBlock(FoundationGrid& foundationGrid) -> Neon::index_3d const&
+    {
+        return foundationGrid.getDefaultBlock();
+    }
+
+    static auto initSpan(FoundationGrid& foundationGrid, Neon::domain::tool::SpanTable<Span>& spanTable) -> void
+    {
+        spanTable.forEachConfiguration([&](Neon::Execution execution,
+                                           Neon::SetIdx    setIdx,
+                                           Neon::DataView  dw,
+                                           Span&           span) {
+            span = foundationGrid.getSpan(execution, setIdx, dw);
+        });
+    }
+
+    static auto initLaunchParameters(FoundationGrid&       foundationGrid,
+                                     Neon::DataView        dataView,
+                                     const Neon::index_3d& blockSize,
+                                     const size_t&         shareMem) -> Neon::set::LaunchParameters
+    {
+        return foundationGrid.getLaunchParameters(dataView, blockSize, shareMem);
+    }
+
+    static auto helpGetGridIdx(FoundationGrid&,
+                               Neon::SetIdx const&,
+                               FoundationGrid::Idx const& fgIdx)
+        -> GridTransformation::Idx
+    {
+        GridTransformation::Idx tgIdx = fgIdx;
+        return tgIdx;
+    }
+
+    template <typename T, int C>
+    static auto initFieldPartition(FoundationGrid::Field<T, C>&                         foundationField,
+                                   Neon::domain::tool::PartitionTable<Partition<T, C>>& partitionTable) -> void
+    {
+        partitionTable.forEachConfiguration(
+            [&](Neon::Execution  execution,
+                Neon::SetIdx     setIdx,
+                Neon::DataView   dw,
+                Partition<T, C>& partition) {
+                auto& foundationPartition = foundationField.getPartition(execution, setIdx, dw);
+                partition = Partition<T, C>(foundationPartition);
+            });
+    }
+};
+using BlockViewGrid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
+
+}  // namespace details
+
+}  // namespace Neon::domain::details::bGrid
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewPartition.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewPartition.h
new file mode 100644
index 00000000..e1d7be2b
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewPartition.h
@@ -0,0 +1,42 @@
+#pragma once
+#include <assert.h>
+#include "Neon/core/core.h"
+#include "Neon/core/types/Macros.h"
+#include "Neon/domain/details/eGrid/eGrid.h"
+#include "Neon/domain/details/eGrid/eIndex.h"
+#include "Neon/domain/interface/NghData.h"
+#include "Neon/set/DevSet.h"
+#include "Neon/sys/memory/CudaIntrinsics.h"
+#include "cuda_fp16.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <typename T,
+          int C = 1>
+class BlockViewPartition : public Neon::domain::details::eGrid::ePartition<T, C>
+{
+   public:
+    BlockViewPartition()
+    {
+    }
+    BlockViewPartition(Neon::domain::details::eGrid::ePartition<T, C> ePartition)
+        : Neon::domain::details::eGrid::ePartition<T, C>(ePartition)
+    {
+    }
+
+    template <class BlockIdexType>
+    static auto getInBlockIdx(typename Neon::domain::details::eGrid::ePartition<T, C>::Idx const& idx,
+                                  uint8_3d const&                                            inBlockLocation) -> BlockIdexType
+    {
+        BlockIdexType blockIdx(idx.helpGet(), inBlockLocation);
+        return inBlockLocation;
+    }
+
+    auto getCountAllocated() const -> int32_t;
+};
+template <typename T, int C>
+auto BlockViewPartition<T, C>::getCountAllocated() const -> int32_t
+{
+    return this->mCountAllocated;
+}
+}  // namespace Neon::domain::details::bGrid
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewPartition_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewPartition_imp.h
new file mode 100644
index 00000000..4464f686
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewPartition_imp.h
@@ -0,0 +1,184 @@
+#pragma once
+
+#include "Neon/domain/details//eGrid/ePartition.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE auto
+ePartition<T, C>::prtID() const
+    -> int
+{
+    return mPrtID;
+}
+
+template <typename T,
+          int C>
+template <int dummy_ta>
+inline NEON_CUDA_HOST_DEVICE auto
+ePartition<T, C>::cardinality() const
+    -> std::enable_if_t<dummy_ta == 0, int>
+{
+    return mCardinality;
+}
+
+template <typename T,
+          int C>
+template <int dummy_ta>
+constexpr inline NEON_CUDA_HOST_DEVICE auto
+ePartition<T, C>::cardinality() const
+    -> std::enable_if_t<dummy_ta != 0, int>
+{
+    return C;
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::operator()(eIndex eId, int cardinalityIdx) const
+    -> T
+{
+    Offset jump = getOffset(eId, cardinalityIdx);
+    return mMem[jump];
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::operator()(eIndex eId, int cardinalityIdx) -> T&
+{
+    Offset jump = getOffset(eId, cardinalityIdx);
+    return mMem[jump];
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::getNghData(eIndex      eId,
+                             NghIdx      nghIdx,
+                             int         card,
+                             const Type& alternativeVal)
+    const -> NghData
+{
+    eIndex     eIdxNgh;
+    const bool isValidNeighbour = isValidNgh(eId, nghIdx, eIdxNgh);
+    T          val = (isValidNeighbour) ? this->operator()(eIdxNgh, card) : alternativeVal;
+    //    printf("(prtId %d)getNghData id %d card %d eIdxNgh %d val %d\n",
+    //         mPrtID,  eId.mIdx, card, eIdxNgh.mIdx, int(val));
+    return NghData(val, isValidNeighbour);
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::getNghData(eIndex               eId,
+                             const Neon::int8_3d& ngh3dIdx,
+                             int                  card,
+                             const Type&          alternativeVal)
+    const -> NghData
+{
+    int tablePithc = (ngh3dIdx.x + mStencilRadius) +
+                     (ngh3dIdx.y + mStencilRadius) * mStencilTableYPitch +
+                     (ngh3dIdx.z + mStencilRadius) * mStencilTableYPitch * mStencilTableYPitch;
+    NghIdx  nghIdx = mStencil3dTo1dOffset[tablePithc];
+    NghData res = getNghData(eId, nghIdx, card, alternativeVal);
+
+    return res;
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::isValidNgh(eIndex  eId,
+                             NghIdx  nghIdx,
+                             eIndex& neighbourIdx) const
+    -> bool
+{
+    const eIndex::Offset connectivityJumo = mCountAllocated * nghIdx + eId.get();
+    neighbourIdx.set() = NEON_CUDA_CONST_LOAD((mConnectivity + connectivityJumo));
+    const bool isValidNeighbour = (neighbourIdx.mIdx > -1);
+//    printf("(prtId %d) getNghData id %d eIdxNgh %d connectivityJumo %d\n",
+//           mPrtID,
+//           eId.mIdx, neighbourIdx.mIdx, connectivityJumo);
+    return isValidNeighbour;
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::getGlobalIndex(eIndex eIndex) const
+    -> Neon::index_3d
+{
+    Neon::index_3d loc;
+    const auto     baseAddr = mOrigins + eIndex.get();
+    loc = mOrigins[eIndex.get()];
+    return loc;
+}
+
+template <typename T,
+          int C>
+ePartition<T, C>::ePartition(int             prtId,
+                             T*              mem,
+                             ePitch          pitch,
+                             int32_t         cardinality,
+                             int32_t         countAllocated,
+                             Offset*         connRaw,
+                             Neon::index_3d* toGlobal,
+                             int8_t*         stencil3dTo1dOffset,
+                             int32_t         stencilRadius)
+{
+    mPrtID = prtId;
+    mMem = mem;
+    mPitch = pitch;
+    mCardinality = cardinality;
+    mCountAllocated = countAllocated;
+
+    mConnectivity = connRaw;
+    mOrigins = toGlobal;
+
+    mStencil3dTo1dOffset = stencil3dTo1dOffset;
+    mStencilTableYPitch = 2 * stencilRadius + 1;
+
+    mStencilRadius = stencilRadius;
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE auto
+ePartition<T, C>::pointer(eIndex eId, int cardinalityIdx) const
+    -> const Type*
+{
+    Offset jump = getOffset(eId, cardinalityIdx);
+    return mMem + jump;
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::getOffset(eIndex eId, int cardinalityIdx) const
+    -> Offset
+{
+    return Offset(eId.get() * mPitch.x + cardinalityIdx * mPitch.y);
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::mem()
+    -> T*
+{
+    return mMem;
+}
+
+template <typename T,
+          int C>
+NEON_CUDA_HOST_DEVICE inline auto
+ePartition<T, C>::mem() const
+    -> const T*
+{
+    return mMem;
+}
+
+}  // namespace Neon::domain::details::eGrid
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/StaticBlock.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/StaticBlock.h
new file mode 100644
index 00000000..7fdd4577
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/StaticBlock.h
@@ -0,0 +1,106 @@
+#pragma once 
+
+#include "Neon/domain/details/bGridDisgMgpu/bSpan.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <uint32_t memBlockSizeX_,
+          uint32_t memBlockSizeY_,
+          uint32_t memBlockSizeZ_,
+          uint32_t userBlockSizeX_ = memBlockSizeX_,
+          uint32_t userBlockSizeY_ = memBlockSizeY_,
+          uint32_t userBlockSizeZ_ = memBlockSizeZ_,
+          bool     isMultiResMode_ = false>
+struct StaticBlock
+{
+   public:
+    constexpr static uint32_t        memBlockSizeX = memBlockSizeX_;
+    constexpr static uint32_t        memBlockSizeY = memBlockSizeY_;
+    constexpr static uint32_t        memBlockSizeZ = memBlockSizeZ_;
+    constexpr static Neon::uint32_3d memBlockSize3D = Neon::uint32_3d(memBlockSizeX, memBlockSizeY, memBlockSizeZ);
+
+    constexpr static uint32_t        userBlockSizeX = userBlockSizeX_;
+    constexpr static uint32_t        userBlockSizeY = userBlockSizeY_;
+    constexpr static uint32_t        userBlockSizeZ = userBlockSizeZ_;
+    constexpr static Neon::uint32_3d userBlockSize3D = Neon::uint32_3d(userBlockSizeX, userBlockSizeY, userBlockSizeZ);
+
+    constexpr static uint32_t blockRatioX = memBlockSizeX / userBlockSizeX;
+    constexpr static uint32_t blockRatioY = memBlockSizeY / userBlockSizeY;
+    constexpr static uint32_t blockRatioZ = memBlockSizeZ / userBlockSizeZ;
+
+    constexpr static uint32_t memBlockPitchX = 1;
+    constexpr static uint32_t memBlockPitchY = memBlockSizeX;
+    constexpr static uint32_t memBlockPitchZ = memBlockSizeX * memBlockSizeY;
+
+    constexpr static bool isMultiResMode = isMultiResMode_;
+
+    constexpr static uint32_t memBlockCountElements = memBlockSizeX * memBlockSizeY * memBlockSizeZ;
+
+    static_assert(memBlockSizeX >= userBlockSizeX);
+    static_assert(memBlockSizeY >= userBlockSizeY);
+    static_assert(memBlockSizeZ >= userBlockSizeZ);
+
+    static_assert(memBlockSizeX % userBlockSizeX == 0);
+    static_assert(memBlockSizeY % userBlockSizeY == 0);
+    static_assert(memBlockSizeZ % userBlockSizeZ == 0);
+
+    struct BitMask
+    {
+        using BitMaskWordType = uint32_t;
+        auto reset() -> void
+        {
+            for (BitMaskWordType i = 0; i < nWords; ++i) {
+                bits[i] = 0;
+            }
+        }
+
+        auto setActive(int threadX,
+                       int threadY,
+                       int threadZ) -> void
+        {
+            BitMaskWordType mask;
+            uint32_t        wordIdx;
+            getMaskAndWordI(threadX, threadY, threadZ, mask, wordIdx);
+            auto& word = bits[wordIdx];
+            word = word | mask;
+        }
+
+        inline auto NEON_CUDA_HOST_DEVICE isActive(int threadX,
+                                                   int threadY,
+                                                   int threadZ) const -> bool
+        {
+            BitMaskWordType mask;
+            uint32_t        wordIdx;
+            getMaskAndWordI(threadX, threadY, threadZ, mask, wordIdx);
+            auto& word = bits[wordIdx];
+            return (word & mask) != 0;
+        }
+
+        static inline auto NEON_CUDA_HOST_DEVICE getMaskAndWordI(int                       threadX,
+                                                                 int                       threadY,
+                                                                 int                       threadZ,
+                                                                 NEON_OUT BitMaskWordType& mask,
+                                                                 NEON_OUT uint32_t&        wordIdx) -> void
+        {
+            const uint32_t threadPitch = threadX * memBlockPitchX +
+                                         threadY * memBlockPitchY +
+                                         threadZ * memBlockPitchZ;
+
+            // threadPitch >> log2_of_bitPerWord
+            // the same as: threadPitch / 2^{log2_of_bitPerWord}
+            wordIdx = threadPitch >> log2_of_bitPerWord;
+            // threadPitch & ((bitMaskWordType(bitMaskStorageBitWidth)) - 1);
+            // same as threadPitch % 2^{log2OfbitMaskWordSize}
+            const uint32_t offsetInWord = threadPitch & ((BitMaskWordType(bitPerWord)) - 1);
+            mask = BitMaskWordType(1) << offsetInWord;
+        }
+
+        constexpr static BitMaskWordType nWords = (memBlockCountElements + 31) / 32;
+        static constexpr uint32_t        log2_of_bitPerWord = 5;
+        static constexpr uint32_t        bitPerWord = 32;
+
+        BitMaskWordType bits[nWords];
+    };
+};
+
+}  // namespace Neon::domain::details::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bField.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bField.h
new file mode 100644
index 00000000..ef911c45
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bField.h
@@ -0,0 +1,120 @@
+#pragma once
+#include "Neon/domain/details/bGridDisgMgpu/bPartition.h"
+#include "Neon/domain/interface/FieldBaseTemplate.h"
+#include "Neon/set/patterns/BlasSet.h"
+
+#include "Neon/core/core.h"
+#include "Neon/core/types/Macros.h"
+
+#include "Neon/set/DevSet.h"
+#include "Neon/set/HuOptions.h"
+#include "Neon/set/MemoryTransfer.h"
+
+#include "Neon/domain/interface/FieldBaseTemplate.h"
+#include "Neon/domain/tools/HaloUpdateTable1DPartitioning.h"
+#include "Neon/domain/tools/PartitionTable.h"
+#include "bPartition.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+
+template <typename T, int C, typename SBlock>
+class bField : public Neon::domain::interface::FieldBaseTemplate<T,
+                                                                 C,
+                                                                 bGrid<SBlock>,
+                                                                 bPartition<T, C, SBlock>,
+                                                                 int>
+{
+    friend bGrid<SBlock>;
+
+   public:
+    using Type = T;
+    using Grid = bGrid<SBlock>;
+    using Field = bField<T, C, SBlock>;
+    using Partition = bPartition<T, C, SBlock>;
+    using Idx = bIndex<SBlock>;
+    using BlockViewGrid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
+    template <typename TT, int CC = 0>
+    using BlockViewField = BlockViewGrid::template Field<TT, CC>;
+
+    using NghIdx = typename Partition::NghIdx;
+    using NghData = typename Partition::NghData;
+
+    bField(const std::string&  fieldUserName,
+           Neon::DataUse       dataUse,
+           Neon::MemoryOptions memoryOptions,
+           const Grid&         grid,
+           int                 cardinality,
+           T                   inactiveValue);
+
+    bField();
+
+    virtual ~bField() = default;
+
+    auto getPartition(Neon::Execution,
+                      Neon::SetIdx,
+                      const Neon::DataView& dataView) const -> const Partition& final;
+
+    auto getPartition(Neon::Execution,
+                      Neon::SetIdx,
+                      const Neon::DataView& dataView) -> Partition& final;
+
+    auto isInsideDomain(const Neon::index_3d& idx) const -> bool;
+
+
+    auto operator()(const Neon::index_3d& idx,
+                    const int&            cardinality) const -> T final;
+
+    auto getReference(const Neon::index_3d& idx,
+                      const int&            cardinality) -> T& final;
+
+    auto updateHostData(int streamId = 0) -> void final;
+
+    auto updateDeviceData(int streamId = 0) -> void final;
+
+    auto newHaloUpdate(Neon::set::StencilSemantic semantic,
+                       Neon::set::TransferMode    transferMode,
+                       Neon::Execution            execution)
+        const -> Neon::set::Container;
+
+    auto getMemoryField() -> BlockViewGrid::Field<T, C>&;
+
+
+   private:
+    auto getRef(const Neon::index_3d& idx, const int& cardinality) const -> T&;
+
+    auto initHaloUpdateTable() -> void;
+
+
+    struct Data
+    {
+        Data() = default;
+        Data(Neon::Backend const& bk)
+        {
+            mPartitionTable.init(bk);
+        }
+
+        enum EndPoints
+        {
+            src = 1,
+            dst = 0
+        };
+
+        struct EndPointsUtils
+        {
+            static constexpr int nConfigs = 2;
+        };
+
+        std::shared_ptr<Grid> grid;
+        BlockViewField<T, 0>  memoryField;
+        int                   cardinality;
+
+        Neon::domain::tool::HaloTable1DPartitioning mLatticeHaloUpdateTable;
+        Neon::domain::tool::HaloTable1DPartitioning mStandardHaloUpdateTable;
+        Neon::domain::tool::PartitionTable<Partition> mPartitionTable;
+    };
+    std::shared_ptr<Data> mData;
+};
+
+
+}  // namespace Neon::domain::details::bGridMgpu
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bField_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bField_imp.h
new file mode 100644
index 00000000..80f807f6
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bField_imp.h
@@ -0,0 +1,523 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMgpu/bField.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <typename T, int C, typename SBlock>
+bField<T, C, SBlock>::bField()
+{
+    mData = std::make_shared<Data>();
+}
+
+template <typename T, int C, typename SBlock>
+bField<T, C, SBlock>::bField(const std::string&  fieldUserName,
+                             Neon::DataUse       dataUse,
+                             Neon::MemoryOptions memoryOptions,
+                             const Grid&         grid,
+                             int                 cardinality,
+                             T                   inactiveValue)
+    : Neon::domain::interface::FieldBaseTemplate<T, C, Grid, Partition, int>(&grid,
+                                                                             fieldUserName,
+                                                                             "bField",
+                                                                             cardinality,
+                                                                             inactiveValue,
+                                                                             dataUse,
+                                                                             memoryOptions,
+                                                                             Neon::domain::haloStatus_et::e::ON)
+{
+    mData = std::make_shared<Data>(grid.getBackend());
+    mData->grid = std::make_shared<Grid>(grid);
+
+    if (memoryOptions.getOrder() == Neon::MemoryLayout::arrayOfStructs) {
+        NEON_WARNING("bField does not support MemoryLayout::arrayOfStructs, enforcing MemoryLayout::structOfArrays");
+        memoryOptions.setOrder(Neon::MemoryLayout::structOfArrays);
+    }
+    // the allocation size is the number of blocks x block size x cardinality
+    mData->memoryField = mData->grid->getBlockViewGrid().template newField<T, 0>(
+        "BitMask",
+        [&] {
+            int elPerBlock = SBlock::memBlockCountElements * cardinality;
+            return elPerBlock;
+        }(),
+        inactiveValue,
+        dataUse,
+        mData->grid->getBackend().getMemoryOptions(bSpan<SBlock>::activeMaskMemoryLayout));
+
+
+    {  // Setting up mPartitionTable
+        // const int setCardinality = mData->grid->getBackend().getDeviceCount();
+        mData->mPartitionTable.forEachConfiguration(
+            [&](Neon::Execution execution,
+                Neon::SetIdx    setIdx,
+                Neon::DataView,
+                Partition& partition) {
+                auto& partitioner = mData->grid->helpGetPartitioner1D();
+                auto  firstBup = partitioner.getSpanLayout().getBoundsBoundary(setIdx, Neon::domain::tool::partitioning::ByDirection::up).first;
+                auto  firstBdw = partitioner.getSpanLayout().getBoundsBoundary(setIdx, Neon::domain::tool::partitioning::ByDirection::down).first;
+                auto  firstGup = partitioner.getSpanLayout().getGhostBoundary(setIdx, Neon::domain::tool::partitioning::ByDirection::up).first;
+                auto  firstGdw = partitioner.getSpanLayout().getGhostBoundary(setIdx, Neon::domain::tool::partitioning::ByDirection::down).first;
+                auto  lastGdw = firstGdw + partitioner.getSpanLayout().getGhostBoundary(setIdx, Neon::domain::tool::partitioning::ByDirection::down).count;
+
+                auto& memoryFieldPartition = mData->memoryField.getPartition(execution, setIdx, Neon::DataView::STANDARD);
+                auto& blockConnectivity = mData->grid->helpGetBlockConnectivity().getPartition(execution, setIdx, Neon::DataView::STANDARD);
+                auto& bitmask = mData->grid->getActiveBitMask().getPartition(execution, setIdx, Neon::DataView::STANDARD);
+                auto& dataBlockOrigins = mData->grid->helpGetDataBlockOriginField().getPartition(execution, setIdx, Neon::DataView::STANDARD);
+
+                partition = bPartition<T, C, SBlock>(setIdx,
+                                                     cardinality,
+                                                     memoryFieldPartition.mem(),
+                                                     blockConnectivity.mem(),
+                                                     bitmask.mem(),
+                                                     dataBlockOrigins.mem(),
+                                                     mData->grid->helpGetStencilIdTo3dOffset().rawMem(execution, setIdx),
+                                                     mData->grid->getDimension(),
+                                                     firstBup,
+                                                     firstBdw,
+                                                     firstGup,
+                                                     firstGdw,
+                                                     lastGdw);
+            });
+    }
+
+    initHaloUpdateTable();
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::getMemoryField() -> BlockViewGrid::Field<T, C>&
+{
+    return mData->memoryField;
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::isInsideDomain(const Neon::index_3d& idx) const -> bool
+{
+    return mData->grid->isInsideDomain(idx);
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::getReference(const Neon::index_3d& cartesianIdx,
+                                        const int&            cardinality) -> T&
+{
+    if constexpr (SBlock::isMultiResMode) {
+        auto& grid = this->getGrid();
+        auto  uniformCartesianIdx = cartesianIdx / grid.helGetMultiResDiscreteIdxSpacing();
+
+        if (cartesianIdx.x % grid.helGetMultiResDiscreteIdxSpacing() != 0 ||
+            cartesianIdx.y % grid.helGetMultiResDiscreteIdxSpacing() != 0 ||
+            cartesianIdx.z % grid.helGetMultiResDiscreteIdxSpacing() != 0) {
+            NeonException exp("bField::getReference");
+            exp << "Input index is not multiple of the grid resolution";
+            exp << "Index = " << cartesianIdx;
+            NEON_THROW(exp);
+        }
+        auto [setIdx, bIdx] = grid.helpGetSetIdxAndGridIdx(uniformCartesianIdx);
+        auto& partition = getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD);
+        auto& result = partition(bIdx, cardinality);
+        return result;
+    } else {
+        auto& grid = this->getGrid();
+        auto [setIdx, bIdx] = grid.helpGetSetIdxAndGridIdx(cartesianIdx);
+        auto& partition = getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD);
+        auto& result = partition(bIdx, cardinality);
+        return result;
+    }
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::operator()(const Neon::index_3d& cartesianIdx,
+                                      const int&            cardinality) const -> T
+{
+    auto& grid = this->getGrid();
+    auto [setIdx, bIdx] = grid.helpGetSetIdxAndGridIdx(cartesianIdx);
+    if (setIdx.idx() == -1) {
+        return this->getOutsideValue();
+    }
+    auto& partition = getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD);
+    auto& result = partition(bIdx, cardinality);
+    return result;
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::updateHostData(int streamId) -> void
+{
+    mData->memoryField.updateHostData(streamId);
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::updateDeviceData(int streamId) -> void
+{
+    mData->memoryField.updateDeviceData(streamId);
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::getPartition(Neon::Execution       execution,
+                                        Neon::SetIdx          setIdx,
+                                        const Neon::DataView& dataView) const -> const Partition&
+{
+    const Neon::DataUse dataUse = this->getDataUse();
+    bool                isOk = Neon::ExecutionUtils::checkCompatibility(dataUse, execution);
+    if (isOk) {
+        Partition const& result = mData->mPartitionTable.getPartition(execution, setIdx, dataView);
+        return result;
+    }
+    std::stringstream message;
+    message << "The requested execution mode ( " << execution << " ) is not compatible with the field DataUse (" << dataUse << ")";
+    NEON_THROW_UNSUPPORTED_OPERATION(message.str());
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::getPartition(Neon::Execution       execution,
+                                        Neon::SetIdx          setIdx,
+                                        const Neon::DataView& dataView) -> Partition&
+{
+    const Neon::DataUse dataUse = this->getDataUse();
+    bool                isOk = Neon::ExecutionUtils::checkCompatibility(dataUse, execution);
+    if (isOk) {
+        Partition& result = mData->mPartitionTable.getPartition(execution, setIdx, dataView);
+        return result;
+    }
+    std::stringstream message;
+    message << "The requested execution mode ( " << execution << " ) is not compatible with the field DataUse (" << dataUse << ")";
+    NEON_THROW_UNSUPPORTED_OPERATION(message.str());
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::newHaloUpdate(Neon::set::StencilSemantic stencilSemantic,
+                                         Neon::set::TransferMode    transferMode,
+                                         Neon::Execution            execution) const -> Neon::set::Container
+{
+
+
+    // We need to define a graph of Containers
+    // One for the actual memory transfer
+    // One for the synchronization
+    // The order depends on the transfer mode: put or get
+    Neon::set::Container dataTransferContainer;
+    auto const&          bk = this->getGrid().getBackend();
+
+    if (stencilSemantic == Neon::set::StencilSemantic::standard) {
+        auto transfers = bk.template newDataSet<std::vector<Neon::set::MemoryTransfer>>();
+
+        if (this->getMemoryOptions().getOrder() == Neon::MemoryLayout::structOfArrays) {
+            for (auto byDirection : {tool::partitioning::ByDirection::up,
+                                     tool::partitioning::ByDirection::down}) {
+
+                auto const& tableEntryByDir = mData->mStandardHaloUpdateTable.get(transferMode,
+                                                                                  execution,
+                                                                                  byDirection);
+
+                tableEntryByDir.forEachSeq([&](SetIdx setIdx, auto const& tableEntryByDirBySetIdx) {
+                    transfers[setIdx].insert(std::end(transfers[setIdx]),
+                                             std::begin(tableEntryByDirBySetIdx),
+                                             std::end(tableEntryByDirBySetIdx));
+                });
+            }
+            dataTransferContainer =
+                Neon::set::Container::factoryDataTransfer(
+                    *this,
+                    transferMode,
+                    stencilSemantic,
+                    transfers,
+                    execution);
+
+
+        } else {
+            NEON_THROW_UNSUPPORTED_OPERATION("");
+        }
+    } else {
+        auto transfers = bk.template newDataSet<std::vector<Neon::set::MemoryTransfer>>();
+
+        if (this->getMemoryOptions().getOrder() == Neon::MemoryLayout::structOfArrays) {
+            for (auto byDirection : {tool::partitioning::ByDirection::up,
+                                     tool::partitioning::ByDirection::down}) {
+
+                auto const& tableEntryByDir = mData->mLatticeHaloUpdateTable.get(transferMode,
+                                                                                 execution,
+                                                                                 byDirection);
+
+                tableEntryByDir.forEachSeq([&](SetIdx setIdx, auto const& tableEntryByDirBySetIdx) {
+                    transfers[setIdx].insert(std::end(transfers[setIdx]),
+                                             std::begin(tableEntryByDirBySetIdx),
+                                             std::end(tableEntryByDirBySetIdx));
+                });
+            }
+            dataTransferContainer =
+                Neon::set::Container::factoryDataTransfer(
+                    *this,
+                    transferMode,
+                    stencilSemantic,
+                    transfers,
+                    execution);
+
+
+        } else {
+            NEON_THROW_UNSUPPORTED_OPERATION("");
+        }
+    }
+    Neon::set::Container SyncContainer =
+        Neon::set::Container::factorySynchronization(
+            *this,
+            Neon::set::SynchronizationContainerType::hostOmpBarrier);
+
+    Neon::set::container::Graph graph(this->getBackend());
+    const auto&                 dataTransferNode = graph.addNode(dataTransferContainer);
+    const auto&                 syncNode = graph.addNode(SyncContainer);
+
+    switch (transferMode) {
+        case Neon::set::TransferMode::put:
+            graph.addDependency(dataTransferNode, syncNode, Neon::GraphDependencyType::data);
+            break;
+        case Neon::set::TransferMode::get:
+            graph.addDependency(syncNode, dataTransferNode, Neon::GraphDependencyType::data);
+            break;
+        default:
+            NEON_THROW_UNSUPPORTED_OPTION();
+            break;
+    }
+
+    graph.removeRedundantDependencies();
+
+    Neon::set::Container output =
+        Neon::set::Container::factoryGraph("dGrid-Halo-Update",
+                                           graph,
+                                           [](Neon::SetIdx, Neon::set::Loader&) {});
+    return output;
+}
+
+template <typename T, int C, typename SBlock>
+auto bField<T, C, SBlock>::initHaloUpdateTable() -> void
+{
+    // NEON_THROW_UNSUPPORTED_OPERATION("");
+    auto& grid = this->getGrid();
+    auto  bk = grid.getBackend();
+    auto  getNghSetIdx = [&](SetIdx setIdx, Neon::domain::tool::partitioning::ByDirection direction) {
+        int res;
+        if (direction == Neon::domain::tool::partitioning::ByDirection::up) {
+            res = (setIdx + 1) % bk.getDeviceCount();
+        } else {
+            res = (setIdx + bk.getDeviceCount() - 1) % bk.getDeviceCount();
+        }
+        return res;
+    };
+
+    mData->mStandardHaloUpdateTable.forEachPutConfiguration(
+        bk, [&](
+                Neon::SetIdx                                  setIdxSend,
+                Execution                                     execution,
+                Neon::domain::tool::partitioning::ByDirection byDirection,
+                std::vector<Neon::set::MemoryTransfer>&       transfersVec) {
+            {
+                using namespace Neon::domain::tool::partitioning;
+
+                if (ByDirection::up == byDirection && bk.isLastDevice(setIdxSend)) {
+                    return;
+                }
+
+                if (ByDirection::down == byDirection && bk.isFirstDevice(setIdxSend)) {
+                    return;
+                }
+
+
+                Neon::SetIdx setIdxRecv = getNghSetIdx(setIdxSend, byDirection);
+
+                Partition* partitionsRecv = &this->getPartition(execution, setIdxRecv, Neon::DataView::STANDARD);
+                Partition* partitionsSend = &this->getPartition(execution, setIdxSend, Neon::DataView::STANDARD);
+
+                auto const recvDirection = byDirection == ByDirection::up
+                                               ? ByDirection::down
+                                               : ByDirection::up;
+                auto const sendDirection = byDirection;
+
+                int const ghostSectorFirstBlockIdx =
+                    recvDirection == ByDirection::up
+                        ? partitionsRecv->helpGetSectorFirstBlock(Partition::Sectors::gUp)
+                        : partitionsRecv->helpGetSectorFirstBlock(Partition::Sectors::gDw);
+
+                int const boundarySectorFirstBlockIdx =
+                    sendDirection == ByDirection::up
+                        ? partitionsSend->helpGetSectorFirstBlock(Partition::Sectors::bUp)
+                        : partitionsSend->helpGetSectorFirstBlock(Partition::Sectors::bDw);
+
+                auto const msgLengthInBlocks = partitionsSend->helpGetSectorLength(sendDirection == ByDirection::up
+                                                                                       ? Partition::Sectors::bUp
+                                                                                       : Partition::Sectors::bDw);
+
+
+                for (int c = 0; c < this->getCardinality(); c++) {
+
+                    auto const recvPitch = [&] {
+                        Idx                          idx;
+                        typename Idx::InDataBlockIdx inDataBlockIdx;
+                        if (recvDirection == ByDirection::up) {
+                            inDataBlockIdx = typename Idx::InDataBlockIdx(0, 0, 0);
+                        } else {
+                            inDataBlockIdx = typename Idx::InDataBlockIdx(0, 0, SBlock::memBlockSizeZ - 1);
+                        }
+                        idx.setInDataBlockIdx(inDataBlockIdx);
+                        idx.setDataBlockIdx(ghostSectorFirstBlockIdx);
+
+                        auto pitch = partitionsRecv->helpGetPitch(idx, c);
+                        return pitch;
+                    }();
+
+                    auto const sendPitch = [&] {
+                        typename Idx::InDataBlockIdx inDataBlockIdx;
+                        if (sendDirection == ByDirection::up) {
+                            inDataBlockIdx = typename Idx::InDataBlockIdx(0, 0, SBlock::memBlockSizeZ - 1);
+                        } else {
+                            inDataBlockIdx = typename Idx::InDataBlockIdx(0, 0, 0);
+                        }
+                        Idx idx;
+                        idx.setInDataBlockIdx(inDataBlockIdx);
+                        idx.setDataBlockIdx(boundarySectorFirstBlockIdx);
+                        auto pitch = partitionsSend->helpGetPitch(idx, c);
+                        return pitch;
+                    }();
+
+                    auto const msgSizePerCardinality = [&] {
+                        // All blocks are mapped into a 3D grid, where blocks are places one after the other in a 1D mapping
+                        // for each block we send only the top or bottom slice
+                        // Therefore the size of the message is equal to the number of blocks in the sector
+                        // by the size of element in a slice of a block...
+                        auto size = msgLengthInBlocks * SBlock::memBlockSizeX * SBlock::memBlockSizeY;
+                        return size;
+                    }();
+
+                    T const* sendMem = partitionsSend->mem();
+                    T const* recvMem = partitionsRecv->mem();
+
+
+                    Neon::set::MemoryTransfer transfer({setIdxRecv, (void*)(recvMem + recvPitch)},
+                                                       {setIdxSend, (void*)(sendMem + sendPitch)},
+                                                       sizeof(T) * msgSizePerCardinality);
+
+                    transfersVec.push_back(transfer);
+                }
+            }
+        });
+
+    mData->mLatticeHaloUpdateTable.forEachPutConfiguration(
+        bk, [&](
+                Neon::SetIdx                                  setIdxSend,
+                Execution                                     execution,
+                Neon::domain::tool::partitioning::ByDirection byDirection,
+                std::vector<Neon::set::MemoryTransfer>&       transfersVec) {
+            {
+                using namespace Neon::domain::tool::partitioning;
+
+                if (ByDirection::up == byDirection && bk.isLastDevice(setIdxSend)) {
+                    return;
+                }
+
+                if (ByDirection::down == byDirection && bk.isFirstDevice(setIdxSend)) {
+                    return;
+                }
+
+
+                Neon::SetIdx setIdxRecv = getNghSetIdx(setIdxSend, byDirection);
+                Partition*   partitionsRecv = &this->getPartition(execution, setIdxRecv, Neon::DataView::STANDARD);
+                Partition*   partitionsSend = &this->getPartition(execution, setIdxSend, Neon::DataView::STANDARD);
+
+                auto const recvDirection = byDirection == ByDirection::up
+                                               ? ByDirection::down
+                                               : ByDirection::up;
+                auto const sendDirection = byDirection;
+
+                int const ghostSectorFirstBlockIdx =
+                    recvDirection == ByDirection::up
+                        ? partitionsRecv->helpGetSectorFirstBlock(Partition::Sectors::gUp)
+                        : partitionsRecv->helpGetSectorFirstBlock(Partition::Sectors::gDw);
+
+                int const boundarySectorFirstBlockIdx =
+                    sendDirection == ByDirection::up
+                        ? partitionsSend->helpGetSectorFirstBlock(Partition::Sectors::bUp)
+                        : partitionsSend->helpGetSectorFirstBlock(Partition::Sectors::bDw);
+
+                auto const msgLengthInBlocks = partitionsSend->helpGetSectorLength(sendDirection == ByDirection::up
+                                                                                       ? Partition::Sectors::bUp
+                                                                                       : Partition::Sectors::bDw);
+
+                bool canBeFusedWithPrevious = false;
+
+                for (int c = 0; c < this->getCardinality(); c++) {
+                    auto const& stencil = this->getGrid().getStencil();
+
+                    auto const recvPitch = [&] {
+                        Idx                          idx;
+                        typename Idx::InDataBlockIdx inDataBlockIdx;
+                        if (recvDirection == ByDirection::up) {
+                            inDataBlockIdx = typename Idx::InDataBlockIdx(0, 0, 0);
+                        } else {
+                            inDataBlockIdx = typename Idx::InDataBlockIdx(0, 0, SBlock::memBlockSizeZ - 1);
+                        }
+                        idx.setInDataBlockIdx(inDataBlockIdx);
+                        idx.setDataBlockIdx(ghostSectorFirstBlockIdx);
+
+                        auto pitch = partitionsRecv->helpGetPitch(idx, c);
+                        return pitch;
+                    }();
+
+                    auto const sendPitch = [&] {
+                        typename Idx::InDataBlockIdx inDataBlockIdx;
+                        if (sendDirection == ByDirection::up) {
+                            inDataBlockIdx = typename Idx::InDataBlockIdx(0, 0, SBlock::memBlockSizeZ - 1);
+                        } else {
+                            inDataBlockIdx = typename Idx::InDataBlockIdx(0, 0, 0);
+                        }
+                        Idx idx;
+                        idx.setInDataBlockIdx(inDataBlockIdx);
+                        idx.setDataBlockIdx(boundarySectorFirstBlockIdx);
+                        auto pitch = partitionsSend->helpGetPitch(idx, c);
+                        return pitch;
+                    }();
+
+                    auto const msgSizePerCardinality = [&] {
+                        // All blocks are mapped into a 3D grid, where blocks are places one after the other in a 1D mapping
+                        // for each block we send only the top or bottom slice
+                        // Therefore the size of the message is equal to the number of blocks in the sector
+                        // by the size of element in a slice of a block...
+                        auto size = msgLengthInBlocks * SBlock::memBlockSizeX * SBlock::memBlockSizeY;
+                        return size;
+                    }();
+
+                    T const* sendMem = partitionsSend->mem();
+                    T const* recvMem = partitionsRecv->mem();
+
+
+                    Neon::set::MemoryTransfer transfer({setIdxRecv, (void*)(recvMem + recvPitch)},
+                                                       {setIdxSend, (void*)(sendMem + sendPitch)},
+                                                       sizeof(T) * msgSizePerCardinality);
+
+                    if (ByDirection::up == sendDirection && !(stencil.points()[c].z > 0)) {
+                        std::cout << "c " << c << " " << stencil.points()[c] << "skipped" << std::endl;
+                        canBeFusedWithPrevious = false;
+                        continue;
+                    }
+                    if (ByDirection::down == sendDirection && !(stencil.points()[c].z < 0)) {
+                        std::cout << "c " << c << " " << stencil.points()[c] << "skipped" << std::endl;
+                        canBeFusedWithPrevious = false;
+                        continue;
+                    }
+                    if (canBeFusedWithPrevious) {
+
+                        const T* begin = (recvMem + recvPitch);
+                        const T* previous = (((T*)(transfersVec[transfersVec.size() - 1].src.mem)) + msgSizePerCardinality);
+                        if (begin != previous) {
+                            NEON_THROW_UNSUPPORTED_OPTION("begin != transfersVec[transfersVec.size() - 1].dst");
+                        }
+                        transfersVec[transfersVec.size() - 1].size += sizeof(T) * msgSizePerCardinality;
+                        std::cout << "c " << c << " " << stencil.points()[c] << "fused" << std::endl
+                                  << "new size = " << transfersVec[transfersVec.size() - 1].size << std::endl;
+                    } else {
+                        transfersVec.push_back(transfer);
+                        std::cout << "c " << c << " " << stencil.points()[c] << "added " << transfer.toString() << std::endl;
+                        canBeFusedWithPrevious = false;
+                    }
+                }
+            }
+        });
+}
+
+
+}  // namespace Neon::domain::details::bGridMgpu
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bGrid.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bGrid.h
new file mode 100644
index 00000000..b7d42d94
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bGrid.h
@@ -0,0 +1,243 @@
+#pragma once
+#include "Neon/core/core.h"
+
+#include "Neon/domain/aGrid.h"
+#include "Neon/domain/details/bGridDisgMgpu/BlockView.h"
+#include "Neon/domain/details/bGridDisgMgpu/StaticBlock.h"
+#include "Neon/domain/details/bGridDisgMgpu/bField.h"
+#include "Neon/domain/details/bGridDisgMgpu/bIndex.h"
+#include "Neon/domain/details/bGridDisgMgpu/bPartition.h"
+#include "Neon/domain/details/bGridDisgMgpu/bSpan.h"
+#include "Neon/domain/interface/GridBaseTemplate.h"
+#include "Neon/domain/patterns/PatternScalar.h"
+#include "Neon/domain/tools/Partitioner1D.h"
+#include "Neon/domain/tools/PointHashTable.h"
+#include "Neon/domain/tools/SpanTable.h"
+#include "Neon/set/Containter.h"
+#include "Neon/set/LaunchParametersTable.h"
+#include "Neon/set/memory/memSet.h"
+
+#include "bField.h"
+#include "bPartition.h"
+#include "bSpan.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+
+template <typename T, int C, typename SBlock>
+class bField;
+
+template <typename SBlock>
+class bGrid : public Neon::domain::interface::GridBaseTemplate<bGrid<SBlock>,
+                                                               bIndex<SBlock>>
+{
+   public:
+    using Grid = bGrid<SBlock>;
+    template <typename T, int C = 0>
+    using Partition = bPartition<T, C, SBlock>;
+    template <typename T, int C = 0>
+    using Field = Neon::domain::details::bGridMgpu::bField<T, C, SBlock>;
+
+    using Span = bSpan<SBlock>;
+    using NghIdx = typename Partition<int>::NghIdx;
+    using GridBaseTemplate = Neon::domain::interface::GridBaseTemplate<Grid, bIndex<SBlock>>;
+
+    using Idx = bIndex<SBlock>;
+    static constexpr Neon::set::details::ExecutionThreadSpan executionThreadSpan = Neon::set::details::ExecutionThreadSpan::d1b3;
+    using ExecutionThreadSpanIndexType = uint32_t;
+
+    using BlockIdx = uint32_t;
+
+    bGrid() = default;
+    virtual ~bGrid();
+
+    /**
+     * Constructor for the vanilla block data structure with depth of 1
+     */
+    template <typename ActiveCellLambda>
+    bGrid(const Neon::Backend&                         backend,
+          const Neon::int32_3d&                        domainSize,
+          const ActiveCellLambda                       activeCellLambda,
+          const Neon::domain::Stencil&                 stencil,
+          const double_3d&                             spacingData = double_3d(1, 1, 1),
+          const double_3d&                             origin = double_3d(0, 0, 0),
+          Neon::domain::tool::spaceCurves::EncoderType encoderType = Neon::domain::tool::spaceCurves::EncoderType::sweep);
+
+
+    /**
+     * Constructor for bGrid. This constructor should be directly used only by mGrid
+     */
+    template <typename ActiveCellLambda>
+    bGrid(const Neon::Backend&         backend /**< Neon backend for the computation */,
+          const Neon::int32_3d&        domainSize /**< Size of the bounded Cartesian */,
+          const ActiveCellLambda       activeCellLambda /**< Function that identify the user domain inside the boxed Cartesian discretization  */,
+          const Neon::domain::Stencil& stencil /**< union of tall the stencil that will be used in the computation */,
+          const int                    multiResDiscreteIdxSpacing /**< Parameter for the multi-resolution. Index i and index (i+1) may be remapped as i*voxelSpacing  and (i+1)* voxelSpacing.
+                                                                   * For a uniform bGrid, i.e outside the context of multi-resolution this parameter is always 1 */
+          ,
+          const double_3d&                             spacingData /** Physical spacing between two consecutive data points in the Cartesian domain */,
+          const double_3d&                             origin /** Physical location in space of the origin of the Cartesian discretization */,
+          Neon::domain::tool::spaceCurves::EncoderType encoderType = Neon::domain::tool::spaceCurves::EncoderType::sweep);
+
+    /**
+     * Returns some properties for a given cartesian in the Cartesian domain.
+     * The provide index my be inside or outside the user defined bounded Cartesian domain
+     */
+    auto getProperties(const Neon::index_3d& idx)
+        const -> typename GridBaseTemplate::CellProperties final;
+
+    /**
+     * Returns true if the query 3D point is inside the user domain
+     * @param idx
+     * @return
+     */
+    auto isInsideDomain(const Neon::index_3d& idx)
+        const -> bool final;
+
+    /**
+     * Retrieves the device index that contains the query point
+     * @param idx
+     * @return
+     */
+    auto getSetIdx(const Neon::index_3d& idx)
+        const -> int32_t final;
+
+    /**
+     * Allocates a new field on the grid
+     */
+    template <typename T, int C = 0>
+    auto newField(const std::string   name,
+                  int                 cardinality,
+                  T                   inactiveValue,
+                  Neon::DataUse       dataUse = Neon::DataUse::HOST_DEVICE,
+                  Neon::MemoryOptions memoryOptions = Neon::MemoryOptions()) const
+        -> Field<T, C>;
+
+    /**
+     * Allocates a new field on the block view grid
+     */
+    template <typename T, int C = 0>
+    auto newBlockViewField(const std::string   name,
+                           int                 cardinality,
+                           T                   inactiveValue,
+                           Neon::DataUse       dataUse = Neon::DataUse::HOST_DEVICE,
+                           Neon::MemoryOptions memoryOptions = Neon::MemoryOptions()) const
+        -> BlockView::Field<T, C>;
+
+    /**
+     * Allocates a new container to execute some computation in the grid
+     */
+    template <Neon::Execution execution = Neon::Execution::device,
+              typename LoadingLambda = void*>
+    auto newContainer(const std::string& name,
+                      index_3d           blockSize,
+                      size_t             sharedMem,
+                      LoadingLambda      lambda) const -> Neon::set::Container;
+
+    /**
+     * Allocates a new container to execute some computation in the grid
+     */
+    template <Neon::Execution execution = Neon::Execution::device,
+              typename LoadingLambda = void*>
+    auto newContainer(const std::string& name,
+                      LoadingLambda      lambda) const -> Neon::set::Container;
+
+    /**
+     * Defines a new set of parameter to launch a Container
+     */
+    auto getLaunchParameters(Neon::DataView        dataView,
+                             const Neon::index_3d& blockSize,
+                             const size_t&         sharedMem) const -> Neon::set::LaunchParameters;
+
+    /**
+     * Retrieve the span associated to the grid w.r.t. some user defined parameters.
+     */
+    auto getSpan(Neon::Execution execution,
+                 SetIdx          setIdx,
+                 Neon::DataView  dataView) -> const Span&;
+
+    /**
+     * Retrieve the block vew grid internally used.
+     * This grid can be leverage to allocate data at the block level.
+     */
+    auto getBlockViewGrid() const -> BlockView::Grid&;
+
+
+    /**
+     * Retrieve the block vew grid internally used.
+     * This grid can be leverage to allocate data at the block level.
+     */
+    auto getActiveBitMask() const -> BlockView::Field<typename SBlock::BitMask, 1>&;
+
+    /**
+     * Helper function to retrieve the discrete index spacing used for the multi-resolution
+     */
+    template <int dummy = SBlock::isMultiResMode>
+    auto helGetMultiResDiscreteIdxSpacing() const -> std::enable_if_t<dummy == 1, int>;
+
+
+    /**
+     * Help function to retrieve the block connectivity as a BlockViewGrid field
+     */
+    auto helpGetBlockConnectivity() const -> BlockView::Field<BlockIdx, 27>&;
+
+    /**
+     * Help function to retrieve the block origin as a BlockViewGrid field
+     */
+    auto helpGetDataBlockOriginField() const -> Neon::aGrid::Field<index_3d, 0>&;
+
+    /**
+     * Help function to retrieve the map that converts a stencil point id to 3d offset
+     */
+    auto helpGetStencilIdTo3dOffset() const -> Neon::set::MemSet<Neon::int8_3d>&;
+
+    auto helpGetPartitioner1D() -> Neon::domain::tool::Partitioner1D&;
+
+    /**
+     * Help function retriev the device and the block index associated to a point in the BlockViewGrid grid
+     */
+    auto helpGetSetIdxAndGridIdx(Neon::index_3d idx) const -> std::tuple<Neon::SetIdx, Idx>;
+
+    struct Data
+    {
+        auto init(const Neon::Backend& bk)
+        {
+            spanTable.init(bk);
+            launchParametersTable.init(bk);
+        }
+
+        Neon::domain::tool::SpanTable<Span> spanTable /** Span for each data view configurations */;
+        Neon::set::LaunchParametersTable    launchParametersTable;
+
+        Neon::domain::tool::Partitioner1D partitioner1D;
+        Stencil                           stencil;
+        Neon::sys::patterns::Engine       reduceEngine;
+
+        Neon::aGrid                     memoryGrid /** memory allocator for fields */;
+        Neon::aGrid::Field<index_3d, 0> mDataBlockOriginField;
+        Neon::set::MemSet<int8_t>       mStencil3dTo1dOffset;
+
+        BlockView::Grid                               blockViewGrid;
+        BlockView::Field<typename SBlock::BitMask, 1> activeBitField;
+        BlockView::Field<BlockIdx, 27>                blockConnectivity;
+        Neon::set::MemSet<Neon::int8_3d>              stencilIdTo3dOffset;
+
+        int mMultiResDiscreteIdxSpacing;
+
+        // number of active voxels in each block
+        Neon::set::DataSet<uint64_t> mNumActiveVoxel;
+
+
+        // Stencil neighbor indices
+        Neon::set::MemSet<NghIdx> mStencilNghIndex;
+    };
+    std::shared_ptr<Data> mData;
+};
+
+constexpr int defaultBlockSize = 4;
+using bGridMgpuDefault = bGrid<StaticBlock<defaultBlockSize, defaultBlockSize, defaultBlockSize>>;
+extern template class bGrid<StaticBlock<defaultBlockSize, defaultBlockSize, defaultBlockSize>>;
+}  // namespace Neon::domain::details::bGridMgpu
+
+#include "bField_imp.h"
+#include "bGrid_imp.h"
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bGrid_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bGrid_imp.h
new file mode 100644
index 00000000..1ebc0468
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bGrid_imp.h
@@ -0,0 +1,470 @@
+#include "Neon/domain/details/bGridDisgMgpu/bGrid.h"
+#include "Neon/domain/tools/SpaceCurves.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <typename SBlock>
+template <typename ActiveCellLambda>
+bGrid<SBlock>::bGrid(const Neon::Backend&                         backend,
+                     const Neon::int32_3d&                        domainSize,
+                     const ActiveCellLambda                       activeCellLambda,
+                     const Neon::domain::Stencil&                 stencil,
+                     const double_3d&                             spacingData,
+                     const double_3d&                             origin,
+                     Neon::domain::tool::spaceCurves::EncoderType encoderType)
+    : bGrid(backend, domainSize, activeCellLambda, stencil, 1, spacingData, origin, encoderType)
+{
+}
+
+template <typename SBlock>
+template <typename ActiveCellLambda>
+bGrid<SBlock>::bGrid(const Neon::Backend&                         backend,
+                     const Neon::int32_3d&                        domainSize,
+                     const ActiveCellLambda                       activeCellLambda,
+                     const Neon::domain::Stencil&                 stencil,
+                     const int                                    multiResDiscreteIdxSpacing,
+                     const double_3d&                             spacingData,
+                     const double_3d&                             origin,
+                     Neon::domain::tool::spaceCurves::EncoderType encoderType)
+{
+
+
+    mData = std::make_shared<Data>();
+    mData->init(backend);
+
+    mData->mMultiResDiscreteIdxSpacing = multiResDiscreteIdxSpacing;
+    mData->stencil = stencil;
+    const index_3d defaultKernelBlockSize(SBlock::memBlockSizeX,
+                                          SBlock::memBlockSizeY,
+                                          SBlock::memBlockSizeZ);
+
+    std::stringstream gridName;
+    gridName << "bGridMgpu_" << SBlock::memBlockSizeX << "_"
+             << SBlock::memBlockSizeY << "_"
+             << SBlock::memBlockSizeZ;
+    {
+        auto nElementsPerPartition = backend.devSet().template newDataSet<size_t>(0);
+        // We do an initialization with nElementsPerPartition to zero,
+        // then we reset to the computed number.
+
+        bGrid::GridBase::init(gridName.str(),
+                              backend,
+                              domainSize,
+                              stencil,
+                              nElementsPerPartition,
+                              defaultKernelBlockSize,
+                              multiResDiscreteIdxSpacing,
+                              origin,
+                              encoderType,
+                              defaultKernelBlockSize);
+    }
+
+    {  // Initialization of the partitioner
+
+        mData->partitioner1D = Neon::domain::tool::Partitioner1D(
+            backend,
+            activeCellLambda,
+            nullptr,
+            SBlock::memBlockSize3D.template newType<int32_t>(),
+            domainSize,
+            Neon::domain::Stencil::s27_t(false),
+            encoderType,
+            multiResDiscreteIdxSpacing);
+
+        mData->mDataBlockOriginField = mData->partitioner1D.getGlobalMapping();
+        mData->mStencil3dTo1dOffset = mData->partitioner1D.getStencil3dTo1dOffset();
+        mData->memoryGrid = mData->partitioner1D.getMemoryGrid();
+    }
+
+    {  // BlockViewGrid
+        Neon::domain::details::eGrid::eGrid egrid(
+            backend,
+            mData->partitioner1D.getBlockSpan(),
+            mData->partitioner1D,
+            Neon::domain::Stencil::s27_t(false),
+            spacingData * SBlock::memBlockSize3D,
+            origin);
+
+        mData->blockViewGrid = BlockView::Grid(egrid);
+    }
+
+    {  // Active bitmask
+        mData->activeBitField = mData->blockViewGrid.template newField<typename SBlock::BitMask, 1>(
+            "BlockViewBitMask",
+            1,
+            [] {
+                typename SBlock::BitMask outsideBitMask;
+                outsideBitMask.reset();
+                return outsideBitMask;
+            }(),
+            Neon::DataUse::HOST_DEVICE, backend.getMemoryOptions(BlockView::layout));
+
+        mData->mNumActiveVoxel = backend.devSet().template newDataSet<uint64_t>();
+
+        mData->activeBitField
+            .getGrid()
+            .template newContainer<Neon::Execution::host>(
+                "activeBitMaskInit",
+                [&, this](Neon::set::Loader& loader) {
+                    auto bitMaskPartition = loader.load(mData->activeBitField);
+                    return [&, bitMaskPartition](const auto& bitMaskIdx) mutable {
+                        auto                      prtIdx = bitMaskPartition.prtID();
+                        int                       countActive = 0;
+                        auto const                blockOrigin = bitMaskPartition.getGlobalIndex(bitMaskIdx);
+                        typename SBlock::BitMask& bitMask = bitMaskPartition(bitMaskIdx, 0);
+                        bitMask.reset();
+
+                        for (int k = 0; k < SBlock::memBlockSize3D.template newType<int32_t>().z; k++) {
+                            for (int j = 0; j < SBlock::memBlockSize3D.template newType<int32_t>().y; j++) {
+                                for (int i = 0; i < SBlock::memBlockSize3D.template newType<int32_t>().x; i++) {
+                                    auto       globalPosition = blockOrigin + Neon::int32_3d(i * this->mData->mMultiResDiscreteIdxSpacing,
+                                                                                             j * this->mData->mMultiResDiscreteIdxSpacing,
+                                                                                             k * this->mData->mMultiResDiscreteIdxSpacing);
+                                    bool const isInDomain = globalPosition < domainSize * this->mData->mMultiResDiscreteIdxSpacing;
+                                    bool const isActive = activeCellLambda(globalPosition);
+                                    if (isActive && isInDomain) {
+                                        countActive++;
+                                        bitMask.setActive(i, j, k);
+                                    }
+                                }
+                            }
+                        }
+#pragma omp critical
+                        {
+                            this->mData->mNumActiveVoxel[prtIdx] += countActive;
+                        }
+                    };
+                })
+            .run(Neon::Backend::mainStreamIdx);
+
+
+        mData->activeBitField.updateDeviceData(Neon::Backend::mainStreamIdx);
+        this->getBackend().sync(Neon::Backend::mainStreamIdx);
+        mData->activeBitField.newHaloUpdate(Neon::set::StencilSemantic::standard,
+                                            Neon::set::TransferMode::put,
+                                            Neon::Execution::device)
+            .run(Neon::Backend::mainStreamIdx);
+    }
+
+
+    {  // Neighbor blocks
+        mData->blockConnectivity = mData->blockViewGrid.template newField<BlockIdx, 27>("blockConnectivity",
+                                                                                        27,
+                                                                                        Span::getInvalidBlockId(),
+                                                                                        Neon::DataUse::HOST_DEVICE,
+                                                                                        Neon::MemoryLayout::arrayOfStructs);
+
+        mData->blockConnectivity.getGrid().template newContainer<Neon::Execution::host>(
+                                              "blockConnectivityInit",
+                                              [&](Neon::set::Loader& loader) {
+                                                  auto blockConnectivity = loader.load(mData->blockConnectivity);
+                                                  return [&, blockConnectivity](auto const& idx) mutable {
+                                                      for (int8_t k = 0; k < 3; k++) {
+                                                          for (int8_t j = 0; j < 3; j++) {
+                                                              for (int8_t i = 0; i < 3; i++) {
+                                                                  auto                                      targetDirection = i + 3 * j + 3 * 3 * k;
+                                                                  BlockIdx                                  blockNghIdx = Span::getInvalidBlockId();
+                                                                  typename decltype(blockConnectivity)::Idx nghIdx;
+                                                                  Neon::int8_3d                             stencilPoint(i - int8_t(1),
+                                                                                                                         j - int8_t(1),
+                                                                                                                         k - int8_t(1));
+                                                                  bool                                      isValid = blockConnectivity.getNghIndex(idx, stencilPoint, nghIdx);
+                                                                  if (isValid) {
+                                                                      blockNghIdx = static_cast<BlockIdx>(nghIdx.helpGet());
+                                                                  }
+                                                                  blockConnectivity(idx, targetDirection) = blockNghIdx;
+                                                              }
+                                                          }
+                                                      }
+                                                  };
+                                              })
+            .run(Neon::Backend::mainStreamIdx);
+        mData->blockConnectivity.updateDeviceData(Neon::Backend::mainStreamIdx);
+    }
+
+    // Initialization of the SPAN table
+    mData->spanTable.forEachConfiguration([&](Neon::Execution execution,
+                                              Neon::SetIdx    setIdx,
+                                              Neon::DataView  dw,
+                                              Span&           span) {
+        span.mDataView = dw;
+        switch (dw) {
+            case Neon::DataView::STANDARD: {
+                span.mFirstDataBlockOffset = 0;
+                span.mDataView = dw;
+                span.mActiveMask = mData->activeBitField.getPartition(execution, setIdx, dw).mem();
+                break;
+            }
+            case Neon::DataView::BOUNDARY: {
+                span.mFirstDataBlockOffset = mData->partitioner1D.getSpanClassifier().countInternal(setIdx);
+                span.mDataView = dw;
+                span.mActiveMask = mData->activeBitField.getPartition(execution, setIdx, dw).mem();
+
+                break;
+            }
+            case Neon::DataView::INTERNAL: {
+                span.mFirstDataBlockOffset = 0;
+                span.mDataView = dw;
+                span.mActiveMask = mData->activeBitField.getPartition(execution, setIdx, dw).mem();
+                break;
+            }
+            default: {
+                NeonException exc("dFieldDev");
+                NEON_THROW(exc);
+            }
+        }
+    });
+
+    {  // Stencil Idx to 3d offset
+        auto nPoints = backend.devSet().newDataSet<uint64_t>(stencil.nNeighbours());
+        mData->stencilIdTo3dOffset = backend.devSet().template newMemSet<int8_3d>(Neon::DataUse::HOST_DEVICE,
+                                                                                  1,
+                                                                                  backend.getMemoryOptions(),
+                                                                                  nPoints);
+        for (int i = 0; i < stencil.nNeighbours(); ++i) {
+            for (int devIdx = 0; devIdx < backend.devSet().setCardinality(); devIdx++) {
+                index_3d      pLong = stencil.neighbours()[i];
+                Neon::int8_3d pShort = pLong.newType<int8_t>();
+                mData->stencilIdTo3dOffset.eRef(devIdx, i) = pShort;
+            }
+        }
+        mData->stencilIdTo3dOffset.updateDeviceData(backend, Neon::Backend::mainStreamIdx);
+    }
+    // Init the base grid
+    bGrid::GridBase::init(gridName.str(),
+                          backend,
+                          domainSize,
+                          Neon::domain::Stencil(),
+                          mData->mNumActiveVoxel,
+                          SBlock::memBlockSize3D.template newType<int32_t>(),
+                          spacingData,
+                          origin,
+                          encoderType,
+                          defaultKernelBlockSize);
+    {  // setting launchParameters
+        mData->launchParametersTable.forEachSeq([&](Neon::DataView               dw,
+                                                    Neon::set::LaunchParameters& bLaunchParameters) {
+            auto defEGridBlock = mData->blockViewGrid.getDefaultBlock();
+            auto eGridParams = mData->blockViewGrid.getLaunchParameters(dw, defEGridBlock, 0);
+            eGridParams.forEachSeq([&](Neon::SetIdx setIdx, Neon::sys::GpuLaunchInfo const& launchSingleDev) {
+                auto eDomainGridSize = launchSingleDev.domainGrid();
+                assert(eDomainGridSize.y == 1);
+                assert(eDomainGridSize.z == 1);
+                int nBlocks = static_cast<int>(eDomainGridSize.x);
+                bLaunchParameters.get(setIdx).set(Neon::sys::GpuLaunchInfo::mode_e::cudaGridMode,
+                                                  nBlocks, SBlock::memBlockSize3D.template newType<int32_t>(), 0);
+            });
+        });
+    }
+}
+
+template <typename SBlock>
+template <typename T, int C>
+auto bGrid<SBlock>::newField(const std::string   name,
+                             int                 cardinality,
+                             T                   inactiveValue,
+                             Neon::DataUse       dataUse,
+                             Neon::MemoryOptions memoryOptions) const -> Field<T, C>
+{
+    memoryOptions = this->getDevSet().sanitizeMemoryOption(memoryOptions);
+    Field<T, C> field(name, dataUse, memoryOptions, *this, cardinality, inactiveValue);
+
+    return field;
+}
+
+template <typename SBlock>
+template <typename T, int C>
+auto bGrid<SBlock>::newBlockViewField(const std::string   name,
+                                      int                 cardinality,
+                                      T                   inactiveValue,
+                                      Neon::DataUse       dataUse,
+                                      Neon::MemoryOptions memoryOptions) const -> BlockView::Field<T, C>
+{
+    memoryOptions = this->getDevSet().sanitizeMemoryOption(memoryOptions);
+    BlockView::Field<T, C> blockViewField = mData->blockViewGrid.template newField<T, C>(name, cardinality, inactiveValue, dataUse, memoryOptions);
+    return blockViewField;
+}
+
+template <typename SBlock>
+template <Neon::Execution execution,
+          typename LoadingLambda>
+auto bGrid<SBlock>::newContainer(const std::string& name,
+                                 index_3d           blockSize,
+                                 size_t             sharedMem,
+                                 LoadingLambda      lambda) const -> Neon::set::Container
+{
+    Neon::set::Container kContainer = Neon::set::Container::factory<execution>(name,
+                                                                               Neon::set::internal::ContainerAPI::DataViewSupport::on,
+                                                                               *this,
+                                                                               lambda,
+                                                                               blockSize,
+                                                                               [sharedMem](const Neon::index_3d&) { return sharedMem; });
+    return kContainer;
+}
+
+template <typename SBlock>
+template <Neon::Execution execution,
+          typename LoadingLambda>
+auto bGrid<SBlock>::newContainer(const std::string& name,
+                                 LoadingLambda      lambda) const -> Neon::set::Container
+{
+    const Neon::index_3d& defaultBlockSize = this->getDefaultBlock();
+    Neon::set::Container  kContainer = Neon::set::Container::factory<execution>(name,
+                                                                               Neon::set::internal::ContainerAPI::DataViewSupport::on,
+                                                                               *this,
+                                                                               lambda,
+                                                                               defaultBlockSize,
+                                                                               [](const Neon::index_3d&) { return 0; });
+    return kContainer;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::
+    getBlockViewGrid()
+        const -> BlockView::Grid&
+{
+    return mData->blockViewGrid;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::
+    getActiveBitMask()
+        const -> BlockView::Field<typename SBlock::BitMask, 1>&
+{
+    return mData->activeBitField;
+}
+
+/**
+ * Helper function to retrieve the discrete index spacing used for the multi-resolution
+ */
+template <typename SBlock>
+template <int dummy>
+auto bGrid<SBlock>::helGetMultiResDiscreteIdxSpacing() const
+    -> std::enable_if_t<dummy == 1, int>
+{
+    return mData->mMultiResDiscreteIdxSpacing;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::
+    helpGetBlockConnectivity()
+        const -> BlockView::Field<BlockIdx, 27>&
+{
+    return mData->blockConnectivity;
+}
+template <typename SBlock>
+auto bGrid<SBlock>::
+    helpGetDataBlockOriginField()
+        const -> Neon::aGrid::Field<index_3d, 0>&
+{
+    return mData->mDataBlockOriginField;
+}
+template <typename SBlock>
+auto bGrid<SBlock>::getSpan(Neon::Execution execution,
+                            SetIdx          setIdx,
+                            Neon::DataView  dataView) -> const bGrid::Span&
+{
+    return mData->spanTable.getSpan(execution, setIdx, dataView);
+}
+
+template <typename SBlock>
+bGrid<SBlock>::~bGrid()
+{
+}
+template <typename SBlock>
+auto bGrid<SBlock>::getSetIdx(const index_3d& idx) const -> int32_t
+{
+    typename GridBaseTemplate::CellProperties cellProperties;
+
+    cellProperties.setIsInside(this->isInsideDomain(idx));
+    if (!cellProperties.isInside()) {
+        return -1;
+    }
+    Neon::SetIdx setIdx = cellProperties.getSetIdx();
+    return setIdx;
+}
+template <typename SBlock>
+auto bGrid<SBlock>::getLaunchParameters(Neon::DataView dataView,
+                                        const index_3d&,
+                                        const size_t& sharedMem) const -> Neon::set::LaunchParameters
+{
+    auto res = mData->launchParametersTable.get(dataView);
+    res.forEachSeq([&](SetIdx const& /*setIdx*/,
+                       Neon::set::LaunchParameters::launchInfo_e& launchParams) -> void {
+        launchParams.setShm(sharedMem);
+    });
+    return res;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::
+    helpGetStencilIdTo3dOffset()
+        const -> Neon::set::MemSet<Neon::int8_3d>&
+{
+    return mData->stencilIdTo3dOffset;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::isInsideDomain(const index_3d& idx) const -> bool
+{
+    // 1. check if the block is active
+    const BlockView::index_3d blockIdx3d = idx / (SBlock::memBlockSize3D.template newType<int32_t>() * mData->mMultiResDiscreteIdxSpacing);
+    auto                      blockProperties = mData->blockViewGrid.getProperties(blockIdx3d);
+
+    if (!blockProperties.isInside()) {
+        return false;
+    }
+    // 2. The block is active, check the element in the block
+    typename SBlock::BitMask const& bitMask = mData->activeBitField.getReference(blockIdx3d, 0);
+
+    bool isActive = bitMask.isActive((idx.x / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.x,
+                                     (idx.y / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.y,
+                                     (idx.z / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.z);
+    return isActive;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::getProperties(const index_3d& idx)
+    const -> typename GridBaseTemplate::CellProperties
+{
+    typename GridBaseTemplate::CellProperties cellProperties;
+
+    cellProperties.setIsInside(this->isInsideDomain(idx));
+    if (!cellProperties.isInside()) {
+        return cellProperties;
+    }
+
+    if (this->getDevSet().setCardinality() == 1) {
+        cellProperties.init(0, DataView::INTERNAL);
+    } else {
+        const index_3d blockIdx3d = idx / SBlock::memBlockSize3D.template newType<int32_t>();
+        auto           blockViewProperty = mData->blockViewGrid.getProperties(blockIdx3d);
+
+        cellProperties.init(blockViewProperty.getSetIdx(),
+                            blockViewProperty.getDataView());
+    }
+    return cellProperties;
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::helpGetSetIdxAndGridIdx(Neon::index_3d idx)
+    const -> std::tuple<Neon::SetIdx, Idx>
+{
+    const index_3d blockIdx3d = idx / (SBlock::memBlockSize3D.template newType<int32_t>() * mData->mMultiResDiscreteIdxSpacing);
+    auto [setIdx, bvGridIdx] = mData->blockViewGrid.helpGetSetIdxAndGridIdx(blockIdx3d);
+    Idx bIdx;
+    bIdx.mDataBlockIdx = bvGridIdx.helpGet();
+    bIdx.mInDataBlockIdx.x = static_cast<typename Idx::InDataBlockIdx::Integer>((idx.x / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.x);
+    bIdx.mInDataBlockIdx.y = static_cast<typename Idx::InDataBlockIdx::Integer>((idx.y / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.y);
+    bIdx.mInDataBlockIdx.z = static_cast<typename Idx::InDataBlockIdx::Integer>((idx.z / mData->mMultiResDiscreteIdxSpacing) % SBlock::memBlockSize3D.z);
+
+    return {setIdx, bIdx};
+}
+
+template <typename SBlock>
+auto bGrid<SBlock>::helpGetPartitioner1D() -> Neon::domain::tool::Partitioner1D&
+{
+    return mData->partitioner1D;
+}
+
+}  // namespace Neon::domain::details::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bIndex.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bIndex.h
new file mode 100644
index 00000000..0452cfc9
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bIndex.h
@@ -0,0 +1,142 @@
+#pragma once
+
+#include "Neon/core/core.h"
+
+
+namespace Neon::domain::details::bGridMgpu {
+
+// Common forward declarations
+template <typename SBlock>
+class bGrid;
+template <typename SBlock>
+class bSpan;
+template <typename T, int C, typename SBlock>
+class bPartition;
+
+class MicroIndex
+{
+   public:
+    using TrayIdx = int32_t;
+    using InTrayIdx = int8_3d;
+
+    NEON_CUDA_HOST_DEVICE inline explicit MicroIndex()
+        : MicroIndex(0, 0, 0, 0)
+    {
+    }
+
+    NEON_CUDA_HOST_DEVICE inline explicit MicroIndex(const TrayIdx&            blockIdx,
+                                                     const InTrayIdx::Integer& x,
+                                                     const InTrayIdx::Integer& y,
+                                                     const InTrayIdx::Integer& z)
+    {
+        mTrayBlockIdx = blockIdx;
+        mInTrayBlockIdx.x = x;
+        mInTrayBlockIdx.y = y;
+        mInTrayBlockIdx.z = z;
+    }
+
+    NEON_CUDA_HOST_DEVICE inline auto getInTrayBlockIdx() const -> InTrayIdx const&
+    {
+        return mInTrayBlockIdx;
+    }
+
+    NEON_CUDA_HOST_DEVICE inline auto getTrayBlockIdx() const -> TrayIdx const&
+    {
+        return mTrayBlockIdx;
+    }
+
+    NEON_CUDA_HOST_DEVICE inline auto setInTrayBlockIdx(InTrayIdx const& inTrayIdx) -> void
+    {
+        mInTrayBlockIdx = inTrayIdx;
+    }
+
+    NEON_CUDA_HOST_DEVICE inline auto setTrayBlockIdx(TrayIdx const& trayIdx) -> void
+    {
+        mTrayBlockIdx = trayIdx;
+    }
+
+    InTrayIdx mInTrayBlockIdx;
+    TrayIdx   mTrayBlockIdx{};
+};
+
+template <typename SBlock>
+class bIndex
+{
+   public:
+    template <typename SBlock_>
+    friend class bSpan;
+    using OuterIdx = bIndex<SBlock>;
+
+    using NghIdx = int8_3d;
+    template <typename T, int C, typename SBlock_>
+    friend class bPartition;
+
+    template <typename T, int C, typename SBlock_>
+    friend class bField;
+
+    template <typename SBlock_>
+    friend class bSpan;
+    template <typename SBlock_>
+    friend class bGrid;
+
+
+    using TrayIdx = MicroIndex::TrayIdx;
+    using InTrayIdx = MicroIndex::InTrayIdx;
+
+    using DataBlockCount = std::make_unsigned_t<TrayIdx>;
+    using DataBlockIdx = std::make_unsigned_t<TrayIdx>;
+    using InDataBlockIdx = InTrayIdx;
+
+    bIndex() = default;
+    ~bIndex() = default;
+
+    NEON_CUDA_HOST_DEVICE inline explicit bIndex(const DataBlockIdx&            blockIdx,
+                                                 const InDataBlockIdx::Integer& x,
+                                                 const InDataBlockIdx::Integer& y,
+                                                 const InDataBlockIdx::Integer& z);
+
+    NEON_CUDA_HOST_DEVICE inline auto getMicroIndex() -> MicroIndex;
+    NEON_CUDA_HOST_DEVICE inline auto init(MicroIndex const&) -> void;
+
+    NEON_CUDA_HOST_DEVICE inline auto getInDataBlockIdx() const -> InDataBlockIdx const&;
+    NEON_CUDA_HOST_DEVICE inline auto getDataBlockIdx() const -> DataBlockIdx const&;
+    NEON_CUDA_HOST_DEVICE inline auto setInDataBlockIdx(InDataBlockIdx const&) -> void;
+    NEON_CUDA_HOST_DEVICE inline auto setDataBlockIdx(DataBlockIdx const&) -> void;
+    NEON_CUDA_HOST_DEVICE inline auto isActive() const -> bool;
+    // the local index within the block
+    InDataBlockIdx mInDataBlockIdx;
+    DataBlockIdx   mDataBlockIdx{};
+};
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::setDataBlockIdx(const bIndex::DataBlockIdx& dataBlockIdx) -> void
+{
+    mDataBlockIdx = dataBlockIdx;
+}
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::setInDataBlockIdx(const bIndex::InDataBlockIdx& inDataBlockIdx) -> void
+{
+    mInDataBlockIdx = inDataBlockIdx;
+}
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::getDataBlockIdx() const -> const bIndex::DataBlockIdx&
+{
+    return mDataBlockIdx;
+}
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::getInDataBlockIdx() const -> const bIndex::InDataBlockIdx&
+{
+    return mInDataBlockIdx;
+}
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::isActive() const -> bool
+{
+    return mDataBlockIdx != std::numeric_limits<typename bIndex::DataBlockIdx>::max();
+}
+
+}  // namespace Neon::domain::details::bGrid
+
+#include "Neon/domain/details/bGrid/bIndex_imp.h"
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bIndex_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bIndex_imp.h
new file mode 100644
index 00000000..9da43b62
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bIndex_imp.h
@@ -0,0 +1,67 @@
+#pragma once
+#include "Neon/domain/details/bGridDisgMgpu/bIndex.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline bIndex<SBlock>::
+    bIndex(const DataBlockIdx&            blockIdx,
+           const InDataBlockIdx::Integer& x,
+           const InDataBlockIdx::Integer& y,
+           const InDataBlockIdx::Integer& z)
+{
+    mDataBlockIdx = blockIdx;
+    mInDataBlockIdx.x = x;
+    mInDataBlockIdx.y = y;
+    mInDataBlockIdx.z = z;
+}
+
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bIndex<SBlock>::getMicroIndex() -> MicroIndex
+{
+
+
+    TrayIdx const exBlockOffset = mDataBlockIdx * (SBlock::blockRatioX * SBlock::blockRatioY * SBlock::blockRatioZ);
+    TrayIdx const exTrayOffset = [&] {
+        TrayIdx const trayBlockIdxX = mInDataBlockIdx.x / SBlock::userBlockSizeX;
+        TrayIdx const trayBlockIdxY = mInDataBlockIdx.y / SBlock::userBlockSizeY;
+        TrayIdx const trayBlockIdxZ = mInDataBlockIdx.z / SBlock::userBlockSizeZ;
+
+        TrayIdx const res = trayBlockIdxX + trayBlockIdxY * SBlock::blockRatioX +
+                            trayBlockIdxZ * (SBlock::blockRatioX * SBlock::blockRatioY);
+        return res;
+    }();
+    MicroIndex res;
+    res.setTrayBlockIdx(exBlockOffset + exTrayOffset);
+    res.setInTrayBlockIdx({static_cast<InTrayIdx::Integer>(mInDataBlockIdx.x % SBlock::userBlockSizeX),
+                           static_cast<InTrayIdx::Integer>(mInDataBlockIdx.y % SBlock::userBlockSizeY),
+                           static_cast<InTrayIdx::Integer>(mInDataBlockIdx.z % SBlock::userBlockSizeZ)});
+    return res;
+}
+
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bIndex<SBlock>::init(MicroIndex const& microIndex) -> void
+{
+    constexpr uint32_t memBlockSize = SBlock::memBlockSizeX * SBlock::memBlockSizeY * SBlock::memBlockSizeZ;
+    constexpr uint32_t userBlockSize = SBlock::userBlockSizeX * SBlock::userBlockSizeY * SBlock::userBlockSizeZ;
+    constexpr uint32_t blockRatioSize = memBlockSize / userBlockSize;
+
+    constexpr uint32_t blockRatioX = SBlock::memBlockSizeX / SBlock::userBlockSizeX;
+    constexpr uint32_t blockRatioY = SBlock::memBlockSizeY / SBlock::userBlockSizeY;
+
+    mDataBlockIdx = microIndex.getTrayBlockIdx() / (blockRatioSize);
+
+    uint32_t reminder = microIndex.getTrayBlockIdx() % (blockRatioSize);
+
+    const uint32_t reminderInZ = reminder / (blockRatioX * blockRatioY);
+    mInDataBlockIdx.z = static_cast<InDataBlockIdx::Integer>(microIndex.getInTrayBlockIdx().z + reminderInZ * SBlock::userBlockSizeZ);
+    reminder = reminder % (blockRatioX * blockRatioY);
+    const uint32_t reminderInY = reminder / (blockRatioX);
+    mInDataBlockIdx.y = static_cast<InDataBlockIdx::Integer>(microIndex.getInTrayBlockIdx().y + reminderInY * SBlock::userBlockSizeY);
+    const uint32_t reminderInX = reminder % blockRatioX;
+    mInDataBlockIdx.x = static_cast<InDataBlockIdx::Integer>(microIndex.getInTrayBlockIdx().x + reminderInX * SBlock::userBlockSizeX);
+}
+
+}  // namespace Neon::domain::details::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bPartition.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bPartition.h
new file mode 100644
index 00000000..666f813b
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bPartition.h
@@ -0,0 +1,218 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMgpu//bIndex.h"
+#include "Neon/domain/details/bGridDisgMgpu//bSpan.h"
+
+#include "Neon/domain/interface/NghData.h"
+
+#include "Neon/sys/memory/CUDASharedMemoryUtil.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <typename SBlock>
+class bSpan;
+
+template <typename T, int C, typename SBlock>
+class bPartition
+{
+   public:
+    enum Sectors
+    {
+        bUp = 0,
+        bDw = 1,
+        gUp = 2,
+        gDw = 3,
+        after = 4,
+        first = bUp
+    };
+
+    using Span = bSpan<SBlock>;
+    using Idx = bIndex<SBlock>;
+    using NghIdx = typename Idx::NghIdx;
+    using Type = T;
+    using NghData = Neon::domain::NghData<T>;
+
+    using BlockViewGrid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
+    using BlockViewGridIdx = BlockViewGrid::Idx;
+
+   public:
+    bPartition();
+
+    ~bPartition() = default;
+
+    explicit bPartition(int                                           setIdx,
+                        int                                           mCardinality,
+                        T*                                            mMem,
+                        typename Idx::DataBlockIdx*                   mBlockConnectivity,
+                        typename SBlock::BitMask const* NEON_RESTRICT mMask,
+                        Neon::int32_3d*                               mOrigin,
+                        NghIdx*                                       mStencilNghIndex,
+                        Neon::int32_3d                                mDomainSize,
+                        typename Idx::DataBlockCount                  mFirstDataBUP,
+                        typename Idx::DataBlockCount                  mFirstDataBDW,
+                        typename Idx::DataBlockCount                  mFirstDataGUP,
+                        typename Idx::DataBlockCount                  mFirstDataGDW,
+                        typename Idx::DataBlockCount                  mLastDataGDW);
+
+    /**
+     * Retrieve the cardinality of the field.
+     */
+    inline NEON_CUDA_HOST_DEVICE auto
+    cardinality()
+        const -> int;
+
+    /**
+     * Gets the field metadata at a cartesian point.
+     */
+    inline NEON_CUDA_HOST_DEVICE auto
+    operator()(const Idx& cell,
+               int        card)
+        -> T&;
+
+    /**
+     * Gets the field metadata at a cartesian point.
+     */
+    inline NEON_CUDA_HOST_DEVICE auto
+    operator()(const Idx& cell,
+               int        card)
+        const -> const T&;
+
+    /**
+     * Gets the field metadata at a neighbour cartesian point.
+     */
+    NEON_CUDA_HOST_DEVICE inline auto
+    getNghData(const Idx&    cell,
+               const NghIdx& offset,
+               const int     card)
+        const -> NghData;
+
+    /**
+     * Gets the field metadata at a neighbour cartesian point.
+     */
+    NEON_CUDA_HOST_DEVICE inline auto
+    getNghData(const Idx& eId,
+               uint8_t    nghID,
+               int        card)
+        const -> NghData;
+
+    /**
+     * Gets the field metadata at a neighbour cartesian point.
+     */
+    template <int xOff, int yOff, int zOff>
+    NEON_CUDA_HOST_DEVICE inline auto
+    getNghData(const Idx& eId,
+               int        card)
+        const -> NghData;
+
+    /**
+     * Gets the field metadata at a neighbour cartesian point.
+     */
+    template <int xOff, int yOff, int zOff>
+    NEON_CUDA_HOST_DEVICE inline auto
+    getNghData(const Idx& eId,
+               int        card,
+               T          defaultValue)
+        const -> NghData;
+
+    template <int xOff,
+              int yOff,
+              int zOff,
+              typename LambdaVALID,
+              typename LambdaNOTValid = void*>
+    NEON_CUDA_HOST_DEVICE inline auto
+    getNghData(const Idx&     gidx,
+               int            card,
+               LambdaVALID    funIfValid,
+               LambdaNOTValid funIfNOTValid = nullptr)
+        const -> std::enable_if_t<std::is_invocable_v<LambdaVALID, T> && (std::is_invocable_v<LambdaNOTValid, T> || std::is_same_v<LambdaNOTValid, void*>), void>;
+
+    template <int xOff,
+              int yOff,
+              int zOff>
+    NEON_CUDA_HOST_DEVICE inline auto
+    writeNghData(const Idx& gidx,
+                 int        card,
+                 T          value)
+        -> bool;
+
+    /**
+     * Gets the global coordinates of the cartesian point.
+     */
+    NEON_CUDA_HOST_DEVICE inline auto
+    getGlobalIndex(const Idx& cell)
+        const -> Neon::index_3d;
+
+    NEON_CUDA_HOST_DEVICE inline auto
+    isActive(const Idx&                      cell,
+             const typename SBlock::BitMask* mask = nullptr) const -> bool;
+
+
+    NEON_CUDA_HOST_DEVICE inline auto
+    getDomainSize()
+        const -> Neon::index_3d;
+
+    NEON_CUDA_HOST_DEVICE
+    auto mem() const -> T const*;
+
+    /**
+     * Gets the Idx for in the block view space.
+     */
+    NEON_CUDA_HOST_DEVICE inline auto
+    getBlockViewIdx(const Idx& cell)
+        const -> BlockViewGridIdx;
+
+
+    NEON_CUDA_HOST_DEVICE inline auto
+    helpGetPitch(const Idx& cell, int card)
+        const -> uint32_t;
+
+    NEON_CUDA_HOST_DEVICE inline auto
+    helpGetValidIdxPitchExplicit(const Idx& idx, int card)
+        const -> uint32_t;
+
+    NEON_CUDA_HOST_DEVICE inline auto
+    helpNghPitch(const Idx& nghIdx, int card)
+        const -> std::tuple<bool, uint32_t>;
+
+    NEON_CUDA_HOST_DEVICE inline auto
+    helpGetNghIdx(const Idx& idx, const NghIdx& offset)
+        const -> Idx;
+
+    template <int xOff, int yOff, int zOff>
+    NEON_CUDA_HOST_DEVICE inline auto
+    helpGetNghIdx(const Idx& idx)
+        const -> Idx;
+
+    NEON_CUDA_HOST_DEVICE inline auto
+    helpGetNghIdx(const Idx& idx, const NghIdx& offset, const typename Idx::DataBlockIdx* blockConnectivity)
+        const -> Idx;
+
+    template <int xOff, int yOff, int zOff>
+    NEON_CUDA_HOST_DEVICE inline auto
+    helpGetNghIdx(const Idx& idx, const typename Idx::DataBlockIdx* blockConnectivity)
+        const -> Idx;
+
+   auto NEON_CUDA_HOST_DEVICE helpGetSectorFirstBlock(Sectors sector) const
+        -> typename Idx::DataBlockCount;
+
+   auto NEON_CUDA_HOST_DEVICE helpGetSectorLength(Sectors sector) const
+       -> typename Idx::DataBlockCount;
+
+    int                                             mCardinality;
+    T*                                              mMem;
+    NghIdx const* NEON_RESTRICT                     mStencilNghIndex;
+    typename Idx::DataBlockIdx const* NEON_RESTRICT mBlockConnectivity;
+    typename SBlock::BitMask const* NEON_RESTRICT   mMask;
+    Neon::int32_3d const* NEON_RESTRICT             mOrigin;
+    int                                             mSetIdx;
+    int                                             mMultiResDiscreteIdxSpacing = 1;
+    Neon::int32_3d                                  mDomainSize;
+
+
+
+    typename Idx::DataBlockCount mSectorFirstBlockIdx[Sectors::after + 1];
+};
+
+}  // namespace Neon::domain::details::bGridMgpu
+
+#include "Neon/domain/details/bGridDisgMgpu//bPartition_imp.h"
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bPartition_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bPartition_imp.h
new file mode 100644
index 00000000..7fdf8bc4
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bPartition_imp.h
@@ -0,0 +1,517 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMgpu//bGrid.h"
+#include "Neon/domain/details/bGridDisgMgpu/bSpan.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <typename T, int C, typename SBlock>
+bPartition<T, C, SBlock>::bPartition()
+    : mCardinality(0),
+      mMem(nullptr),
+      mStencilNghIndex(),
+      mBlockConnectivity(nullptr),
+      mMask(nullptr),
+      mOrigin(0),
+      mSetIdx(0)
+{
+}
+
+template <typename T, int C, typename SBlock>
+bPartition<T, C, SBlock>::
+    bPartition(int                                           setIdx,
+               int                                           cardinality,
+               T*                                            mem,
+               typename Idx::DataBlockIdx*                   blockConnectivity,
+               typename SBlock::BitMask const* NEON_RESTRICT mask,
+               Neon::int32_3d*                               origin,
+               NghIdx*                                       stencilNghIndex,
+               Neon::int32_3d                                mDomainSize,
+               typename Idx::DataBlockCount                  mFirstDataBUP,
+               typename Idx::DataBlockCount                  mFirstDataBDW,
+               typename Idx::DataBlockCount                  mFirstDataGUP,
+               typename Idx::DataBlockCount                  mFirstDataGDW,
+               typename Idx::DataBlockCount                  mLastDataGDW)
+    : mCardinality(cardinality),
+      mMem(mem),
+      mStencilNghIndex(stencilNghIndex),
+      mBlockConnectivity(blockConnectivity),
+      mMask(mask),
+      mOrigin(origin),
+      mSetIdx(setIdx),
+      mDomainSize(mDomainSize)
+{
+    mSectorFirstBlockIdx[Sectors::bUp] = mFirstDataBUP;
+    mSectorFirstBlockIdx[Sectors::bDw] = mFirstDataBDW;
+    mSectorFirstBlockIdx[Sectors::gUp] = mFirstDataGUP;
+    mSectorFirstBlockIdx[Sectors::gDw] = mFirstDataGDW;
+    mSectorFirstBlockIdx[Sectors::after] = mLastDataGDW;
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getGlobalIndex(const Idx& gidx)
+        const -> Neon::index_3d
+{
+    auto location = mOrigin[gidx.mDataBlockIdx];
+    location.x += gidx.mInDataBlockIdx.x;
+    location.y += gidx.mInDataBlockIdx.y;
+    location.z += gidx.mInDataBlockIdx.z;
+    if constexpr (SBlock::isMultiResMode) {
+        return location * mMultiResDiscreteIdxSpacing;
+    }
+    return location;
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getDomainSize()
+        const -> Neon::index_3d
+{
+    return mDomainSize;
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getBlockViewIdx(const Idx& gidx)
+        const -> BlockViewGridIdx
+{
+    BlockViewGridIdx res;
+    res.manualSet(gidx.getDataBlockIdx());
+    return res;
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+    cardinality()
+        const -> int
+{
+    return mCardinality;
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+operator()(const Idx& cell,
+           int        card) -> T&
+{
+    return mMem[helpGetPitch(cell, card)];
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+operator()(const Idx& cell,
+           int        card) const -> const T&
+{
+    return mMem[helpGetPitch(cell, card)];
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    mem() const -> T const*
+{
+    return mMem;
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+    helpGetPitch(const Idx& idx, int card)
+        const -> uint32_t
+{
+    uint32_t const pitch = helpGetValidIdxPitchExplicit(idx, card);
+    return pitch;
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+    helpGetValidIdxPitchExplicit(const Idx& idx, int card)
+        const -> uint32_t
+{
+    // we are in the internal sector, so we have the standard AoSoA
+    if (idx.getDataBlockIdx() < mSectorFirstBlockIdx[Sectors::first]) {
+
+        uint32_t constexpr blockPitchByCard = SBlock::memBlockSizeX * SBlock::memBlockSizeY * SBlock::memBlockSizeZ;
+        uint32_t const inBlockInCardPitch = idx.mInDataBlockIdx.x +
+                                            SBlock::memBlockSizeX * idx.mInDataBlockIdx.y +
+                                            (SBlock::memBlockSizeX * SBlock::memBlockSizeY) * idx.mInDataBlockIdx.z;
+        uint32_t const blockAdnCardPitch = (idx.mDataBlockIdx * mCardinality + card) * blockPitchByCard;
+        uint32_t const pitch = blockAdnCardPitch + inBlockInCardPitch;
+        return pitch;
+    }
+
+    // We switch to the other sector where we have a SoA
+    int sectorLenght = 0;
+    int myFirstBlock = 0;
+
+    auto const myBlockIdx = idx.getDataBlockIdx();
+    for (int mySector = Sectors::gDw; mySector >= 0; mySector--) {
+        if (myBlockIdx >= mSectorFirstBlockIdx[mySector]) {
+            sectorLenght = mSectorFirstBlockIdx[mySector + 1] - mSectorFirstBlockIdx[mySector];
+            myFirstBlock = mSectorFirstBlockIdx[mySector];
+            break;
+        }
+    }
+    int const denseX = (myBlockIdx - myFirstBlock) * SBlock::memBlockSizeX + idx.getInDataBlockIdx().x;
+    int const denseY = idx.mInDataBlockIdx.y;
+    int const denseZ = idx.mInDataBlockIdx.z;
+
+    int const xStride = 1;
+    int const yStride = SBlock::memBlockSizeX * sectorLenght;
+    int const zStride = SBlock::memBlockSizeY * yStride;
+
+    int const pitch = (SBlock::memBlockSizeX * SBlock::memBlockSizeY * SBlock::memBlockSizeZ) *
+                          card *
+                          sectorLenght +
+                      denseX * xStride +
+                      denseY * yStride +
+                      denseZ * zStride;
+
+    int const fullPitch = myFirstBlock *
+                              SBlock::memBlockSizeX *
+                              SBlock::memBlockSizeY *
+                              SBlock::memBlockSizeZ *
+                              cardinality() +
+                          pitch;
+    return fullPitch;
+}
+
+template <typename T, int C, typename SBlock>
+inline NEON_CUDA_HOST_DEVICE auto bPartition<T, C, SBlock>::
+    helpNghPitch(const Idx& nghIdx, int card)
+        const -> std::tuple<bool, uint32_t>
+{
+    if (nghIdx.mDataBlockIdx == Span::getInvalidBlockId()) {
+        return {false, 0};
+    }
+
+    const bool isActive = mMask[nghIdx.mDataBlockIdx].isActive(nghIdx.mInDataBlockIdx.x, nghIdx.mInDataBlockIdx.y, nghIdx.mInDataBlockIdx.z);
+    if (!isActive) {
+        return {false, 0};
+    }
+    auto const offset = helpGetValidIdxPitchExplicit(nghIdx, card);
+    return {true, offset};
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    helpGetNghIdx(const Idx&    idx,
+                  const NghIdx& offset)
+        const -> Idx
+{
+    return this->helpGetNghIdx(idx, offset, mBlockConnectivity);
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    helpGetNghIdx(const Idx&                        idx,
+                  const NghIdx&                     offset,
+                  const typename Idx::DataBlockIdx* blockConnectivity)
+        const -> Idx
+{
+
+    typename Idx::InDataBlockIdx ngh(idx.mInDataBlockIdx.x + offset.x,
+                                     idx.mInDataBlockIdx.y + offset.y,
+                                     idx.mInDataBlockIdx.z + offset.z);
+
+    /**
+     * 0 if no offset on the direction
+     * 1 positive offset
+     * -1 negative offset
+     */
+    const int xFlag = ngh.x < 0 ? -1 : (ngh.x >= SBlock::memBlockSizeX ? +1 : 0);
+    const int yFlag = ngh.y < 0 ? -1 : (ngh.y >= SBlock::memBlockSizeX ? +1 : 0);
+    const int zFlag = ngh.z < 0 ? -1 : (ngh.z >= SBlock::memBlockSizeX ? +1 : 0);
+
+    const bool isLocal = (xFlag | yFlag | zFlag) == 0;
+    if (!(isLocal)) {
+        typename Idx::InDataBlockIdx remoteInBlockOffset;
+        /**
+         * Example
+         * - 8 block (1D case)
+         * Case 1:
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *        ^     ^
+         *       -3     starting point
+         *
+         * - idx.inBlock = 2
+         * - offset = -1
+         * - remote.x = (2-3) - ((-1) * 4) = -1 + 4 = 3
+         * Case 2:
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *                ^     ^
+         *  starting point      +3 from 3
+         *
+         * - idx.inBlock = 3
+         * - offset = (+3,0)
+         * - remote.x = (7+3) - ((+1) * 8) = 10 - 8 = 2
+         *
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *  ^                   ^
+         *  -3 from 0          +3 from 3
+         *
+         * NOTE: if in one direction the neighbour offet is zero, xFalg is 0;
+         * */
+
+        Idx remoteNghIdx;
+        remoteNghIdx.mInDataBlockIdx.x = ngh.x - xFlag * SBlock::memBlockSizeX;
+        remoteNghIdx.mInDataBlockIdx.y = ngh.y - yFlag * SBlock::memBlockSizeX;
+        remoteNghIdx.mInDataBlockIdx.z = ngh.z - zFlag * SBlock::memBlockSizeX;
+
+        int connectivityJump = idx.mDataBlockIdx * 27 +
+                               (xFlag + 1) +
+                               (yFlag + 1) * 3 +
+                               (zFlag + 1) * 9;
+        remoteNghIdx.mDataBlockIdx = blockConnectivity[connectivityJump];
+
+        return remoteNghIdx;
+    } else {
+        Idx localNghIdx;
+        localNghIdx.mDataBlockIdx = idx.mDataBlockIdx;
+        localNghIdx.mInDataBlockIdx = ngh;
+        return localNghIdx;
+    }
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    helpGetNghIdx(const Idx& idx)
+        const -> Idx
+{
+    return this->helpGetNghIdx<xOff, yOff, zOff>(idx, mBlockConnectivity);
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    helpGetNghIdx(const Idx& idx, const typename Idx::DataBlockIdx* blockConnectivity)
+        const -> Idx
+{
+
+    typename Idx::InDataBlockIdx ngh(idx.mInDataBlockIdx.x + xOff,
+                                     idx.mInDataBlockIdx.y + yOff,
+                                     idx.mInDataBlockIdx.z + zOff);
+
+    /**
+     * 0 if no offset on the direction
+     * 1 positive offset
+     * -1 negative offset
+     */
+    const int xFlag = [&] {
+        if constexpr (xOff == 0) {
+            return 0;
+        } else {
+            return ngh.x < 0 ? -1 : (ngh.x >= SBlock::memBlockSizeX ? +1 : 0);
+        }
+    }();
+
+
+    const int yFlag = [&] {
+        if constexpr (yOff == 0) {
+            return 0;
+        } else {
+            return ngh.y < 0 ? -1 : (ngh.y >= SBlock::memBlockSizeX ? +1 : 0);
+        }
+    }();
+    const int zFlag = [&] {
+        if constexpr (zOff == 0) {
+            return 0;
+        } else {
+            return ngh.z < 0 ? -1 : (ngh.z >= SBlock::memBlockSizeX ? +1 : 0);
+        }
+    }();
+
+    const bool isLocal = (xFlag | yFlag | zFlag) == 0;
+    if (!(isLocal)) {
+        typename Idx::InDataBlockIdx remoteInBlockOffset;
+        /**
+         * Example
+         * - 8 block (1D case)
+         * Case 1:
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *        ^     ^
+         *       -3     starting point
+         *
+         * - idx.inBlock = 2
+         * - offset = -1
+         * - remote.x = (2-3) - ((-1) * 4) = -1 + 4 = 3
+         * Case 2:
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *                ^     ^
+         *  starting point      +3 from 3
+         *
+         * - idx.inBlock = 3
+         * - offset = (+3,0)
+         * - remote.x = (7+3) - ((+1) * 8) = 10 - 8 = 2
+         *
+         * |0,1,2,3|0,1,2,3|0,1,2,3|
+         *  ^                   ^
+         *  -3 from 0          +3 from 3
+         *
+         * NOTE: if in one direction the neighbour offet is zero, xFalg is 0;
+         * */
+
+        Idx remoteNghIdx;
+        remoteNghIdx.mInDataBlockIdx.x = ngh.x - xFlag * SBlock::memBlockSizeX;
+        remoteNghIdx.mInDataBlockIdx.y = ngh.y - yFlag * SBlock::memBlockSizeX;
+        remoteNghIdx.mInDataBlockIdx.z = ngh.z - zFlag * SBlock::memBlockSizeX;
+
+        int connectivityJump = idx.mDataBlockIdx * 27 +
+                               (xFlag + 1) +
+                               (yFlag + 1) * 3 +
+                               (zFlag + 1) * 9;
+        remoteNghIdx.mDataBlockIdx = blockConnectivity[connectivityJump];
+
+        return remoteNghIdx;
+    } else {
+        Idx localNghIdx;
+        localNghIdx.mDataBlockIdx = idx.mDataBlockIdx;
+        localNghIdx.mInDataBlockIdx = ngh;
+        return localNghIdx;
+    }
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx& eId,
+               uint8_t    nghID,
+               int        card)
+        const -> NghData
+{
+    NghIdx nghOffset = mStencilNghIndex[nghID];
+    return getNghData(eId, nghOffset, card);
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx&    idx,
+               const NghIdx& offset,
+               const int     card)
+        const -> NghData
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx(idx, offset);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+    if (!isValid) {
+        result.invalidate();
+        return result;
+    }
+    auto const value = mMem[pitch];
+    result.set(value, true);
+    return result;
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx& idx,
+               int        card)
+        const -> NghData
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx<xOff, yOff, zOff>(idx);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+    if (!isValid) {
+        result.invalidate();
+        return result;
+    }
+    auto const value = mMem[pitch];
+    result.set(value, true);
+    return result;
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx& idx,
+               int        card,
+               T          defaultValue)
+        const -> NghData
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx<xOff, yOff, zOff>(idx);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+    if (!isValid) {
+        result.set(defaultValue, false);
+        return result;
+    }
+    auto const value = mMem[pitch];
+    result.set(value, true);
+    return result;
+}
+
+template <typename T, int C, typename SBlock>
+
+template <int xOff,
+          int yOff,
+          int zOff,
+          typename LambdaVALID,
+          typename LambdaNOTValid>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    getNghData(const Idx&     gidx,
+               int            card,
+               LambdaVALID    funIfValid,
+               LambdaNOTValid funIfNOTValid)
+        const -> std::enable_if_t<std::is_invocable_v<LambdaVALID, T> && (std::is_invocable_v<LambdaNOTValid, T> || std::is_same_v<LambdaNOTValid, void*>), void>
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx<xOff, yOff, zOff>(gidx);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+
+    if (isValid) {
+        auto const& value = mMem[pitch];
+        funIfValid(value);
+        return;
+    }
+
+    if constexpr (!std::is_same_v<LambdaNOTValid, void*>) {
+        funIfNOTValid();
+    }
+    return;
+}
+
+template <typename T, int C, typename SBlock>
+template <int xOff, int yOff, int zOff>
+NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
+    writeNghData(const Idx& gidx,
+                 int        card,
+                 T          value)
+        -> bool
+{
+    NghData result;
+    bIndex  nghIdx = helpGetNghIdx<xOff, yOff, zOff>(gidx);
+    auto [isValid, pitch] = helpNghPitch(nghIdx, card);
+    if (!isValid) {
+        return false;
+    }
+    mMem[pitch] = value;
+    return true;
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto
+bPartition<T, C, SBlock>::isActive(const Idx&                      cell,
+                                   const typename SBlock::BitMask* mask) const -> bool
+{
+    if (!mask) {
+        return mMask[cell.mDataBlockIdx].isActive(cell.mInDataBlockIdx.x, cell.mInDataBlockIdx.y, cell.mInDataBlockIdx.z);
+    } else {
+        return mask[cell.mDataBlockIdx].isActive(cell.mInDataBlockIdx.x, cell.mInDataBlockIdx.y, cell.mInDataBlockIdx.z);
+    }
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto
+bPartition<T, C, SBlock>::helpGetSectorFirstBlock(Sectors sector) const
+    -> typename Idx::DataBlockCount
+{
+    return mSectorFirstBlockIdx[sector];
+}
+
+template <typename T, int C, typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto
+bPartition<T, C, SBlock>::helpGetSectorLength(Sectors sector) const
+    -> typename Idx::DataBlockCount
+{
+    return mSectorFirstBlockIdx[sector + 1] - mSectorFirstBlockIdx[sector];
+}
+}  // namespace Neon::domain::details::bGridMgpu
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bSpan.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bSpan.h
new file mode 100644
index 00000000..397133c8
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bSpan.h
@@ -0,0 +1,55 @@
+#pragma once
+
+#include "Neon/domain/details/bGridDisgMgpu/bIndex.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <typename SBlock>
+class bSpan
+{
+   public:
+    // bit mask information
+    using BitMaskWordType = uint64_t;
+
+    static constexpr uint32_t           bitMaskStorageBitWidth = 64;
+    static constexpr Neon::MemoryLayout activeMaskMemoryLayout = Neon::MemoryLayout::arrayOfStructs;
+    static constexpr uint32_t           log2OfbitMaskWordSize = 6;
+
+    using Idx = bIndex<SBlock>;
+    friend class bGrid<SBlock>;
+
+    static constexpr int SpaceDim = 3;
+
+    bSpan() = default;
+    virtual ~bSpan() = default;
+
+    NEON_CUDA_HOST_DEVICE inline static auto getInvalidBlockId()
+        -> typename Idx::DataBlockIdx
+    {
+        return std::numeric_limits<uint32_t>::max();
+    }
+
+    inline bSpan(
+        typename Idx::DataBlockCount                  mFirstDataBlockOffset,
+        typename SBlock::BitMask const* NEON_RESTRICT mActiveMask,
+        Neon::DataView                                mDataView);
+
+    NEON_CUDA_HOST_DEVICE inline auto setAndValidateCPUDevice(
+        Idx&            bidx,
+        uint32_t const& threadIdx,
+        uint32_t const& x,
+        uint32_t const& y,
+        uint32_t const& z) const -> bool;
+
+    NEON_CUDA_HOST_DEVICE inline auto setAndValidateGPUDevice(
+        Idx& bidx) const -> bool;
+
+
+    // We don't need to have a count on active blocks
+    typename Idx::DataBlockCount                  mFirstDataBlockOffset;
+    typename SBlock::BitMask const* NEON_RESTRICT mActiveMask;
+    Neon::DataView                                mDataView;
+};
+}  // namespace Neon::domain::details::bGrid
+
+#include "Neon/domain/details/bGridDisgMgpu/bSpan_imp.h"
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bSpan_imp.h b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bSpan_imp.h
new file mode 100644
index 00000000..45e40eeb
--- /dev/null
+++ b/libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/bSpan_imp.h
@@ -0,0 +1,51 @@
+#include "Neon/domain/details/bGridDisgMgpu/bSpan.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto
+bSpan<SBlock>::setAndValidateGPUDevice([[maybe_unused]] Idx& bidx) const -> bool
+{
+#ifdef NEON_PLACE_CUDA_DEVICE
+    bidx.mDataBlockIdx = blockIdx.x + mFirstDataBlockOffset;
+    bidx.mInDataBlockIdx.x = threadIdx.x;
+    bidx.mInDataBlockIdx.y = threadIdx.y;
+    bidx.mInDataBlockIdx.z = threadIdx.z;
+
+    const bool isActive = mActiveMask[bidx.mDataBlockIdx].isActive(bidx.mInDataBlockIdx.x, bidx.mInDataBlockIdx.y, bidx.mInDataBlockIdx.z);
+
+    return isActive;
+#else
+    NEON_THROW_UNSUPPORTED_OPERATION("Operation supported only on GPU");
+#endif
+}
+
+template <typename SBlock>
+NEON_CUDA_HOST_DEVICE inline auto
+bSpan<SBlock>::setAndValidateCPUDevice(Idx&            bidx,
+                                       uint32_t const& dataBlockIdx,
+                                       uint32_t const& x,
+                                       uint32_t const& y,
+                                       uint32_t const& z) const -> bool
+{
+
+    bidx.mDataBlockIdx = dataBlockIdx + mFirstDataBlockOffset;
+    bidx.mInDataBlockIdx.x = static_cast<typename Idx::InDataBlockIdx::Integer>(x);
+    bidx.mInDataBlockIdx.y = static_cast<typename Idx::InDataBlockIdx::Integer>(y);
+    bidx.mInDataBlockIdx.z = static_cast<typename Idx::InDataBlockIdx::Integer>(z);
+    const bool isActive = mActiveMask[dataBlockIdx].isActive(bidx.mInDataBlockIdx.x, bidx.mInDataBlockIdx.y, bidx.mInDataBlockIdx.z);
+    return isActive;
+}
+
+template <typename SBlock>
+bSpan<SBlock>::bSpan(typename Idx::DataBlockCount                  firstDataBlockOffset,
+                     typename SBlock::BitMask const* NEON_RESTRICT activeMask,
+                     Neon::DataView                                dataView)
+    : mFirstDataBlockOffset(firstDataBlockOffset),
+      mActiveMask(activeMask),
+      mDataView(dataView)
+{
+}
+
+
+}  // namespace Neon::domain::details::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/include/Neon/domain/details/eGrid/eField_imp.h b/libNeonDomain/include/Neon/domain/details/eGrid/eField_imp.h
index 2427dc57..7fab92b1 100644
--- a/libNeonDomain/include/Neon/domain/details/eGrid/eField_imp.h
+++ b/libNeonDomain/include/Neon/domain/details/eGrid/eField_imp.h
@@ -75,7 +75,7 @@ eField<T, C>::eField(const std::string&         fieldUserName,
     }
 #if 0
     {  // Setting Reduction information
-        mData->partitionTable.forEachConfigurationWithUserData(
+        mData->mPartitionTable.forEachConfigurationWithUserData(
             [&](Neon::Execution,
                 Neon::SetIdx   setIdx,
                 Neon::DataView dw,
@@ -322,7 +322,7 @@ auto eField<T, C>::helpHaloUpdate(SetIdx                     setIdx,
 
                     T* src = [&]() {
                         auto  southDevice = setId;
-                        auto& partition = mData->partitionTable.getPartition(execution,
+                        auto& partition = mData->mPartitionTable.getPartition(execution,
                                                                             southDevice,
                                                                             Neon::DataView::STANDARD);
                         dIndex firstBoundaryNorthCell(0, 0, partition.dim.z - mData->zHaloDim);
@@ -332,7 +332,7 @@ auto eField<T, C>::helpHaloUpdate(SetIdx                     setIdx,
 
                     T* dst = [&]() {
                         auto  northDevice = setId + 1;
-                        auto& partition = mData->partitionTable.getPartition(execution,
+                        auto& partition = mData->mPartitionTable.getPartition(execution,
                                                                             northDevice,
                                                                             Neon::DataView::STANDARD);
                         dIndex firstBoundarySouthCell(0, 0, 0);
@@ -355,7 +355,7 @@ auto eField<T, C>::helpHaloUpdate(SetIdx                     setIdx,
                 const size_t transferBytes = sizeof(T) * mData->zHaloDim * mData->pitch[setId].z;
 
                 if (setId != setCardinality - 1) {  // Addressing all partitions that needs to send data north
-                    auto& partition = mData->partitionTable.getPartition(Neon::Execution::device,
+                    auto& partition = mData->mPartitionTable.getPartition(Neon::Execution::device,
                                                                         setId,
                                                                         Neon::DataView::STANDARD);
 
diff --git a/libNeonDomain/include/Neon/domain/tools/partitioning/SpanClassifier.h b/libNeonDomain/include/Neon/domain/tools/partitioning/SpanClassifier.h
index 9ab62b43..ef52e7ef 100644
--- a/libNeonDomain/include/Neon/domain/tools/partitioning/SpanClassifier.h
+++ b/libNeonDomain/include/Neon/domain/tools/partitioning/SpanClassifier.h
@@ -272,7 +272,6 @@ namespace Neon::domain::tool::partitioning {
                                                     if (activeCellLambda(globalId)) {
                                                         isActiveBlock = true;
                                                         if (whatdomain == ByDomain::bc) {
-                                                            //std::cout << "HERE " << blockOrigin << std::endl;
                                                             byDomain = ByDomain::bc;
                                                             doBreak = true;
                                                         }
diff --git a/libNeonDomain/src/domain/details/bGridDisgMask/bFieldReduceKernels.cu b/libNeonDomain/src/domain/details/bGridDisgMask/bFieldReduceKernels.cu
new file mode 100644
index 00000000..678565ea
--- /dev/null
+++ b/libNeonDomain/src/domain/details/bGridDisgMask/bFieldReduceKernels.cu
@@ -0,0 +1,62 @@
+#include "Neon/domain/details/dGrid/dGrid.h"
+#include "Neon/domain/patterns/ReduceKernels.cuh"
+
+namespace Neon::domain::details::bGridMask {
+#if 0
+template <typename T, int C>
+auto dFieldDev<T, C>::dotCUB(
+    Neon::set::patterns::BlasSet<T>& blasSet,
+    const dFieldDev<T>&              input,
+    Neon::set::MemDevSet<T>&         output,
+    const Neon::DataView&            dataView) -> void
+{
+    Neon::domain::details::dotCUB<T, 256, 1, 1>(blasSet,
+                                                 grid(),
+                                                 *this,
+                                                 input,
+                                                 output,
+                                                 dataView);
+}
+
+
+template <typename T, int C>
+auto dFieldDev<T, C>::norm2CUB(
+    Neon::set::patterns::BlasSet<T>& blasSet,
+    Neon::set::MemDevSet<T>&         output,
+    const Neon::DataView&            dataView) -> void
+{
+    Neon::domain::details::norm2CUB<T, 256, 1, 1>(blasSet,
+                                                   grid(),
+                                                   *this,
+                                                   output,
+                                                   dataView);
+}
+
+template void dFieldDev<double, 0>::dotCUB(Neon::set::patterns::BlasSet<double>&,
+                                           const dFieldDev<double>&,
+                                           Neon::set::MemDevSet<double>&,
+                                           const Neon::DataView&);
+
+template void dFieldDev<float, 0>::dotCUB(Neon::set::patterns::BlasSet<float>&,
+                                          const dFieldDev<float>&,
+                                          Neon::set::MemDevSet<float>&,
+                                          const Neon::DataView&);
+
+template void dFieldDev<int64_t, 0>::dotCUB(Neon::set::patterns::BlasSet<int64_t>&,
+                                            const dFieldDev<int64_t>&,
+                                            Neon::set::MemDevSet<int64_t>&,
+                                            const Neon::DataView&);
+
+template void dFieldDev<double, 0>::norm2CUB(Neon::set::patterns::BlasSet<double>&,
+                                             Neon::set::MemDevSet<double>&,
+                                             const Neon::DataView&);
+
+template void dFieldDev<float, 0>::norm2CUB(Neon::set::patterns::BlasSet<float>&,
+                                            Neon::set::MemDevSet<float>&,
+                                            const Neon::DataView&);
+
+template void dFieldDev<int64_t, 0>::norm2CUB(Neon::set::patterns::BlasSet<int64_t>&,
+                                            Neon::set::MemDevSet<int64_t>&,
+                                            const Neon::DataView&);
+#endif
+}  // namespace Neon::domain::details::dGrid
\ No newline at end of file
diff --git a/libNeonDomain/src/domain/details/bGridDisgMask/bGrid.cpp b/libNeonDomain/src/domain/details/bGridDisgMask/bGrid.cpp
new file mode 100644
index 00000000..d5e3126d
--- /dev/null
+++ b/libNeonDomain/src/domain/details/bGridDisgMask/bGrid.cpp
@@ -0,0 +1,8 @@
+#include "Neon/domain/details/bGridDisgMask/bGrid.h"
+
+namespace Neon::domain::details::disaggregated::bGridMask {
+
+
+template class bGrid<Neon::domain::details::disaggregated::bGridMask::StaticBlock<4,4,4>>;
+
+}  // namespace Neon::domain::details::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/src/domain/details/bGridDisgMgpu/bFieldReduceKernels.cu b/libNeonDomain/src/domain/details/bGridDisgMgpu/bFieldReduceKernels.cu
new file mode 100644
index 00000000..e69de29b
diff --git a/libNeonDomain/src/domain/details/bGridDisgMgpu/bGrid.cpp b/libNeonDomain/src/domain/details/bGridDisgMgpu/bGrid.cpp
new file mode 100644
index 00000000..a54e64ca
--- /dev/null
+++ b/libNeonDomain/src/domain/details/bGridDisgMgpu/bGrid.cpp
@@ -0,0 +1,7 @@
+#include "Neon/domain/details/bGridDisgMgpu/bGrid.h"
+
+namespace Neon::domain::details::bGridMgpu {
+
+template class bGrid<StaticBlock<defaultBlockSize, defaultBlockSize, defaultBlockSize>>;
+
+}  // namespace Neon::domain::details::bGrid
\ No newline at end of file
diff --git a/libNeonDomain/tests/domain-globalIdx/src/globalIdx.cu b/libNeonDomain/tests/domain-globalIdx/src/globalIdx.cu
index e74c61bc..21c75926 100644
--- a/libNeonDomain/tests/domain-globalIdx/src/globalIdx.cu
+++ b/libNeonDomain/tests/domain-globalIdx/src/globalIdx.cu
@@ -102,5 +102,6 @@ template auto run<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t, 0>&) -
 template auto run<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 template auto run<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>(TestData<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>&) -> void;
 template auto run<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+template auto run<Neon::bGridMgpu , int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 }  // namespace globalIdx
\ No newline at end of file
diff --git a/libNeonDomain/tests/domain-globalIdx/src/globalIdx.h b/libNeonDomain/tests/domain-globalIdx/src/globalIdx.h
index 616c46f0..117fdcd5 100644
--- a/libNeonDomain/tests/domain-globalIdx/src/globalIdx.h
+++ b/libNeonDomain/tests/domain-globalIdx/src/globalIdx.h
@@ -19,6 +19,7 @@ extern template auto run<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t,
 extern template auto run<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 extern template auto run<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>(TestData<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>&) -> void;
 extern template auto run<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+extern template auto run<Neon::bGridMgpu , int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 
 }  // namespace globalIdx
diff --git a/libNeonDomain/tests/domain-globalIdx/src/gtests.cpp b/libNeonDomain/tests/domain-globalIdx/src/gtests.cpp
index 4e2f193d..d6b91edd 100644
--- a/libNeonDomain/tests/domain-globalIdx/src/gtests.cpp
+++ b/libNeonDomain/tests/domain-globalIdx/src/gtests.cpp
@@ -41,6 +41,15 @@ TEST(domain_globalIdx, dGridDisg)
                             1);
 }
 
+TEST(domain_globalIdx, bGridMgpu)
+{
+    int nGpus = 3;
+    using Type = int64_t;
+    runAllTestConfiguration(std::function(globalIdx::run<Neon::bGridMgpu , Type, 0>),
+                            nGpus,
+                            1);
+}
+
 int main(int argc, char** argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
diff --git a/libNeonDomain/tests/domain-map-disg/src/gtests.cpp b/libNeonDomain/tests/domain-map-disg/src/gtests.cpp
index b5b77c56..3664f646 100644
--- a/libNeonDomain/tests/domain-map-disg/src/gtests.cpp
+++ b/libNeonDomain/tests/domain-map-disg/src/gtests.cpp
@@ -4,7 +4,7 @@
 #include "map.h"
 #include "runHelper.h"
 
-TEST(domain_map, bGridDisg)
+TEST(domain_map_disg, bGridDisg)
 {
     int nGpus = 1;
     using Type = int64_t;
@@ -13,7 +13,7 @@ TEST(domain_map, bGridDisg)
                             1);
 }
 
-TEST(domain_map_dataView, bGridDisg)
+TEST(domain_map_disg_dataView, bGridDisg)
 {
     int nGpus = 1;
     using Type = int64_t;
diff --git a/libNeonDomain/tests/domain-map-disg/src/map.cu b/libNeonDomain/tests/domain-map-disg/src/map.cu
index 991ac8dd..434802ef 100644
--- a/libNeonDomain/tests/domain-map-disg/src/map.cu
+++ b/libNeonDomain/tests/domain-map-disg/src/map.cu
@@ -138,7 +138,7 @@ auto run(TestData<G, T, C>& data) -> void
             if (!isInside) {
                 return Neon::domain::details::disaggregated::bGrid::details::cGrid::ClassSelector::outside;
             }
-            if (idx.rSum() % 2 == 0) {
+            if (idx.x == 0 || idx.y == 0 || idx.z == 0 || idx.x == grid.getDimension().x - 1 || idx.y == grid.getDimension().y - 1 || idx.z == grid.getDimension().z - 1) {
                 return Neon::domain::details::disaggregated::bGrid::details::cGrid::ClassSelector::beta;
             }
             return Neon::domain::details::disaggregated::bGrid::details::cGrid::ClassSelector::alpha;
@@ -151,6 +151,7 @@ auto run(TestData<G, T, C>& data) -> void
 
 
     grid = tmp;
+    grid.helpGetClassField().template ioToVtk<int>("classField", "classField");
     const std::string appName = TestInformation::fullName(grid.getImplementationName());
     data.resetValuesToLinear(1, 100);
 
diff --git a/libNeonDomain/tests/domain-map/src/gtests.cpp b/libNeonDomain/tests/domain-map/src/gtests.cpp
index 6240ad66..f6977444 100644
--- a/libNeonDomain/tests/domain-map/src/gtests.cpp
+++ b/libNeonDomain/tests/domain-map/src/gtests.cpp
@@ -42,7 +42,7 @@ TEST(domain_map, bGrid)
 
 TEST(domain_map, dGridDisg)
 {
-    int nGpus = 1;
+    int nGpus = 3;
     using Type = int64_t;
     runAllTestConfiguration(std::function(map::run<Neon::domain::details::disaggregated::dGrid::dGrid, Type, 0>),
                             nGpus,
@@ -58,6 +58,16 @@ TEST(domain_map, dGridSoA)
                             1);
 }
 
+TEST(domain_map, bGridMgpu)
+{
+    int nGpus = 3;
+    using Type = int64_t;
+    // extern template auto run<Neon::bGridMgpu, int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
+    runAllTestConfiguration(std::function(map::run<Neon::bGridMgpu, Type, 0>),
+                            nGpus,
+                            1);
+}
+
 int main(int argc, char** argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
diff --git a/libNeonDomain/tests/domain-map/src/map.cu b/libNeonDomain/tests/domain-map/src/map.cu
index 810cb067..d2dc5e5f 100644
--- a/libNeonDomain/tests/domain-map/src/map.cu
+++ b/libNeonDomain/tests/domain-map/src/map.cu
@@ -1,7 +1,7 @@
 #include <functional>
 #include "Neon/domain/Grids.h"
-#include "Neon/domain/details/dGridDisg/dGrid.h"
 #include "Neon/domain/details/bGridDisg/bGrid.h"
+#include "Neon/domain/details/dGridDisg/dGrid.h"
 
 #include "Neon/domain/details/dGridSoA/dGridSoA.h"
 #include "Neon/domain/tools/TestData.h"
@@ -36,15 +36,15 @@ auto mapContainer_axpy(int                   streamIdx,
 
 template <typename Field>
 auto mapContainer_add(int                   streamIdx,
-                       typename Field::Type& val,
-                       Field&                fieldB)
+                      typename Field::Type& val,
+                      Field&                fieldB)
     -> Neon::set::Container
 {
     const auto& grid = fieldB.getGrid();
     return grid.newContainer(
         "mapContainer_axpy",
         [&, val](Neon::set::Loader& loader) {
-            auto       b = loader.load(fieldB);
+            auto b = loader.load(fieldB);
 
             return [=] NEON_CUDA_HOST_DEVICE(const typename Field::Idx& e) mutable {
                 for (int i = 0; i < b.cardinality(); i++) {
@@ -101,6 +101,7 @@ template auto run<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t, 0>&) -
 template auto run<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 template auto run<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>(TestData<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>&) -> void;
 template auto run<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+template auto run<Neon::bGridMgpu, int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 namespace dataView {
 template <typename G, typename T, int C>
@@ -151,6 +152,7 @@ template auto run<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -
 template auto run<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>(TestData<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>&) -> void;
 template auto run<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
 template auto run<Neon::bGridDisg, int64_t, 0>(TestData<Neon::bGridDisg, int64_t, 0>&) -> void;
+template auto run<Neon::bGridMgpu, int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 }  // namespace dataView
 }  // namespace map
\ No newline at end of file
diff --git a/libNeonDomain/tests/domain-map/src/map.h b/libNeonDomain/tests/domain-map/src/map.h
index f6f8bc7c..73038353 100644
--- a/libNeonDomain/tests/domain-map/src/map.h
+++ b/libNeonDomain/tests/domain-map/src/map.h
@@ -19,6 +19,7 @@ extern template auto run<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t,
 extern template auto run<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 extern template auto run<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>(TestData<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>&) -> void;
 extern template auto run<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+extern template auto run<Neon::bGridMgpu, int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 namespace dataView {
 
@@ -30,6 +31,7 @@ extern template auto run<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t,
 extern template auto run<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 extern template auto run<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>(TestData<Neon::domain::details::dGridSoA::dGridSoA, int64_t, 0>&) -> void;
 extern template auto run<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+extern template auto run<Neon::bGridMgpu, int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 }  // namespace dataView
 
diff --git a/libNeonDomain/tests/domain-map/src/runHelper.h b/libNeonDomain/tests/domain-map/src/runHelper.h
index 593e31c2..4c622f82 100644
--- a/libNeonDomain/tests/domain-map/src/runHelper.h
+++ b/libNeonDomain/tests/domain-map/src/runHelper.h
@@ -73,7 +73,7 @@ void runAllTestConfiguration(
                         Neon::Backend       backend(ids, runtime);
                         Neon::MemoryOptions memoryOptions = backend.getMemoryOptions();
 
-                        if constexpr (std::is_same_v<G, Neon::bGrid>) {
+                        if constexpr (G::executionThreadSpan == Neon::set::details::ExecutionThreadSpan::d1b3) {
                             if (dim.z < 8 * ngpu * 3) {
                                 dim.z = ngpu * 3 * 8;
                             }
diff --git a/libNeonDomain/tests/domain-neighbour-globalIdx/src/gtests.cpp b/libNeonDomain/tests/domain-neighbour-globalIdx/src/gtests.cpp
index 3ea17257..2376b72f 100644
--- a/libNeonDomain/tests/domain-neighbour-globalIdx/src/gtests.cpp
+++ b/libNeonDomain/tests/domain-neighbour-globalIdx/src/gtests.cpp
@@ -49,6 +49,15 @@ TEST(domain_neighbour_globalIdx, dGridDisg)
                             1);
 }
 
+TEST(domain_neighbour_globalIdx, bGridMgpu)
+{
+    int nGpus = 5;
+    using Type = int64_t;
+    runAllTestConfiguration(std::function(globalIdx::run<Neon::bGridMgpu, Type, 0>),
+                            nGpus,
+                            1);
+}
+
 ///////////////////////////////////////////
 
 TEST(domain_neighbour_globalIdx, dGrid_template)
@@ -87,6 +96,17 @@ TEST(domain_neighbour_globalIdx, dGridSoA_template)
                             1);
 }
 
+TEST(domain_neighbour_globalIdx,bGridMgpu_template)
+{
+    int nGpus = 5;
+    using Type = int64_t;
+    runAllTestConfiguration(std::function(globalIdx::runTemplate<Neon::bGridMgpu, Type, 0>),
+                            nGpus,
+                            1);
+}
+
+
+
 int main(int argc, char** argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
diff --git a/libNeonDomain/tests/domain-neighbour-globalIdx/src/testsAndContainers.cu b/libNeonDomain/tests/domain-neighbour-globalIdx/src/testsAndContainers.cu
index 17f9f0ec..fe25f54e 100644
--- a/libNeonDomain/tests/domain-neighbour-globalIdx/src/testsAndContainers.cu
+++ b/libNeonDomain/tests/domain-neighbour-globalIdx/src/testsAndContainers.cu
@@ -407,6 +407,7 @@ template auto run<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t, 0>&) -
 template auto run<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 template auto run<Neon::dGridSoA, int64_t, 0>(TestData<Neon::dGridSoA, int64_t, 0>&) -> void;
 template auto run<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+template auto run<Neon::bGridMgpu , int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 
 template auto runTemplate<Neon::dGrid, int64_t, 0>(TestData<Neon::dGrid, int64_t, 0>&) -> void;
@@ -414,5 +415,6 @@ template auto runTemplate<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t
 template auto runTemplate<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 template auto runTemplate<Neon::dGridSoA, int64_t, 0>(TestData<Neon::dGridSoA, int64_t, 0>&) -> void;
 template auto runTemplate<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+template auto runTemplate<Neon::bGridMgpu , int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 }  // namespace globalIdx
\ No newline at end of file
diff --git a/libNeonDomain/tests/domain-neighbour-globalIdx/src/testsAndContainers.h b/libNeonDomain/tests/domain-neighbour-globalIdx/src/testsAndContainers.h
index 03550960..92542034 100644
--- a/libNeonDomain/tests/domain-neighbour-globalIdx/src/testsAndContainers.h
+++ b/libNeonDomain/tests/domain-neighbour-globalIdx/src/testsAndContainers.h
@@ -22,11 +22,13 @@ extern template auto run<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t,
 extern template auto run<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 extern template auto run<Neon::dGridSoA , int64_t, 0>(TestData<Neon::dGridSoA, int64_t, 0>&) -> void;
 extern template auto run<Neon::domain::details::disaggregated::dGrid::dGrid , int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+extern template auto run<Neon::bGridMgpu , int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 extern template auto runTemplate<Neon::dGrid, int64_t, 0>(TestData<Neon::dGrid, int64_t, 0>&) -> void;
 extern template auto runTemplate<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t, 0>&) -> void;
 extern template auto runTemplate<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 extern template auto runTemplate<Neon::dGridSoA , int64_t, 0>(TestData<Neon::dGridSoA, int64_t, 0>&) -> void;
 extern template auto runTemplate<Neon::domain::details::disaggregated::dGrid::dGrid , int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+extern template auto runTemplate<Neon::bGridMgpu , int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 }  // namespace map
diff --git a/libNeonDomain/tests/domain-stencil/src/gtests.cpp b/libNeonDomain/tests/domain-stencil/src/gtests.cpp
index ada2bd6b..3d5f436c 100644
--- a/libNeonDomain/tests/domain-stencil/src/gtests.cpp
+++ b/libNeonDomain/tests/domain-stencil/src/gtests.cpp
@@ -94,6 +94,15 @@ TEST(domain_stencil, dGridDisg_Template)
                             1);
 }
 
+TEST(domain_stencil, bGridDisgMgpu_Template)
+{
+    int nGpus = 5;
+    using Type = int64_t;
+    runAllTestConfiguration(std::function(map::runTemplate<Neon::bGridMgpu , Type, 0>),
+                            nGpus,
+                            1);
+}
+
 int main(int argc, char** argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
diff --git a/libNeonDomain/tests/domain-stencil/src/stencil.cu b/libNeonDomain/tests/domain-stencil/src/stencil.cu
index 6deb55f7..4cf31f05 100644
--- a/libNeonDomain/tests/domain-stencil/src/stencil.cu
+++ b/libNeonDomain/tests/domain-stencil/src/stencil.cu
@@ -117,7 +117,7 @@ auto laplaceTemplate(const Field& filedA,
                                                                    count++;
                                                                });
                     });
-                    
+
                     b(idx, i) = a(idx, i) - count * partial;
                 }
             };
@@ -281,11 +281,13 @@ template auto runNoTemplate<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64
 template auto runNoTemplate<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 template auto runNoTemplate<Neon::dGridSoA, int64_t, 0>(TestData<Neon::dGridSoA, int64_t, 0>&) -> void;
 template auto runNoTemplate<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+template auto runNoTemplate<Neon::bGridMgpu, int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 template auto runTemplate<Neon::dGrid, int64_t, 0>(TestData<Neon::dGrid, int64_t, 0>&) -> void;
 template auto runTemplate<Neon::eGrid, int64_t, 0>(TestData<Neon::eGrid, int64_t, 0>&) -> void;
 template auto runTemplate<Neon::bGrid, int64_t, 0>(TestData<Neon::bGrid, int64_t, 0>&) -> void;
 template auto runTemplate<Neon::dGridSoA, int64_t, 0>(TestData<Neon::dGridSoA, int64_t, 0>&) -> void;
 template auto runTemplate<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>(TestData<Neon::domain::details::disaggregated::dGrid::dGrid, int64_t, 0>&) -> void;
+template auto runTemplate<Neon::bGridMgpu, int64_t, 0>(TestData<Neon::bGridMgpu, int64_t, 0>&) -> void;
 
 }  // namespace map
\ No newline at end of file
diff --git a/libNeonDomain/tests/test-template/CMakeLists.txt b/libNeonDomain/tests/test-template/CMakeLists.txt
new file mode 100644
index 00000000..c077f20f
--- /dev/null
+++ b/libNeonDomain/tests/test-template/CMakeLists.txt
@@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
+
+set(APP_NAME domain-neighbour-globalIdx)
+file(GLOB_RECURSE SrcFiles src/*.*)
+
+add_executable(${APP_NAME} ${SrcFiles})
+
+target_link_libraries(${APP_NAME} 
+	PUBLIC libNeonDomain
+	PUBLIC gtest_main)
+
+set_target_properties(${APP_NAME} PROPERTIES 
+	CUDA_SEPARABLE_COMPILATION ON
+	CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+
+set_target_properties(${APP_NAME} PROPERTIES FOLDER "libNeonDomain")
+source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "${APP_NAME}" FILES ${SrcFiles})
+
+add_test(NAME ${APP_NAME} COMMAND ${APP_NAME})
\ No newline at end of file
diff --git a/libNeonDomain/tests/test-template/src/testsAndContainers.cu b/libNeonDomain/tests/test-template/src/testsAndContainers.cu
new file mode 100644
index 00000000..91368f7c
--- /dev/null
+++ b/libNeonDomain/tests/test-template/src/testsAndContainers.cu
@@ -0,0 +1,75 @@
+#include <functional>
+#include "Neon/domain/Grids.h"
+#include "Neon/domain/details/dGridDisg/dGrid.h"
+#include "Neon/domain/details/dGridSoA/dGridSoA.h"
+
+#include "Neon/domain/tools/TestData.h"
+
+auto NEON_CUDA_HOST_DEVICE idxToInt(Neon::index_3d idx)
+{
+    return 33 * 1000000 + 10000 * idx.x + 100 * idx.y + idx.z;
+};
+
+int main()
+{
+    using Grid = Neon::bGridMgpu;
+    Neon::init();
+    Neon::Backend bk({0, 0}, Neon::Runtime::openmp);
+    int           blockSize = Neon::domain::details::bGridMgpu::defaultBlockSize;
+    Grid          bGridMgpu(
+        bk,
+        {blockSize, blockSize, 6 * blockSize},
+        [](Neon::index_3d idx) { return true; },
+        Neon::domain::Stencil::s6_Jacobi_t(),
+        {1, 1, 1},
+        {0, 0, 0},
+        Neon::domain::tool::spaceCurves::EncoderType::sweep);
+    using Field = typename Grid::Field<int, 1>;
+    Field A = bGridMgpu.newField<int, 1>("A", 1, 0);
+
+    auto setupOp = bGridMgpu.newContainer(
+        "setup",
+        [&A](Neon::set::Loader& loader) {
+            auto a = loader.load(A);
+            return [=] NEON_CUDA_HOST_DEVICE(const typename Field::Idx& gIdx) mutable {
+                auto const global_idx = a.getGlobalIndex(gIdx);
+                a(gIdx, 0) = idxToInt(global_idx);
+            };
+        });
+
+    setupOp.run(0);
+    bk.sync(0);
+
+    A.newHaloUpdate(
+         Neon::set::StencilSemantic::standard,
+         Neon::set::TransferMode::get,
+         Neon::Execution::device)
+        .run(0);
+
+    bk.sync(0);
+
+    auto stencilOp = bGridMgpu.newContainer(
+        "setup",
+        [&A](Neon::set::Loader& loader) {
+            auto a = loader.load(A);
+            return [=] NEON_CUDA_HOST_DEVICE(const typename Field::Idx& gIdx) mutable {
+                auto const global_idx = a.getGlobalIndex(gIdx);
+                auto       expectedNgh = global_idx + Neon::index_3d(0, 0, 1);
+                auto       val = a.getNghData<0, 0, 1>(gIdx, 0, -1).getData();
+                if (val != -1 &&
+                    val != idxToInt(expectedNgh)) {
+                    printf("ERROR: %d (%d %d %d) %d\n", val,
+                           global_idx.x, global_idx.y, global_idx.z,
+                           a(gIdx, 0));
+                    if (global_idx.x == 0 && global_idx.y == 0 && global_idx.z == 11) {
+                        printf("ERROR: %d (%d %d %d) %d\n", val,
+                               global_idx.x, global_idx.y, global_idx.z,
+                               a(gIdx, 0));
+                        val = a.getNghData<0, 0, 1>(gIdx, 0, -1).getData();
+                    }
+                }
+            };
+        });
+    stencilOp.run(0);
+    bk.sync(0);
+};
diff --git a/libNeonSet/include/Neon/set/DevSet.h b/libNeonSet/include/Neon/set/DevSet.h
index 5e8b03b7..114408d3 100644
--- a/libNeonSet/include/Neon/set/DevSet.h
+++ b/libNeonSet/include/Neon/set/DevSet.h
@@ -401,10 +401,18 @@ class DevSet
                     executor = (void*)Neon::set::details::blockSpan::launchLambdaOnSpanCUDAWithCompilerHints<CudaLaunchCompilerTimeHints, DataSetContainer, Lambda>;
                 }
             }
-            dev.kernel.template cudaLaunchKernel<Neon::run_et::async>(gpuStreamSet[setIdx.idx()],
-                                                                      launchInfoSet[setIdx.idx()],
-                                                                      executor,
-                                                                      untypedParams);
+            auto launchInfo = launchInfoSet[setIdx.idx()];
+            auto cudaGrid = launchInfo.cudaGrid();
+            if (cudaGrid.x * cudaGrid.y * cudaGrid.z != 0) {
+
+                dev.kernel.template cudaLaunchKernel<Neon::run_et::async>(gpuStreamSet[setIdx.idx()],
+                                                                          launchInfo,
+                                                                          executor,
+                                                                          untypedParams);
+            } else {
+                NEON_WARNING("Cuda grid with zero number of element was detected. The kernel will be skipped.");
+                ;
+            }
         }
 #else
         NeonException exp("DevSet");
@@ -447,10 +455,18 @@ class DevSet
             } else {
                 executor = (void*)Neon::set::details::blockSpan::launchLambdaOnSpanCUDA<DataSetContainer, Lambda>;
             }
-            dev.kernel.template cudaLaunchKernel<Neon::run_et::async>(gpuStreamSet[setIdx.idx()],
-                                                                      launchInfoSet[setIdx.idx()],
-                                                                      executor,
-                                                                      untypedParams);
+            auto launchInfo = launchInfoSet[setIdx.idx()];
+            auto cudaGrid = launchInfo.cudaGrid();
+            if (cudaGrid.x * cudaGrid.y * cudaGrid.z != 0) {
+
+                dev.kernel.template cudaLaunchKernel<Neon::run_et::async>(gpuStreamSet[setIdx.idx()],
+                                                                          launchInfo,
+                                                                          executor,
+                                                                          untypedParams);
+            } else {
+                NEON_WARNING("Cuda grid with zero number of element was detected. The kernel will be skipped.");
+                ;
+            }
         }
         if (kernelConfig.runMode() == Neon::run_et::sync) {
             gpuStreamSet.sync();
@@ -502,9 +518,14 @@ class DevSet
                     const Neon::Integer_3d<IndexType> blockSize(cudaBlock.x, cudaBlock.y, cudaBlock.z);
                     const Neon::Integer_3d<IndexType> gridSize(cudaGrid.x, cudaGrid.y, cudaGrid.z);
 
-                    Neon::set::details::blockSpan::launchLambdaOnSpanOMP<IndexType,
-                                                                         DataSetContainer,
-                                                                         Lambda>(blockSize, gridSize, iterator, lambda);
+                    if (cudaGrid.x * cudaGrid.y * cudaGrid.z != 0) {
+
+                        Neon::set::details::blockSpan::launchLambdaOnSpanOMP<IndexType,
+                                                                             DataSetContainer,
+                                                                             Lambda>(blockSize, gridSize, iterator, lambda);
+                    } else {
+                        NEON_WARNING("Omp grid with zero number of element was detected. The kernel will be skipped.");
+                    }
                 }
             }
         }
@@ -543,10 +564,14 @@ class DevSet
             auto const&                       cudaGrid = launchInfoSet[setIdx].cudaGrid();
             const Neon::Integer_3d<IndexType> blockSize(cudaBlock.x, cudaBlock.y, cudaBlock.z);
             const Neon::Integer_3d<IndexType> gridSize(cudaGrid.x, cudaGrid.y, cudaGrid.z);
+            if (cudaGrid.x * cudaGrid.y * cudaGrid.z != 0) {
 
             Neon::set::details::blockSpan::launchLambdaOnSpanOMP<IndexType,
                                                                  DataSetContainer,
                                                                  Lambda>(blockSize, gridSize, iterator, lambda);
+            } else {
+                NEON_WARNING("Omp grid with zero number of element was detected. The kernel will be skipped.");
+            }
         }
         return;
     }