Skip to content

Commit

Permalink
wip: adding new multi-gpu bGrid.
Browse files Browse the repository at this point in the history
  • Loading branch information
massimim committed Jan 5, 2024
1 parent 48acb7f commit 8e4adc8
Show file tree
Hide file tree
Showing 24 changed files with 2,713 additions and 8 deletions.
1 change: 1 addition & 0 deletions libNeonDomain/include/Neon/domain/Grids.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
#include "Neon/domain/bGrid.h"
#include "Neon/domain/dGridSoA.h"
#include "Neon/domain/bGridDisg.h"
#include "Neon/domain/bGridMgpuDisg.h"
6 changes: 6 additions & 0 deletions libNeonDomain/include/Neon/domain/bGridMgpuDisg.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#pragma once
#include "Neon/domain/details/bGridDisgMgpu//bGrid.h"

namespace Neon {
using bGridMgpu = Neon::domain::details::bGridMgpu::bGrid<Neon::domain::details::bGridMgpu::StaticBlock<8,8,8>>;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "Neon/domain/details/bGridDisgMgpu//BlockView/BlockViewGrid.h"
#include "Neon/domain/tools/GridTransformer.h"

namespace Neon::domain::details::bGridMgpu {

struct BlockView
{
public:
using Grid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;
template <typename T, int C = 0>
using Field = Grid::template Field<T, C>;
using index_3d = Neon::index_3d;

template <typename T, int C = 0>
static auto helpGetReference(T* mem, const int idx, const int card) -> std::enable_if_t<C == 0, T&>
{
return mem[idx * card];
}

template <typename T, int C = 0>
static auto helpGetReference(T* mem, const int idx, const int card) -> std::enable_if_t<C != 0, T&>
{
return mem[idx * C];
}

static constexpr Neon::MemoryLayout layout = Neon::MemoryLayout::arrayOfStructs;
};

} // namespace Neon::domain::details::bGrid
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#pragma once
#include <assert.h>

#include "Neon/core/core.h"
#include "Neon/core/types/DataUse.h"
#include "Neon/core/types/Macros.h"

#include "Neon/set/BlockConfig.h"
#include "Neon/set/Containter.h"
#include "Neon/set/DevSet.h"
#include "Neon/set/MemoryOptions.h"

#include "Neon/sys/memory/MemDevice.h"

#include "Neon/domain/aGrid.h"

#include "Neon/domain/interface/GridBaseTemplate.h"
#include "Neon/domain/interface/GridConcept.h"
#include "Neon/domain/interface/KernelConfig.h"
#include "Neon/domain/interface/LaunchConfig.h"
#include "Neon/domain/interface/Stencil.h"
#include "Neon/domain/interface/common.h"

#include "Neon/domain/tools/GridTransformer.h"
#include "Neon/domain/tools/SpanTable.h"

#include "Neon/domain/details/eGrid/eGrid.h"
#include "Neon/domain/patterns/PatternScalar.h"

#include "BlockViewPartition.h"

namespace Neon::domain::details::bGridMgpu {

namespace details {
struct GridTransformation
{
template <typename T, int C>
using Partition = BlockViewPartition<T, C>;
using Span = Neon::domain::details::eGrid::eSpan;
static constexpr Neon::set::internal::ContainerAPI::DataViewSupport dataViewSupport = Neon::set::internal::ContainerAPI::DataViewSupport::on;

using FoundationGrid = Neon::domain::details::eGrid::eGrid;
static constexpr Neon::set::details::ExecutionThreadSpan executionThreadSpan = FoundationGrid::executionThreadSpan;
using ExecutionThreadSpanIndexType = int32_t;
using Idx = FoundationGrid::Idx;

static auto getDefaultBlock(FoundationGrid& foundationGrid) -> Neon::index_3d const&
{
return foundationGrid.getDefaultBlock();
}

static auto initSpan(FoundationGrid& foundationGrid, Neon::domain::tool::SpanTable<Span>& spanTable) -> void
{
spanTable.forEachConfiguration([&](Neon::Execution execution,
Neon::SetIdx setIdx,
Neon::DataView dw,
Span& span) {
span = foundationGrid.getSpan(execution, setIdx, dw);
});
}

static auto initLaunchParameters(FoundationGrid& foundationGrid,
Neon::DataView dataView,
const Neon::index_3d& blockSize,
const size_t& shareMem) -> Neon::set::LaunchParameters
{
return foundationGrid.getLaunchParameters(dataView, blockSize, shareMem);
}

static auto helpGetGridIdx(FoundationGrid&,
Neon::SetIdx const&,
FoundationGrid::Idx const& fgIdx)
-> GridTransformation::Idx
{
GridTransformation::Idx tgIdx = fgIdx;
return tgIdx;
}

template <typename T, int C>
static auto initFieldPartition(FoundationGrid::Field<T, C>& foundationField,
Neon::domain::tool::PartitionTable<Partition<T, C>>& partitionTable) -> void
{
partitionTable.forEachConfiguration(
[&](Neon::Execution execution,
Neon::SetIdx setIdx,
Neon::DataView dw,
Partition<T, C>& partition) {
auto& foundationPartition = foundationField.getPartition(execution, setIdx, dw);
partition = Partition<T, C>(foundationPartition);
});
}
};
using BlockViewGrid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid;

} // namespace details

} // namespace Neon::domain::details::bGrid
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#pragma once
#include <assert.h>
#include "Neon/core/core.h"
#include "Neon/core/types/Macros.h"
#include "Neon/domain/details/eGrid/eGrid.h"
#include "Neon/domain/details/eGrid/eIndex.h"
#include "Neon/domain/interface/NghData.h"
#include "Neon/set/DevSet.h"
#include "Neon/sys/memory/CudaIntrinsics.h"
#include "cuda_fp16.h"

namespace Neon::domain::details::bGridMgpu {

template <typename T,
int C = 1>
class BlockViewPartition : public Neon::domain::details::eGrid::ePartition<T, C>
{
public:
BlockViewPartition()
{
}
BlockViewPartition(Neon::domain::details::eGrid::ePartition<T, C> ePartition)
: Neon::domain::details::eGrid::ePartition<T, C>(ePartition)
{
}

template <class BlockIdexType>
static auto getInBlockIdx(typename Neon::domain::details::eGrid::ePartition<T, C>::Idx const& idx,
uint8_3d const& inBlockLocation) -> BlockIdexType
{
BlockIdexType blockIdx(idx.helpGet(), inBlockLocation);
return inBlockLocation;
}

auto getCountAllocated() const -> int32_t;
};
template <typename T, int C>
auto BlockViewPartition<T, C>::getCountAllocated() const -> int32_t
{
return this->mCountAllocated;
}
} // namespace Neon::domain::details::bGrid
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#pragma once

#include "Neon/domain/details//eGrid/ePartition.h"

namespace Neon::domain::details::bGridMgpu {


template <typename T,
int C>
NEON_CUDA_HOST_DEVICE auto
ePartition<T, C>::prtID() const
-> int
{
return mPrtID;
}

template <typename T,
int C>
template <int dummy_ta>
inline NEON_CUDA_HOST_DEVICE auto
ePartition<T, C>::cardinality() const
-> std::enable_if_t<dummy_ta == 0, int>
{
return mCardinality;
}

template <typename T,
int C>
template <int dummy_ta>
constexpr inline NEON_CUDA_HOST_DEVICE auto
ePartition<T, C>::cardinality() const
-> std::enable_if_t<dummy_ta != 0, int>
{
return C;
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::operator()(eIndex eId, int cardinalityIdx) const
-> T
{
Offset jump = getOffset(eId, cardinalityIdx);
return mMem[jump];
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::operator()(eIndex eId, int cardinalityIdx) -> T&
{
Offset jump = getOffset(eId, cardinalityIdx);
return mMem[jump];
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::getNghData(eIndex eId,
NghIdx nghIdx,
int card,
const Type& alternativeVal)
const -> NghData
{
eIndex eIdxNgh;
const bool isValidNeighbour = isValidNgh(eId, nghIdx, eIdxNgh);
T val = (isValidNeighbour) ? this->operator()(eIdxNgh, card) : alternativeVal;
// printf("(prtId %d)getNghData id %d card %d eIdxNgh %d val %d\n",
// mPrtID, eId.mIdx, card, eIdxNgh.mIdx, int(val));
return NghData(val, isValidNeighbour);
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::getNghData(eIndex eId,
const Neon::int8_3d& ngh3dIdx,
int card,
const Type& alternativeVal)
const -> NghData
{
int tablePithc = (ngh3dIdx.x + mStencilRadius) +
(ngh3dIdx.y + mStencilRadius) * mStencilTableYPitch +
(ngh3dIdx.z + mStencilRadius) * mStencilTableYPitch * mStencilTableYPitch;
NghIdx nghIdx = mStencil3dTo1dOffset[tablePithc];
NghData res = getNghData(eId, nghIdx, card, alternativeVal);

return res;
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::isValidNgh(eIndex eId,
NghIdx nghIdx,
eIndex& neighbourIdx) const
-> bool
{
const eIndex::Offset connectivityJumo = mCountAllocated * nghIdx + eId.get();
neighbourIdx.set() = NEON_CUDA_CONST_LOAD((mConnectivity + connectivityJumo));
const bool isValidNeighbour = (neighbourIdx.mIdx > -1);
// printf("(prtId %d) getNghData id %d eIdxNgh %d connectivityJumo %d\n",
// mPrtID,
// eId.mIdx, neighbourIdx.mIdx, connectivityJumo);
return isValidNeighbour;
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::getGlobalIndex(eIndex eIndex) const
-> Neon::index_3d
{
Neon::index_3d loc;
const auto baseAddr = mOrigins + eIndex.get();
loc = mOrigins[eIndex.get()];
return loc;
}

template <typename T,
int C>
ePartition<T, C>::ePartition(int prtId,
T* mem,
ePitch pitch,
int32_t cardinality,
int32_t countAllocated,
Offset* connRaw,
Neon::index_3d* toGlobal,
int8_t* stencil3dTo1dOffset,
int32_t stencilRadius)
{
mPrtID = prtId;
mMem = mem;
mPitch = pitch;
mCardinality = cardinality;
mCountAllocated = countAllocated;

mConnectivity = connRaw;
mOrigins = toGlobal;

mStencil3dTo1dOffset = stencil3dTo1dOffset;
mStencilTableYPitch = 2 * stencilRadius + 1;

mStencilRadius = stencilRadius;
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE auto
ePartition<T, C>::pointer(eIndex eId, int cardinalityIdx) const
-> const Type*
{
Offset jump = getOffset(eId, cardinalityIdx);
return mMem + jump;
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::getOffset(eIndex eId, int cardinalityIdx) const
-> Offset
{
return Offset(eId.get() * mPitch.x + cardinalityIdx * mPitch.y);
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::mem()
-> T*
{
return mMem;
}

template <typename T,
int C>
NEON_CUDA_HOST_DEVICE inline auto
ePartition<T, C>::mem() const
-> const T*
{
return mMem;
}

} // namespace Neon::domain::details::eGrid
Loading

0 comments on commit 8e4adc8

Please sign in to comment.