-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
24 changed files
with
2,713 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#pragma once | ||
#include "Neon/domain/details/bGridDisgMgpu//bGrid.h" | ||
|
||
namespace Neon { | ||
using bGridMgpu = Neon::domain::details::bGridMgpu::bGrid<Neon::domain::details::bGridMgpu::StaticBlock<8,8,8>>; | ||
} |
29 changes: 29 additions & 0 deletions
29
libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#include "Neon/domain/details/bGridDisgMgpu//BlockView/BlockViewGrid.h" | ||
#include "Neon/domain/tools/GridTransformer.h" | ||
|
||
namespace Neon::domain::details::bGridMgpu { | ||
|
||
struct BlockView | ||
{ | ||
public: | ||
using Grid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid; | ||
template <typename T, int C = 0> | ||
using Field = Grid::template Field<T, C>; | ||
using index_3d = Neon::index_3d; | ||
|
||
template <typename T, int C = 0> | ||
static auto helpGetReference(T* mem, const int idx, const int card) -> std::enable_if_t<C == 0, T&> | ||
{ | ||
return mem[idx * card]; | ||
} | ||
|
||
template <typename T, int C = 0> | ||
static auto helpGetReference(T* mem, const int idx, const int card) -> std::enable_if_t<C != 0, T&> | ||
{ | ||
return mem[idx * C]; | ||
} | ||
|
||
static constexpr Neon::MemoryLayout layout = Neon::MemoryLayout::arrayOfStructs; | ||
}; | ||
|
||
} // namespace Neon::domain::details::bGrid |
97 changes: 97 additions & 0 deletions
97
libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewGrid.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#pragma once | ||
#include <assert.h> | ||
|
||
#include "Neon/core/core.h" | ||
#include "Neon/core/types/DataUse.h" | ||
#include "Neon/core/types/Macros.h" | ||
|
||
#include "Neon/set/BlockConfig.h" | ||
#include "Neon/set/Containter.h" | ||
#include "Neon/set/DevSet.h" | ||
#include "Neon/set/MemoryOptions.h" | ||
|
||
#include "Neon/sys/memory/MemDevice.h" | ||
|
||
#include "Neon/domain/aGrid.h" | ||
|
||
#include "Neon/domain/interface/GridBaseTemplate.h" | ||
#include "Neon/domain/interface/GridConcept.h" | ||
#include "Neon/domain/interface/KernelConfig.h" | ||
#include "Neon/domain/interface/LaunchConfig.h" | ||
#include "Neon/domain/interface/Stencil.h" | ||
#include "Neon/domain/interface/common.h" | ||
|
||
#include "Neon/domain/tools/GridTransformer.h" | ||
#include "Neon/domain/tools/SpanTable.h" | ||
|
||
#include "Neon/domain/details/eGrid/eGrid.h" | ||
#include "Neon/domain/patterns/PatternScalar.h" | ||
|
||
#include "BlockViewPartition.h" | ||
|
||
namespace Neon::domain::details::bGridMgpu { | ||
|
||
namespace details { | ||
struct GridTransformation | ||
{ | ||
template <typename T, int C> | ||
using Partition = BlockViewPartition<T, C>; | ||
using Span = Neon::domain::details::eGrid::eSpan; | ||
static constexpr Neon::set::internal::ContainerAPI::DataViewSupport dataViewSupport = Neon::set::internal::ContainerAPI::DataViewSupport::on; | ||
|
||
using FoundationGrid = Neon::domain::details::eGrid::eGrid; | ||
static constexpr Neon::set::details::ExecutionThreadSpan executionThreadSpan = FoundationGrid::executionThreadSpan; | ||
using ExecutionThreadSpanIndexType = int32_t; | ||
using Idx = FoundationGrid::Idx; | ||
|
||
static auto getDefaultBlock(FoundationGrid& foundationGrid) -> Neon::index_3d const& | ||
{ | ||
return foundationGrid.getDefaultBlock(); | ||
} | ||
|
||
static auto initSpan(FoundationGrid& foundationGrid, Neon::domain::tool::SpanTable<Span>& spanTable) -> void | ||
{ | ||
spanTable.forEachConfiguration([&](Neon::Execution execution, | ||
Neon::SetIdx setIdx, | ||
Neon::DataView dw, | ||
Span& span) { | ||
span = foundationGrid.getSpan(execution, setIdx, dw); | ||
}); | ||
} | ||
|
||
static auto initLaunchParameters(FoundationGrid& foundationGrid, | ||
Neon::DataView dataView, | ||
const Neon::index_3d& blockSize, | ||
const size_t& shareMem) -> Neon::set::LaunchParameters | ||
{ | ||
return foundationGrid.getLaunchParameters(dataView, blockSize, shareMem); | ||
} | ||
|
||
static auto helpGetGridIdx(FoundationGrid&, | ||
Neon::SetIdx const&, | ||
FoundationGrid::Idx const& fgIdx) | ||
-> GridTransformation::Idx | ||
{ | ||
GridTransformation::Idx tgIdx = fgIdx; | ||
return tgIdx; | ||
} | ||
|
||
template <typename T, int C> | ||
static auto initFieldPartition(FoundationGrid::Field<T, C>& foundationField, | ||
Neon::domain::tool::PartitionTable<Partition<T, C>>& partitionTable) -> void | ||
{ | ||
partitionTable.forEachConfiguration( | ||
[&](Neon::Execution execution, | ||
Neon::SetIdx setIdx, | ||
Neon::DataView dw, | ||
Partition<T, C>& partition) { | ||
auto& foundationPartition = foundationField.getPartition(execution, setIdx, dw); | ||
partition = Partition<T, C>(foundationPartition); | ||
}); | ||
} | ||
}; | ||
using BlockViewGrid = Neon::domain::tool::GridTransformer<details::GridTransformation>::Grid; | ||
|
||
} // namespace details | ||
|
||
} // namespace Neon::domain::details::bGrid |
42 changes: 42 additions & 0 deletions
42
libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewPartition.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#pragma once | ||
#include <assert.h> | ||
#include "Neon/core/core.h" | ||
#include "Neon/core/types/Macros.h" | ||
#include "Neon/domain/details/eGrid/eGrid.h" | ||
#include "Neon/domain/details/eGrid/eIndex.h" | ||
#include "Neon/domain/interface/NghData.h" | ||
#include "Neon/set/DevSet.h" | ||
#include "Neon/sys/memory/CudaIntrinsics.h" | ||
#include "cuda_fp16.h" | ||
|
||
namespace Neon::domain::details::bGridMgpu { | ||
|
||
template <typename T, | ||
int C = 1> | ||
class BlockViewPartition : public Neon::domain::details::eGrid::ePartition<T, C> | ||
{ | ||
public: | ||
BlockViewPartition() | ||
{ | ||
} | ||
BlockViewPartition(Neon::domain::details::eGrid::ePartition<T, C> ePartition) | ||
: Neon::domain::details::eGrid::ePartition<T, C>(ePartition) | ||
{ | ||
} | ||
|
||
template <class BlockIdexType> | ||
static auto getInBlockIdx(typename Neon::domain::details::eGrid::ePartition<T, C>::Idx const& idx, | ||
uint8_3d const& inBlockLocation) -> BlockIdexType | ||
{ | ||
BlockIdexType blockIdx(idx.helpGet(), inBlockLocation); | ||
return inBlockLocation; | ||
} | ||
|
||
auto getCountAllocated() const -> int32_t; | ||
}; | ||
template <typename T, int C> | ||
auto BlockViewPartition<T, C>::getCountAllocated() const -> int32_t | ||
{ | ||
return this->mCountAllocated; | ||
} | ||
} // namespace Neon::domain::details::bGrid |
184 changes: 184 additions & 0 deletions
184
libNeonDomain/include/Neon/domain/details/bGridDisgMgpu/BlockView/BlockViewPartition_imp.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
#pragma once | ||
|
||
#include "Neon/domain/details//eGrid/ePartition.h" | ||
|
||
namespace Neon::domain::details::bGridMgpu { | ||
|
||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE auto | ||
ePartition<T, C>::prtID() const | ||
-> int | ||
{ | ||
return mPrtID; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
template <int dummy_ta> | ||
inline NEON_CUDA_HOST_DEVICE auto | ||
ePartition<T, C>::cardinality() const | ||
-> std::enable_if_t<dummy_ta == 0, int> | ||
{ | ||
return mCardinality; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
template <int dummy_ta> | ||
constexpr inline NEON_CUDA_HOST_DEVICE auto | ||
ePartition<T, C>::cardinality() const | ||
-> std::enable_if_t<dummy_ta != 0, int> | ||
{ | ||
return C; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::operator()(eIndex eId, int cardinalityIdx) const | ||
-> T | ||
{ | ||
Offset jump = getOffset(eId, cardinalityIdx); | ||
return mMem[jump]; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::operator()(eIndex eId, int cardinalityIdx) -> T& | ||
{ | ||
Offset jump = getOffset(eId, cardinalityIdx); | ||
return mMem[jump]; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::getNghData(eIndex eId, | ||
NghIdx nghIdx, | ||
int card, | ||
const Type& alternativeVal) | ||
const -> NghData | ||
{ | ||
eIndex eIdxNgh; | ||
const bool isValidNeighbour = isValidNgh(eId, nghIdx, eIdxNgh); | ||
T val = (isValidNeighbour) ? this->operator()(eIdxNgh, card) : alternativeVal; | ||
// printf("(prtId %d)getNghData id %d card %d eIdxNgh %d val %d\n", | ||
// mPrtID, eId.mIdx, card, eIdxNgh.mIdx, int(val)); | ||
return NghData(val, isValidNeighbour); | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::getNghData(eIndex eId, | ||
const Neon::int8_3d& ngh3dIdx, | ||
int card, | ||
const Type& alternativeVal) | ||
const -> NghData | ||
{ | ||
int tablePithc = (ngh3dIdx.x + mStencilRadius) + | ||
(ngh3dIdx.y + mStencilRadius) * mStencilTableYPitch + | ||
(ngh3dIdx.z + mStencilRadius) * mStencilTableYPitch * mStencilTableYPitch; | ||
NghIdx nghIdx = mStencil3dTo1dOffset[tablePithc]; | ||
NghData res = getNghData(eId, nghIdx, card, alternativeVal); | ||
|
||
return res; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::isValidNgh(eIndex eId, | ||
NghIdx nghIdx, | ||
eIndex& neighbourIdx) const | ||
-> bool | ||
{ | ||
const eIndex::Offset connectivityJumo = mCountAllocated * nghIdx + eId.get(); | ||
neighbourIdx.set() = NEON_CUDA_CONST_LOAD((mConnectivity + connectivityJumo)); | ||
const bool isValidNeighbour = (neighbourIdx.mIdx > -1); | ||
// printf("(prtId %d) getNghData id %d eIdxNgh %d connectivityJumo %d\n", | ||
// mPrtID, | ||
// eId.mIdx, neighbourIdx.mIdx, connectivityJumo); | ||
return isValidNeighbour; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::getGlobalIndex(eIndex eIndex) const | ||
-> Neon::index_3d | ||
{ | ||
Neon::index_3d loc; | ||
const auto baseAddr = mOrigins + eIndex.get(); | ||
loc = mOrigins[eIndex.get()]; | ||
return loc; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
ePartition<T, C>::ePartition(int prtId, | ||
T* mem, | ||
ePitch pitch, | ||
int32_t cardinality, | ||
int32_t countAllocated, | ||
Offset* connRaw, | ||
Neon::index_3d* toGlobal, | ||
int8_t* stencil3dTo1dOffset, | ||
int32_t stencilRadius) | ||
{ | ||
mPrtID = prtId; | ||
mMem = mem; | ||
mPitch = pitch; | ||
mCardinality = cardinality; | ||
mCountAllocated = countAllocated; | ||
|
||
mConnectivity = connRaw; | ||
mOrigins = toGlobal; | ||
|
||
mStencil3dTo1dOffset = stencil3dTo1dOffset; | ||
mStencilTableYPitch = 2 * stencilRadius + 1; | ||
|
||
mStencilRadius = stencilRadius; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE auto | ||
ePartition<T, C>::pointer(eIndex eId, int cardinalityIdx) const | ||
-> const Type* | ||
{ | ||
Offset jump = getOffset(eId, cardinalityIdx); | ||
return mMem + jump; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::getOffset(eIndex eId, int cardinalityIdx) const | ||
-> Offset | ||
{ | ||
return Offset(eId.get() * mPitch.x + cardinalityIdx * mPitch.y); | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::mem() | ||
-> T* | ||
{ | ||
return mMem; | ||
} | ||
|
||
template <typename T, | ||
int C> | ||
NEON_CUDA_HOST_DEVICE inline auto | ||
ePartition<T, C>::mem() const | ||
-> const T* | ||
{ | ||
return mMem; | ||
} | ||
|
||
} // namespace Neon::domain::details::eGrid |
Oops, something went wrong.