Skip to content

Commit

Permalink
Merge "Merge branch 'amd-develop' into amd-master" into krussell/rocm…
Browse files Browse the repository at this point in the history
…-rel-1.5
  • Loading branch information
mangupta authored and Gerrit Code Review committed Apr 27, 2017
2 parents 5433d17 + 662f4f8 commit 262c2f5
Show file tree
Hide file tree
Showing 8 changed files with 500 additions and 141 deletions.
1 change: 0 additions & 1 deletion .vimrc

This file was deleted.

6 changes: 3 additions & 3 deletions include/hip/hcc_detail/hip_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ hipError_t hipEventQuery(hipEvent_t event) ;
*
* @see hipGetDeviceCount, hipGetDevice, hipSetDevice, hipChooseDevice
*/
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr);
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void* ptr);

/**
* @brief Allocate memory on the default accelerator
Expand All @@ -863,7 +863,7 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
*
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
*
* @return #hipSuccess
* @return #hipSuccess, #hipErrorMemoryAllocation, #hipErrorInvalidValue (bad context, null *ptr)
*
* @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, hipHostFree, hipHostMalloc
*/
Expand Down Expand Up @@ -1922,7 +1922,7 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image);
* @param [in] blockDimZ Z grid dimension specified in work-items
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The kernel can access this with HIP_DYNAMIC_SHARED.
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules.
* @param [in] kernelParams
* @param [in] kernelParams
* @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel.
*
* @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
Expand Down
6 changes: 3 additions & 3 deletions include/hip/nvcc_detail/hip_complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ __device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hi
}

__device__ __host__ static inline float hipCabsf(hipFloatComplex z){
return cuCabsf(p, q);
return cuCabsf(z);
}

typedef cuDoubleComplex hipDoubleComplex;
Expand All @@ -85,7 +85,7 @@ __device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z){
return cuConj(z);
}

__device__ __host__ static inline hipDoubleComplex hipCsqabs(hipDoubleComplex z){
__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z){
return cuCabs(z) * cuCabs(z);
}

Expand Down Expand Up @@ -123,7 +123,7 @@ __device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q
return cuCfmaf(p, q, r);
}

__device__ __host__ static inline hipDoubleComplex hipCfma(hipComplex p, hipComplex q, hipComplex r){
__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, hipDoubleComplex r){
return cuCfma(p, q, r);
}

Expand Down
82 changes: 72 additions & 10 deletions src/hip_hcc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1765,20 +1765,24 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,

if (HIP_FORCE_P2P_HOST & 0x1) {
*forceUnpinnedCopy = true;
tprintf (DB_COPY, "P2P. Copy engine (dev:%d agent=0x%lx) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n",
(*copyDevice)->getDeviceNum(), (*copyDevice)->getDevice()->_hsaAgent.handle);
tprintf (DB_COPY, "Copy engine (dev:%d agent=0x%lx) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n",
*copyDevice ? (*copyDevice)->getDeviceNum() : -1,
*copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0);

} else {
tprintf (DB_COPY, "P2P. Copy engine (dev:%d agent=0x%lx) can see src and dst.\n",
(*copyDevice)->getDeviceNum(), (*copyDevice)->getDevice()->_hsaAgent.handle);
tprintf (DB_COPY, "Copy engine (dev:%d agent=0x%lx) can see src and dst.\n",
*copyDevice ? (*copyDevice)->getDeviceNum() : -1,
*copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0);
}
} else {
*forceUnpinnedCopy = true;
tprintf (DB_COPY, "P2P: Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
(*copyDevice)->getDeviceNum(), (*copyDevice)->getDevice()->_hsaAgent.handle);
*copyDevice ? (*copyDevice)->getDeviceNum() : -1,
*copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0);
if (HIP_FAIL_SOC & 0x2) {
fprintf (stderr, "HIP_FAIL_SOC: P2P: copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
(*copyDevice)->getDeviceNum(), (*copyDevice)->getDevice()->_hsaAgent.handle);
*copyDevice ? (*copyDevice)->getDeviceNum() : -1,
*copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0);
throw ihipException(hipErrorRuntimeOther);
}
}
Expand All @@ -1794,6 +1798,62 @@ void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const h
}


// the pointer-info as returned by HC refers to the allocation
// This routine modifies the pointer-info so it appears to refer to the specific ptr and sizeBytes.
// TODO -remove this when HCC uses HSA pointer info functions directly.
void tailorPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
{
const char *ptrc = static_cast<const char *> (ptr);
if (ptrInfo->_sizeBytes == 0) {
// invalid ptrInfo, don't modify
return;
} else if (ptrInfo->_isInDeviceMem) {
assert (ptrInfo->_devicePointer != nullptr);
std::ptrdiff_t diff = ptrc - static_cast<const char*> (ptrInfo->_devicePointer);

//TODO : assert-> runtime assert that only appears in debug mode
assert (diff >= 0);
assert (diff <= ptrInfo->_sizeBytes);

ptrInfo->_devicePointer = const_cast<void*> (ptr);

if (ptrInfo->_hostPointer != nullptr) {
ptrInfo->_hostPointer = static_cast<char*>(ptrInfo->_hostPointer) + diff;
}

} else {

assert (ptrInfo->_hostPointer != nullptr);
std::ptrdiff_t diff = ptrc - static_cast<const char*> (ptrInfo->_hostPointer);

//TODO : assert-> runtime assert that only appears in debug mode
assert (diff >= 0);
assert (diff <= ptrInfo->_sizeBytes);

ptrInfo->_hostPointer = const_cast<void*>(ptr);

if (ptrInfo->_devicePointer != nullptr) {
ptrInfo->_devicePointer = static_cast<char*>(ptrInfo->_devicePointer) + diff;
}
}

assert (sizeBytes <= ptrInfo->_sizeBytes);
ptrInfo->_sizeBytes = sizeBytes;
};


bool getTailoredPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
{
bool tracked = (hc::am_memtracker_getinfo(ptrInfo, ptr) == AM_SUCCESS);

if (tracked) {
tailorPtrInfo(ptrInfo, ptr, sizeBytes);
}

return tracked;
};


// TODO : For registered and host memory, if the portable flag is set, we need to recognize that and perform appropriate copy operation.
// What can happen now is that Portable memory is mapped into multiple devices but Peer access is not enabled. i
// The peer detection logic doesn't see that the memory is already mapped and so tries to use an unpinned copy algorithm. If this is PinInPlace, then an error can occur.
Expand All @@ -1812,8 +1872,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
hc::accelerator acc;
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS);
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);


// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here:
Expand Down Expand Up @@ -1873,6 +1933,7 @@ void ihipStream_t::lockedSymbolCopySync(hc::accelerator &acc, void* dst, void* s

void ihipStream_t::lockedSymbolCopyAsync(hc::accelerator &acc, void* dst, void* src, size_t sizeBytes, size_t offset, unsigned kind)
{
// TODO - review - this looks broken , should not be adding pointers to tracker dynamically:
if(kind == hipMemcpyHostToDevice) {
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
Expand All @@ -1899,6 +1960,7 @@ void ihipStream_t::lockedSymbolCopyAsync(hc::accelerator &acc, void* dst, void*
}
}


void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind)
{

Expand Down Expand Up @@ -1926,8 +1988,8 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
hc::accelerator acc;
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS);
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);


hc::hcCommandKind hcCopyDir;
Expand Down
26 changes: 15 additions & 11 deletions src/hip_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, unsig
//_appAllocationFlags : These are flags provided by the user when allocation is performed. They are returned to user in hipHostGetFlags and other APIs.
// TODO - add more info here when available.
//
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void* ptr)
{
HIP_INIT_API(attributes, ptr);

Expand All @@ -149,10 +149,10 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
attributes->devicePointer = amPointerInfo._devicePointer;
attributes->isManaged = 0;
if(attributes->memoryType == hipMemoryTypeHost){
attributes->hostPointer = ptr;
attributes->hostPointer = (void*)ptr;
}
if(attributes->memoryType == hipMemoryTypeDevice){
attributes->devicePointer = ptr;
attributes->devicePointer = (void*)ptr;
}
attributes->allocationFlags = amPointerInfo._appAllocationFlags;
attributes->device = amPointerInfo._appId;
Expand Down Expand Up @@ -207,22 +207,26 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
HIP_INIT_API(ptr, sizeBytes);
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;

auto ctx = ihipGetTlsDefaultCtx();
// return NULL pointer when malloc size is 0
if (sizeBytes == 0)
{
*ptr = NULL;
return ihipLogStatus(hipSuccess);
}
hip_status = hipSuccess;

auto ctx = ihipGetTlsDefaultCtx();
} else if ((ctx==nullptr) || (ptr == nullptr)) {
hip_status = hipErrorInvalidValue;

if (ctx) {
} else {
auto device = ctx->getWriteableDevice();
*ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, 0/*amFlags*/, 0/*hipFlags*/);

} else {
hip_status = hipErrorMemoryAllocation;
}
if(sizeBytes && (*ptr == NULL)){
hip_status = hipErrorMemoryAllocation;
}

}


return ihipLogStatus(hip_status);
Expand Down Expand Up @@ -1268,7 +1272,7 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned
hsa_amd_ipc_memory_attach((hsa_amd_ipc_memory_t*)&(iHandle->ipc_handle), iHandle->psize, crit->peerCnt(), crit->peerAgents(), devPtr);
if(hsa_status != HSA_STATUS_SUCCESS)
hipStatus = hipErrorMapBufferObjectFailed;
}
}
#else
hipStatus = hipErrorRuntimeOther;
#endif
Expand Down
Loading

0 comments on commit 262c2f5

Please sign in to comment.