diff --git a/docs/RuntimeAPI/html/Synchonization.html b/docs/RuntimeAPI/html/Synchonization.html new file mode 100644 index 0000000000..224350dc77 --- /dev/null +++ b/docs/RuntimeAPI/html/Synchonization.html @@ -0,0 +1,117 @@ + + +
+ + + +
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
The following commands are "host-asynchronous" - meaning they do not wait for any preceding commands to complete, and may return control to the host thread before the requested operation completes:
+"Host-synchronous" commands have the following properties:
+The following commands are "host-synchronous".
+The term "blocking" has two meanings in HIP.
+The first refers to synchronization commands (ie hipStreamSynchronize, hipEventSynchronize) that cause the host CPU to wait for GPU activity to complete. These can either use an active where the host CPU spin-waits on the synchronization variable, or can use an interrupt-based scheme where the core is interrupted when the wait completes. The second technique is referred to as "blocking" (ie hipDeviceBlockingSync, hipEventBlockingSync) while the first is referred to as "active". Active can be appropriate for short tasks where latency is critical, but comes at the expense of a CPU core dedicated to monitoring the event.
+Note CUDA_LAUNCH_BLOCKING does add any pre-serialization to the commands and does not affect the concurrent stream behavior. For example, even when CUDA_LAUNCH_BLOCKING is set, kernels or data copy commands launched to separate streams can execute concurrently. Use the NULL stream if additional stream synchronization is desired.
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
HCC always returns 0 for maxThreadsPerMultiProcessor
+HCC always returns 0 for regsPerBlock
+HCC always returns 0 for l2CacheSize
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for FakeMutex, including all inherited members.
+lock() (defined in FakeMutex) | FakeMutex | inline |
try_lock() (defined in FakeMutex) | FakeMutex | inline |
unlock() (defined in FakeMutex) | FakeMutex | inline |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+void | lock () |
+bool | try_lock () |
+void | unlock () |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for LockedAccessor< T >, including all inherited members.
+LockedAccessor(T &criticalData, bool autoUnlock=true) (defined in LockedAccessor< T >) | LockedAccessor< T > | inline |
operator->() (defined in LockedAccessor< T >) | LockedAccessor< T > | inline |
unlock() (defined in LockedAccessor< T >) | LockedAccessor< T > | inline |
~LockedAccessor() (defined in LockedAccessor< T >) | LockedAccessor< T > | inline |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+ | LockedAccessor (T &criticalData, bool autoUnlock=true) |
+void | unlock () |
+T * | operator-> () |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for ihipDeviceCriticalBase_t< MUTEX_TYPE >, including all inherited members.
+_mutex (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | private |
addPeer(ihipDevice_t *peer) (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | |
addStream(ihipStream_t *stream) (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | |
const_streams() const (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | inline |
ihipDeviceCriticalBase_t() (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | inline |
incStreamId() (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | inline |
init(unsigned deviceCnt) (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | inline |
isPeer(const ihipDevice_t *peer) (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | |
lock() (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | inlineprivate |
LockedAccessor< ihipDeviceCriticalBase_t > (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | friend |
peerAgents() const (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | inline |
peerCnt() const (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | inline |
removePeer(ihipDevice_t *peer) (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | |
resetPeers(ihipDevice_t *thisDevice) (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | |
streams() (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | inline |
unlock() (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | inlineprivate |
~ihipDeviceCriticalBase_t() (defined in ihipDeviceCriticalBase_t< MUTEX_TYPE >) | ihipDeviceCriticalBase_t< MUTEX_TYPE > | inline |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+void | init (unsigned deviceCnt) |
+std::list< ihipStream_t * > & | streams () |
+const std::list< ihipStream_t * > & | const_streams () const |
+ihipStream_t::SeqNum_t | incStreamId () |
+bool | isPeer (const ihipDevice_t *peer) |
+bool | addPeer (ihipDevice_t *peer) |
+bool | removePeer (ihipDevice_t *peer) |
+void | resetPeers (ihipDevice_t *thisDevice) |
+void | addStream (ihipStream_t *stream) |
+uint32_t | peerCnt () const |
+hsa_agent_t * | peerAgents () const |
+Friends | |
+class | LockedAccessor< ihipDeviceCriticalBase_t > |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for ihipDevice_t, including all inherited members.
+_acc (defined in ihipDevice_t) | ihipDevice_t | |
_compute_units (defined in ihipDevice_t) | ihipDevice_t | |
_default_stream (defined in ihipDevice_t) | ihipDevice_t | |
_device_flags (defined in ihipDevice_t) | ihipDevice_t | |
_device_index (defined in ihipDevice_t) | ihipDevice_t | |
_hsa_agent (defined in ihipDevice_t) | ihipDevice_t | |
_props (defined in ihipDevice_t) | ihipDevice_t | |
_staging_buffer (defined in ihipDevice_t) | ihipDevice_t | |
criticalData() (defined in ihipDevice_t) | ihipDevice_t | inline |
ihipDevice_t() (defined in ihipDevice_t) | ihipDevice_t | inline |
init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags) (defined in ihipDevice_t) | ihipDevice_t | |
locked_addStream(ihipStream_t *s) (defined in ihipDevice_t) | ihipDevice_t | |
locked_removeStream(ihipStream_t *s) (defined in ihipDevice_t) | ihipDevice_t | |
locked_reset() (defined in ihipDevice_t) | ihipDevice_t | |
locked_syncDefaultStream(bool waitOnSelf) (defined in ihipDevice_t) | ihipDevice_t | |
locked_waitAllStreams() (defined in ihipDevice_t) | ihipDevice_t | |
~ihipDevice_t() (defined in ihipDevice_t) | ihipDevice_t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+void | init (unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags) |
+void | locked_addStream (ihipStream_t *s) |
+void | locked_removeStream (ihipStream_t *s) |
+void | locked_reset () |
+void | locked_waitAllStreams () |
+void | locked_syncDefaultStream (bool waitOnSelf) |
+ihipDeviceCritical_t & | criticalData () |
+Public Attributes | |
+unsigned | _device_index |
+hipDeviceProp_t | _props |
+hc::accelerator | _acc |
+hsa_agent_t | _hsa_agent |
+ihipStream_t * | _default_stream |
+unsigned | _compute_units |
+StagingBuffer * | _staging_buffer [2] |
+unsigned | _device_flags |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for ihipException, including all inherited members.
+_code (defined in ihipException) | ihipException | |
ihipException(hipError_t e) (defined in ihipException) | ihipException | inline |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+ | ihipException (hipError_t e) |
+Public Attributes | |
+hipError_t | _code |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for ihipStreamCriticalBase_t< MUTEX_TYPE >, including all inherited members.
+_last_command_type (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | |
_last_copy_signal (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | |
_last_kernel_future (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | |
_mutex (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | |
_oldest_live_sig_id (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | |
_signalCursor (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | |
_signalPool (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | |
_stream_sig_id (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | |
ihipStreamCriticalBase_t() (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | inline |
lock() (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | inline |
mlock() (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | inline |
unlock() (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | inline |
~ihipStreamCriticalBase_t() (defined in ihipStreamCriticalBase_t< MUTEX_TYPE >) | ihipStreamCriticalBase_t< MUTEX_TYPE > | inline |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+ihipStreamCriticalBase_t +< StreamMutex > * | mlock () |
Public Member Functions inherited from LockedBase< MUTEX_TYPE > | |
+void | lock () |
+void | unlock () |
+Public Attributes | |
+ihipCommand_t | _last_command_type |
+ihipSignal_t * | _last_copy_signal |
+hc::completion_future | _last_kernel_future |
+int | _signalCursor |
+SIGSEQNUM | _oldest_live_sig_id |
+std::deque< ihipSignal_t > | _signalPool |
+SIGSEQNUM | _stream_sig_id |
Public Attributes inherited from LockedBase< MUTEX_TYPE > | |
+MUTEX_TYPE | _mutex |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for ihipStream_t, including all inherited members.
+_av (defined in ihipStream_t) | ihipStream_t | |
_flags (defined in ihipStream_t) | ihipStream_t | |
_id (defined in ihipStream_t) | ihipStream_t | |
allocSignal(LockedAccessor_StreamCrit_t &crit) (defined in ihipStream_t) | ihipStream_t | |
copyAsync(void *dst, const void *src, size_t sizeBytes, unsigned kind) (defined in ihipStream_t) | ihipStream_t | |
copySync(LockedAccessor_StreamCrit_t &crit, void *dst, const void *src, size_t sizeBytes, unsigned kind) (defined in ihipStream_t) | ihipStream_t | |
getDevice() const (defined in ihipStream_t) | ihipStream_t | |
ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags) (defined in ihipStream_t) | ihipStream_t | |
lastCopySeqId(LockedAccessor_StreamCrit_t &crit) (defined in ihipStream_t) | ihipStream_t | inline |
lockclose_postKernelCommand(hc::completion_future &kernel_future) (defined in ihipStream_t) | ihipStream_t | |
locked_copySync(void *dst, const void *src, size_t sizeBytes, unsigned kind) (defined in ihipStream_t) | ihipStream_t | |
locked_lastCopySeqId() (defined in ihipStream_t) | ihipStream_t | inline |
locked_reclaimSignals(SIGSEQNUM sigNum) (defined in ihipStream_t) | ihipStream_t | |
locked_wait(bool assertQueueEmpty=false) (defined in ihipStream_t) | ihipStream_t | |
lockopen_preKernelCommand() (defined in ihipStream_t) | ihipStream_t | |
operator<< (defined in ihipStream_t) | ihipStream_t | friend |
preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *lastCopy, hsa_signal_t *waitSignal, ihipCommand_t copyType) (defined in ihipStream_t) | ihipStream_t | |
SeqNum_t typedef (defined in ihipStream_t) | ihipStream_t | |
wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty=false) (defined in ihipStream_t) | ihipStream_t | |
~ihipStream_t() (defined in ihipStream_t) | ihipStream_t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Types | |
+typedef uint64_t | SeqNum_t |
+Public Member Functions | |
+ | ihipStream_t (unsigned device_index, hc::accelerator_view av, unsigned int flags) |
+void | copySync (LockedAccessor_StreamCrit_t &crit, void *dst, const void *src, size_t sizeBytes, unsigned kind) |
+void | locked_copySync (void *dst, const void *src, size_t sizeBytes, unsigned kind) |
+void | copyAsync (void *dst, const void *src, size_t sizeBytes, unsigned kind) |
+bool | lockopen_preKernelCommand () |
+void | lockclose_postKernelCommand (hc::completion_future &kernel_future) |
+int | preCopyCommand (LockedAccessor_StreamCrit_t &crit, ihipSignal_t *lastCopy, hsa_signal_t *waitSignal, ihipCommand_t copyType) |
+void | locked_reclaimSignals (SIGSEQNUM sigNum) |
+void | locked_wait (bool assertQueueEmpty=false) |
+SIGSEQNUM | locked_lastCopySeqId () |
+void | wait (LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty=false) |
+SIGSEQNUM | lastCopySeqId (LockedAccessor_StreamCrit_t &crit) |
+ihipSignal_t * | allocSignal (LockedAccessor_StreamCrit_t &crit) |
+ihipDevice_t * | getDevice () const |
+Public Attributes | |
+SeqNum_t | _id |
+hc::accelerator_view | _av |
+unsigned | _flags |
+Friends | |
+std::ostream & | operator<< (std::ostream &os, const ihipStream_t &s) |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Files | |
file | device_util.cpp |
file | hip_device.cpp |
file | hip_error.cpp |
file | hip_event.cpp |
file | hip_hcc.cpp |
file | hip_ldg.cpp |
file | hip_memory.cpp |
file | hip_peer.cpp |
file | hip_stream.cpp |
file | staging_buffer.cpp |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Files | |
file | hcc_acc.h [code] |
file | hip_hcc.h [code] |
file | hip_ldg.h [code] |
file | hip_runtime.h [code] |
Contains definitions of APIs for HIP runtime. | |
file | hip_runtime_api.h [code] |
Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h. | |
file | hip_texture.h [code] |
HIP C++ Texture API for hcc compiler. | |
file | hip_util.h [code] |
file | hip_vector_types.h [code] |
Defines the different newt vector types for HIP runtime. | |
file | host_defines.h [code] |
TODO-doc. | |
file | staging_buffer.h [code] |
file | trace_helper.h [code] |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Directories | |
directory | hcc_detail |
+Files | |
file | hcc.h [code] |
file | hip_common.h [code] |
file | hip_runtime.h [code] |
file | hip_runtime_api.h [code] |
file | hip_vector_types.h [code] |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
include | |
hcc_detail | |
hcc_acc.h | |
hip_hcc.h | |
hip_ldg.h | |
hip_runtime.h | Contains definitions of APIs for HIP runtime |
hip_runtime_api.h | Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h |
hip_texture.h | HIP C++ Texture API for hcc compiler |
hip_util.h | |
hip_vector_types.h | Defines the different newt vector types for HIP runtime |
host_defines.h | TODO-doc |
staging_buffer.h | |
trace_helper.h | |
hcc.h | |
hip_common.h | |
hip_runtime.h | |
hip_runtime_api.h | |
hip_vector_types.h | |
src | |
hip_hcc.cpp |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Modules | |
Device Management | |
Error Handling | |
Stream Management | |
Event Management | |
Memory Management | |
Device Memory Access | |
Management | |
Control | |
HCC-Specific Accessors | |
Texture Reference Management | |
Defines the HIP API. See the individual sections for more information.
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Functions | |
hipError_t | hipDeviceSynchronize (void) |
Blocks until the default device has completed all preceding requested tasks. More... | |
hipError_t | hipDeviceReset (void) |
Destroy all resources and reset all state on the default device in the current process. More... | |
hipError_t | hipSetDevice (int device) |
Set default device to be used for subsequent hip API calls from this thread. More... | |
hipError_t | hipGetDevice (int *device) |
Return the default device id for the calling host thread. More... | |
hipError_t | hipGetDeviceCount (int *count) |
Return number of compute-capable devices. More... | |
hipError_t | hipDeviceGetAttribute (int *pi, hipDeviceAttribute_t attr, int device) |
Query device attribute. More... | |
hipError_t | hipGetDeviceProperties (hipDeviceProp_t *prop, int device) |
Returns device properties. More... | |
hipError_t | hipDeviceSetCacheConfig (hipFuncCache cacheConfig) |
Set L1/Shared cache partition. More... | |
hipError_t | hipDeviceGetCacheConfig (hipFuncCache *cacheConfig) |
Set Cache configuration for a specific function. More... | |
hipError_t | hipFuncSetCacheConfig (hipFuncCache config) |
Set Cache configuration for a specific function. More... | |
hipError_t | hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) |
Get Shared memory bank configuration. More... | |
hipError_t | hipDeviceSetSharedMemConfig (hipSharedMemConfig config) |
Set Shared memory bank configuration. More... | |
hipError_t | hipSetDeviceFlags (unsigned flags) |
Set Device flags. More... | |
hipError_t hipDeviceGetAttribute | +( | +int * | +pi, | +
+ | + | hipDeviceAttribute_t | +attr, | +
+ | + | int | +device | +
+ | ) | ++ |
Query device attribute.
+[out] | pi | pointer to value to return |
[in] | attr | attribute to query |
[in] | device | which device to query for information |
hipError_t hipDeviceGetCacheConfig | +( | +hipFuncCache * | +cacheConfig | ) | ++ |
Set Cache configuration for a specific function.
+Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
+hipError_t hipDeviceGetSharedMemConfig | +( | +hipSharedMemConfig * | +pConfig | ) | ++ |
Get Shared memory bank configuration.
+Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
+hipError_t hipDeviceReset | +( | +void | +) | ++ |
Destroy all resources and reset all state on the default device in the current process.
+Explicity destroy all memory allocations, events, and queues associated with the default device in the current process.
+This function will reset the device immmediately, and then return after all resources have been freed. The caller must ensure that the device is not being accessed by any other host threads from the active process when this function is called.
+hipError_t hipDeviceSetCacheConfig | +( | +hipFuncCache | +cacheConfig | ) | ++ |
Set L1/Shared cache partition.
+Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
+hipError_t hipDeviceSetSharedMemConfig | +( | +hipSharedMemConfig | +config | ) | ++ |
Set Shared memory bank configuration.
+Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
+hipError_t hipDeviceSynchronize | +( | +void | +) | ++ |
Blocks until the default device has completed all preceding requested tasks.
+This function waits for all streams on the default device to complete execution, and then returns.
+hipError_t hipFuncSetCacheConfig | +( | +hipFuncCache | +cacheConfig | ) | ++ |
Set Cache configuration for a specific function.
+Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
+hipError_t hipGetDevice | +( | +int * | +device | ) | ++ |
Return the default device id for the calling host thread.
+[out] | device | *device is written with the default device |
HIP maintains an default device for each thread using thread-local-storage. This device is used implicitly for HIP runtime APIs called by this thread. hipGetDevice returns in * device
the default device for the calling host thread.
hipError_t hipGetDeviceCount | +( | +int * | +count | ) | ++ |
Return number of compute-capable devices.
+[output] | count Returns number of compute-capable devices. |
Returns in *count
the number of devices that have ability to run compute commands. If there are no such devices, then hipGetDeviceCount will return hipErrorNoDevice. If 1 or more devices can be found, then hipGetDeviceCount returns hipSuccess.
hipError_t hipGetDeviceProperties | +( | +hipDeviceProp_t * | +props, | +
+ | + | int | +device | +
+ | ) | ++ |
Returns device properties.
+[out] | prop | written with device properties |
[in] | device | which device to query for information |
Populates hipGetDeviceProperties with information for the specified device.
+HCC always returns 0 for maxThreadsPerMultiProcessor
+HCC always returns 0 for regsPerBlock
+HCC always returns 0 for l2CacheSize
+hipError_t hipSetDevice | +( | +int | +device | ) | ++ |
Set default device to be used for subsequent hip API calls from this thread.
+[in] | device | Valid device in range 0...hipGetDeviceCount(). |
Sets device
as the default device for the calling host thread. Valid device id's are 0... (hipGetDeviceCount()-1).
Many HIP APIs implicitly use the "default device" :
+This function may be called from any host thread. Multiple host threads may use the same device. This function does no synchronization with the previous or new device, and has very little runtime overhead. Applications can use hipSetDevice to quickly switch the default device before making a HIP runtime call which uses the default device.
+The default device is stored in thread-local-storage for each thread. Thread-pool implementations may inherit the default device of the previous thread. A good practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known standard device.
+hipError_t hipSetDeviceFlags | +( | +unsigned | +flags | ) | ++ |
Set Device flags.
+Note: Only hipDeviceScheduleAuto and hipDeviceMapHost are supported
+ +
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Functions | |
hipError_t | hipGetLastError (void) |
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess. More... | |
hipError_t | hipPeekAtLastError (void) |
Return last error returned by any HIP runtime API call. More... | |
const char * | hipGetErrorName (hipError_t hip_error) |
Return name of the specified error code in text form. More... | |
const char * | hipGetErrorString (hipError_t hip_error) |
Return handy text string message to explain the error which occurred. More... | |
const char* hipGetErrorName | +( | +hipError_t | +hip_error | ) | ++ |
Return name of the specified error code in text form.
+hip_error | Error code to convert to name. |
const char* hipGetErrorString | +( | +hipError_t | +hip_error | ) | ++ |
Return handy text string message to explain the error which occurred.
+hip_error | Error code to convert to string. |
hipError_t hipGetLastError | +( | +void | +) | ++ |
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess.
+Returns the last error that has been returned by any of the runtime calls in the same host thread, and then resets the saved error to hipSuccess.
+hipError_t hipPeekAtLastError | +( | +void | +) | ++ |
Return last error returned by any HIP runtime API call.
+Returns the last error that has been returned by any of the runtime calls in the same host thread. Unlike hipGetLastError, this function does not reset the saved error code.
+ +
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Functions | |
hipError_t | hipEventCreateWithFlags (hipEvent_t *event, unsigned flags) |
Create an event with the specified flags. More... | |
hipError_t | hipEventCreate (hipEvent_t *event) |
hipError_t | hipEventRecord (hipEvent_t event, hipStream_t stream) |
Record an event in the specified stream. More... | |
hipError_t | hipEventDestroy (hipEvent_t event) |
Destroy the specified event. More... | |
hipError_t | hipEventSynchronize (hipEvent_t event) |
: Wait for an event to complete. More... | |
hipError_t | hipEventElapsedTime (float *ms, hipEvent_t start, hipEvent_t stop) |
Return the elapsed time between two events. More... | |
hipError_t | hipEventQuery (hipEvent_t event) |
Query event status. More... | |
hipError_t hipEventCreate | +( | +hipEvent_t * | +event | ) | ++ |
Create an event
+[in,out] | event | Returns the newly created event. |
hipError_t hipEventCreateWithFlags | +( | +hipEvent_t * | +event, | +
+ | + | unsigned | +flags | +
+ | ) | ++ |
Create an event with the specified flags.
+[in,out] | event | Returns the newly created event. |
[in] | flags | Flags to control event behavior. hipEventDefault, hipEventBlockingSync, hipEventDisableTiming, hipEventInterprocess |
hipError_t hipEventDestroy | +( | +hipEvent_t | +event | ) | ++ |
Destroy the specified event.
+[in] | event | Event to destroy. |
Releases memory associated with the event. If the event is recording but has not completed recording when hipEventDestroy is called, the function will return immediately and the completion_future resources will be released later, when the hipDevice is synchronized.
+ +hipError_t hipEventElapsedTime | +( | +float * | +ms, | +
+ | + | hipEvent_t | +start, | +
+ | + | hipEvent_t | +stop | +
+ | ) | ++ |
Return the elapsed time between two events.
+[out] | ms : Return time between start and stop in ms. | |
[in] | start | : Start event. |
[in] | stop | : Stop event. |
Computes the elapsed time between two events. Time is computed in ms, with a resolution of approximately 1 us.
+Events which are recorded in a NULL stream will block until all commands on all other streams complete execution, and then record the timestamp.
+Events which are recorded in a non-NULL stream will record their timestamp when they reach the head of the specified stream, after all previous commands in that stream have completed executing. Thus the time that the event recorded may be significantly after the host calls hipEventRecord.
+If hipEventRecord has not been called on either event, then hipErrorInvalidResourceHandle is returned. If hipEventRecord has been called on both events, but the timestamp has not yet been recorded on one or both events (that is, hipEventQuery would return hipErrorNotReady on at least one of the events), then hipErrorNotReady is returned.
+ +hipError_t hipEventQuery | +( | +hipEvent_t | +event | ) | ++ |
Query event status.
+[in] | event | Event to query. |
Query the status of the specified event. This function will return hipErrorNotReady if all commands in the appropriate stream (specified to hipEventRecord) have completed. If that work has not completed, or if hipEventRecord was not called on the event, then hipSuccess is returned.
+ +hipError_t hipEventRecord | +( | +hipEvent_t | +event, | +
+ | + | hipStream_t | +stream | +
+ | ) | ++ |
Record an event in the specified stream.
+[in] | event | event to record. |
[in] | stream | stream in which to record event. |
hipEventQuery or hipEventSynchronize must be used to determine when the event transitions from "recording" (after eventRecord is called) to "recorded" (when timestamps are set, if requested).
+Events which are recorded in a non-NULL stream will transition to from recording to "recorded" state when they reach the head of the specified stream, after all previous commands in that stream have completed executing.
+If hipEventRecord has been previously called aon event, then this call will overwrite any existing state in event.
+If this function is called on a an event that is currently being recorded, results are undefined - either outstanding recording may save state into the event, and the order is not guaranteed. This shoul be avoided.
+hipError_t hipEventSynchronize | +( | +hipEvent_t | +event | ) | ++ |
: Wait for an event to complete.
+This function will block until the event is ready, waiting for all previous work in the stream specified when event was recorded with hipEventRecord.
+If hipEventRecord has not been called on event
, this function returns immediately.
TODO-hcc - This function needs to support hipEventBlockingSync parameter.
+[in] | event | Event on which to wait. |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Classes | |
struct | dim3 |
+Macros | |
#define | hipStreamDefault 0x00 |
Flags that can be used with hipStreamCreateWithFlags. More... | |
+#define | hipStreamNonBlocking 0x01 |
Stream does not implicitly synchronize with null stream. | |
#define | hipEventDefault 0x0 |
Flags that can be used with hipEventCreateWithFlags: More... | |
+#define | hipEventBlockingSync 0x1 |
Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency. | |
+#define | hipEventDisableTiming 0x2 |
Disable event's capability to record timing information. May improve performance. | |
#define | hipEventInterprocess 0x4 |
Event can support IPC. More... | |
+#define | hipHostMallocDefault 0x0 |
Flags that can be used with hipHostMalloc. | |
+#define | hipHostMallocPortable 0x1 |
+#define | hipHostMallocMapped 0x2 |
+#define | hipHostMallocWriteCombined 0x4 |
#define | hipHostRegisterDefault 0x0 |
Flags that can be used with hipHostRegister. More... | |
+#define | hipHostRegisterPortable 0x1 |
Memory is considered registered by all contexts. HIP only supports one context so this is always assumed true. | |
+#define | hipHostRegisterMapped 0x2 |
Map the allocation into the address space for the current device. The device pointer can be obtained with hipHostGetDevicePointer. | |
+#define | hipHostRegisterIoMemory 0x4 |
Not supported. | |
+#define | hipDeviceScheduleAuto 0x0 |
+#define | hipDeviceScheduleSpin 0x1 |
+#define | hipDeviceScheduleYield 0x2 |
+#define | hipDeviceBlockingSync 0x4 |
+#define | hipDeviceMapHost 0x8 |
+#define | hipDeviceLmemResizeToMax 0x16 |
+Typedefs | |
+typedef enum hipError_t | hipError_t |
+typedef enum hipDeviceAttribute_t | hipDeviceAttribute_t |
typedef enum hipFuncCache | hipFuncCache |
typedef enum hipSharedMemConfig | hipSharedMemConfig |
typedef struct dim3 | dim3 |
typedef enum hipMemcpyKind | hipMemcpyKind |
+Enumerations | |
enum | hipError_t { + hipSuccess = 0, +hipErrorMemoryAllocation, +hipErrorLaunchOutOfResources, +hipErrorInvalidValue, + + hipErrorInvalidResourceHandle, +hipErrorInvalidDevice, +hipErrorInvalidMemcpyDirection, +hipErrorInvalidDevicePointer, + + hipErrorInitializationError, +hipErrorNoDevice, +hipErrorNotReady, +hipErrorUnknown, + + hipErrorPeerAccessNotEnabled, +hipErrorPeerAccessAlreadyEnabled, +hipErrorRuntimeMemory, +hipErrorRuntimeOther, + + hipErrorHostMemoryAlreadyRegistered, +hipErrorHostMemoryNotRegistered, +hipErrorTbd + + } |
enum | hipDeviceAttribute_t { + hipDeviceAttributeMaxThreadsPerBlock, +hipDeviceAttributeMaxBlockDimX, +hipDeviceAttributeMaxBlockDimY, +hipDeviceAttributeMaxBlockDimZ, + + hipDeviceAttributeMaxGridDimX, +hipDeviceAttributeMaxGridDimY, +hipDeviceAttributeMaxGridDimZ, +hipDeviceAttributeMaxSharedMemoryPerBlock, + + hipDeviceAttributeTotalConstantMemory, +hipDeviceAttributeWarpSize, +hipDeviceAttributeMaxRegistersPerBlock, +hipDeviceAttributeClockRate, + + hipDeviceAttributeMemoryClockRate, +hipDeviceAttributeMemoryBusWidth, +hipDeviceAttributeMultiprocessorCount, +hipDeviceAttributeComputeMode, + + hipDeviceAttributeL2CacheSize, +hipDeviceAttributeMaxThreadsPerMultiProcessor, +hipDeviceAttributeComputeCapabilityMajor, +hipDeviceAttributeComputeCapabilityMinor, + + hipDeviceAttributeConcurrentKernels, +hipDeviceAttributePciBusId, +hipDeviceAttributePciDeviceId, +hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, + + hipDeviceAttributeIsMultiGpuBoard + + } |
enum | hipFuncCache { hipFuncCachePreferNone, +hipFuncCachePreferShared, +hipFuncCachePreferL1, +hipFuncCachePreferEqual + } |
enum | hipSharedMemConfig { hipSharedMemBankSizeDefault, +hipSharedMemBankSizeFourByte, +hipSharedMemBankSizeEightByte + } |
enum | hipMemcpyKind { + hipMemcpyHostToHost = 0, +hipMemcpyHostToDevice = 1, +hipMemcpyDeviceToHost = 2, +hipMemcpyDeviceToDevice =3, + + hipMemcpyDefault = 4 + + } |
#define hipEventDefault 0x0 | +
Flags that can be used with hipEventCreateWithFlags:
+Default flags
+ +#define hipEventInterprocess 0x4 | +
Event can support IPC.
+#define hipHostRegisterDefault 0x0 | +
Flags that can be used with hipHostRegister.
+Memory is Mapped and Portable
+ +#define hipStreamDefault 0x00 | +
Flags that can be used with hipStreamCreateWithFlags.
+Default stream creation flags. These are used with hipStreamCreate().
+ +typedef enum hipFuncCache hipFuncCache | +
typedef enum hipMemcpyKind hipMemcpyKind | +
Memory copy types
+ +typedef enum hipSharedMemConfig hipSharedMemConfig | +
enum hipDeviceAttribute_t | +
Enumerator | |
---|---|
hipDeviceAttributeMaxThreadsPerBlock |
+ Maximum number of threads per block. + |
hipDeviceAttributeMaxBlockDimX |
+ Maximum x-dimension of a block. + |
hipDeviceAttributeMaxBlockDimY |
+ Maximum y-dimension of a block. + |
hipDeviceAttributeMaxBlockDimZ |
+ Maximum z-dimension of a block. + |
hipDeviceAttributeMaxGridDimX |
+ Maximum x-dimension of a grid. + |
hipDeviceAttributeMaxGridDimY |
+ Maximum y-dimension of a grid. + |
hipDeviceAttributeMaxGridDimZ |
+ Maximum z-dimension of a grid. + |
hipDeviceAttributeMaxSharedMemoryPerBlock |
+ Maximum shared memory available per block in bytes. + |
hipDeviceAttributeTotalConstantMemory |
+ Constant memory size in bytes. + |
hipDeviceAttributeWarpSize |
+ Warp size in threads. + |
hipDeviceAttributeMaxRegistersPerBlock |
+ Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. + |
hipDeviceAttributeClockRate |
+ Peak clock frequency in kilohertz. + |
hipDeviceAttributeMemoryClockRate |
+ Peak memory clock frequency in kilohertz. + |
hipDeviceAttributeMemoryBusWidth |
+ Global memory bus width in bits. + |
hipDeviceAttributeMultiprocessorCount |
+ Number of multiprocessors on the device. + |
hipDeviceAttributeComputeMode |
+ Compute mode that device is currently in. + |
hipDeviceAttributeL2CacheSize |
+ Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. + |
hipDeviceAttributeMaxThreadsPerMultiProcessor |
+ Maximum resident threads per multiprocessor. + |
hipDeviceAttributeComputeCapabilityMajor |
+ Major compute capability version number. + |
hipDeviceAttributeComputeCapabilityMinor |
+ Minor compute capability version number. + |
hipDeviceAttributeConcurrentKernels |
+ Device can possibly execute multiple kernels concurrently. + |
hipDeviceAttributePciBusId |
+ PCI Bus ID. + |
hipDeviceAttributePciDeviceId |
+ PCI Device ID. + |
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor |
+ Maximum Shared Memory Per Multiprocessor. + |
hipDeviceAttributeIsMultiGpuBoard |
+ Multiple GPU devices. + |
enum hipError_t | +
Enumerator | |
---|---|
hipSuccess |
+ Successful completion. + |
hipErrorMemoryAllocation |
+ Memory allocation error. + |
hipErrorLaunchOutOfResources |
+ Out of resources error. + |
hipErrorInvalidValue |
+ One or more of the parameters passed to the API call is NULL or not in an acceptable range. + |
hipErrorInvalidResourceHandle |
+ Resource handle (hipEvent_t or hipStream_t) invalid. + |
hipErrorInvalidDevice |
+ DeviceID must be in range 0...#compute-devices. + |
hipErrorInvalidMemcpyDirection |
+ Invalid memory copy direction. + |
hipErrorInvalidDevicePointer |
+ Invalid Device Pointer. + |
hipErrorInitializationError |
+ TODO comment from hipErrorInitializationError. + |
hipErrorNoDevice |
+ Call to hipGetDeviceCount returned 0 devices. + |
hipErrorNotReady |
+ Indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery. + |
hipErrorUnknown |
+ Unknown error. + |
hipErrorPeerAccessNotEnabled |
+ Peer access was never enabled from the current device. + |
hipErrorPeerAccessAlreadyEnabled |
+ Peer access was already enabled from the current device. + |
hipErrorRuntimeMemory |
+ HSA runtime memory call returned error. Typically not seen in production systems. + |
hipErrorRuntimeOther |
+ HSA runtime call other than memory returned error. Typically not seen in production systems. + |
hipErrorHostMemoryAlreadyRegistered |
+ Produced when trying to lock a page-locked memory. + |
hipErrorHostMemoryNotRegistered |
+ Produced when trying to unlock a non-page-locked memory. + |
hipErrorTbd |
+ Marker that more error codes are needed. + |
enum hipFuncCache | +
Enumerator | |
---|---|
hipFuncCachePreferNone |
+ no preference for shared memory or L1 (default) + |
hipFuncCachePreferShared |
+ prefer larger shared memory and smaller L1 cache + |
hipFuncCachePreferL1 |
+ prefer larger L1 cache and smaller shared memory + |
hipFuncCachePreferEqual |
+ prefer equal size L1 cache and shared memory + |
enum hipMemcpyKind | +
Memory copy types
+Enumerator | |
---|---|
hipMemcpyHostToHost |
+ Host-to-Host Copy. + |
hipMemcpyHostToDevice |
+ Host-to-Device Copy. + |
hipMemcpyDeviceToHost |
+ Device-to-Host Copy. + |
hipMemcpyDeviceToDevice |
+ Device-to-Device Copy. + |
hipMemcpyDefault |
+ Runtime will automatically determine copy-kind based on virtual addresses. + |
enum hipSharedMemConfig | +
Enumerator | |
---|---|
hipSharedMemBankSizeDefault |
+ The compiler selects a device-specific value for the banking. + |
hipSharedMemBankSizeFourByte |
+ Shared mem is banked at 4-bytes intervals and performs best when adjacent threads access data 4 bytes apart. + |
hipSharedMemBankSizeEightByte |
+ Shared mem is banked at 8-byte intervals and performs best when adjacent threads access data 4 bytes apart. + |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
The following calls are only supported when compiler HIP with HCC. To produce portable code, use of these calls must be guarded #ifdef checks:
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Functions | |
hipError_t | hipPointerGetAttributes (hipPointerAttribute_t *attributes, void *ptr) |
Return attributes for the specified pointer. More... | |
hipError_t | hipMalloc (void **ptr, size_t size) |
Allocate memory on the default accelerator. More... | |
hipError_t | hipMallocHost (void **ptr, size_t size) __attribute__((deprecated("use hipHostMalloc instead"))) |
Allocate pinned host memory. More... | |
hipError_t | hipHostMalloc (void **ptr, size_t size, unsigned int flags) |
Allocate device accessible page locked host memory. More... | |
+hipError_t | hipHostAlloc (void **ptr, size_t size, unsigned int flags) __attribute__((deprecated("use hipHostMalloc instead"))) |
hipError_t | hipHostGetDevicePointer (void **devPtr, void *hstPtr, unsigned int flags) |
Get Device pointer from Host Pointer allocated through hipHostAlloc. More... | |
hipError_t | hipHostGetFlags (unsigned int *flagsPtr, void *hostPtr) |
Get flags associated with host pointer. More... | |
hipError_t | hipHostRegister (void *hostPtr, size_t sizeBytes, unsigned int flags) |
Register host memory so it can be accessed from the current device. More... | |
hipError_t | hipHostUnregister (void *hostPtr) |
Un-register host pointer. More... | |
hipError_t | hipFree (void *ptr) |
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSynchronize() call. If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. More... | |
hipError_t | hipFreeHost (void *ptr) __attribute__((deprecated("use hipHostFree instead"))) |
Free memory allocated by the hcc hip host memory allocation API. [Deprecated.]. More... | |
hipError_t | hipHostFree (void *ptr) |
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDeviceSynchronize() call. If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. More... | |
hipError_t | hipMemcpy (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind) |
Copy data from src to dst. More... | |
hipError_t | hipMemcpyToSymbol (const char *symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind) |
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset bytes from the start of symbol symbol . More... | |
hipError_t | hipMemcpyAsync (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) |
Copy data from src to dst asynchronously. More... | |
hipError_t | hipMemset (void *dst, int value, size_t sizeBytes) |
Copy data from src to dst asynchronously. More... | |
hipError_t | hipMemsetAsync (void *dst, int value, size_t sizeBytes, hipStream_t stream) |
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value. More... | |
hipError_t | hipMemGetInfo (size_t *free, size_t *total) |
Query memory info. Return snapshot of free memory, and total allocatable memory on the device. More... | |
The following CUDA APIs are not currently supported:
+hipError_t hipFree | +( | +void * | +ptr | ) | ++ |
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSynchronize() call. If pointer is NULL, the hip runtime is initialized and hipSuccess is returned.
+[in] | ptr | Pointer to memory to be freed |
hipError_t hipFreeHost | +( | +void * | +ptr | ) | ++ |
Free memory allocated by the hcc hip host memory allocation API. [Deprecated.].
+hipError_t hipHostFree | +( | +void * | +ptr | ) | ++ |
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDeviceSynchronize() call. If pointer is NULL, the hip runtime is initialized and hipSuccess is returned.
+[in] | ptr | Pointer to memory to be freed |
hipError_t hipHostGetDevicePointer | +( | +void ** | +devPtr, | +
+ | + | void * | +hstPtr, | +
+ | + | unsigned int | +flags | +
+ | ) | ++ |
Get Device pointer from Host Pointer allocated through hipHostAlloc.
+[out] | dstPtr | Device Pointer mapped to passed host pointer |
[in] | hstPtr | Host Pointer allocated through hipHostAlloc |
[in] | flags | Flags to be passed for extension |
hipError_t hipHostGetFlags | +( | +unsigned int * | +flagsPtr, | +
+ | + | void * | +hostPtr | +
+ | ) | ++ |
Get flags associated with host pointer.
+[out] | flagsPtr | Memory location to store flags |
[in] | hostPtr | Host Pointer allocated through hipHostMalloc |
hipError_t hipHostMalloc | +( | +void ** | +ptr, | +
+ | + | size_t | +size, | +
+ | + | unsigned int | +flags | +
+ | ) | ++ |
Allocate device accessible page locked host memory.
+[out] | ptr | Pointer to the allocated host pinned memory |
[in] | size | Requested memory size |
[in] | flags | Type of host memory allocation |
hipError_t hipHostRegister | +( | +void * | +hostPtr, | +
+ | + | size_t | +sizeBytes, | +
+ | + | unsigned int | +flags | +
+ | ) | ++ |
Register host memory so it can be accessed from the current device.
+[out] | hostPtr | Pointer to host memory to be registered. |
[in] | sizeBytes | size of the host memory |
[in] | flags. | See below. |
Flags:
+After registering the memory, use hipHostGetDevicePointer to obtain the mapped device pointer. On many systems, the mapped device pointer will have a different value than the mapped host pointer. Applications must use the device pointer in device code, and the host pointer in device code.
+On some systems, registered memory is pinned. On some systems, registered memory may not be actually be pinned but uses OS or hardware facilities to all GPU access to the host memory.
+Developers are strongly encouraged to register memory blocks which are aligned to the host cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction).
+If registering non-aligned pointers, the application must take care when register pointers from the same cache line on different devices. HIP's coarse-grained synchronization model does not guarantee correct results if different devices write to different parts of the same cache block - typically one of the writes will "win" and overwrite data from the other registered memory region.
+hipError_t hipHostUnregister | +( | +void * | +hostPtr | ) | ++ |
Un-register host pointer.
+[in] | hostPtr | Host pointer previously registered with hipHostRegister |
hipError_t hipMalloc | +( | +void ** | +ptr, | +
+ | + | size_t | +sizeBytes | +
+ | ) | ++ |
Allocate memory on the default accelerator.
+[out] | ptr | Pointer to the allocated memory |
[in] | size | Requested memory size |
hipError_t hipMallocHost | +( | +void ** | +ptr, | +
+ | + | size_t | +size | +
+ | ) | ++ |
Allocate pinned host memory.
+[out] | ptr | Pointer to the allocated host pinned memory |
[in] | size | Requested memory size |
hipError_t hipMemcpy | +( | +void * | +dst, | +
+ | + | const void * | +src, | +
+ | + | size_t | +sizeBytes, | +
+ | + | hipMemcpyKind | +kind | +
+ | ) | ++ |
Copy data from src to dst.
+It supports memory from host to device, device to host, device to device and host to host The src and dst must not overlap.
+For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice). For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the device where the src data is physically located. For optimal peer-to-peer copies, the copy device must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice argument. if this is not done, the hipMemcpy will still work, but will perform the copy using a staging buffer on the host.
+[out] | dst | Data being copy to |
[in] | src | Data being copy from |
[in] | sizeBytes | Data size in bytes |
[in] | copyType | Memory copy type |
hipError_t hipMemcpyAsync | +( | +void * | +dst, | +
+ | + | const void * | +src, | +
+ | + | size_t | +sizeBytes, | +
+ | + | hipMemcpyKind | +kind, | +
+ | + | hipStream_t | +stream | +
+ | ) | ++ |
Copy data from src to dst asynchronously.
+For hipMemcpy, the copy is always performed by the device associated with the specified stream.
+For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a attached to the device where the src data is physically located. For optimal peer-to-peer copies, the copy device must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice argument. if this is not done, the hipMemcpy will still work, but will perform the copy using a staging buffer on the host.
+[out] | dst | Data being copy to |
[in] | src | Data being copy from |
[in] | sizeBytes | Data size in bytes |
[in] | accelerator_view | Accelerator view which the copy is being enqueued |
hipError_t hipMemcpyToSymbol | +( | +const char * | +symbolName, | +
+ | + | const void * | +src, | +
+ | + | size_t | +sizeBytes, | +
+ | + | size_t | +offset, | +
+ | + | hipMemcpyKind | +kind | +
+ | ) | ++ |
Copies sizeBytes
bytes from the memory area pointed to by src
to the memory area pointed to by offset
bytes from the start of symbol symbol
.
The memory areas may not overlap. Symbol can either be a variable that resides in global or constant memory space, or it can be a character string, naming a variable that resides in global or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use hipErrorUnknown for now.
+[in] | symbolName | - Symbol destination on device |
[in] | src | - Data being copy from |
[in] | sizeBytes | - Data size in bytes |
[in] | offset | - Offset from start of symbol in bytes |
[in] | kind | - Type of transfer |
hipError_t hipMemGetInfo | +( | +size_t * | +free, | +
+ | + | size_t * | +total | +
+ | ) | ++ |
Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
+Returns in *free a snapshot of the current free memory o
+ +hipError_t hipMemset | +( | +void * | +dst, | +
+ | + | int | +value, | +
+ | + | size_t | +sizeBytes | +
+ | ) | ++ |
Copy data from src to dst asynchronously.
+It supports memory from host to device, device to host, device to device and host to host.
+[out] | dst | Data being copy to |
[in] | src | Data being copy from |
[in] | sizeBytes | Data size in bytes |
[in] | accelerator_view | Accelerator view which the copy is being enqueued |
hipError_t hipMemsetAsync | +( | +void * | +dst, | +
+ | + | int | +value, | +
+ | + | size_t | +sizeBytes, | +
+ | + | hipStream_t | +stream | +
+ | ) | ++ |
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value.
+hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the memset is complete. The operation can optionally be associated to a stream by passing a non-zero stream argument. If stream is non-zero, the operation may overlap with operations in other streams.
+[out] | dst | Pointer to device memory |
[in] | value | - Value to set for each byte of specified memory |
[in] | sizeBytes | - Size in bytes to set |
[in] | stream | - Stream identifier |
hipError_t hipPointerGetAttributes | +( | +hipPointerAttribute_t * | +attributes, | +
+ | + | void * | +ptr | +
+ | ) | ++ |
Return attributes for the specified pointer.
+ + +
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Functions | |
hipError_t | hipDeviceCanAccessPeer (int *canAccessPeer, int deviceId, int peerDeviceId) |
Determine if a device can access a peer's memory. More... | |
hipError_t | hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) |
Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. More... | |
hipError_t | hipDeviceDisablePeerAccess (int peerDeviceId) |
Disable direct access from current device's virtual address space to memory allocations physically located on a peer device. More... | |
hipError_t hipDeviceCanAccessPeer | +( | +int * | +canAccessPeer, | +
+ | + | int | +deviceId, | +
+ | + | int | +peerDeviceId | +
+ | ) | ++ |
Determine if a device can access a peer's memory.
+[out] | canAccessPeer | Returns the peer access capability (0 or 1) |
[in] | device | - device from where memory may be accessed. |
[in] | peerDevice | - device where memory is physically located |
Returns "1" in canAccessPeer
if the specified device
is capable of directly accessing memory physically located on peerDevice , or "0" if not.
Returns "0" in canAccessPeer
if deviceId == peerDeviceId, and both are valid devices : a device is not a peer of itself.
HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P
+ +hipError_t hipDeviceDisablePeerAccess | +( | +int | +peerDeviceId | ) | ++ |
Disable direct access from current device's virtual address space to memory allocations physically located on a peer device.
+Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been enabled from the current device.
+[in] | peerDeviceId |
hipError_t hipDeviceEnablePeerAccess | +( | +int | +peerDeviceId, | +
+ | + | unsigned int | +flags | +
+ | ) | ++ |
Enable direct access from current device's virtual address space to memory allocations physically located on a peer device.
+Memory which already allocated on peer device will be mapped into the address space of the current device. In addition, all future memory allocations on peerDeviceId will be mapped into the address space of the current device when the memory is allocated. The peer memory remains accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.
+[in] | peerDeviceId | |
[in] | flags | Returns hipSuccess, hipErrorInvalidDevice, hipErrorInvalidValue, |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
On AMD platforms, hipProfilerStart and hipProfilerStop require installation of AMD's GPU perf counter API and defining GPU_PERF
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Functions | |
hipError_t | hipStreamCreateWithFlags (hipStream_t *stream, unsigned int flags) |
Create an asynchronous stream. More... | |
hipError_t | hipStreamCreate (hipStream_t *stream) |
Create an asynchronous stream. More... | |
hipError_t | hipStreamWaitEvent (hipStream_t stream, hipEvent_t event, unsigned int flags) |
Make the specified compute stream wait for an event. More... | |
hipError_t | hipStreamSynchronize (hipStream_t stream) |
Wait for all commands in stream to complete. More... | |
hipError_t | hipStreamDestroy (hipStream_t stream) |
Destroys the specified stream. More... | |
hipError_t | hipStreamGetFlags (hipStream_t stream, unsigned int *flags) |
Return flags associated with this stream. More... | |
The following Stream APIs are not (yet) supported in HIP:
+hipError_t hipStreamCreate | +( | +hipStream_t * | +stream | ) | ++ |
Create an asynchronous stream.
+[in,out] | stream | Valid pointer to hipStream_t. This function writes the memory with the newly created stream. |
Create a new asynchronous stream. stream
returns an opaque handle that can be used to reference the newly created stream in subsequent hipStream* commands. The stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope. To release the memory used by the stream, applicaiton must call hipStreamDestroy.
hipError_t hipStreamCreateWithFlags | +( | +hipStream_t * | +stream, | +
+ | + | unsigned int | +flags | +
+ | ) | ++ |
Create an asynchronous stream.
+[in,out] | stream | Pointer to new stream |
[in] | flags | to control stream creation. |
Create a new asynchronous stream. stream
returns an opaque handle that can be used to reference the newly created stream in subsequent hipStream* commands. The stream is allocated on the heap and will remain allocated
even if the handle goes out-of-scope. To release the memory used by the stream, applicaiton must call hipStreamDestroy. Flags controls behavior of the stream. See hipStreamDefault, hipStreamNonBlocking. hipStream_t are under development - with current HIP use the NULL stream.
+ +hipError_t hipStreamDestroy | +( | +hipStream_t | +stream | ) | ++ |
Destroys the specified stream.
+[in,out] | stream | Valid pointer to hipStream_t. This function writes the memory with the newly created stream. |
Destroys the specified stream.
+If commands are still executing on the specified stream, some may complete execution before the queue is deleted.
+The queue may be destroyed while some commands are still inflight, or may wait for all commands queued to the stream before destroying it.
+ + +hipError_t hipStreamGetFlags | +( | +hipStream_t | +stream, | +
+ | + | unsigned int * | +flags | +
+ | ) | ++ |
Return flags associated with this stream.
+[in] | stream | |
[in,out] | flags |
Return flags associated with this stream in *flags
.
hipError_t hipStreamSynchronize | +( | +hipStream_t | +stream | ) | ++ |
Wait for all commands in stream to complete.
+If the null stream is specified, this command blocks until all
+This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active or blocking.
+This command is host-synchronous : the host will block until the stream is empty.
+TODO
+ +hipError_t hipStreamWaitEvent | +( | +hipStream_t | +stream, | +
+ | + | hipEvent_t | +event, | +
+ | + | unsigned int | +flags | +
+ | ) | ++ |
Make the specified compute stream wait for an event.
+[in] | stream | stream to make wait. |
[in] | event | event to wait on |
[in] | flags | control operation [must be 0] |
This function inserts a wait operation into the specified stream. All future work submitted to stream
will wait until event
reports completion before beginning execution. This function is host-asynchronous and the function may return before the wait has completed.
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Functions | |
+template<class T > | |
hipChannelFormatDesc | hipCreateChannelDesc () |
+template<class T , int dim, enum hipTextureReadMode readMode> | |
hipError_t | hipBindTexture (size_t *offset, struct texture< T, dim, readMode > &tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX) |
+template<class T , int dim, enum hipTextureReadMode readMode> | |
hipError_t | hipBindTexture (size_t *offset, struct texture< T, dim, readMode > &tex, const void *devPtr, size_t size=UINT_MAX) |
+template<class T , int dim, enum hipTextureReadMode readMode> | |
hipError_t | hipUnbindTexture (struct texture< T, dim, readMode > *tex) |
The HIP texture support is intended to allow use of texture cache on hardware where this is beneficial.
+The following CUDA APIs are not currently supported:
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Functions | |
hipError_t | hipDriverGetVersion (int *driverVersion) |
Returns the approximate HIP driver version. More... | |
hipError_t hipDriverGetVersion | +( | +int * | +driverVersion | ) | ++ |
Returns the approximate HIP driver version.
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
Contains definitions of APIs for HIP runtime. +More...
+#include <cmath>
#include <string.h>
#include <stddef.h>
#include <hip/hip_runtime_api.h>
#include <grid_launch.h>
#include <hip/hcc_detail/host_defines.h>
Go to the source code of this file.
++Macros | |
+#define | CUDA_SUCCESS hipSuccess |
+#define | hipLaunchParm grid_launch_parm |
+#define | __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) |
+#define | clock_t long long int |
#define | hipThreadIdx_x (amp_get_local_id(2)) |
+#define | hipThreadIdx_y (amp_get_local_id(1)) |
+#define | hipThreadIdx_z (amp_get_local_id(0)) |
+#define | hipBlockIdx_x (hc_get_group_id(2)) |
+#define | hipBlockIdx_y (hc_get_group_id(1)) |
+#define | hipBlockIdx_z (hc_get_group_id(0)) |
+#define | hipBlockDim_x (amp_get_local_size(2)) |
+#define | hipBlockDim_y (amp_get_local_size(1)) |
+#define | hipBlockDim_z (amp_get_local_size(0)) |
+#define | hipGridDim_x (hc_get_num_groups(2)) |
+#define | hipGridDim_y (hc_get_num_groups(1)) |
+#define | hipGridDim_z (hc_get_num_groups(0)) |
+#define | __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) |
+#define | HIP_KERNEL_NAME(...) __VA_ARGS__ |
#define | HIP_DYNAMIC_SHARED(type, var) |
+Functions | |
+__device__ float | acosf (float x) |
+__device__ float | acoshf (float x) |
+__device__ float | asinf (float x) |
+__device__ float | asinhf (float x) |
+__device__ float | atan2f (float y, float x) |
+__device__ float | atanf (float x) |
+__device__ float | atanhf (float x) |
+__device__ float | cbrtf (float x) |
+__device__ float | ceilf (float x) |
+__device__ float | copysignf (float x, float y) |
+__device__ float | cosf (float x) |
+__device__ float | coshf (float x) |
+__device__ float | cyl_bessel_i0f (float x) |
+__device__ float | cyl_bessel_i1f (float x) |
+__device__ float | erfcf (float x) |
+__device__ float | erfcinvf (float y) |
+__device__ float | erfcxf (float x) |
+__device__ float | erff (float x) |
+__device__ float | erfinvf (float y) |
+__device__ float | exp10f (float x) |
+__device__ float | exp2f (float x) |
+__device__ float | expf (float x) |
+__device__ float | expm1f (float x) |
+__device__ float | fabsf (float x) |
+__device__ float | fdimf (float x, float y) |
+__device__ __host__ float | fdividef (float x, float y) |
+__device__ float | floorf (float x) |
+__device__ float | fmaf (float x, float y, float z) |
+__device__ float | fmaxf (float x, float y) |
+__device__ float | fminf (float x, float y) |
+__device__ float | fmodf (float x, float y) |
+__device__ float | frexpf (float x, float y) |
+__device__ float | hypotf (float x, float y) |
+__device__ float | ilogbf (float x) |
+__host__ __device__ unsigned | isfinite (float a) |
+__device__ unsigned | isinf (float a) |
+__device__ unsigned | isnan (float a) |
+__device__ float | j0f (float x) |
+__device__ float | j1f (float x) |
+__device__ float | jnf (int n, float x) |
+__device__ float | ldexpf (float x, int exp) |
+__device__ float | lgammaf (float x) |
+__device__ long long int | llrintf (float x) |
+__device__ long long int | llroundf (float x) |
+__device__ float | log10f (float x) |
+__device__ float | log1pf (float x) |
+__device__ float | log2f (float x) |
+__device__ float | logbf (float x) |
+__device__ float | logf (float x) |
+__device__ long int | lrintf (float x) |
+__device__ long int | lroundf (float x) |
+__device__ float | modff (float x, float *iptr) |
+__device__ float | nanf (const char *tagp) |
+__device__ float | nearbyintf (float x) |
+__device__ float | nextafterf (float x, float y) |
+__device__ float | norm3df (float a, float b, float c) |
+__device__ float | norm4df (float a, float b, float c, float d) |
+__device__ float | normcdff (float y) |
+__device__ float | normcdfinvf (float y) |
+__device__ float | normf (int dim, const float *a) |
+__device__ float | powf (float x, float y) |
+__device__ float | rcbrtf (float x) |
+__device__ float | remainderf (float x, float y) |
+__device__ float | remquof (float x, float y, int *quo) |
+__device__ float | rhypotf (float x, float y) |
+__device__ float | rintf (float x) |
+__device__ float | rnorm3df (float a, float b, float c) |
+__device__ float | rnorm4df (float a, float b, float c, float d) |
+__device__ float | rnormf (int dim, const float *a) |
+__device__ float | roundf (float x) |
+__device__ float | rsqrtf (float x) |
+__device__ float | scalblnf (float x, long int n) |
+__device__ float | scalbnf (float x, int n) |
+__host__ __device__ unsigned | signbit (float a) |
+__device__ void | sincosf (float x, float *sptr, float *cptr) |
+__device__ void | sincospif (float x, float *sptr, float *cptr) |
+__device__ float | sinf (float x) |
+__device__ float | sinhf (float x) |
+__device__ float | sinpif (float x) |
+__device__ float | sqrtf (float x) |
+__device__ float | tanf (float x) |
+__device__ float | tanhf (float x) |
+__device__ float | tgammaf (float x) |
+__device__ float | truncf (float x) |
+__device__ float | y0f (float x) |
+__device__ float | y1f (float x) |
+__device__ float | ynf (int n, float x) |
+__host__ __device__ float | cospif (float x) |
+__device__ double | acos (double x) |
+__device__ double | acosh (double x) |
+__device__ double | asin (double x) |
+__device__ double | asinh (double x) |
+__device__ double | atan (double x) |
+__device__ double | atan2 (double y, double x) |
+__device__ double | atanh (double x) |
+__device__ double | cbrt (double x) |
+__device__ double | ceil (double x) |
+__device__ double | copysign (double x, double y) |
+__device__ double | cos (double x) |
+__device__ double | cosh (double x) |
+__host__ __device__ double | cospi (double x) |
+__device__ double | cyl_bessel_i0 (double x) |
+__device__ double | cyl_bessel_i1 (double x) |
+__device__ double | erf (double x) |
+__device__ double | erfc (double x) |
+__device__ double | erfcinv (double y) |
+__device__ double | erfcx (double x) |
+__device__ double | erfinv (double x) |
+__device__ double | exp (double x) |
+__device__ double | exp10 (double x) |
+__device__ double | exp2 (double x) |
+__device__ double | expm1 (double x) |
+__device__ double | fabs (double x) |
+__device__ double | fdim (double x, double y) |
+__device__ double | fdivide (double x, double y) |
+__device__ double | floor (double x) |
+__device__ double | fma (double x, double y, double z) |
+__device__ double | fmax (double x, double y) |
+__device__ double | fmin (double x, double y) |
+__device__ double | fmod (double x, double y) |
+__device__ double | frexp (double x, int *nptr) |
+__device__ double | hypot (double x, double y) |
+__device__ double | ilogb (double x) |
+__host__ __device__ unsigned | isfinite (double x) |
+__device__ unsigned | isinf (double x) |
+__device__ unsigned | isnan (double x) |
+__device__ double | j0 (double x) |
+__device__ double | j1 (double x) |
+__device__ double | jn (int n, double x) |
+__device__ double | ldexp (double x, int exp) |
+__device__ double | lgamma (double x) |
+__device__ long long | llrint (double x) |
+__device__ long long | llround (double x) |
+__device__ double | log (double x) |
+__device__ double | log10 (double x) |
+__device__ double | log1p (double x) |
+__device__ double | log2 (double x) |
+__device__ double | logb (double x) |
+__device__ long int | lrint (double x) |
+__device__ long int | lround (double x) |
+__device__ double | modf (double x, double *iptr) |
+__device__ double | nan (const char *tagp) |
+__device__ double | nearbyint (double x) |
+__device__ double | nextafter (double x, double y) |
+__device__ double | norm (int dim, const double *t) |
+__device__ double | norm3d (double a, double b, double c) |
+__device__ double | norm4d (double a, double b, double c, double d) |
+__device__ double | normcdf (double y) |
+__device__ double | normcdfinv (double y) |
+__device__ double | pow (double x, double y) |
+__device__ double | rcbrt (double x) |
+__device__ double | remainder (double x, double y) |
+__device__ double | remquo (double x, double y, int *quo) |
+__device__ double | rhypot (double x, double y) |
+__device__ double | rint (double x) |
+__device__ double | rnorm (int dim, const double *t) |
+__device__ double | rnorm3d (double a, double b, double c) |
+__device__ double | rnorm4d (double a, double b, double c, double d) |
+__device__ double | round (double x) |
+__host__ __device__ double | rsqrt (double x) |
+__device__ double | scalbln (double x, long int n) |
+__device__ double | scalbn (double x, int n) |
+__host__ __device__ unsigned | signbit (double a) |
+__device__ double | sin (double a) |
+__device__ void | sincos (double x, double *sptr, double *cptr) |
+__device__ void | sincospi (double x, double *sptr, double *cptr) |
+__device__ double | sinh (double x) |
+__host__ __device__ double | sinpi (double x) |
+__device__ double | sqrt (double x) |
+__device__ double | tan (double x) |
+__device__ double | tanh (double x) |
+__device__ double | tgamma (double x) |
+__device__ double | trunc (double x) |
+__device__ double | y0 (double x) |
+__device__ double | y1 (double y) |
+__device__ double | yn (int n, double x) |
+__device__ long long int | clock64 () |
+__device__ clock_t | clock () |
+__device__ int | atomicAdd (int *address, int val) |
+__device__ unsigned int | atomicAdd (unsigned int *address, unsigned int val) |
+__device__ unsigned long long int | atomicAdd (unsigned long long int *address, unsigned long long int val) |
+__device__ float | atomicAdd (float *address, float val) |
+__device__ int | atomicSub (int *address, int val) |
+__device__ unsigned int | atomicSub (unsigned int *address, unsigned int val) |
+__device__ int | atomicExch (int *address, int val) |
+__device__ unsigned int | atomicExch (unsigned int *address, unsigned int val) |
+__device__ unsigned long long int | atomicExch (unsigned long long int *address, unsigned long long int val) |
+__device__ float | atomicExch (float *address, float val) |
+__device__ int | atomicMin (int *address, int val) |
+__device__ unsigned int | atomicMin (unsigned int *address, unsigned int val) |
+__device__ unsigned long long int | atomicMin (unsigned long long int *address, unsigned long long int val) |
+__device__ int | atomicMax (int *address, int val) |
+__device__ unsigned int | atomicMax (unsigned int *address, unsigned int val) |
+__device__ unsigned long long int | atomicMax (unsigned long long int *address, unsigned long long int val) |
+__device__ int | atomicCAS (int *address, int compare, int val) |
+__device__ unsigned int | atomicCAS (unsigned int *address, unsigned int compare, unsigned int val) |
+__device__ unsigned long long int | atomicCAS (unsigned long long int *address, unsigned long long int compare, unsigned long long int val) |
+__device__ int | atomicAnd (int *address, int val) |
+__device__ unsigned int | atomicAnd (unsigned int *address, unsigned int val) |
+__device__ unsigned long long int | atomicAnd (unsigned long long int *address, unsigned long long int val) |
+__device__ int | atomicOr (int *address, int val) |
+__device__ unsigned int | atomicOr (unsigned int *address, unsigned int val) |
+__device__ unsigned long long int | atomicOr (unsigned long long int *address, unsigned long long int val) |
+__device__ int | atomicXor (int *address, int val) |
+__device__ unsigned int | atomicXor (unsigned int *address, unsigned int val) |
+__device__ unsigned long long int | atomicXor (unsigned long long int *address, unsigned long long int val) |
+__device__ unsigned int | atomicInc (unsigned int *address, unsigned int val) |
+__device__ unsigned int | atomicDec (unsigned int *address, unsigned int val) |
+__device__ unsigned int | __popc (unsigned int input) |
+__device__ unsigned int | __popcll (unsigned long long int input) |
+__device__ unsigned int | __clz (unsigned int input) |
+__device__ unsigned int | __clzll (unsigned long long int input) |
+__device__ unsigned int | __clz (int input) |
+__device__ unsigned int | __clzll (long long int input) |
+__device__ unsigned int | __ffs (unsigned int input) |
+__device__ unsigned int | __ffsll (unsigned long long int input) |
+__device__ unsigned int | __ffs (int input) |
+__device__ unsigned int | __ffsll (long long int input) |
+__device__ unsigned int | __brev (unsigned int input) |
+__device__ unsigned long long int | __brevll (unsigned long long int input) |
+__device__ int | __all (int input) |
+__device__ int | __any (int input) |
+__device__ unsigned long long int | __ballot (int input) |
+__device__ int | __shfl (int input, int lane, int width) |
+__device__ int | __shfl_up (int input, unsigned int lane_delta, int width) |
+__device__ int | __shfl_down (int input, unsigned int lane_delta, int width) |
+__device__ int | __shfl_xor (int input, int lane_mask, int width) |
+__device__ float | __shfl (float input, int lane, int width) |
+__device__ float | __shfl_up (float input, unsigned int lane_delta, int width) |
+__device__ float | __shfl_down (float input, unsigned int lane_delta, int width) |
+__device__ float | __shfl_xor (float input, int lane_mask, int width) |
+__host__ __device__ int | min (int arg1, int arg2) |
+__host__ __device__ int | max (int arg1, int arg2) |
+__device__ | __attribute__ ((address_space(3))) void *__get_dynamicgroupbaseptr() |
+__device__ float | __cosf (float x) |
+__device__ float | __expf (float x) |
+__device__ float | __frsqrt_rn (float x) |
+__device__ float | __fsqrt_rd (float x) |
+__device__ float | __fsqrt_rn (float x) |
+__device__ float | __fsqrt_ru (float x) |
+__device__ float | __fsqrt_rz (float x) |
+__device__ float | __log10f (float x) |
+__device__ float | __log2f (float x) |
+__device__ float | __logf (float x) |
+__device__ float | __powf (float base, float exponent) |
+__device__ void | __sincosf (float x, float *s, float *c) |
+__device__ float | __sinf (float x) |
+__device__ float | __tanf (float x) |
+__device__ float | __dsqrt_rd (double x) |
+__device__ float | __dsqrt_rn (double x) |
+__device__ float | __dsqrt_ru (double x) |
+__device__ float | __dsqrt_rz (double x) |
+Variables | |
+int | HIP_TRACE_API |
+const int | warpSize |
Contains definitions of APIs for HIP runtime.
+#define HIP_DYNAMIC_SHARED | +( | ++ | type, | +
+ | + | + | var | +
+ | ) | ++ |
extern shared
+ +#define hipThreadIdx_x (amp_get_local_id(2)) | +
Kernel launching
+ +
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h. +More...
+#include <stdint.h>
#include <stddef.h>
#include <hip/hcc_detail/host_defines.h>
#include <hip/hip_runtime_api.h>
Go to the source code of this file.
++Classes | |
struct | hipEvent_t |
struct | dim3 |
+Macros | |
#define | hipStreamDefault 0x00 |
Flags that can be used with hipStreamCreateWithFlags. More... | |
+#define | hipStreamNonBlocking 0x01 |
Stream does not implicitly synchronize with null stream. | |
#define | hipEventDefault 0x0 |
Flags that can be used with hipEventCreateWithFlags: More... | |
+#define | hipEventBlockingSync 0x1 |
Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency. | |
+#define | hipEventDisableTiming 0x2 |
Disable event's capability to record timing information. May improve performance. | |
#define | hipEventInterprocess 0x4 |
Event can support IPC. More... | |
+#define | hipHostMallocDefault 0x0 |
Flags that can be used with hipHostMalloc. | |
+#define | hipHostMallocPortable 0x1 |
+#define | hipHostMallocMapped 0x2 |
+#define | hipHostMallocWriteCombined 0x4 |
#define | hipHostRegisterDefault 0x0 |
Flags that can be used with hipHostRegister. More... | |
+#define | hipHostRegisterPortable 0x1 |
Memory is considered registered by all contexts. HIP only supports one context so this is always assumed true. | |
+#define | hipHostRegisterMapped 0x2 |
Map the allocation into the address space for the current device. The device pointer can be obtained with hipHostGetDevicePointer. | |
+#define | hipHostRegisterIoMemory 0x4 |
Not supported. | |
+#define | hipDeviceScheduleAuto 0x0 |
+#define | hipDeviceScheduleSpin 0x1 |
+#define | hipDeviceScheduleYield 0x2 |
+#define | hipDeviceBlockingSync 0x4 |
+#define | hipDeviceMapHost 0x8 |
+#define | hipDeviceLmemResizeToMax 0x16 |
+Typedefs | |
+typedef struct ihipStream_t * | hipStream_t |
+typedef struct hipEvent_t | hipEvent_t |
typedef enum hipFuncCache | hipFuncCache |
typedef enum hipSharedMemConfig | hipSharedMemConfig |
typedef struct dim3 | dim3 |
typedef enum hipMemcpyKind | hipMemcpyKind |
+Enumerations | |
enum | hipFuncCache { hipFuncCachePreferNone, +hipFuncCachePreferShared, +hipFuncCachePreferL1, +hipFuncCachePreferEqual + } |
enum | hipSharedMemConfig { hipSharedMemBankSizeDefault, +hipSharedMemBankSizeFourByte, +hipSharedMemBankSizeEightByte + } |
enum | hipMemcpyKind { + hipMemcpyHostToHost = 0, +hipMemcpyHostToDevice = 1, +hipMemcpyDeviceToHost = 2, +hipMemcpyDeviceToDevice =3, + + hipMemcpyDefault = 4 + + } |
+Functions | |
hipError_t | hipDeviceSynchronize (void) |
Blocks until the default device has completed all preceding requested tasks. More... | |
hipError_t | hipDeviceReset (void) |
Destroy all resources and reset all state on the default device in the current process. More... | |
hipError_t | hipSetDevice (int device) |
Set default device to be used for subsequent hip API calls from this thread. More... | |
hipError_t | hipGetDevice (int *device) |
Return the default device id for the calling host thread. More... | |
hipError_t | hipGetDeviceCount (int *count) |
Return number of compute-capable devices. More... | |
hipError_t | hipDeviceGetAttribute (int *pi, hipDeviceAttribute_t attr, int device) |
Query device attribute. More... | |
hipError_t | hipGetDeviceProperties (hipDeviceProp_t *prop, int device) |
Returns device properties. More... | |
hipError_t | hipDeviceSetCacheConfig (hipFuncCache cacheConfig) |
Set L1/Shared cache partition. More... | |
hipError_t | hipDeviceGetCacheConfig (hipFuncCache *cacheConfig) |
Set Cache configuration for a specific function. More... | |
hipError_t | hipFuncSetCacheConfig (hipFuncCache config) |
Set Cache configuration for a specific function. More... | |
hipError_t | hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) |
Get Shared memory bank configuration. More... | |
hipError_t | hipDeviceSetSharedMemConfig (hipSharedMemConfig config) |
Set Shared memory bank configuration. More... | |
hipError_t | hipSetDeviceFlags (unsigned flags) |
Set Device flags. More... | |
hipError_t | hipGetLastError (void) |
Return last error returned by any HIP runtime API call and resets the stored error code to hipSuccess. More... | |
hipError_t | hipPeekAtLastError (void) |
Return last error returned by any HIP runtime API call. More... | |
const char * | hipGetErrorName (hipError_t hip_error) |
Return name of the specified error code in text form. More... | |
const char * | hipGetErrorString (hipError_t hip_error) |
Return handy text string message to explain the error which occurred. More... | |
hipError_t | hipStreamCreateWithFlags (hipStream_t *stream, unsigned int flags) |
Create an asynchronous stream. More... | |
hipError_t | hipStreamCreate (hipStream_t *stream) |
Create an asynchronous stream. More... | |
hipError_t | hipStreamWaitEvent (hipStream_t stream, hipEvent_t event, unsigned int flags) |
Make the specified compute stream wait for an event. More... | |
hipError_t | hipStreamSynchronize (hipStream_t stream) |
Wait for all commands in stream to complete. More... | |
hipError_t | hipStreamDestroy (hipStream_t stream) |
Destroys the specified stream. More... | |
hipError_t | hipStreamGetFlags (hipStream_t stream, unsigned int *flags) |
Return flags associated with this stream. More... | |
hipError_t | hipEventCreateWithFlags (hipEvent_t *event, unsigned flags) |
Create an event with the specified flags. More... | |
hipError_t | hipEventCreate (hipEvent_t *event) |
hipError_t | hipEventRecord (hipEvent_t event, hipStream_t stream) |
Record an event in the specified stream. More... | |
hipError_t | hipEventDestroy (hipEvent_t event) |
Destroy the specified event. More... | |
hipError_t | hipEventSynchronize (hipEvent_t event) |
: Wait for an event to complete. More... | |
hipError_t | hipEventElapsedTime (float *ms, hipEvent_t start, hipEvent_t stop) |
Return the elapsed time between two events. More... | |
hipError_t | hipEventQuery (hipEvent_t event) |
Query event status. More... | |
hipError_t | hipPointerGetAttributes (hipPointerAttribute_t *attributes, void *ptr) |
Return attributes for the specified pointer. More... | |
hipError_t | hipMalloc (void **ptr, size_t size) |
Allocate memory on the default accelerator. More... | |
hipError_t | hipMallocHost (void **ptr, size_t size) __attribute__((deprecated("use hipHostMalloc instead"))) |
Allocate pinned host memory. More... | |
hipError_t | hipHostMalloc (void **ptr, size_t size, unsigned int flags) |
Allocate device accessible page locked host memory. More... | |
+hipError_t | hipHostAlloc (void **ptr, size_t size, unsigned int flags) __attribute__((deprecated("use hipHostMalloc instead"))) |
hipError_t | hipHostGetDevicePointer (void **devPtr, void *hstPtr, unsigned int flags) |
Get Device pointer from Host Pointer allocated through hipHostAlloc. More... | |
hipError_t | hipHostGetFlags (unsigned int *flagsPtr, void *hostPtr) |
Get flags associated with host pointer. More... | |
hipError_t | hipHostRegister (void *hostPtr, size_t sizeBytes, unsigned int flags) |
Register host memory so it can be accessed from the current device. More... | |
hipError_t | hipHostUnregister (void *hostPtr) |
Un-register host pointer. More... | |
hipError_t | hipFree (void *ptr) |
Free memory allocated by the hcc hip memory allocation API. This API performs an implicit hipDeviceSynchronize() call. If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. More... | |
hipError_t | hipFreeHost (void *ptr) __attribute__((deprecated("use hipHostFree instead"))) |
Free memory allocated by the hcc hip host memory allocation API. [Deprecated.]. More... | |
hipError_t | hipHostFree (void *ptr) |
Free memory allocated by the hcc hip host memory allocation API This API performs an implicit hipDeviceSynchronize() call. If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. More... | |
hipError_t | hipMemcpy (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind) |
Copy data from src to dst. More... | |
hipError_t | hipMemcpyToSymbol (const char *symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind) |
Copies sizeBytes bytes from the memory area pointed to by src to the memory area pointed to by offset bytes from the start of symbol symbol . More... | |
hipError_t | hipMemcpyAsync (void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) |
Copy data from src to dst asynchronously. More... | |
hipError_t | hipMemset (void *dst, int value, size_t sizeBytes) |
Copy data from src to dst asynchronously. More... | |
hipError_t | hipMemsetAsync (void *dst, int value, size_t sizeBytes, hipStream_t stream) |
Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value. More... | |
hipError_t | hipMemGetInfo (size_t *free, size_t *total) |
Query memory info. Return snapshot of free memory, and total allocatable memory on the device. More... | |
hipError_t | hipDeviceCanAccessPeer (int *canAccessPeer, int deviceId, int peerDeviceId) |
Determine if a device can access a peer's memory. More... | |
hipError_t | hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) |
Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. More... | |
hipError_t | hipDeviceDisablePeerAccess (int peerDeviceId) |
Disable direct access from current device's virtual address space to memory allocations physically located on a peer device. More... | |
hipError_t | hipDriverGetVersion (int *driverVersion) |
Returns the approximate HIP driver version. More... | |
Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h.
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
Defines the different newt vector types for HIP runtime. +More...
+#include "hip/hcc_detail/host_defines.h"
Go to the source code of this file.
++Macros | |
+#define | __HIP_DEVICE__ __device__ __host__ |
+Typedefs | |
+typedef hc::short_vector::char1 | char1 |
+typedef hc::short_vector::char2 | char2 |
+typedef hc::short_vector::char3 | char3 |
+typedef hc::short_vector::char4 | char4 |
+typedef hc::short_vector::short1 | short1 |
+typedef hc::short_vector::short2 | short2 |
+typedef hc::short_vector::short3 | short3 |
+typedef hc::short_vector::short4 | short4 |
+typedef hc::short_vector::int1 | int1 |
+typedef hc::short_vector::int2 | int2 |
+typedef hc::short_vector::int3 | int3 |
+typedef hc::short_vector::int4 | int4 |
+typedef hc::short_vector::long1 | long1 |
+typedef hc::short_vector::long2 | long2 |
+typedef hc::short_vector::long3 | long3 |
+typedef hc::short_vector::long4 | long4 |
+typedef hc::short_vector::longlong1 | longlong1 |
+typedef hc::short_vector::longlong2 | longlong2 |
+typedef hc::short_vector::longlong3 | longlong3 |
+typedef hc::short_vector::longlong4 | longlong4 |
+typedef hc::short_vector::uchar1 | uchar1 |
+typedef hc::short_vector::uchar2 | uchar2 |
+typedef hc::short_vector::uchar3 | uchar3 |
+typedef hc::short_vector::uchar4 | uchar4 |
+typedef hc::short_vector::ushort1 | ushort1 |
+typedef hc::short_vector::ushort2 | ushort2 |
+typedef hc::short_vector::ushort3 | ushort3 |
+typedef hc::short_vector::ushort4 | ushort4 |
+typedef hc::short_vector::uint1 | uint1 |
+typedef hc::short_vector::uint2 | uint2 |
+typedef hc::short_vector::uint3 | uint3 |
+typedef hc::short_vector::uint4 | uint4 |
+typedef hc::short_vector::ulong1 | ulong1 |
+typedef hc::short_vector::ulong2 | ulong2 |
+typedef hc::short_vector::ulong3 | ulong3 |
+typedef hc::short_vector::ulong4 | ulong4 |
+typedef +hc::short_vector::ulonglong1 | ulonglong1 |
+typedef +hc::short_vector::ulonglong2 | ulonglong2 |
+typedef +hc::short_vector::ulonglong3 | ulonglong3 |
+typedef +hc::short_vector::ulonglong4 | ulonglong4 |
+typedef hc::short_vector::float1 | float1 |
+typedef hc::short_vector::float2 | float2 |
+typedef hc::short_vector::float3 | float3 |
+typedef hc::short_vector::float4 | float4 |
+typedef hc::short_vector::double1 | double1 |
+typedef hc::short_vector::double2 | double2 |
+typedef hc::short_vector::double3 | double3 |
+typedef hc::short_vector::double4 | double4 |
+Functions | |
+__HIP_DEVICE__ char1 | make_char1 (signed char) |
+__HIP_DEVICE__ char2 | make_char2 (signed char, signed char) |
+__HIP_DEVICE__ char3 | make_char3 (signed char, signed char, signed char) |
+__HIP_DEVICE__ char4 | make_char4 (signed char, signed char, signed char, signed char) |
+__HIP_DEVICE__ short1 | make_short1 (short) |
+__HIP_DEVICE__ short2 | make_short2 (short, short) |
+__HIP_DEVICE__ short3 | make_short3 (short, short, short) |
+__HIP_DEVICE__ short4 | make_short4 (short, short, short, short) |
+__HIP_DEVICE__ int1 | make_int1 (int) |
+__HIP_DEVICE__ int2 | make_int2 (int, int) |
+__HIP_DEVICE__ int3 | make_int3 (int, int, int) |
+__HIP_DEVICE__ int4 | make_int4 (int, int, int, int) |
+__HIP_DEVICE__ long1 | make_long1 (long) |
+__HIP_DEVICE__ long2 | make_long2 (long, long) |
+__HIP_DEVICE__ long3 | make_long3 (long, long, long) |
+__HIP_DEVICE__ long4 | make_long4 (long, long, long, long) |
+__HIP_DEVICE__ longlong1 | make_longlong1 (long long) |
+__HIP_DEVICE__ longlong2 | make_longlong2 (long long, long long) |
+__HIP_DEVICE__ longlong3 | make_longlong3 (long long, long long, long long) |
+__HIP_DEVICE__ longlong4 | make_longlong4 (long long, long long, long long, long long) |
+__HIP_DEVICE__ uchar1 | make_uchar1 (unsigned char) |
+__HIP_DEVICE__ uchar2 | make_uchar2 (unsigned char, unsigned char) |
+__HIP_DEVICE__ uchar3 | make_uchar3 (unsigned char, unsigned char, unsigned char) |
+__HIP_DEVICE__ uchar4 | make_uchar4 (unsigned char, unsigned char, unsigned char, unsigned char) |
+__HIP_DEVICE__ ushort1 | make_ushort1 (unsigned short) |
+__HIP_DEVICE__ ushort2 | make_ushort2 (unsigned short, unsigned short) |
+__HIP_DEVICE__ ushort3 | make_ushort3 (unsigned short, unsigned short, unsigned short) |
+__HIP_DEVICE__ ushort4 | make_ushort4 (unsigned short, unsigned short, unsigned short, unsigned short) |
+__HIP_DEVICE__ uint1 | make_uint1 (unsigned int) |
+__HIP_DEVICE__ uint2 | make_uint2 (unsigned int, unsigned int) |
+__HIP_DEVICE__ uint3 | make_uint3 (unsigned int, unsigned int, unsigned int) |
+__HIP_DEVICE__ uint4 | make_uint4 (unsigned int, unsigned int, unsigned int, unsigned int) |
+__HIP_DEVICE__ ulong1 | make_ulong1 (unsigned long) |
+__HIP_DEVICE__ ulong2 | make_ulong2 (unsigned long, unsigned long) |
+__HIP_DEVICE__ ulong3 | make_ulong3 (unsigned long, unsigned long, unsigned long) |
+__HIP_DEVICE__ ulong4 | make_ulong4 (unsigned long, unsigned long, unsigned long, unsigned long) |
+__HIP_DEVICE__ ulonglong1 | make_ulonglong1 (unsigned long long) |
+__HIP_DEVICE__ ulonglong2 | make_ulonglong2 (unsigned long long, unsigned long long) |
+__HIP_DEVICE__ ulonglong3 | make_ulonglong3 (unsigned long long, unsigned long long, unsigned long long) |
+__HIP_DEVICE__ ulonglong4 | make_ulonglong4 (unsigned long long, unsigned long long, unsigned long long, unsigned long long) |
+__HIP_DEVICE__ float1 | make_float1 (float) |
+__HIP_DEVICE__ float2 | make_float2 (float, float) |
+__HIP_DEVICE__ float3 | make_float3 (float, float, float) |
+__HIP_DEVICE__ float4 | make_float4 (float, float, float, float) |
+__HIP_DEVICE__ double1 | make_double1 (double) |
+__HIP_DEVICE__ double2 | make_double2 (double, double) |
+__HIP_DEVICE__ double3 | make_double3 (double, double, double) |
+__HIP_DEVICE__ double4 | make_double4 (double, double, double, double) |
Defines the different newt vector types for HIP runtime.
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
dim3 | |
exception | |
ihipException | |
FakeMutex | |
hipChannelFormatDesc | |
hipDeviceArch_t | |
hipDeviceProp_t | |
hipEvent_t | |
hipPointerAttribute_t | |
ihipDevice_t | |
ihipEvent_t | |
ihipSignal_t | |
ihipStream_t | |
LockedAccessor< T > | |
LockedBase< MUTEX_TYPE > | |
ihipDeviceCriticalBase_t< MUTEX_TYPE > | |
ihipStreamCriticalBase_t< MUTEX_TYPE > | |
LockedBase< DeviceMutex > | |
ihipDeviceCriticalBase_t< DeviceMutex > | |
LockedBase< StreamMutex > | |
ihipStreamCriticalBase_t< StreamMutex > | |
StagingBuffer | |
textureReference |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
#include <assert.h>
#include <stdint.h>
#include <iostream>
#include <sstream>
#include <list>
#include <sys/types.h>
#include <unistd.h>
#include <deque>
#include <vector>
#include <algorithm>
#include <hc.hpp>
#include <hc_am.hpp>
#include "hip_runtime.h"
#include "hcc_detail/hip_hcc.h"
#include "hsa_ext_amd.h"
#include "hsakmt.h"
#include "hcc_detail/trace_helper.h"
+Macros | |
+#define | DeviceErrorCheck(x) if (x != HSA_STATUS_SUCCESS) { return hipErrorInvalidDevice; } |
+#define | ErrorCheck(x) error_check(x, __LINE__, __FILE__) |
#define | READ_ENV_I(_build, _ENV_VAR, _ENV_VAR2, _description) |
+Functions | |
+const char * | ihipErrorString (hipError_t hip_error) |
+bool | ihipIsValidDevice (unsigned deviceIndex) |
+ihipDevice_t * | getDevice (unsigned deviceIndex) |
+void | error_check (hsa_status_t hsa_error_code, int line_num, std::string str) |
+hsa_status_t | get_region_info (hsa_region_t region, void *data) |
+void | ihipReadEnv_I (int *var_ptr, const char *var_name1, const char *var_name2, const char *description) |
+void | ihipInit () |
+ihipDevice_t * | ihipGetTlsDefaultDevice () |
+ihipDevice_t * | ihipGetDevice (int deviceId) |
+hipStream_t | ihipSyncAndResolveStream (hipStream_t stream) |
+hipStream_t | ihipPreLaunchKernel (hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp) |
+hipStream_t | ihipPreLaunchKernel (hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp) |
+hipStream_t | ihipPreLaunchKernel (hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp) |
+hipStream_t | ihipPreLaunchKernel (hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp) |
+void | ihipPostLaunchKernel (hipStream_t stream, grid_launch_parm &lp) |
+void | ihipSetTs (hipEvent_t e) |
hipError_t | hipHccGetAccelerator (int deviceId, hc::accelerator *acc) |
hipError_t | hipHccGetAcceleratorView (hipStream_t stream, hc::accelerator_view **av) |
+Variables | |
+const int | release = 1 |
+int | HIP_LAUNCH_BLOCKING = 0 |
+int | HIP_PRINT_ENV = 0 |
+int | HIP_TRACE_API = 0 |
+int | HIP_ATP_MARKER = 0 |
+int | HIP_DB = 0 |
+int | HIP_STAGING_SIZE = 64 |
+int | HIP_STAGING_BUFFERS = 2 |
+int | HIP_PININPLACE = 0 |
+int | HIP_STREAM_SIGNALS = 2 |
+int | HIP_VISIBLE_DEVICES = 0 |
+int | HIP_DISABLE_HW_KERNEL_DEP = 0 |
+int | HIP_DISABLE_HW_COPY_DEP = 0 |
+thread_local int | tls_defaultDevice = 0 |
+thread_local hipError_t | tls_lastHipError = hipSuccess |
+std::once_flag | hip_initialized |
+ihipDevice_t * | g_devices |
+bool | g_visible_device = false |
+unsigned | g_deviceCnt |
+std::vector< int > | g_hip_visible_devices |
+hsa_agent_t | g_cpu_agent |
Contains definitions for functions that are large enough that we don't want to inline them everywhere. This file is compiled and linked into apps running HIP / HCC path.
+#define READ_ENV_I | +( | ++ | _build, | +
+ | + | + | _ENV_VAR, | +
+ | + | + | _ENV_VAR2, | +
+ | + | + | _description | +
+ | ) | ++ |
hipError_t hipHccGetAccelerator | +( | +int | +deviceId, | +
+ | + | hc::accelerator * | +acc | +
+ | ) | ++ |
hipError_t hipHccGetAcceleratorView | +( | +hipStream_t | +stream, | +
+ | + | hc::accelerator_view ** | +av | +
+ | ) | ++ |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
HIP C++ Texture API for hcc compiler. +More...
+#include <limits.h>
#include <hip/hip_runtime.h>
Go to the source code of this file.
++Classes | |
struct | hipChannelFormatDesc |
struct | textureReference |
+Macros | |
+#define | hipTextureType1D 1 |
+#define | tex1Dfetch(_tex, _addr) (_tex._dataPtr[_addr]) |
+Typedefs | |
+typedef struct hipChannelFormatDesc | hipChannelFormatDesc |
+typedef enum hipTextureReadMode | hipTextureReadMode |
+typedef enum hipTextureFilterMode | hipTextureFilterMode |
+Enumerations | |
enum | hipTextureReadMode { hipReadModeElementType + } |
enum | hipTextureFilterMode { hipFilterModePoint + } |
+Functions | |
+template<class T > | |
hipChannelFormatDesc | hipCreateChannelDesc () |
+template<class T , int dim, enum hipTextureReadMode readMode> | |
hipError_t | hipBindTexture (size_t *offset, struct texture< T, dim, readMode > &tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX) |
+template<class T , int dim, enum hipTextureReadMode readMode> | |
hipError_t | hipBindTexture (size_t *offset, struct texture< T, dim, readMode > &tex, const void *devPtr, size_t size=UINT_MAX) |
+template<class T , int dim, enum hipTextureReadMode readMode> | |
hipError_t | hipUnbindTexture (struct texture< T, dim, readMode > *tex) |
HIP C++ Texture API for hcc compiler.
+enum hipTextureFilterMode | +
enum hipTextureReadMode | +
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
TODO-doc. +More...
+ +Go to the source code of this file.
++Macros | |
#define | __host__ __attribute__((cpu)) |
+#define | __device__ __attribute__((hc)) |
+#define | __global__ __attribute__((hc_grid_launch)) |
+#define | __noinline__ __attribute__((noinline)) |
+#define | __forceinline__ __attribute__((always_inline)) |
+#define | __shared__ tile_static |
+#define | __constant__ __attribute__((address_space(2))) |
TODO-doc.
+#define __host__ __attribute__((cpu)) | +
Function and kernel markers
+ +
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
The HIP interface makes it very easy to port existing CUDA apps to run on AMD GPUs, or to develop new apps that can run on either CUDA or AMD GPUs from a common source base.
+t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
Synchonization | |
Bug List |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for LockedBase< MUTEX_TYPE >, including all inherited members.
+_mutex (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | |
lock() (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | inline |
unlock() (defined in LockedBase< MUTEX_TYPE >) | LockedBase< MUTEX_TYPE > | inline |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+void | lock () |
+void | unlock () |
+Public Attributes | |
+MUTEX_TYPE | _mutex |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for StagingBuffer, including all inherited members.
+_max_buffers (defined in StagingBuffer) | StagingBuffer | static |
CopyDeviceToHost(void *dst, const void *src, size_t sizeBytes, hsa_signal_t *waitFor) (defined in StagingBuffer) | StagingBuffer | |
CopyDeviceToHostPinInPlace(void *dst, const void *src, size_t sizeBytes, hsa_signal_t *waitFor) (defined in StagingBuffer) | StagingBuffer | |
CopyHostToDevice(void *dst, const void *src, size_t sizeBytes, hsa_signal_t *waitFor) (defined in StagingBuffer) | StagingBuffer | |
CopyHostToDevicePinInPlace(void *dst, const void *src, size_t sizeBytes, hsa_signal_t *waitFor) (defined in StagingBuffer) | StagingBuffer | |
CopyPeerToPeer(void *dst, hsa_agent_t dstAgent, const void *src, hsa_agent_t srcAgent, size_t sizeBytes, hsa_signal_t *waitFor) (defined in StagingBuffer) | StagingBuffer | |
StagingBuffer(hsa_agent_t hsaAgent, hsa_region_t systemRegion, size_t bufferSize, int numBuffers) (defined in StagingBuffer) | StagingBuffer | |
~StagingBuffer() (defined in StagingBuffer) | StagingBuffer |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+ | StagingBuffer (hsa_agent_t hsaAgent, hsa_region_t systemRegion, size_t bufferSize, int numBuffers) |
+void | CopyHostToDevice (void *dst, const void *src, size_t sizeBytes, hsa_signal_t *waitFor) |
+void | CopyHostToDevicePinInPlace (void *dst, const void *src, size_t sizeBytes, hsa_signal_t *waitFor) |
+void | CopyDeviceToHost (void *dst, const void *src, size_t sizeBytes, hsa_signal_t *waitFor) |
+void | CopyDeviceToHostPinInPlace (void *dst, const void *src, size_t sizeBytes, hsa_signal_t *waitFor) |
+void | CopyPeerToPeer (void *dst, hsa_agent_t dstAgent, const void *src, hsa_agent_t srcAgent, size_t sizeBytes, hsa_signal_t *waitFor) |
+Static Public Attributes | |
+static const int | _max_buffers = 4 |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for dim3, including all inherited members.
+x | dim3 | |
y | dim3 | |
z | dim3 |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
#include <hip_runtime_api.h>
+Public Attributes | |
+uint32_t | x |
x | |
+uint32_t | y |
y | |
+uint32_t | z |
z | |
Struct for data in 3D
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for hipChannelFormatDesc, including all inherited members.
+_dummy (defined in hipChannelFormatDesc) | hipChannelFormatDesc |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Attributes | |
+int | _dummy |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for hipDeviceArch_t, including all inherited members.
+has3dGrid | hipDeviceArch_t | |
hasDoubles | hipDeviceArch_t | |
hasDynamicParallelism | hipDeviceArch_t | |
hasFloatAtomicAdd | hipDeviceArch_t | |
hasFunnelShift | hipDeviceArch_t | |
hasGlobalFloatAtomicExch | hipDeviceArch_t | |
hasGlobalInt32Atomics | hipDeviceArch_t | |
hasGlobalInt64Atomics | hipDeviceArch_t | |
hasSharedFloatAtomicExch | hipDeviceArch_t | |
hasSharedInt32Atomics | hipDeviceArch_t | |
hasSharedInt64Atomics | hipDeviceArch_t | |
hasSurfaceFuncs | hipDeviceArch_t | |
hasSyncThreadsExt | hipDeviceArch_t | |
hasThreadFenceSystem | hipDeviceArch_t | |
hasWarpBallot | hipDeviceArch_t | |
hasWarpShuffle | hipDeviceArch_t | |
hasWarpVote | hipDeviceArch_t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Attributes | |
+unsigned | hasGlobalInt32Atomics: 1 |
32-bit integer atomics for global memory. | |
+unsigned | hasGlobalFloatAtomicExch: 1 |
32-bit float atomic exch for global memory. | |
+unsigned | hasSharedInt32Atomics: 1 |
32-bit integer atomics for shared memory. | |
+unsigned | hasSharedFloatAtomicExch: 1 |
32-bit float atomic exch for shared memory. | |
+unsigned | hasFloatAtomicAdd: 1 |
32-bit float atomic add in global and shared memory. | |
+unsigned | hasGlobalInt64Atomics: 1 |
64-bit integer atomics for global memory. | |
+unsigned | hasSharedInt64Atomics: 1 |
64-bit integer atomics for shared memory. | |
+unsigned | hasDoubles: 1 |
Double-precision floating point. | |
+unsigned | hasWarpVote: 1 |
Warp vote instructions (__any, __all). | |
+unsigned | hasWarpBallot: 1 |
Warp ballot instructions (__ballot). | |
+unsigned | hasWarpShuffle: 1 |
Warp shuffle operations. (__shfl_*). | |
+unsigned | hasFunnelShift: 1 |
Funnel two words into one with shift&mask caps. | |
+unsigned | hasThreadFenceSystem: 1 |
__threadfence_system. | |
+unsigned | hasSyncThreadsExt: 1 |
__syncthreads_count, syncthreads_and, syncthreads_or. | |
+unsigned | hasSurfaceFuncs: 1 |
Surface functions. | |
+unsigned | has3dGrid: 1 |
Grid and group dims are 3D (rather than 2D). | |
+unsigned | hasDynamicParallelism: 1 |
Dynamic parallelism. | |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for hipDeviceProp_t, including all inherited members.
+arch | hipDeviceProp_t | |
canMapHostMemory | hipDeviceProp_t | |
clockInstructionRate | hipDeviceProp_t | |
clockRate | hipDeviceProp_t | |
computeMode | hipDeviceProp_t | |
concurrentKernels | hipDeviceProp_t | |
isMultiGpuBoard | hipDeviceProp_t | |
l2CacheSize | hipDeviceProp_t | |
major | hipDeviceProp_t | |
maxGridSize | hipDeviceProp_t | |
maxSharedMemoryPerMultiProcessor | hipDeviceProp_t | |
maxThreadsDim | hipDeviceProp_t | |
maxThreadsPerBlock | hipDeviceProp_t | |
maxThreadsPerMultiProcessor | hipDeviceProp_t | |
memoryBusWidth | hipDeviceProp_t | |
memoryClockRate | hipDeviceProp_t | |
minor | hipDeviceProp_t | |
multiProcessorCount | hipDeviceProp_t | |
name | hipDeviceProp_t | |
pciBusID | hipDeviceProp_t | |
pciDeviceID | hipDeviceProp_t | |
regsPerBlock | hipDeviceProp_t | |
sharedMemPerBlock | hipDeviceProp_t | |
totalConstMem | hipDeviceProp_t | |
totalGlobalMem | hipDeviceProp_t | |
warpSize | hipDeviceProp_t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
#include <hip_runtime_api.h>
+Public Attributes | |
+char | name [256] |
Device name. | |
+size_t | totalGlobalMem |
Size of global memory region (in bytes). | |
+size_t | sharedMemPerBlock |
Size of shared memory region (in bytes). | |
+int | regsPerBlock |
Registers per block. | |
+int | warpSize |
Warp size. | |
+int | maxThreadsPerBlock |
Max work items per work group or workgroup max size. | |
+int | maxThreadsDim [3] |
Max number of threads in each dimension (XYZ) of a block. | |
+int | maxGridSize [3] |
Max grid dimensions (XYZ). | |
+int | clockRate |
Max clock frequency of the multiProcessors in khz. | |
+int | memoryClockRate |
Max global memory clock frequency in khz. | |
+int | memoryBusWidth |
Global memory bus width in bits. | |
+size_t | totalConstMem |
Size of shared memory region (in bytes). | |
+int | major |
Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. | |
+int | minor |
Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. | |
+int | multiProcessorCount |
Number of multi-processors (compute units). | |
+int | l2CacheSize |
L2 cache size. | |
+int | maxThreadsPerMultiProcessor |
Maximum resident threads per multi-processor. | |
+int | computeMode |
Compute mode. | |
+int | clockInstructionRate |
Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP. | |
+hipDeviceArch_t | arch |
Architectural feature flags. New for HIP. | |
+int | concurrentKernels |
Device can possibly execute multiple kernels concurrently. | |
+int | pciBusID |
PCI Bus ID. | |
+int | pciDeviceID |
PCI Device ID. | |
+size_t | maxSharedMemoryPerMultiProcessor |
Maximum Shared Memory Per Multiprocessor. | |
+int | isMultiGpuBoard |
1 if device is on a multi-GPU board, 0 if not. | |
+int | canMapHostMemory |
Check whether HIP can map host memory. | |
hipDeviceProp
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for hipEvent_t, including all inherited members.
+_handle (defined in hipEvent_t) | hipEvent_t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Attributes | |
+struct ihipEvent_t * | _handle |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for hipPointerAttribute_t, including all inherited members.
+allocationFlags (defined in hipPointerAttribute_t) | hipPointerAttribute_t | |
device (defined in hipPointerAttribute_t) | hipPointerAttribute_t | |
devicePointer (defined in hipPointerAttribute_t) | hipPointerAttribute_t | |
hostPointer (defined in hipPointerAttribute_t) | hipPointerAttribute_t | |
isManaged (defined in hipPointerAttribute_t) | hipPointerAttribute_t | |
memoryType (defined in hipPointerAttribute_t) | hipPointerAttribute_t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
#include <hip_runtime_api.h>
+Public Attributes | |
+enum hipMemoryType | memoryType |
+int | device |
+void * | devicePointer |
+void * | hostPointer |
+int | isManaged |
+unsigned | allocationFlags |
Pointer attributes
+
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for ihipEvent_t, including all inherited members.
+_copy_seq_id (defined in ihipEvent_t) | ihipEvent_t | |
_flags (defined in ihipEvent_t) | ihipEvent_t | |
_marker (defined in ihipEvent_t) | ihipEvent_t | |
_state (defined in ihipEvent_t) | ihipEvent_t | |
_stream (defined in ihipEvent_t) | ihipEvent_t | |
_timestamp (defined in ihipEvent_t) | ihipEvent_t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Attributes | |
+hipEventStatus_t | _state |
+hipStream_t | _stream |
+unsigned | _flags |
+hc::completion_future | _marker |
+uint64_t | _timestamp |
+SIGSEQNUM | _copy_seq_id |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for ihipSignal_t, including all inherited members.
+_hsa_signal (defined in ihipSignal_t) | ihipSignal_t | |
_index (defined in ihipSignal_t) | ihipSignal_t | |
_sig_id (defined in ihipSignal_t) | ihipSignal_t | |
ihipSignal_t() (defined in ihipSignal_t) | ihipSignal_t | |
release() (defined in ihipSignal_t) | ihipSignal_t | |
~ihipSignal_t() (defined in ihipSignal_t) | ihipSignal_t |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Member Functions | |
+void | release () |
+Public Attributes | |
+hsa_signal_t | _hsa_signal |
+int | _index |
+SIGSEQNUM | _sig_id |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
This is the complete list of members for textureReference, including all inherited members.
+channelDesc (defined in textureReference) | textureReference | |
filterMode (defined in textureReference) | textureReference | |
normalized (defined in textureReference) | textureReference |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+
+Public Attributes | |
+hipTextureFilterMode | filterMode |
+bool | normalized |
+hipChannelFormatDesc | channelDesc |
+ HIP: Heterogenous-computing Interface for Portability
+
+ |
+