From a3e04104ac5344a4ad7debb07eaa26d480a4b862 Mon Sep 17 00:00:00 2001
From: jiabaxie <jiabaxie@amd.com>
Date: Mon, 13 Nov 2023 20:52:07 -0500
Subject: [PATCH] SWDEV-427855, SWDEV-306642 - hip change for profiler and add
 texture/surface/device capabilities device struct entries

Change-Id: Id992356b05931a1112fa7de7148efbb7b8b1bb15
---
 include/hip/hip_deprecated.h  |  94 +++++++++++++
 include/hip/hip_runtime_api.h | 239 ++++++++++++++++++++++------------
 2 files changed, 251 insertions(+), 82 deletions(-)
 create mode 100644 include/hip/hip_deprecated.h

diff --git a/include/hip/hip_deprecated.h b/include/hip/hip_deprecated.h
new file mode 100644
index 0000000000..f6d5b0eb68
--- /dev/null
+++ b/include/hip/hip_deprecated.h
@@ -0,0 +1,94 @@
+#pragma once
+
+// This file will add older hip functions used in the versioning system
+// Find the deprecated functions and structs in hip_device.cpp
+
+// This struct is also kept in hip_device.cpp
+typedef struct hipDeviceProp_tR0000 {
+  char name[256];            ///< Device name.
+  size_t totalGlobalMem;     ///< Size of global memory region (in bytes).
+  size_t sharedMemPerBlock;  ///< Size of shared memory region (in bytes).
+  int regsPerBlock;          ///< Registers per block.
+  int warpSize;              ///< Warp size.
+  int maxThreadsPerBlock;    ///< Max work items per work group or workgroup max size.
+  int maxThreadsDim[3];      ///< Max number of threads in each dimension (XYZ) of a block.
+  int maxGridSize[3];        ///< Max grid dimensions (XYZ).
+  int clockRate;             ///< Max clock frequency of the multiProcessors in khz.
+  int memoryClockRate;       ///< Max global memory clock frequency in khz.
+  int memoryBusWidth;        ///< Global memory bus width in bits.
+  size_t totalConstMem;      ///< Size of shared memory region (in bytes).
+  int major;  ///< Major compute capability.  On HCC, this is an approximation and features may
+              ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
+              ///< feature caps.
+  int minor;  ///< Minor compute capability.  On HCC, this is an approximation and features may
+              ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
+              ///< feature caps.
+  int multiProcessorCount;          ///< Number of multi-processors (compute units).
+  int l2CacheSize;                  ///< L2 cache size.
+  int maxThreadsPerMultiProcessor;  ///< Maximum resident threads per multi-processor.
+  int computeMode;                  ///< Compute mode.
+  int clockInstructionRate;  ///< Frequency in khz of the timer used by the device-side "clock*"
+                             ///< instructions.  New for HIP.
+  hipDeviceArch_t arch;      ///< Architectural feature flags.  New for HIP.
+  int concurrentKernels;     ///< Device can possibly execute multiple kernels concurrently.
+  int pciDomainID;           ///< PCI Domain ID
+  int pciBusID;              ///< PCI Bus ID.
+  int pciDeviceID;           ///< PCI Device ID.
+  size_t maxSharedMemoryPerMultiProcessor;  ///< Maximum Shared Memory Per Multiprocessor.
+  int isMultiGpuBoard;                      ///< 1 if device is on a multi-GPU board, 0 if not.
+  int canMapHostMemory;                     ///< Check whether HIP can map host memory
+  int gcnArch;                              ///< DEPRECATED: use gcnArchName instead
+  char gcnArchName[256];                    ///< AMD GCN Arch Name.
+  int integrated;                           ///< APU vs dGPU
+  int cooperativeLaunch;                    ///< HIP device supports cooperative launch
+  int cooperativeMultiDeviceLaunch;         ///< HIP device supports cooperative launch on multiple
+                                            ///< devices
+  int maxTexture1DLinear;                   ///< Maximum size for 1D textures bound to linear memory
+  int maxTexture1D;                         ///< Maximum number of elements in 1D images
+  int maxTexture2D[2];  ///< Maximum dimensions (width, height) of 2D images, in image elements
+  int maxTexture3D[3];  ///< Maximum dimensions (width, height, depth) of 3D images, in image
+                        ///< elements
+  unsigned int* hdpMemFlushCntl;  ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
+  unsigned int* hdpRegFlushCntl;  ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
+  size_t memPitch;                ///< Maximum pitch in bytes allowed by memory copies
+  size_t textureAlignment;        ///< Alignment requirement for textures
+  size_t texturePitchAlignment;   ///< Pitch alignment requirement for texture references bound to
+                                  ///< pitched memory
+  int kernelExecTimeoutEnabled;   ///< Run time limit for kernels executed on the device
+  int ECCEnabled;                 ///< Device has ECC support enabled
+  int tccDriver;                  ///< 1:If device is Tesla device using TCC driver, else 0
+  int cooperativeMultiDeviceUnmatchedFunc;       ///< HIP device supports cooperative launch on
+                                                 ///< multiple
+                                                 /// devices with unmatched functions
+  int cooperativeMultiDeviceUnmatchedGridDim;    ///< HIP device supports cooperative launch on
+                                                 ///< multiple
+                                                 /// devices with unmatched grid dimensions
+  int cooperativeMultiDeviceUnmatchedBlockDim;   ///< HIP device supports cooperative launch on
+                                                 ///< multiple
+                                                 /// devices with unmatched block dimensions
+  int cooperativeMultiDeviceUnmatchedSharedMem;  ///< HIP device supports cooperative launch on
+                                                 ///< multiple
+                                                 /// devices with unmatched shared memories
+  int isLargeBar;                                ///< 1: if it is a large PCI bar device, else 0
+  int asicRevision;                              ///< Revision of the GPU in this device
+  int managedMemory;                   ///< Device supports allocating managed memory on this system
+  int directManagedMemAccessFromHost;  ///< Host can directly access managed memory on the device
+                                       ///< without migration
+  int concurrentManagedAccess;  ///< Device can coherently access managed memory concurrently with
+                                ///< the CPU
+  int pageableMemoryAccess;     ///< Device supports coherently accessing pageable memory
+                                ///< without calling hipHostRegister on it
+  int pageableMemoryAccessUsesHostPageTables;  ///< Device accesses pageable memory via the host's
+                                               ///< page tables
+} hipDeviceProp_tR0000;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+hipError_t hipGetDevicePropertiesR0000(hipDeviceProp_tR0000* prop, int device);
+hipError_t hipChooseDeviceR0000(int* device, const hipDeviceProp_tR0000* prop);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h
index 0eb81cb718..e072dc9a24 100644
--- a/include/hip/hip_runtime_api.h
+++ b/include/hip/hip_runtime_api.h
@@ -88,76 +88,151 @@ typedef struct hipUUID_t {
 //---
 // Common headers for both NVCC and HCC paths:
 
+#define hipGetDeviceProperties hipGetDevicePropertiesR0600
+#define hipDeviceProp_t hipDeviceProp_tR0600
+#define hipChooseDevice hipChooseDeviceR0600
+
 /**
  * hipDeviceProp
  *
  */
 typedef struct hipDeviceProp_t {
-    char name[256];            ///< Device name.
-    size_t totalGlobalMem;     ///< Size of global memory region (in bytes).
-    size_t sharedMemPerBlock;  ///< Size of shared memory region (in bytes).
-    int regsPerBlock;          ///< Registers per block.
-    int warpSize;              ///< Warp size.
-    int maxThreadsPerBlock;    ///< Max work items per work group or workgroup max size.
-    int maxThreadsDim[3];      ///< Max number of threads in each dimension (XYZ) of a block.
-    int maxGridSize[3];        ///< Max grid dimensions (XYZ).
-    int clockRate;             ///< Max clock frequency of the multiProcessors in khz.
-    int memoryClockRate;       ///< Max global memory clock frequency in khz.
-    int memoryBusWidth;        ///< Global memory bus width in bits.
-    size_t totalConstMem;      ///< Size of shared memory region (in bytes).
+    char name[256];                   ///< Device name.
+    hipUUID uuid;                     ///< UUID of a device
+    char luid[8];                     ///< 8-byte unique identifier. Only valid on windows
+    unsigned int luidDeviceNodeMask;  ///< LUID node mask
+    size_t totalGlobalMem;            ///< Size of global memory region (in bytes).
+    size_t sharedMemPerBlock;         ///< Size of shared memory region (in bytes).
+    int regsPerBlock;                 ///< Registers per block.
+    int warpSize;                     ///< Warp size.
+    size_t memPitch;                  ///< Maximum pitch in bytes allowed by memory copies
+                                      ///< pitched memory
+    int maxThreadsPerBlock;           ///< Max work items per work group or workgroup max size.
+    int maxThreadsDim[3];             ///< Max number of threads in each dimension (XYZ) of a block.
+    int maxGridSize[3];               ///< Max grid dimensions (XYZ).
+    int clockRate;                    ///< Max clock frequency of the multiProcessors in khz.
+    size_t totalConstMem;             ///< Size of shared memory region (in bytes).
     int major;  ///< Major compute capability.  On HCC, this is an approximation and features may
                 ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
                 ///< feature caps.
     int minor;  ///< Minor compute capability.  On HCC, this is an approximation and features may
                 ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
                 ///< feature caps.
-    int multiProcessorCount;          ///< Number of multi-processors (compute units).
-    int l2CacheSize;                  ///< L2 cache size.
-    int maxThreadsPerMultiProcessor;  ///< Maximum resident threads per multi-processor.
-    int computeMode;                  ///< Compute mode.
+    size_t textureAlignment;       ///< Alignment requirement for textures
+    size_t texturePitchAlignment;  ///< Pitch alignment requirement for texture references bound to
+    int deviceOverlap;             ///< Deprecated. Use asyncEngineCount instead
+    int multiProcessorCount;       ///< Number of multi-processors (compute units).
+    int kernelExecTimeoutEnabled;  ///< Run time limit for kernels executed on the device
+    int integrated;                ///< APU vs dGPU
+    int canMapHostMemory;          ///< Check whether HIP can map host memory
+    int computeMode;               ///< Compute mode.
+    int maxTexture1D;              ///< Maximum number of elements in 1D images
+    int maxTexture1DMipmap;        ///< Maximum 1D mipmap texture size
+    int maxTexture1DLinear;        ///< Maximum size for 1D textures bound to linear memory
+    int maxTexture2D[2];  ///< Maximum dimensions (width, height) of 2D images, in image elements
+    int maxTexture2DMipmap[2];  ///< Maximum number of elements in 2D array mipmap of images
+    int maxTexture2DLinear[3];  ///< Maximum 2D tex dimensions if tex are bound to pitched memory
+    int maxTexture2DGather[2];  ///< Maximum 2D tex dimensions if gather has to be performed
+    int maxTexture3D[3];  ///< Maximum dimensions (width, height, depth) of 3D images, in image
+    ///< elements
+    int maxTexture3DAlt[3];           ///< Maximum alternate 3D texture dims
+    int maxTextureCubemap;            ///< Maximum cubemap texture dims
+    int maxTexture1DLayered[2];       ///< Maximum number of elements in 1D array images
+    int maxTexture2DLayered[3];       ///< Maximum number of elements in 2D array images
+    int maxTextureCubemapLayered[2];  ///< Maximum cubemaps layered texture dims
+    int maxSurface1D;                 ///< Maximum 1D surface size
+    int maxSurface2D[2];              ///< Maximum 2D surface size
+    int maxSurface3D[3];              ///< Maximum 3D surface size
+    int maxSurface1DLayered[2];       ///< Maximum 1D layered surface size
+    int maxSurface2DLayered[3];       ///< Maximum 2D layared surface size
+    int maxSurfaceCubemap;            ///< Maximum cubemap surface size
+    int maxSurfaceCubemapLayered[2];  ///< Maximum cubemap layered surface size
+    size_t surfaceAlignment;          ///< Alignment requirement for surface
+    int concurrentKernels;         ///< Device can possibly execute multiple kernels concurrently.
+    int ECCEnabled;                ///< Device has ECC support enabled
+    int pciBusID;                  ///< PCI Bus ID.
+    int pciDeviceID;               ///< PCI Device ID.
+    int pciDomainID;               ///< PCI Domain ID
+    int tccDriver;                 ///< 1:If device is Tesla device using TCC driver, else 0
+    int asyncEngineCount;          ///< Number of async engines
+    int unifiedAddressing;         ///< Does device and host share unified address space
+    int memoryClockRate;           ///< Max global memory clock frequency in khz.
+    int memoryBusWidth;            ///< Global memory bus width in bits.
+    int l2CacheSize;               ///< L2 cache size.
+    int persistingL2CacheMaxSize;  ///< Device's max L2 persisting lines in bytes
+    int maxThreadsPerMultiProcessor;    ///< Maximum resident threads per multi-processor.
+    int streamPrioritiesSupported;      ///< Device supports stream priority
+    int globalL1CacheSupported;         ///< Indicates globals are cached in L1
+    int localL1CacheSupported;          ///< Locals are cahced in L1
+    size_t sharedMemPerMultiprocessor;  ///< Amount of shared memory available per multiprocessor.
+    int regsPerMultiprocessor;          ///< registers available per multiprocessor
+    int managedMemory;         ///< Device supports allocating managed memory on this system
+    int isMultiGpuBoard;       ///< 1 if device is on a multi-GPU board, 0 if not.
+    int multiGpuBoardGroupID;  ///< Unique identifier for a group of devices on same multiboard GPU
+    int hostNativeAtomicSupported;         ///< Link between host and device supports native atomics
+    int singleToDoublePrecisionPerfRatio;  ///< Deprecated. CUDA only.
+    int pageableMemoryAccess;              ///< Device supports coherently accessing pageable memory
+                                           ///< without calling hipHostRegister on it
+    int concurrentManagedAccess;  ///< Device can coherently access managed memory concurrently with
+                                  ///< the CPU
+    int computePreemptionSupported;         ///< Is compute preemption supported on the device
+    int canUseHostPointerForRegisteredMem;  ///< Device can access host registered memory with same
+                                            ///< address as the host
+    int cooperativeLaunch;                  ///< HIP device supports cooperative launch
+    int cooperativeMultiDeviceLaunch;       ///< HIP device supports cooperative launch on multiple
+                                            ///< devices
+    size_t
+        sharedMemPerBlockOptin;  ///< Per device m ax shared mem per block usable by special opt in
+    int pageableMemoryAccessUsesHostPageTables;  ///< Device accesses pageable memory via the host's
+                                                 ///< page tables
+    int directManagedMemAccessFromHost;  ///< Host can directly access managed memory on the device
+                                         ///< without migration
+    int maxBlocksPerMultiProcessor;      ///< Max number of blocks on CU
+    int accessPolicyMaxWindowSize;       ///< Max value of access policy window
+    size_t reservedSharedMemPerBlock;    ///< Shared memory reserved by driver per block
+    int hostRegisterSupported;           ///< Device supports hipHostRegister
+    int sparseHipArraySupported;         ///< Indicates if device supports sparse hip arrays
+    int hostRegisterReadOnlySupported;   ///< Device supports using the hipHostRegisterReadOnly flag
+                                         ///< with hipHostRegistger
+    int timelineSemaphoreInteropSupported;  ///< Indicates external timeline semaphore support
+    int memoryPoolsSupported;  ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
+    int gpuDirectRDMASupported;                    ///< Indicates device support of RDMA APIs
+    unsigned int gpuDirectRDMAFlushWritesOptions;  ///< Bitmask to be interpreted according to
+                                                   ///< hipFlushGPUDirectRDMAWritesOptions
+    int gpuDirectRDMAWritesOrdering;               ///< value of hipGPUDirectRDMAWritesOrdering
+    unsigned int
+        memoryPoolSupportedHandleTypes;  ///< Bitmask of handle types support with mempool based IPC
+    int deferredMappingHipArraySupported;  ///< Device supports deferred mapping HIP arrays and HIP
+                                           ///< mipmapped arrays
+    int ipcEventSupported;                 ///< Device supports IPC events
+    int clusterLaunch;                     ///< Device supports cluster launch
+    int unifiedFunctionPointers;           ///< Indicates device supports unified function pointers
+    int reserved[63];                      ///< CUDA Reserved.
+
+    int hipReserved[32];  ///< Reserved for adding new entries for HIP/CUDA.
+
+    /* HIP Only struct members */
+    char gcnArchName[256];                    ///< AMD GCN Arch Name. HIP Only.
+    size_t maxSharedMemoryPerMultiProcessor;  ///< Maximum Shared Memory Per CU. HIP Only.
     int clockInstructionRate;  ///< Frequency in khz of the timer used by the device-side "clock*"
                                ///< instructions.  New for HIP.
     hipDeviceArch_t arch;      ///< Architectural feature flags.  New for HIP.
-    int concurrentKernels;     ///< Device can possibly execute multiple kernels concurrently.
-    int pciDomainID;           ///< PCI Domain ID
-    int pciBusID;              ///< PCI Bus ID.
-    int pciDeviceID;           ///< PCI Device ID.
-    size_t maxSharedMemoryPerMultiProcessor;  ///< Maximum Shared Memory Per Multiprocessor.
-    int isMultiGpuBoard;                      ///< 1 if device is on a multi-GPU board, 0 if not.
-    int canMapHostMemory;                     ///< Check whether HIP can map host memory
-    int gcnArch;                              ///< DEPRECATED: use gcnArchName instead
-    char gcnArchName[256];                    ///< AMD GCN Arch Name.
-    int integrated;            ///< APU vs dGPU
-    int cooperativeLaunch;            ///< HIP device supports cooperative launch
-    int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
-    int maxTexture1DLinear;    ///< Maximum size for 1D textures bound to linear memory
-    int maxTexture1D;          ///< Maximum number of elements in 1D images
-    int maxTexture2D[2];       ///< Maximum dimensions (width, height) of 2D images, in image elements
-    int maxTexture3D[3];       ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
-    unsigned int* hdpMemFlushCntl;      ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
-    unsigned int* hdpRegFlushCntl;      ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
-    size_t memPitch;                 ///<Maximum pitch in bytes allowed by memory copies
-    size_t textureAlignment;         ///<Alignment requirement for textures
-    size_t texturePitchAlignment;    ///<Pitch alignment requirement for texture references bound to pitched memory
-    int kernelExecTimeoutEnabled;    ///<Run time limit for kernels executed on the device
-    int ECCEnabled;                  ///<Device has ECC support enabled
-    int tccDriver;                   ///< 1:If device is Tesla device using TCC driver, else 0
-    int cooperativeMultiDeviceUnmatchedFunc;        ///< HIP device supports cooperative launch on multiple
-                                                    ///devices with unmatched functions
-    int cooperativeMultiDeviceUnmatchedGridDim;     ///< HIP device supports cooperative launch on multiple
-                                                    ///devices with unmatched grid dimensions
-    int cooperativeMultiDeviceUnmatchedBlockDim;    ///< HIP device supports cooperative launch on multiple
-                                                    ///devices with unmatched block dimensions
-    int cooperativeMultiDeviceUnmatchedSharedMem;   ///< HIP device supports cooperative launch on multiple
-                                                    ///devices with unmatched shared memories
-    int isLargeBar;                  ///< 1: if it is a large PCI bar device, else 0
-    int asicRevision;                ///< Revision of the GPU in this device
-    int managedMemory;               ///< Device supports allocating managed memory on this system
-    int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
-    int concurrentManagedAccess;     ///< Device can coherently access managed memory concurrently with the CPU
-    int pageableMemoryAccess;        ///< Device supports coherently accessing pageable memory
-                                     ///< without calling hipHostRegister on it
-    int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
+    unsigned int* hdpMemFlushCntl;            ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
+    unsigned int* hdpRegFlushCntl;            ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
+    int cooperativeMultiDeviceUnmatchedFunc;  ///< HIP device supports cooperative launch on
+                                              ///< multiple
+                                              /// devices with unmatched functions
+    int cooperativeMultiDeviceUnmatchedGridDim;    ///< HIP device supports cooperative launch on
+                                                   ///< multiple
+                                                   /// devices with unmatched grid dimensions
+    int cooperativeMultiDeviceUnmatchedBlockDim;   ///< HIP device supports cooperative launch on
+                                                   ///< multiple
+                                                   /// devices with unmatched block dimensions
+    int cooperativeMultiDeviceUnmatchedSharedMem;  ///< HIP device supports cooperative launch on
+                                                   ///< multiple
+                                                   /// devices with unmatched shared memories
+    int isLargeBar;                                ///< 1: if it is a large PCI bar device, else 0
+    int asicRevision;                              ///< Revision of the GPU in this device
 } hipDeviceProp_t;
 
  /**
@@ -335,33 +410,34 @@ typedef enum hipDeviceAttribute_t {
 
     hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
     hipDeviceAttributeAccessPolicyMaxWindowSize,        ///< Cuda only. The maximum size of the window policy in bytes.
-    hipDeviceAttributeAsyncEngineCount,                 ///< Cuda only. Asynchronous engines number.
+    hipDeviceAttributeAsyncEngineCount,                 ///< Asynchronous engines number.
     hipDeviceAttributeCanMapHostMemory,                 ///< Whether host memory can be mapped into device address space
-    hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
+    hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Device can access host registered memory
                                                         ///< at the same virtual address as the CPU
     hipDeviceAttributeClockRate,                        ///< Peak clock frequency in kilohertz.
     hipDeviceAttributeComputeMode,                      ///< Compute mode that device is currently in.
-    hipDeviceAttributeComputePreemptionSupported,       ///< Cuda only. Device supports Compute Preemption.
+    hipDeviceAttributeComputePreemptionSupported,       ///< Device supports Compute Preemption.
     hipDeviceAttributeConcurrentKernels,                ///< Device can possibly execute multiple kernels concurrently.
     hipDeviceAttributeConcurrentManagedAccess,          ///< Device can coherently access managed memory concurrently with the CPU
     hipDeviceAttributeCooperativeLaunch,                ///< Support cooperative launch
     hipDeviceAttributeCooperativeMultiDeviceLaunch,     ///< Support cooperative launch on multiple devices
-    hipDeviceAttributeDeviceOverlap,                    ///< Cuda only. Device can concurrently copy memory and execute a kernel.
+    hipDeviceAttributeHostRegisterSupported,            ///< Can device support host memory registration via hipHostRegister
+    hipDeviceAttributeDeviceOverlap,                    ///< Device can concurrently copy memory and execute a kernel.
                                                         ///< Deprecated. Use instead asyncEngineCount.
     hipDeviceAttributeDirectManagedMemAccessFromHost,   ///< Host can directly access managed memory on
                                                         ///< the device without migration
-    hipDeviceAttributeGlobalL1CacheSupported,           ///< Cuda only. Device supports caching globals in L1
-    hipDeviceAttributeHostNativeAtomicSupported,        ///< Cuda only. Link between the device and the host supports native atomic operations
+    hipDeviceAttributeGlobalL1CacheSupported,           ///< Device supports caching globals in L1
+    hipDeviceAttributeHostNativeAtomicSupported,        ///< Link between the device and the host supports native atomic operations
     hipDeviceAttributeIntegrated,                       ///< Device is integrated GPU
     hipDeviceAttributeIsMultiGpuBoard,                  ///< Multiple GPU devices.
     hipDeviceAttributeKernelExecTimeout,                ///< Run time limit for kernels executed on the device
     hipDeviceAttributeL2CacheSize,                      ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
     hipDeviceAttributeLocalL1CacheSupported,            ///< caching locals in L1 is supported
-    hipDeviceAttributeLuid,                             ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
-    hipDeviceAttributeLuidDeviceNodeMask,               ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
+    hipDeviceAttributeLuid,                             ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
+    hipDeviceAttributeLuidDeviceNodeMask,               ///< Luid device node mask. Undefined on TCC and non-Windows platforms
     hipDeviceAttributeComputeCapabilityMajor,           ///< Major compute capability version number.
     hipDeviceAttributeManagedMemory,                    ///< Device supports allocating managed memory on this system
-    hipDeviceAttributeMaxBlocksPerMultiProcessor,       ///< Cuda only. Max block size per multiprocessor
+    hipDeviceAttributeMaxBlocksPerMultiProcessor,       ///< Max block size per multiprocessor
     hipDeviceAttributeMaxBlockDimX,                     ///< Max block size in width.
     hipDeviceAttributeMaxBlockDimY,                     ///< Max block size in height.
     hipDeviceAttributeMaxBlockDimZ,                     ///< Max block size in depth.
@@ -376,22 +452,22 @@ typedef enum hipDeviceAttribute_t {
     hipDeviceAttributeMaxSurfaceCubemap,                ///< Cuda only. Maximum dimensions of Cubemap surface.
     hipDeviceAttributeMaxSurfaceCubemapLayered,         ///< Cuda only. Maximum dimension of Cubemap layered surface.
     hipDeviceAttributeMaxTexture1DWidth,                ///< Maximum size of 1D texture.
-    hipDeviceAttributeMaxTexture1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered texture.
+    hipDeviceAttributeMaxTexture1DLayered,              ///< Maximum dimensions of 1D layered texture.
     hipDeviceAttributeMaxTexture1DLinear,               ///< Maximum number of elements allocatable in a 1D linear texture.
                                                         ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
-    hipDeviceAttributeMaxTexture1DMipmap,               ///< Cuda only. Maximum size of 1D mipmapped texture.
+    hipDeviceAttributeMaxTexture1DMipmap,               ///< Maximum size of 1D mipmapped texture.
     hipDeviceAttributeMaxTexture2DWidth,                ///< Maximum dimension width of 2D texture.
     hipDeviceAttributeMaxTexture2DHeight,               ///< Maximum dimension hight of 2D texture.
-    hipDeviceAttributeMaxTexture2DGather,               ///< Cuda only. Maximum dimensions of 2D texture if gather operations  performed.
-    hipDeviceAttributeMaxTexture2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered texture.
-    hipDeviceAttributeMaxTexture2DLinear,               ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
-    hipDeviceAttributeMaxTexture2DMipmap,               ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
+    hipDeviceAttributeMaxTexture2DGather,               ///< Maximum dimensions of 2D texture if gather operations  performed.
+    hipDeviceAttributeMaxTexture2DLayered,              ///< Maximum dimensions of 2D layered texture.
+    hipDeviceAttributeMaxTexture2DLinear,               ///< Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
+    hipDeviceAttributeMaxTexture2DMipmap,               ///< Maximum dimensions of 2D mipmapped texture.
     hipDeviceAttributeMaxTexture3DWidth,                ///< Maximum dimension width of 3D texture.
     hipDeviceAttributeMaxTexture3DHeight,               ///< Maximum dimension height of 3D texture.
     hipDeviceAttributeMaxTexture3DDepth,                ///< Maximum dimension depth of 3D texture.
-    hipDeviceAttributeMaxTexture3DAlt,                  ///< Cuda only. Maximum dimensions of alternate 3D texture.
-    hipDeviceAttributeMaxTextureCubemap,                ///< Cuda only. Maximum dimensions of Cubemap texture
-    hipDeviceAttributeMaxTextureCubemapLayered,         ///< Cuda only. Maximum dimensions of Cubemap layered texture.
+    hipDeviceAttributeMaxTexture3DAlt,                  ///< Maximum dimensions of alternate 3D texture.
+    hipDeviceAttributeMaxTextureCubemap,                ///< Maximum dimensions of Cubemap texture
+    hipDeviceAttributeMaxTextureCubemapLayered,         ///< Maximum dimensions of Cubemap layered texture.
     hipDeviceAttributeMaxThreadsDim,                    ///< Maximum dimension of a block
     hipDeviceAttributeMaxThreadsPerBlock,               ///< Maximum number of threads per block.
     hipDeviceAttributeMaxThreadsPerMultiProcessor,      ///< Maximum resident threads per multiprocessor.
@@ -399,7 +475,7 @@ typedef enum hipDeviceAttribute_t {
     hipDeviceAttributeMemoryBusWidth,                   ///< Global memory bus width in bits.
     hipDeviceAttributeMemoryClockRate,                  ///< Peak memory clock frequency in kilohertz.
     hipDeviceAttributeComputeCapabilityMinor,           ///< Minor compute capability version number.
-    hipDeviceAttributeMultiGpuBoardGroupID,             ///< Cuda only. Unique ID of device group on the same multi-GPU board
+    hipDeviceAttributeMultiGpuBoardGroupID,             ///< Unique ID of device group on the same multi-GPU board
     hipDeviceAttributeMultiprocessorCount,              ///< Number of multiprocessors on the device.
     hipDeviceAttributeUnused1,                          ///< Previously hipDeviceAttributeName
     hipDeviceAttributePageableMemoryAccess,             ///< Device supports coherently accessing pageable memory
@@ -408,17 +484,17 @@ typedef enum hipDeviceAttribute_t {
     hipDeviceAttributePciBusId,                         ///< PCI Bus ID.
     hipDeviceAttributePciDeviceId,                      ///< PCI Device ID.
     hipDeviceAttributePciDomainID,                      ///< PCI Domain ID.
-    hipDeviceAttributePersistingL2CacheMaxSize,         ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
+    hipDeviceAttributePersistingL2CacheMaxSize,         ///< Maximum l2 persisting lines capacity in bytes
     hipDeviceAttributeMaxRegistersPerBlock,             ///< 32-bit registers available to a thread block. This number is shared
                                                         ///< by all thread blocks simultaneously resident on a multiprocessor.
     hipDeviceAttributeMaxRegistersPerMultiprocessor,    ///< 32-bit registers available per block.
-    hipDeviceAttributeReservedSharedMemPerBlock,        ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
+    hipDeviceAttributeReservedSharedMemPerBlock,        ///< Shared memory reserved by CUDA driver per block.
     hipDeviceAttributeMaxSharedMemoryPerBlock,          ///< Maximum shared memory available per block in bytes.
-    hipDeviceAttributeSharedMemPerBlockOptin,           ///< Cuda only. Maximum shared memory per block usable by special opt in.
-    hipDeviceAttributeSharedMemPerMultiprocessor,       ///< Cuda only. Shared memory available per multiprocessor.
+    hipDeviceAttributeSharedMemPerBlockOptin,           ///< Maximum shared memory per block usable by special opt in.
+    hipDeviceAttributeSharedMemPerMultiprocessor,       ///< Shared memory available per multiprocessor.
     hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
-    hipDeviceAttributeStreamPrioritiesSupported,        ///< Cuda only. Whether to support stream priorities.
-    hipDeviceAttributeSurfaceAlignment,                 ///< Cuda only. Alignment requirement for surfaces
+    hipDeviceAttributeStreamPrioritiesSupported,        ///< Whether to support stream priorities.
+    hipDeviceAttributeSurfaceAlignment,                 ///< Alignment requirement for surfaces
     hipDeviceAttributeTccDriver,                        ///< Cuda only. Whether device is a Tesla device using TCC driver
     hipDeviceAttributeTextureAlignment,                 ///< Alignment requirement for textures
     hipDeviceAttributeTexturePitchAlignment,            ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
@@ -429,7 +505,6 @@ typedef enum hipDeviceAttribute_t {
     hipDeviceAttributeWarpSize,                         ///< Warp size in threads.
     hipDeviceAttributeMemoryPoolsSupported,             ///< Device supports HIP Stream Ordered Memory Allocator
     hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management
-    hipDeviceAttributeHostRegisterSupported,            ///< Host memory registeration through device support
 
     hipDeviceAttributeCudaCompatibleEnd = 9999,
     hipDeviceAttributeAmdSpecificBegin = 10000,