From 5931eb632b29570990241b9b31db273b6ddd89f4 Mon Sep 17 00:00:00 2001 From: Adel Johar Date: Tue, 28 Jan 2025 17:13:25 +0100 Subject: [PATCH] Docs: Add xnack to unified memory page --- .../memory_management/unified_memory.rst | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/docs/how-to/hip_runtime_api/memory_management/unified_memory.rst b/docs/how-to/hip_runtime_api/memory_management/unified_memory.rst index c253416928..111f5272ec 100644 --- a/docs/how-to/hip_runtime_api/memory_management/unified_memory.rst +++ b/docs/how-to/hip_runtime_api/memory_management/unified_memory.rst @@ -113,6 +113,141 @@ allocator can be used. :sup:`1` Works only with ``XNACK=1`` and kernels with HMM support. First GPU access causes recoverable page-fault. +.. _xnack: + +XNACK +----- + +On specific GPU architectures (which is referenced in the previous table), there is an +option to automatically migrate pages of memory between host and device. This is important +for managed memory, where the locality of the data is important for performance. +Depending on the system, page migration may be disabled by default in which case managed +memory will act like pinned host memory and suffer degraded performance. + +**XNACK** describes the GPU's ability to retry memory accesses that failed due to a page fault +(which normally would lead to a memory access error), and instead retrieve the missing page. + +This also affects memory allocated by the system as indicated by the first table in +:ref:`unified memory allocators`. + +Below is a small example that demonstrates an explicit page fault and how **XNACK** affects +the page fault behavior. + +.. code-block:: cpp + + #include + #include + + #define HIP_CHECK(expression) \ + { \ + const hipError_t err = expression; \ + if(err != hipSuccess){ \ + std::cerr << "HIP error: " \ + << hipGetErrorString(err) \ + << " at " << __LINE__ << "\n"; \ + } \ + exit(EXIT_FAILURE); \ + } + + __global__ void write_to_memory(int* data, int size) + { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) + { + // Writing to memory that may not have been allocated in GPU memory + data[idx] = idx * 2; // Triggers a page fault if not resident + } + } + + int main() + { + const int N = 1024; // 1K elements + const int blocksize = 256; + int* data; + + // Allocate unified memory + HIP_CHECK(hipMallocManaged(&data, N * sizeof(int))); + + // Intentionally don't initialize or prefetch any part of the data + // No initialization: data is uninitialized but accessible + + // Launch kernel that writes to all elements + dim3 threads(blocksize); + dim3 grids(N / blocksize) + hipLaunchKernelGGL(write_to_memory, grids, threads, 0, 0, data, N); + + // Synchronize to ensure kernel execution and fault resolution + HIP_CHECK(hipDeviceSynchronize()); + + // Check results + bool pass = true; + for (int i = 0; i < 10; ++i) + { + if (data[i] != (i * 2)) + { + pass = false; + std::cout << "Failed at position" << i << " with value " << data[i] <