From 32aea3f5bf9a4a19b8ed2d2caeea143cbbcee4e0 Mon Sep 17 00:00:00 2001 From: Adel Johar Date: Thu, 23 Jan 2025 13:44:27 +0100 Subject: [PATCH] Docs: Consolidate environment variables into a single RST file, use a script to pick tables --- docs/.gitignore | 1 + docs/conf.py | 10 +- docs/data/env_variables_hip.rst | 251 +++++++++++++++++++++++++++++ docs/extension/__init__.py | 0 docs/extension/custom_directive.py | 59 +++++++ docs/how-to/debugging.rst | 3 +- docs/how-to/debugging_env.rst | 99 ------------ docs/reference/env_variables.rst | 154 ++---------------- 8 files changed, 331 insertions(+), 246 deletions(-) create mode 100644 docs/data/env_variables_hip.rst create mode 100644 docs/extension/__init__.py create mode 100644 docs/extension/custom_directive.py delete mode 100644 docs/how-to/debugging_env.rst diff --git a/docs/.gitignore b/docs/.gitignore index 53b7787fbd..76d890c082 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -6,3 +6,4 @@ /doxygen/html /doxygen/xml /sphinx/_toc.yml +__pycache__ diff --git a/docs/conf.py b/docs/conf.py index aed3ead08d..8261240fb0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,6 +5,8 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html import re +import sys +from pathlib import Path from typing import Any, Dict, List from rocm_docs import ROCmDocs @@ -38,7 +40,10 @@ for sphinx_var in ROCmDocs.SPHINX_VARS: globals()[sphinx_var] = getattr(docs_core, sphinx_var) -extensions += ["sphinxcontrib.doxylink"] +# Add the _extensions directory to Python's search path +sys.path.append(str(Path(__file__).parent / 'extension')) + +extensions += ["sphinxcontrib.doxylink", "custom_directive"] cpp_id_attributes = ["__global__", "__device__", "__host__", "__forceinline__", "static"] cpp_paren_attributes = ["__declspec"] @@ -50,5 +55,6 @@ exclude_patterns = [ "doxygen/mainpage.md", "understand/glossary.md", - 'how-to/debugging_env.rst' + 'how-to/debugging_env.rst', + "data/env_variables_hip.rst" ] \ No newline at end of file diff --git a/docs/data/env_variables_hip.rst b/docs/data/env_variables_hip.rst new file mode 100644 index 0000000000..f7cf44ac18 --- /dev/null +++ b/docs/data/env_variables_hip.rst @@ -0,0 +1,251 @@ +.. meta:: + :description: HIP environment variables + :keywords: AMD, HIP, environment variables, environment + +.. _hip-env-iso: +.. list-table:: + :header-rows: 1 + :widths: 70,30 + + * - **Environment variable** + - **Value** + + * - | ``ROCR_VISIBLE_DEVICES`` + | A list of device indices or UUIDs that will be exposed to applications. + - Example: ``0,GPU-DEADBEEFDEADBEEF`` + + * - | ``GPU_DEVICE_ORDINAL`` + | Devices indices exposed to OpenCL and HIP applications. + - Example: ``0,2`` + + * - | ``HIP_VISIBLE_DEVICES`` or ``CUDA_VISIBLE_DEVICES`` + | Device indices exposed to HIP applications. + - Example: ``0,2`` + +.. _hip-env-prof: +.. list-table:: + :header-rows: 1 + :widths: 70,30 + + * - **Environment variable** + - **Value** + + * - | ``HSA_CU_MASK`` + | Sets the mask on a lower level of queue creation in the driver, + | this mask will also be set for queues being profiled. + - Example: ``1:0-8`` + + * - | ``ROC_GLOBAL_CU_MASK`` + | Sets the mask on queues created by the HIP or the OpenCL runtimes, + | this mask will also be set for queues being profiled. + - Example: ``0xf``, enables only 4 CUs + + * - | ``HIP_FORCE_QUEUE_PROFILING`` + | Used to run the app as if it were run in rocprof. Forces command queue + | profiling on by default. + - | 0: Disable + | 1: Enable + +.. _hip-env-debug: +.. list-table:: + :header-rows: 1 + :widths: 35,14,51 + + * - **Environment variable** + - **Default value** + - **Value** + + * - | ``AMD_LOG_LEVEL`` + | Enables HIP log on various level. + - ``0`` + - | 0: Disable log. + | 1: Enables error logs. + | 2: Enables warning logs next to lower-level logs. + | 3: Enables information logs next to lower-level logs. + | 4: Enables debug logs next to lower-level logs. + | 5: Enables debug extra logs next to lower-level logs. + + * - | ``AMD_LOG_LEVEL_FILE`` + | Sets output file for ``AMD_LOG_LEVEL``. + - stderr output + - + + * - | ``AMD_LOG_MASK`` + | Specifies HIP log filters. Here is the ` complete list of log masks `_. + - ``0x7FFFFFFF`` + - | 0x1: Log API calls. + | 0x2: Kernel and copy commands and barriers. + | 0x4: Synchronization and waiting for commands to finish. + | 0x8: Decode and display AQL packets. + | 0x10: Queue commands and queue contents. + | 0x20: Signal creation, allocation, pool. + | 0x40: Locks and thread-safety code. + | 0x80: Kernel creations and arguments, etc. + | 0x100: Copy debug. + | 0x200: Detailed copy debug. + | 0x400: Resource allocation, performance-impacting events. + | 0x800: Initialization and shutdown. + | 0x1000: Misc debug, not yet classified. + | 0x2000: Show raw bytes of AQL packet. + | 0x4000: Show code creation debug. + | 0x8000: More detailed command info, including barrier commands. + | 0x10000: Log message location. + | 0x20000: Memory allocation. + | 0x40000: Memory pool allocation, including memory in graphs. + | 0x80000: Timestamp details. + | 0xFFFFFFFF: Log always even mask flag is zero. + + * - | ``HIP_LAUNCH_BLOCKING`` + | Used for serialization on kernel execution. + - ``0`` + - | 0: Disable. Kernel executes normally. + | 1: Enable. Serializes kernel enqueue, behaves the same as ``AMD_SERIALIZE_KERNEL``. + + * - | ``HIP_VISIBLE_DEVICES`` (or ``CUDA_VISIBLE_DEVICES``) + | Only devices whose index is present in the sequence are visible to HIP + - Unset by default. + - 0,1,2: Depending on the number of devices on the system. + + * - | ``GPU_DUMP_CODE_OBJECT`` + | Dump code object. + - ``0`` + - | 0: Disable + | 1: Enable + + * - | ``AMD_SERIALIZE_KERNEL`` + | Serialize kernel enqueue. + - ``0`` + - | 0: Disable + | 1: Wait for completion before enqueue. + | 2: Wait for completion after enqueue. + | 3: Both + + * - | ``AMD_SERIALIZE_COPY`` + | Serialize copies + - ``0`` + - | 0: Disable + | 1: Wait for completion before enqueue. + | 2: Wait for completion after enqueue. + | 3: Both + + * - | ``AMD_DIRECT_DISPATCH`` + | Enable direct kernel dispatch (Currently for Linux; under development for Windows). + - ``1`` + - | 0: Disable + | 1: Enable + + * - | ``GPU_MAX_HW_QUEUES`` + | The maximum number of hardware queues allocated per device. + - ``4`` + - The variable controls how many independent hardware queues HIP runtime can create per process, + per device. If an application allocates more HIP streams than this number, then HIP runtime reuses + the same hardware queues for the new streams in a round-robin manner. Note that this maximum + number does not apply to hardware queues that are created for CU-masked HIP streams, or + cooperative queues for HIP Cooperative Groups (single queue per device). + +.. _hip-env-memory: +.. list-table:: + :header-rows: 1 + :widths: 35,14,51 + + * - **Environment variable** + - **Default value** + - **Value** + + * - | ``HIP_HIDDEN_FREE_MEM`` + | Amount of memory to hide from the free memory reported by hipMemGetInfo. + - ``0`` + - | 0: Disable + | Unit: megabyte (MB) + + * - | ``HIP_HOST_COHERENT`` + | Specifies if the memory is coherent between the host and GPU in ``hipHostMalloc``. + - ``0`` + - | 0: Memory is not coherent. + | 1: Memory is coherent. + | Environment variable has effect, if the following conditions are statisfied: + | - One of the ``hipHostMallocDefault``, ``hipHostMallocPortable``, ``hipHostMallocWriteCombined`` or ``hipHostMallocNumaUser`` flag set to 1. + | - ``hipHostMallocCoherent``, ``hipHostMallocNonCoherent`` and ``hipHostMallocMapped`` flags set to 0. + + * - | ``HIP_INITIAL_DM_SIZE`` + | Set initial heap size for device malloc. + - ``8388608`` + - | Unit: Byte + | The default value corresponds to 8 MB. + + * - | ``HIP_MEM_POOL_SUPPORT`` + | Enables memory pool support in HIP. + - ``0`` + - | 0: Disable + | 1: Enable + + * - | ``HIP_MEM_POOL_USE_VM`` + | Enables memory pool support in HIP. + - | ``0``: other OS + | ``1``: Windows + - | 0: Disable + | 1: Enable + + * - | ``HIP_VMEM_MANAGE_SUPPORT`` + | Virtual Memory Management Support. + - ``1`` + - | 0: Disable + | 1: Enable + + * - | ``GPU_MAX_HEAP_SIZE`` + | Set maximum size of the GPU heap to % of board memory. + - ``100`` + - | Unit: Percentage + + * - | ``GPU_MAX_REMOTE_MEM_SIZE`` + | Maximum size that allows device memory substitution with system. + - ``2`` + - | Unit: kilobyte (KB) + + * - | ``GPU_NUM_MEM_DEPENDENCY`` + | Number of memory objects for dependency tracking. + - ``256`` + - + + * - | ``GPU_STREAMOPS_CP_WAIT`` + | Force the stream memory operation to wait on CP. + - ``0`` + - | 0: Disable + | 1: Enable + + * - | ``HSA_LOCAL_MEMORY_ENABLE`` + | Enable HSA device local memory usage. + - ``1`` + - | 0: Disable + | 1: Enable + + * - | ``PAL_ALWAYS_RESIDENT`` + | Force memory resources to become resident at allocation time. + - ``0`` + - | 0: Disable + | 1: Enable + + * - | ``PAL_PREPINNED_MEMORY_SIZE`` + | Size of prepinned memory. + - ``64`` + - | Unit: kilobyte (KB) + + * - | ``REMOTE_ALLOC`` + | Use remote memory for the global heap allocation. + - ``0`` + - | 0: Disable + | 1: Enable + +.. _hip-env-other: +.. list-table:: + :header-rows: 1 + :widths: 35,14,51 + + * - **Environment variable** + - **Default value** + - **Value** + + * - | ``HIPRTC_COMPILE_OPTIONS_APPEND`` + | Sets compile options needed for ``hiprtc`` compilation. + - None + - ``--gpu-architecture=gfx906:sramecc+:xnack``, ``-fgpu-rdc`` diff --git a/docs/extension/__init__.py b/docs/extension/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/extension/custom_directive.py b/docs/extension/custom_directive.py new file mode 100644 index 0000000000..3c18396d7e --- /dev/null +++ b/docs/extension/custom_directive.py @@ -0,0 +1,59 @@ +import os +import re +from docutils.parsers.rst import Directive +from docutils.statemachine import StringList + +class TableInclude(Directive): + required_arguments = 1 + optional_arguments = 0 + final_argument_whitespace = True + option_spec = { + 'table': str + } + + def run(self): + # Get the file path from the first argument + file_path = self.arguments[0] + + # Get the environment to resolve the full path + env = self.state.document.settings.env + src_dir = os.path.abspath(env.srcdir) + full_file_path = os.path.join(src_dir, file_path) + + # Check if the file exists + if not os.path.exists(full_file_path): + raise self.error(f"RST file {full_file_path} does not exist.") + + # Read the entire file content + with open(full_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Find all tables with named targets + table_pattern = r'(?:^\.\.\ _(.+?):\n)(.. list-table::.*?(?:\n\s*\*\s*-.*?)+)(?=\n\n|\Z)' + table_matches = list(re.finditer(table_pattern, content, re.MULTILINE | re.DOTALL)) + + # Get the specific table name from options + table_name = self.options.get('table') + + # If no table specified, merge compatible tables + if not table_name: + raise self.error("The ':table:' option is required to specify which table to include.") + + # Find the specific table + matching_tables = [ + match for match in table_matches + if match.group(1).strip() == table_name + ] + + if not matching_tables: + raise self.error(f"Table '{table_name}' not found in {full_file_path}") + + # Extract the matched table content + table_content = matching_tables[0].group(2) + + # Insert the table content into the current document + self.state_machine.insert_input(table_content.splitlines(), full_file_path) + return [] + +def setup(app): + app.add_directive('include-table', TableInclude) diff --git a/docs/how-to/debugging.rst b/docs/how-to/debugging.rst index 433d31de10..6d5ff2ff24 100644 --- a/docs/how-to/debugging.rst +++ b/docs/how-to/debugging.rst @@ -273,7 +273,8 @@ HIP environment variable summary Here are some of the more commonly used environment variables: -.. include:: ../how-to/debugging_env.rst +.. include-table:: data/env_variables_hip.rst + :table: hip-env-debug General debugging tips ====================================================== diff --git a/docs/how-to/debugging_env.rst b/docs/how-to/debugging_env.rst deleted file mode 100644 index b3544a967f..0000000000 --- a/docs/how-to/debugging_env.rst +++ /dev/null @@ -1,99 +0,0 @@ -.. meta:: - :description: Debug environment variables for HIP. - :keywords: AMD, ROCm, HIP, debugging, Environment variables, ROCgdb - -.. list-table:: - :header-rows: 1 - :widths: 35,14,51 - - * - **Environment variable** - - **Default value** - - **Value** - - * - | ``AMD_LOG_LEVEL`` - | Enables HIP log on various level. - - ``0`` - - | 0: Disable log. - | 1: Enables error logs. - | 2: Enables warning logs next to lower-level logs. - | 3: Enables information logs next to lower-level logs. - | 4: Enables debug logs next to lower-level logs. - | 5: Enables debug extra logs next to lower-level logs. - - * - | ``AMD_LOG_LEVEL_FILE`` - | Sets output file for ``AMD_LOG_LEVEL``. - - stderr output - - - - * - | ``AMD_LOG_MASK`` - | Specifies HIP log filters. Here is the ` complete list of log masks `_. - - ``0x7FFFFFFF`` - - | 0x1: Log API calls. - | 0x2: Kernel and copy commands and barriers. - | 0x4: Synchronization and waiting for commands to finish. - | 0x8: Decode and display AQL packets. - | 0x10: Queue commands and queue contents. - | 0x20: Signal creation, allocation, pool. - | 0x40: Locks and thread-safety code. - | 0x80: Kernel creations and arguments, etc. - | 0x100: Copy debug. - | 0x200: Detailed copy debug. - | 0x400: Resource allocation, performance-impacting events. - | 0x800: Initialization and shutdown. - | 0x1000: Misc debug, not yet classified. - | 0x2000: Show raw bytes of AQL packet. - | 0x4000: Show code creation debug. - | 0x8000: More detailed command info, including barrier commands. - | 0x10000: Log message location. - | 0x20000: Memory allocation. - | 0x40000: Memory pool allocation, including memory in graphs. - | 0x80000: Timestamp details. - | 0xFFFFFFFF: Log always even mask flag is zero. - - * - | ``HIP_LAUNCH_BLOCKING`` - | Used for serialization on kernel execution. - - ``0`` - - | 0: Disable. Kernel executes normally. - | 1: Enable. Serializes kernel enqueue, behaves the same as ``AMD_SERIALIZE_KERNEL``. - - * - | ``HIP_VISIBLE_DEVICES`` (or ``CUDA_VISIBLE_DEVICES``) - | Only devices whose index is present in the sequence are visible to HIP - - Unset by default. - - 0,1,2: Depending on the number of devices on the system. - - * - | ``GPU_DUMP_CODE_OBJECT`` - | Dump code object. - - ``0`` - - | 0: Disable - | 1: Enable - - * - | ``AMD_SERIALIZE_KERNEL`` - | Serialize kernel enqueue. - - ``0`` - - | 0: Disable - | 1: Wait for completion before enqueue. - | 2: Wait for completion after enqueue. - | 3: Both - - * - | ``AMD_SERIALIZE_COPY`` - | Serialize copies - - ``0`` - - | 0: Disable - | 1: Wait for completion before enqueue. - | 2: Wait for completion after enqueue. - | 3: Both - - * - | ``AMD_DIRECT_DISPATCH`` - | Enable direct kernel dispatch (Currently for Linux; under development for Windows). - - ``1`` - - | 0: Disable - | 1: Enable - - * - | ``GPU_MAX_HW_QUEUES`` - | The maximum number of hardware queues allocated per device. - - ``4`` - - The variable controls how many independent hardware queues HIP runtime can create per process, - per device. If an application allocates more HIP streams than this number, then HIP runtime reuses - the same hardware queues for the new streams in a round-robin manner. Note that this maximum - number does not apply to hardware queues that are created for CU-masked HIP streams, or - cooperative queues for HIP Cooperative Groups (single queue per device). diff --git a/docs/reference/env_variables.rst b/docs/reference/env_variables.rst index 7d336c0b4e..8c46d34ed1 100644 --- a/docs/reference/env_variables.rst +++ b/docs/reference/env_variables.rst @@ -15,24 +15,8 @@ GPU isolation variables The GPU isolation environment variables in HIP are collected in the next table. For more information, check :doc:`GPU isolation page `. -.. list-table:: - :header-rows: 1 - :widths: 70,30 - - * - **Environment variable** - - **Value** - - * - | ``ROCR_VISIBLE_DEVICES`` - | A list of device indices or UUIDs that will be exposed to applications. - - Example: ``0,GPU-DEADBEEFDEADBEEF`` - - * - | ``GPU_DEVICE_ORDINAL`` - | Devices indices exposed to OpenCL and HIP applications. - - Example: ``0,2`` - - * - | ``HIP_VISIBLE_DEVICES`` or ``CUDA_VISIBLE_DEVICES`` - | Device indices exposed to HIP applications. - - Example: ``0,2`` +.. include-table:: data/env_variables_hip.rst + :table: hip-env-iso Profiling variables ================================================================================ @@ -40,28 +24,8 @@ Profiling variables The profiling environment variables in HIP are collected in the next table. For more information, check :doc:`setting the number of CUs page `. -.. list-table:: - :header-rows: 1 - :widths: 70,30 - - * - **Environment variable** - - **Value** - - * - | ``HSA_CU_MASK`` - | Sets the mask on a lower level of queue creation in the driver, - | this mask will also be set for queues being profiled. - - Example: ``1:0-8`` - - * - | ``ROC_GLOBAL_CU_MASK`` - | Sets the mask on queues created by the HIP or the OpenCL runtimes, - | this mask will also be set for queues being profiled. - - Example: ``0xf``, enables only 4 CUs - - * - | ``HIP_FORCE_QUEUE_PROFILING`` - | Used to run the app as if it were run in rocprof. Forces command queue - | profiling on by default. - - | 0: Disable - | 1: Enable +.. include-table:: data/env_variables_hip.rst + :table: hip-env-prof Debug variables ================================================================================ @@ -69,7 +33,8 @@ Debug variables The debugging environment variables in HIP are collected in the next table. For more information, check :ref:`debugging_with_hip`. -.. include:: ../how-to/debugging_env.rst +.. include-table:: data/env_variables_hip.rst + :table: hip-env-debug Memory management related variables ================================================================================ @@ -77,97 +42,8 @@ Memory management related variables The memory management related environment variables in HIP are collected in the next table. -.. list-table:: - :header-rows: 1 - :widths: 35,14,51 - - * - **Environment variable** - - **Default value** - - **Value** - - * - | ``HIP_HIDDEN_FREE_MEM`` - | Amount of memory to hide from the free memory reported by hipMemGetInfo. - - ``0`` - - | 0: Disable - | Unit: megabyte (MB) - - * - | ``HIP_HOST_COHERENT`` - | Specifies if the memory is coherent between the host and GPU in ``hipHostMalloc``. - - ``0`` - - | 0: Memory is not coherent. - | 1: Memory is coherent. - | Environment variable has effect, if the following conditions are statisfied: - | - One of the ``hipHostMallocDefault``, ``hipHostMallocPortable``, ``hipHostMallocWriteCombined`` or ``hipHostMallocNumaUser`` flag set to 1. - | - ``hipHostMallocCoherent``, ``hipHostMallocNonCoherent`` and ``hipHostMallocMapped`` flags set to 0. - - * - | ``HIP_INITIAL_DM_SIZE`` - | Set initial heap size for device malloc. - - ``8388608`` - - | Unit: Byte - | The default value corresponds to 8 MB. - - * - | ``HIP_MEM_POOL_SUPPORT`` - | Enables memory pool support in HIP. - - ``0`` - - | 0: Disable - | 1: Enable - - * - | ``HIP_MEM_POOL_USE_VM`` - | Enables memory pool support in HIP. - - | ``0``: other OS - | ``1``: Windows - - | 0: Disable - | 1: Enable - - * - | ``HIP_VMEM_MANAGE_SUPPORT`` - | Virtual Memory Management Support. - - ``1`` - - | 0: Disable - | 1: Enable - - * - | ``GPU_MAX_HEAP_SIZE`` - | Set maximum size of the GPU heap to % of board memory. - - ``100`` - - | Unit: Percentage - - * - | ``GPU_MAX_REMOTE_MEM_SIZE`` - | Maximum size that allows device memory substitution with system. - - ``2`` - - | Unit: kilobyte (KB) - - * - | ``GPU_NUM_MEM_DEPENDENCY`` - | Number of memory objects for dependency tracking. - - ``256`` - - - - * - | ``GPU_STREAMOPS_CP_WAIT`` - | Force the stream memory operation to wait on CP. - - ``0`` - - | 0: Disable - | 1: Enable - - * - | ``HSA_LOCAL_MEMORY_ENABLE`` - | Enable HSA device local memory usage. - - ``1`` - - | 0: Disable - | 1: Enable - - * - | ``PAL_ALWAYS_RESIDENT`` - | Force memory resources to become resident at allocation time. - - ``0`` - - | 0: Disable - | 1: Enable - - * - | ``PAL_PREPINNED_MEMORY_SIZE`` - | Size of prepinned memory. - - ``64`` - - | Unit: kilobyte (KB) - - * - | ``REMOTE_ALLOC`` - | Use remote memory for the global heap allocation. - - ``0`` - - | 0: Disable - | 1: Enable +.. include-table:: data/env_variables_hip.rst + :table: hip-env-memory Other useful variables ================================================================================ @@ -175,15 +51,5 @@ Other useful variables The following table lists environment variables that are useful but relate to different features. -.. list-table:: - :header-rows: 1 - :widths: 35,14,51 - - * - **Environment variable** - - **Default value** - - **Value** - - * - | ``HIPRTC_COMPILE_OPTIONS_APPEND`` - | Sets compile options needed for ``hiprtc`` compilation. - - None - - ``--gpu-architecture=gfx906:sramecc+:xnack``, ``-fgpu-rdc`` +.. include-table:: data/env_variables_hip.rst + :table: hip-env-other