From 5ed6128f9285e61cfee73d780b663c9d780f20c7 Mon Sep 17 00:00:00 2001 From: guyueh1 <140554423+guyueh1@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:35:19 -0800 Subject: [PATCH] Add nsys command argument to profile cuda graph workload (#138) * Add nsys command argument to profile cuda graph workload; fix nsys profile export path Signed-off-by: Guyue Huang * Revert a change to nsys profile path because the bug is fixed in tot Signed-off-by: Guyue Huang * Fix test_slurm nsys command Signed-off-by: Guyue Huang --------- Signed-off-by: Guyue Huang Signed-off-by: Guyue Huang Co-authored-by: Guyue Huang --- src/nemo_run/core/execution/base.py | 1 + test/core/execution/test_slurm.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/nemo_run/core/execution/base.py b/src/nemo_run/core/execution/base.py index 2504fd6..cd86498 100644 --- a/src/nemo_run/core/execution/base.py +++ b/src/nemo_run/core/execution/base.py @@ -50,6 +50,7 @@ def get_nsys_prefix(self, profile_dir: str) -> Optional[list[str]]: "true", "--capture-range=cudaProfilerApi", "--capture-range-end=stop", + "--cuda-graph-trace=node", ] return args diff --git a/test/core/execution/test_slurm.py b/test/core/execution/test_slurm.py index c280dbe..8ab4cad 100644 --- a/test/core/execution/test_slurm.py +++ b/test/core/execution/test_slurm.py @@ -508,6 +508,7 @@ def test_dummy_batch_request_nsys( "true", "--capture-range=cudaProfilerApi", "--capture-range-end=stop", + "--cuda-graph-trace=node", ] def test_dummy_batch_request_warn(