diff --git a/src/nemo_run/core/execution/base.py b/src/nemo_run/core/execution/base.py index 2504fd6..cd86498 100644 --- a/src/nemo_run/core/execution/base.py +++ b/src/nemo_run/core/execution/base.py @@ -50,6 +50,7 @@ def get_nsys_prefix(self, profile_dir: str) -> Optional[list[str]]: "true", "--capture-range=cudaProfilerApi", "--capture-range-end=stop", + "--cuda-graph-trace=node", ] return args diff --git a/test/core/execution/test_slurm.py b/test/core/execution/test_slurm.py index c280dbe..8ab4cad 100644 --- a/test/core/execution/test_slurm.py +++ b/test/core/execution/test_slurm.py @@ -508,6 +508,7 @@ def test_dummy_batch_request_nsys( "true", "--capture-range=cudaProfilerApi", "--capture-range-end=stop", + "--cuda-graph-trace=node", ] def test_dummy_batch_request_warn(