From f8c9f314ffe7eddfbd3645a94143301e610f68de Mon Sep 17 00:00:00 2001 From: hj-wei Date: Tue, 7 Jan 2025 01:38:19 +0800 Subject: [PATCH] [BUG FIX]:fix get torch.version.cuda error when cuda is None in rocm (#6909) HI, I found some error when using deepspeed with rocm-torch ``` torch_cuda_version = ".".join(torch.version.cuda.split('.')[:2]) ``` will raise an AttributeError when torch.version.cuda is None. This occurs because the CUDA version in rocm-torch/version.py is set to always be None, leading to potential runtime errors in environments where ROCm is being used. --------- Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> --- op_builder/builder.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/op_builder/builder.py b/op_builder/builder.py index 461281d4a569..ab26054bda7d 100644 --- a/op_builder/builder.py +++ b/op_builder/builder.py @@ -415,10 +415,11 @@ def cpu_arch(self): return '-mcpu=native' return '-march=native' - def is_cuda_enable(self): + def get_cuda_compile_flag(self): try: - assert_no_cuda_mismatch(self.name) - return '-D__ENABLE_CUDA__' + if not self.is_rocm_pytorch(): + assert_no_cuda_mismatch(self.name) + return "-D__ENABLE_CUDA__" except MissingCUDAException: print(f"{WARNING} {self.name} cuda is missing or is incompatible with installed torch, " "only cpu ops can be compiled!") @@ -839,7 +840,7 @@ def cxx_args(self): CPU_ARCH = self.cpu_arch() SIMD_WIDTH = self.simd_width() - CUDA_ENABLE = self.is_cuda_enable() + CUDA_ENABLE = self.get_cuda_compile_flag() args += [ CPU_ARCH, '-fopenmp',