From 1e5fd43f2ce128220aef44cd813af25818429c15 Mon Sep 17 00:00:00 2001 From: cloudhan Date: Tue, 16 Jan 2024 23:35:57 +0800 Subject: [PATCH] ci: add additional tests for LTS releases (#215) * ci: add missing tests for old LTS releases * test: remove add_prefix to fix tests for bazel 5 --- .bazelrc | 2 - .github/workflows/build-tests.yaml | 36 +++++++++++++++++ examples/.bazelrc | 2 - examples/WORKSPACE.bazel | 3 -- examples/WORKSPACE.bzlmod | 3 -- examples/nccl/nccl-tests.BUILD | 16 ++++---- examples/nccl/nccl-tests.bzl | 4 +- examples/nccl/nccl.BUILD | 62 +++++++++++++++--------------- examples/nccl/nccl.bzl | 8 ++-- 9 files changed, 85 insertions(+), 51 deletions(-) diff --git a/.bazelrc b/.bazelrc index 873ec8e7..cad08b16 100644 --- a/.bazelrc +++ b/.bazelrc @@ -16,5 +16,3 @@ build:clang --//cuda:compiler=clang # https://github.com/bazel-contrib/rules_cuda/issues/1 # build --ui_event_filters=-INFO - -common:bzlmod --enable_bzlmod diff --git a/.github/workflows/build-tests.yaml b/.github/workflows/build-tests.yaml index e1fe1625..8cd32fed 100644 --- a/.github/workflows/build-tests.yaml +++ b/.github/workflows/build-tests.yaml @@ -56,6 +56,9 @@ jobs: echo "build --config=clang" > $HOME/.bazelrc echo "build:clang --@rules_cuda//cuda:archs=sm_80" >> $HOME/.bazelrc + # Check https://bazel.build/release#support-matrix, manually unroll the the strategy matrix to avoid exploding + # the combinations. + # Use Bazel with version specified in .bazelversion # out of @examples repo build requires WORKSPACE-based external dependency system - run: bazelisk build --noenable_bzlmod @rules_cuda_examples//basic:all @@ -68,3 +71,36 @@ jobs: - run: cd examples && bazelisk build //if_cuda:main - run: cd examples && bazelisk build //if_cuda:main --enable_cuda=False - run: bazelisk shutdown + + # Use Bazel 6 + - run: echo "USE_BAZEL_VERSION=6.4.0" >> $GITHUB_ENV + if: ${{ !startsWith(matrix.cases.os, 'windows') }} + - run: echo "USE_BAZEL_VERSION=6.4.0" >> $env:GITHUB_ENV + if: ${{ startsWith(matrix.cases.os, 'windows') }} + + - run: bazelisk build @rules_cuda_examples//basic:all + - run: bazelisk build @rules_cuda_examples//rdc:all + - run: bazelisk build @rules_cuda_examples//if_cuda:main + - run: bazelisk build @rules_cuda_examples//if_cuda:main --enable_cuda=False + - run: cd examples && bazelisk build --enable_bzlmod //basic:all + - run: cd examples && bazelisk build --enable_bzlmod //rdc:all + - run: cd examples && bazelisk build --enable_bzlmod //if_cuda:main + - run: cd examples && bazelisk build --enable_bzlmod //if_cuda:main --enable_cuda=False + - run: bazelisk shutdown + + # Use Bazel 5 + - run: echo "USE_BAZEL_VERSION=5.4.1" >> $GITHUB_ENV + if: ${{ !startsWith(matrix.cases.os, 'windows') }} + - run: echo "USE_BAZEL_VERSION=5.4.1" >> $env:GITHUB_ENV + if: ${{ startsWith(matrix.cases.os, 'windows') }} + + - run: bazelisk build @rules_cuda_examples//basic:all + - run: bazelisk build @rules_cuda_examples//rdc:all + - run: bazelisk build @rules_cuda_examples//if_cuda:main + - run: bazelisk build @rules_cuda_examples//if_cuda:main --enable_cuda=False + # bzlmod is not covered, our separate @rules_cuda_examples repo setup doesn't play well with it + # - run: cd examples && bazelisk build --experimental_enable_bzlmod //basic:all + # - run: cd examples && bazelisk build --experimental_enable_bzlmod //rdc:all + # - run: cd examples && bazelisk build --experimental_enable_bzlmod //if_cuda:main + # - run: cd examples && bazelisk build --experimental_enable_bzlmod //if_cuda:main --enable_cuda=False + - run: bazelisk shutdown diff --git a/examples/.bazelrc b/examples/.bazelrc index d38c59e8..7ebc8e4a 100644 --- a/examples/.bazelrc +++ b/examples/.bazelrc @@ -15,5 +15,3 @@ build:clang --@rules_cuda//cuda:compiler=clang # https://github.com/bazel-contrib/rules_cuda/issues/1 # build --ui_event_filters=-INFO - -common:bzlmod --enable_bzlmod diff --git a/examples/WORKSPACE.bazel b/examples/WORKSPACE.bazel index 7b6b5cb3..d95d8b03 100644 --- a/examples/WORKSPACE.bazel +++ b/examples/WORKSPACE.bazel @@ -25,7 +25,6 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "nccl", - add_prefix = "nccl", build_file = "@rules_cuda_examples//nccl:nccl.BUILD", sha256 = "83b299cfc2dfe63887dadf3590b3ac2b8b2fd68ec5515b6878774eda39a697d2", strip_prefix = "nccl-9814c75eea18fc7374cde884592233b6b7dc055b", @@ -34,10 +33,8 @@ http_archive( http_archive( name = "nccl-tests", - add_prefix = "nccl-tests", build_file = "@rules_cuda_examples//nccl:nccl-tests.BUILD", patch_args = [ - "--directory=nccl-tests", "-p1", ], patches = ["@rules_cuda_examples//nccl:nccl-tests-clang.patch"], diff --git a/examples/WORKSPACE.bzlmod b/examples/WORKSPACE.bzlmod index 97c6ca7f..8809e4e6 100644 --- a/examples/WORKSPACE.bzlmod +++ b/examples/WORKSPACE.bzlmod @@ -9,7 +9,6 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") # For WORK http_archive( name = "nccl", - add_prefix = "nccl", build_file = "@rules_cuda_examples//nccl:nccl.BUILD", sha256 = "83b299cfc2dfe63887dadf3590b3ac2b8b2fd68ec5515b6878774eda39a697d2", strip_prefix = "nccl-9814c75eea18fc7374cde884592233b6b7dc055b", @@ -18,10 +17,8 @@ http_archive( http_archive( name = "nccl-tests", - add_prefix = "nccl-tests", build_file = "@rules_cuda_examples//nccl:nccl-tests.BUILD", patch_args = [ - "--directory=nccl-tests", "-p1", ], patches = ["@rules_cuda_examples//nccl:nccl-tests-clang.patch"], diff --git a/examples/nccl/nccl-tests.BUILD b/examples/nccl/nccl-tests.BUILD index f482e6db..5fe3f187 100644 --- a/examples/nccl/nccl-tests.BUILD +++ b/examples/nccl/nccl-tests.BUILD @@ -1,19 +1,21 @@ load("@rules_cuda//cuda:defs.bzl", "cuda_library") load("@rules_cuda_examples//nccl:nccl-tests.bzl", "nccl_tests_binary") +# NOTE: all paths in this file relative to @nccl-tests repo root. + cc_library( name = "nccl_tests_include", - hdrs = glob(["nccl-tests/src/*.h"]), - includes = ["nccl-tests/src"], + hdrs = glob(["src/*.h"]), + includes = ["src"], ) cuda_library( name = "common_cuda", srcs = [ - "nccl-tests/src/common.cu", - "nccl-tests/verifiable/verifiable.cu", + "src/common.cu", + "verifiable/verifiable.cu", ] + glob([ - "nccl-tests/**/*.h", + "**/*.h", ]), deps = [ ":nccl_tests_include", @@ -23,8 +25,8 @@ cuda_library( cc_library( name = "common_cc", - srcs = ["nccl-tests/src/timer.cc"], - hdrs = ["nccl-tests/src/timer.h"], + srcs = ["src/timer.cc"], + hdrs = ["src/timer.h"], alwayslink = 1, ) diff --git a/examples/nccl/nccl-tests.bzl b/examples/nccl/nccl-tests.bzl index 48229031..fd0aae71 100644 --- a/examples/nccl/nccl-tests.bzl +++ b/examples/nccl/nccl-tests.bzl @@ -1,9 +1,11 @@ load("@rules_cuda//cuda:defs.bzl", "cuda_library") +# NOTE: all paths in this file relative to @nccl-tests repo root. + def nccl_tests_binary(name, cc_deps = [], cuda_deps = []): cuda_library( name = name, - srcs = ["nccl-tests/src/{}.cu".format(name)], + srcs = ["src/{}.cu".format(name)], deps = [ "@nccl//:nccl_shared", ":common_cuda", diff --git a/examples/nccl/nccl.BUILD b/examples/nccl/nccl.BUILD index 98f36117..6f9a47e5 100644 --- a/examples/nccl/nccl.BUILD +++ b/examples/nccl/nccl.BUILD @@ -2,9 +2,11 @@ load("@bazel_skylib//rules:expand_template.bzl", "expand_template") load("@rules_cuda//cuda:defs.bzl", "cuda_library", "cuda_objects") load("@rules_cuda_examples//nccl:nccl.bzl", "if_cuda_clang", "if_cuda_nvcc", "nccl_primitive") +# NOTE: all paths in this file relative to @nccl repo root. + expand_template( name = "nccl_h", - out = "nccl/src/include/nccl.h", + out = "src/include/nccl.h", substitutions = { "${nccl:Major}": "2", "${nccl:Minor}": "18", @@ -13,7 +15,7 @@ expand_template( # NCCL_VERSION(X,Y,Z) ((X) * 10000 + (Y) * 100 + (Z)) "${nccl:Version}": "21803", }, - template = "nccl/src/nccl.h.in", + template = "src/nccl.h.in", ) cc_library( @@ -21,24 +23,24 @@ cc_library( hdrs = [ ":nccl_h", ] + glob([ - "nccl/src/include/**/*.h", - "nccl/src/include/**/*.hpp", + "src/include/**/*.h", + "src/include/**/*.hpp", ]), includes = [ # this will add both nccl/src/include in repo and # bazel-out//bin/nccl/src/include to include paths # so the previous expand_template generate nccl.h to the very path! - "nccl/src/include", + "src/include", ], ) cuda_objects( name = "nccl_device_common", srcs = [ - "nccl/src/collectives/device/functions.cu", - "nccl/src/collectives/device/onerank_reduce.cu", + "src/collectives/device/functions.cu", + "src/collectives/device/onerank_reduce.cu", ] + glob([ - "nccl/src/collectives/device/**/*.h", + "src/collectives/device/**/*.h", ]), copts = if_cuda_nvcc(["--extended-lambda"]), ptxasopts = ["-maxrregcount=96"], @@ -51,21 +53,21 @@ USE_BF16 = True filegroup( name = "collective_dev_hdrs", srcs = [ - "nccl/src/collectives/device/all_gather.h", - "nccl/src/collectives/device/all_reduce.h", - "nccl/src/collectives/device/broadcast.h", - "nccl/src/collectives/device/common.h", - "nccl/src/collectives/device/common_kernel.h", - "nccl/src/collectives/device/gen_rules.sh", - "nccl/src/collectives/device/op128.h", - "nccl/src/collectives/device/primitives.h", - "nccl/src/collectives/device/prims_ll.h", - "nccl/src/collectives/device/prims_ll128.h", - "nccl/src/collectives/device/prims_simple.h", - "nccl/src/collectives/device/reduce.h", - "nccl/src/collectives/device/reduce_kernel.h", - "nccl/src/collectives/device/reduce_scatter.h", - "nccl/src/collectives/device/sendrecv.h", + "src/collectives/device/all_gather.h", + "src/collectives/device/all_reduce.h", + "src/collectives/device/broadcast.h", + "src/collectives/device/common.h", + "src/collectives/device/common_kernel.h", + "src/collectives/device/gen_rules.sh", + "src/collectives/device/op128.h", + "src/collectives/device/primitives.h", + "src/collectives/device/prims_ll.h", + "src/collectives/device/prims_ll128.h", + "src/collectives/device/prims_simple.h", + "src/collectives/device/reduce.h", + "src/collectives/device/reduce_kernel.h", + "src/collectives/device/reduce_scatter.h", + "src/collectives/device/sendrecv.h", ], ) @@ -131,16 +133,16 @@ cc_binary( name = "nccl", srcs = glob( [ - "nccl/src/*.cc", - "nccl/src/collectives/*.cc", - "nccl/src/graph/*.cc", - "nccl/src/graph/*.h", - "nccl/src/misc/*.cc", - "nccl/src/transport/*.cc", + "src/*.cc", + "src/collectives/*.cc", + "src/graph/*.cc", + "src/graph/*.h", + "src/misc/*.cc", + "src/transport/*.cc", ], exclude = [ # https://github.com/NVIDIA/nccl/issues/658 - "nccl/src/enhcompat.cc", + "src/enhcompat.cc", ], ), copts = if_cuda_clang(["-xcu"]), diff --git a/examples/nccl/nccl.bzl b/examples/nccl/nccl.bzl index e2758f27..6679430c 100644 --- a/examples/nccl/nccl.bzl +++ b/examples/nccl/nccl.bzl @@ -1,6 +1,8 @@ load("@bazel_skylib//rules:copy_file.bzl", "copy_file") load("@rules_cuda//cuda:defs.bzl", "cuda_library", "cuda_objects") +# NOTE: all paths in this file relative to @nccl repo root. + def if_cuda_nvcc(if_true, if_false = []): return select({ "@rules_cuda//cuda:compiler_is_nvcc": if_true, @@ -25,8 +27,8 @@ def nccl_primitive(name, hdrs = [], deps = [], use_bf16 = True): name_op_dt = "{}_{}_{}".format(name, op, dt) copy_file( name = name_op_dt + "_rename", - src = "nccl/src/collectives/device/{}.cu".format(name), - out = "nccl/src/collectives/device/{}.cu".format(name_op_dt), + src = "src/collectives/device/{}.cu".format(name), + out = "src/collectives/device/{}.cu".format(name_op_dt), ) cuda_objects( @@ -36,7 +38,7 @@ def nccl_primitive(name, hdrs = [], deps = [], use_bf16 = True): deps = deps, ptxasopts = ["-maxrregcount=96"], defines = ["NCCL_OP={}".format(opn), "NCCL_TYPE={}".format(dtn)], - includes = ["nccl/src/collectives/device"], + includes = ["src/collectives/device"], ) intermediate_targets.append(":" + name_op_dt)