From 09556dd63d2aeeb4abc18769ca38a19b08453c52 Mon Sep 17 00:00:00 2001 From: Cloud Han Date: Fri, 18 Aug 2023 23:27:15 +0800 Subject: [PATCH] Build on windows, nsight compute does not support profiling programs in WSL Output with symlink_prefix --- gemm/.bazelrc | 10 ++++++++++ gemm/.gitignore | 1 + gemm/README.md | 5 ++++- gemm/cpu/BUILD.bazel | 1 + gemm/cuda/BUILD.bazel | 8 ++++++++ gemm/cuda/benchmark_driver.py | 17 ++++++++++++++--- gemm/cuda/pybind_matmul.cpp | 8 +++++++- gemm/cuda/test_driver.py | 18 ++++++++++++++---- 8 files changed, 59 insertions(+), 9 deletions(-) diff --git a/gemm/.bazelrc b/gemm/.bazelrc index fbba8a7..116e022 100644 --- a/gemm/.bazelrc +++ b/gemm/.bazelrc @@ -1,5 +1,15 @@ +common:linux --symlink_prefix=linux/bazel- build:linux --cxxopt=-std=c++17 build:linux --cxxopt=-march=native +build:linux --@rules_cuda//cuda:copts=-std=c++17 +build:linux --spawn_strategy=local + +common:windows --symlink_prefix=windows/bazel- +build:windows --cxxopt=/std:c++17 +build:windows --cxxopt=/permissive- +build:windows --@rules_cuda//cuda:copts=-std=c++17 + +build:profile --@rules_cuda//cuda:copts=-lineinfo test:benchmark --cache_test_results=no test:benchmark --test_output=streamed diff --git a/gemm/.gitignore b/gemm/.gitignore index 3ca2fae..981df57 100644 --- a/gemm/.gitignore +++ b/gemm/.gitignore @@ -1,4 +1,5 @@ *.nsys-rep +/nsight ### Automatically added by Hedron's Bazel Compile Commands Extractor: https://github.com/hedronvision/bazel-compile-commands-extractor # Ignore the `external` link (that is added by `bazel-compile-commands-extractor`). The link differs between macOS/Linux and Windows, so it shouldn't be checked in. The pattern must not end with a trailing `/` because it's a symlink on macOS/Linux. diff --git a/gemm/README.md b/gemm/README.md index fb1b74e..31afaa7 100644 --- a/gemm/README.md +++ b/gemm/README.md @@ -1,7 +1,10 @@ ## Build -``` +```bash bazel build -c opt --config=linux cpu/... + +# Generate compile_commands.json +bazel run @hedron_compile_commands//:refresh_all --symlink_prefix=linux/bazel- -- --config=linux ``` ## Test diff --git a/gemm/cpu/BUILD.bazel b/gemm/cpu/BUILD.bazel index fa9a3bf..50b4d05 100644 --- a/gemm/cpu/BUILD.bazel +++ b/gemm/cpu/BUILD.bazel @@ -22,6 +22,7 @@ configure_make( lib_name = "libblis", lib_source = "@com_github_flame_blis//:blis_all_files", linkopts = ["-lpthread"], + target_compatible_with = ["@platforms//os:linux"], ) cc_library( diff --git a/gemm/cuda/BUILD.bazel b/gemm/cuda/BUILD.bazel index 48766fc..e577ffb 100644 --- a/gemm/cuda/BUILD.bazel +++ b/gemm/cuda/BUILD.bazel @@ -27,3 +27,11 @@ pybind_extension( srcs = ["pybind_matmul.cpp"], deps = [":matmul_impl"], ) + +genrule( + name = "matmul_copy", + srcs = [":matmul.so"], + outs = ["matmul.pyd"], + cmd_bat = "copy /Y $< $@", + target_compatible_with = ["@platforms//os:windows"], +) diff --git a/gemm/cuda/benchmark_driver.py b/gemm/cuda/benchmark_driver.py index b206d50..e638264 100644 --- a/gemm/cuda/benchmark_driver.py +++ b/gemm/cuda/benchmark_driver.py @@ -1,8 +1,19 @@ import os import sys -if not os.path.exists(os.path.join(os.path.dirname(__file__), "../bazel-bin/cuda/matmul.so")): - raise EnvironmentError("bazel build -c opt --config=linux '//cuda:matmul.so'") -sys.path.append(os.path.join(os.path.dirname(__file__), "../bazel-bin/cuda")) +if os.name == "posix": + if not os.path.exists(os.path.join(os.path.dirname(__file__), "../linux/bazel-bin/cuda/matmul.so")): + raise EnvironmentError("bazel build -c opt --config=linux '//cuda:matmul.so'") + sys.path.append(os.path.join(os.path.dirname(__file__), "../linux/bazel-bin/cuda")) +if os.name == "nt": + if not os.path.exists(os.path.join(os.path.dirname(__file__), "../windows/bazel-bin/cuda/matmul.pyd")): + raise EnvironmentError("bazel build -c opt --config=windows '//cuda:matmul.pyd'") + sys.path.append(os.path.join(os.path.dirname(__file__), "../windows/bazel-bin/cuda")) + if tuple(sys.version_info) > (3, 8): + # fuck this shit, see https://stackoverflow.com/a/64472088/2091555 + # always use winmode=0 and preload the library. So that I don't suffer from the add_dll_directory chaos + import ctypes + matmul_lib = ctypes.CDLL( + os.path.join(os.path.dirname(__file__), "../windows/bazel-bin/cuda/matmul.pyd"), winmode=0) from dataclasses import dataclass, field import numpy as np diff --git a/gemm/cuda/pybind_matmul.cpp b/gemm/cuda/pybind_matmul.cpp index e021f6b..355efb8 100644 --- a/gemm/cuda/pybind_matmul.cpp +++ b/gemm/cuda/pybind_matmul.cpp @@ -10,7 +10,6 @@ namespace py = pybind11; #define REGISTER(name) \ - MATMUL_SIGNATURE(name); \ m.def( \ #name, \ [&](py::array_t a, py::array_t b, py::array_t c, int repeats = 1) { \ @@ -61,6 +60,13 @@ namespace py = pybind11; ); namespace column_major { +// MSVC is not happy with function local forward decl +MATMUL_SIGNATURE(matmul_reference); +MATMUL_SIGNATURE(launch_matmul_kernel_naive_cta16x16); +MATMUL_SIGNATURE(launch_matmul_kernel_naive_cta16x32); +MATMUL_SIGNATURE(launch_matmul_kernel_naive_cta32x16); +MATMUL_SIGNATURE(launch_matmul_kernel_naive_cta32x32); + PYBIND11_MODULE(matmul, m) { REGISTER(matmul_reference); REGISTER(launch_matmul_kernel_naive_cta16x16); diff --git a/gemm/cuda/test_driver.py b/gemm/cuda/test_driver.py index 8dc005d..cd0415e 100644 --- a/gemm/cuda/test_driver.py +++ b/gemm/cuda/test_driver.py @@ -1,13 +1,23 @@ import os import sys -if not os.path.exists(os.path.join(os.path.dirname(__file__), "../bazel-bin/cuda/matmul.so")): - raise EnvironmentError("bazel build -c opt --config=linux '//cuda:matmul.so'") -sys.path.append(os.path.join(os.path.dirname(__file__), "../bazel-bin/cuda")) +if os.name == "posix": + if not os.path.exists(os.path.join(os.path.dirname(__file__), "../linux/bazel-bin/cuda/matmul.so")): + raise EnvironmentError("bazel build -c opt --config=linux '//cuda:matmul.so'") + sys.path.append(os.path.join(os.path.dirname(__file__), "../linux/bazel-bin/cuda")) +if os.name == "nt": + if not os.path.exists(os.path.join(os.path.dirname(__file__), "../windows/bazel-bin/cuda/matmul.pyd")): + raise EnvironmentError("bazel build -c opt --config=windows '//cuda:matmul.pyd'") + sys.path.append(os.path.join(os.path.dirname(__file__), "../windows/bazel-bin/cuda")) + if tuple(sys.version_info) > (3, 8): + # fuck this shit, see https://stackoverflow.com/a/64472088/2091555 + # always use winmode=0 and preload the library. So that I don't suffer from the add_dll_directory chaos + import ctypes + matmul_lib = ctypes.CDLL(os.path.join(os.path.dirname(__file__), "../windows/bazel-bin/cuda/matmul.pyd"), winmode=0) import numpy as np import pytest import matmul - +print(matmul.__file__) def get_bound(dtype: str, a: np.ndarray, b: np.ndarray, c: np.ndarray, transa: bool, transb: bool): k = b.shape[1] if transb else b.shape[0]