diff --git a/.lintrunner.toml b/.lintrunner.toml index b7375092a..7d48258bc 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -2,7 +2,7 @@ merge_base_with = "origin/main" [[linter]] code = 'RUFF' -include_patterns = ['test/smoke_test/*.py', 's3_management/*.py'] +include_patterns = ['test/smoke_test/*.py', 's3_management/*.py', 'aarch64_linux/*.py'] command = [ 'python3', 'tools/linter/adapters/ruff_linter.py', diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py index 3b772847c..a57dab545 100755 --- a/aarch64_linux/aarch64_wheel_ci_build.py +++ b/aarch64_linux/aarch64_wheel_ci_build.py @@ -2,7 +2,7 @@ # encoding: UTF-8 import os -import subprocess +from subprocess import check_output from pygit2 import Repository from typing import List @@ -11,7 +11,7 @@ def list_dir(path: str) -> List[str]: '''' Helper for getting paths for Python ''' - return subprocess.check_output(["ls", "-1", path]).decode().split("\n") + return check_output(["ls", "-1", path]).decode().split("\n") def build_ArmComputeLibrary(git_clone_flags: str = "") -> None: @@ -19,10 +19,12 @@ def build_ArmComputeLibrary(git_clone_flags: str = "") -> None: Using ArmComputeLibrary for aarch64 PyTorch ''' print('Building Arm Compute Library') + acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0", + "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"]) os.system("cd / && mkdir /acl") os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}") os.system("cd ComputeLibrary; export acl_install_dir=/acl; " - "scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 fixed_format_kernels=1 build=native build_dir=$acl_install_dir/build; " + f"scons Werror=1 -j8 {acl_build_flags} build_dir=$acl_install_dir/build; " "cp -r arm_compute $acl_install_dir; " "cp -r include $acl_install_dir; " "cp -r utils $acl_install_dir; " @@ -86,13 +88,12 @@ def parse_arguments(): if override_package_version is not None: version = override_package_version build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " - else: - if branch in ['nightly', 'master']: - build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '') - version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2] - build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " - if branch.startswith("v1.") or branch.startswith("v2."): - build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " + elif branch in ['nightly', 'master']: + build_date = check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '') + version = check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2] + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " + elif branch.startswith(("v1.", "v2.")): + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " if enable_mkldnn: build_ArmComputeLibrary(git_clone_flags) @@ -105,9 +106,10 @@ def parse_arguments(): else: print("build pytorch without mkldnn backend") - # work around to fix Raspberry pie crash - print("Applying mkl-dnn patch to fix readdir crash") - os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-readdir-crash.patch") + # patch mkldnn to fix aarch64 mac and aws lambda crash + print("Applying mkl-dnn patch to fix crash due to /sys not accesible") + os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/fix-xbyak-failure.patch") + os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") pytorch_wheel_name = complete_wheel("pytorch") print(f"Build Compelete. Created {pytorch_wheel_name}..") diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py index 9efd2e6ae..23d8e29d7 100755 --- a/aarch64_linux/build_aarch64_wheel.py +++ b/aarch64_linux/build_aarch64_wheel.py @@ -2,9 +2,10 @@ # This script is for building AARCH64 wheels using AWS EC2 instances. # To generate binaries for the release follow these steps: -# 1. Update mappings for each of the Domain Libraries by adding new row to a table like this: "v1.11.0": ("0.11.0", "rc1"), -# 2. Run script with following arguments for each of the supported python versions and specify required RC tag for example: v1.11.0-rc3: -# build_aarch64_wheel.py --key-name --use-docker --python 3.8 --branch +# 1. Update mappings for each of the Domain Libraries by adding new row to a table like this: +# "v1.11.0": ("0.11.0", "rc1"), +# 2. Run script with following arguments for each of the supported python versions and required tag, for example: +# build_aarch64_wheel.py --key-name --use-docker --python 3.8 --branch v1.11.0-rc3 import boto3 @@ -177,7 +178,7 @@ def wait_for_connection(addr, port, timeout=15, attempt_cnt=5): try: with socket.create_connection((addr, port), timeout=timeout): return - except (ConnectionRefusedError, socket.timeout): + except (ConnectionRefusedError, socket.timeout): # noqa: PERF203 if i == attempt_cnt - 1: raise time.sleep(timeout) @@ -203,7 +204,7 @@ def install_condaforge(host: RemoteHost, if host.using_docker(): host.run_cmd("echo 'PATH=$HOME/miniforge3/bin:$PATH'>>.bashrc") else: - host.run_cmd(['sed', '-i', '\'/^# If not running interactively.*/i PATH=$HOME/miniforge3/bin:$PATH\'', '.bashrc']) + host.run_cmd(['sed', '-i', '\'/^# If not running interactively.*/i PATH=$HOME/miniforge3/bin:$PATH\'', '.bashrc']) # noqa: E501 def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None: @@ -221,12 +222,13 @@ def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None: print('Building OpenBLAS') host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.25 {git_clone_flags}") make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8" - host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS") + host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS") # noqa: E501 def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None: print('Building Arm Compute Library') - acl_build_flags="debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 fixed_format_kernels=1 build=native" + acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0", + "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"]) host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}") host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}") @@ -301,7 +303,7 @@ def build_torchvision(host: RemoteHost, *, # Remove .so files to force static linking host.run_cmd("rm miniforge3/lib/libpng.so miniforge3/lib/libpng16.so miniforge3/lib/libjpeg.so") # And patch setup.py to include libz dependency for libpng - host.run_cmd(['sed -i -e \'s/image_link_flags\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py']) + host.run_cmd(['sed -i -e \'s/image_link_flags\\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py']) # noqa: E501 build_vars = "" if branch == "nightly": @@ -525,7 +527,7 @@ def start_build(host: RemoteHost, *, if host.using_docker(): print("Move libgfortant.a into a standard location") # HACK: pypa gforntran.a is compiled without PIC, which leads to the following error - # libgfortran.a(error.o)(.text._gfortrani_st_printf+0x34): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `__stack_chk_guard@@GLIBC_2.17' + # libgfortran.a(error.o)(.text._gfortrani_st_printf+0x34): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `__stack_chk_guard@@GLIBC_2.17' # noqa: E501 # Workaround by copying gfortran library from the host host.run_ssh_cmd("sudo apt-get install -y gfortran-8") host.run_cmd("mkdir -p /usr/lib/gcc/aarch64-linux-gnu/8") @@ -543,10 +545,10 @@ def start_build(host: RemoteHost, *, # Breakpad build fails on aarch64 build_vars = "USE_BREAKPAD=0 " if branch == 'nightly': - build_date = host.check_output("cd pytorch && git log --pretty=format:%s -1").strip().split()[0].replace("-", "") + build_date = host.check_output("cd pytorch && git log --pretty=format:%s -1").strip().split()[0].replace("-", "") # noqa: E501 version = host.check_output("cat pytorch/version.txt").strip()[:-2] build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1" - if branch.startswith("v1.") or branch.startswith("v2."): + if branch.startswith(("v1.", "v2.")): build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1" if host.using_docker(): build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" @@ -554,11 +556,12 @@ def start_build(host: RemoteHost, *, build_ArmComputeLibrary(host, git_clone_flags) print("build pytorch with mkldnn+acl backend") build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" - host.run_cmd(f"cd $HOME && git clone https://github.com/pytorch/builder.git") - host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}") + host.run_cmd("cd $HOME && git clone https://github.com/pytorch/builder.git && cd builder && git checkout release/2.2") # noqa: E501 + host.run_cmd("cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/fix-xbyak-failure.patch") # noqa: E501 + host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}") # noqa: E501 print('Repair the wheel') pytorch_wheel_name = host.list_dir("pytorch/dist")[0] - host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}") + host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}") # noqa: E501 print('replace the original wheel with the repaired one') pytorch_repaired_wheel_name = host.list_dir("wheelhouse")[0] host.run_cmd(f"cp $HOME/wheelhouse/{pytorch_repaired_wheel_name} $HOME/pytorch/dist/{pytorch_wheel_name}") @@ -706,7 +709,7 @@ def parse_arguments(): parser.add_argument("--build-only", action="store_true") parser.add_argument("--test-only", type=str) parser.add_argument("--os", type=str, choices=list(os_amis.keys()), default='ubuntu20_04') - parser.add_argument("--python-version", type=str, choices=['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], default=None) + parser.add_argument("--python-version", type=str, choices=[f'3.{d}' for d in range(6, 12)], default=None) parser.add_argument("--alloc-instance", action="store_true") parser.add_argument("--list-instances", action="store_true") parser.add_argument("--pytorch-only", action="store_true") diff --git a/aarch64_linux/embed_library.py b/aarch64_linux/embed_library.py index 978970d45..be6bb048f 100644 --- a/aarch64_linux/embed_library.py +++ b/aarch64_linux/embed_library.py @@ -13,7 +13,7 @@ def replace_tag(filename): - with open(filename, 'r') as f: + with open(filename) as f: lines = f.read().split("\\n") for i,line in enumerate(lines): if not line.startswith("Tag: "): @@ -42,7 +42,7 @@ def embed_library(whl_path, lib_soname, update_tag=False): torchlib_path = os.path.join(ctx._tmpdir.name, 'torch', 'lib') ctx.out_wheel=tmp_whl_name new_lib_path, new_lib_soname = None, None - for filename, elf in elf_file_filter(ctx.iter_files()): + for filename, _ in elf_file_filter(ctx.iter_files()): if not filename.startswith('torch/lib'): continue libtree = lddtree(filename) diff --git a/mkldnn_fix/aarch64-fix-readdir-crash.patch b/mkldnn_fix/aarch64-fix-readdir-crash.patch deleted file mode 100644 index 81d46d406..000000000 --- a/mkldnn_fix/aarch64-fix-readdir-crash.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp b/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp -index cb800b2509..5516373b90 100644 ---- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp -+++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp -@@ -170,6 +170,8 @@ int Cpu::getFilePathMaxTailNumPlus1(const char *path) { - fflush(stdout); - - DIR *dir = opendir(dir_path); -+ if (dir == NULL) -+ return 0; - struct dirent *dp; - - dp = readdir(dir); - diff --git a/mkldnn_fix/fix-xbyak-failure.patch b/mkldnn_fix/fix-xbyak-failure.patch new file mode 100644 index 000000000..2ad278f0b --- /dev/null +++ b/mkldnn_fix/fix-xbyak-failure.patch @@ -0,0 +1,96 @@ +cpu: aarch64: fix xbyak functions for /sys access failures + +There are platforms with /sys not mounted. skip handling HW caps +for such platforms. + +This fixes the issue# pytorch/pytorch#115482 +--- + .../xbyak_aarch64/src/util_impl_linux.h | 24 ++++++++++++++----- + .../aarch64/xbyak_aarch64/src/util_impl_mac.h | 9 ++++--- + 2 files changed, 24 insertions(+), 9 deletions(-) + +diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h +index 2c7b28e58b..860a05700f 100644 +--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h ++++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h +@@ -144,8 +144,13 @@ private: + regex_t regexBuf; + regmatch_t match[1]; + +- if (regcomp(®exBuf, regex, REG_EXTENDED) != 0) +- throw ERR_INTERNAL; ++ if (regcomp(®exBuf, regex, REG_EXTENDED) != 0) { ++ /* There are platforms with /sys not mounted. return empty buffers ++ * in these scenarios ++ */ ++ buf[0] = '\0'; ++ return 0; ++ } + + const int retVal = regexec(®exBuf, path, 1, match, 0); + regfree(®exBuf); +@@ -187,8 +192,12 @@ private: + regex_t regexBuf; + regmatch_t match[2]; + +- if (regcomp(®exBuf, "index[0-9]*$", REG_EXTENDED) != 0) +- throw ERR_INTERNAL; ++ if (regcomp(®exBuf, "index[0-9]*$", REG_EXTENDED) != 0) { ++ /* There are platforms with /sys not mounted. return gracefully ++ * in these scenarios ++ */ ++ goto init_and_return_false; ++ } + + if (regexec(®exBuf, dp->d_name, 1, match, 0) == 0) { // Found index[1-9][0-9]. directory + char *dir_name = buf0; +@@ -438,12 +447,15 @@ private: + + FILE *file = fopen(path_midr_el1, "r"); + if (file == nullptr) { +- throw Error(ERR_INTERNAL); ++ /* There are platforms with /sys not mounted. return empty buffer ++ * in these scenarios ++ */ ++ cacheInfo_.midr_el1 = 0xFE << 24; + return; + } + + if (fread(buf, sizeof(char), 64, file) == 0) { +- throw Error(ERR_INTERNAL); ++ cacheInfo_.midr_el1 = 0xFE << 24; + return; + } + +diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h +index ebd6dba7c0..93bdae1d7a 100644 +--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h ++++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h +@@ -102,18 +102,21 @@ private: + size_t val = 0; + size_t len = sizeof(val); + ++ /* There are platforms with /sys not mounted. skip ++ * handling HW caps for such platforms. ++ */ + if (sysctlbyname(hw_opt_atomics, &val, &len, NULL, 0) != 0) +- throw Error(ERR_INTERNAL); ++ type_ = 0; + else + type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ATOMIC : 0; + + if (sysctlbyname(hw_opt_fp, &val, &len, NULL, 0) != 0) +- throw Error(ERR_INTERNAL); ++ type_ = 0; + else + type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_FP : 0; + + if (sysctlbyname(hw_opt_neon, &val, &len, NULL, 0) != 0) +- throw Error(ERR_INTERNAL); ++ type_ = 0; + else + type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ADVSIMD : 0; + } +-- +2.34.1 +