diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml index beecf5f..ca0399a 100644 --- a/.github/workflows/ci-build.yml +++ b/.github/workflows/ci-build.yml @@ -58,7 +58,7 @@ jobs: - name: py311 build whl and upload run: | cd "$env:GITHUB_WORKSPACE/jax" - ../bazel-build-cpu.ps1 -bazel_path $env:BAZEL_PATH + ../build-jaxlib.ps1 cpu -bazel_path $env:BAZEL_PATH az storage blob upload-batch --overwrite -d unstable -s "$env:GITHUB_WORKSPACE/jax/bazel-dist" --pattern '*.whl' rm -Recurse -Force "$env:GITHUB_WORKSPACE/jax/bazel-dist" @@ -74,7 +74,7 @@ jobs: - name: py310 build whl and upload run: | cd "$env:GITHUB_WORKSPACE/jax" - ../bazel-build-cpu.ps1 -bazel_path $env:BAZEL_PATH + ../build-jaxlib.ps1 cpu -bazel_path $env:BAZEL_PATH az storage blob upload-batch --overwrite -d unstable -s "$env:GITHUB_WORKSPACE/jax/bazel-dist" --pattern '*.whl' rm -Recurse -Force "$env:GITHUB_WORKSPACE/jax/bazel-dist" @@ -90,7 +90,7 @@ jobs: - name: py39 build whl and upload run: | cd "$env:GITHUB_WORKSPACE/jax" - ../bazel-build-cpu.ps1 -bazel_path $env:BAZEL_PATH + ../build-jaxlib.ps1 cpu -bazel_path $env:BAZEL_PATH az storage blob upload-batch --overwrite -d unstable -s "$env:GITHUB_WORKSPACE/jax/bazel-dist" --pattern '*.whl' rm -Recurse -Force "$env:GITHUB_WORKSPACE/jax/bazel-dist" @@ -106,12 +106,14 @@ jobs: - name: py38 build whl and upload run: | cd "$env:GITHUB_WORKSPACE/jax" - ../bazel-build-cpu.ps1 -bazel_path $env:BAZEL_PATH + ../build-jaxlib.ps1 cpu -bazel_path $env:BAZEL_PATH az storage blob upload-batch --overwrite -d unstable -s "$env:GITHUB_WORKSPACE/jax/bazel-dist" --pattern '*.whl' rm -Recurse -Force "$env:GITHUB_WORKSPACE/jax/bazel-dist" - build-cuda111: - if: ${{ false }} # disable for now + build-cuda: + strategy: + matrix: + cuda-version: ["11.8", "12.1"] runs-on: windows-2019 env: AZURE_STORAGE_CONNECTION_STRING: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }} @@ -119,6 +121,8 @@ jobs: TEMP: C:\\Users\\runneradmin\\Temp TMP: C:\\Users\\runneradmin\\Temp PYTHONUNBUFFERED: '1' + USE_BAZEL_VERSION: '5.1.1' + steps: - name: Show user home run: ls ~ @@ -146,12 +150,12 @@ jobs: - name: Download Bazelisk run: curl -k -L https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-windows-amd64.exe -o $env:BAZEL_PATH - - name: Install CUDA 11.1 + - name: Install CUDA ${{ matrix.cuda-version }} run: | - curl -k -L https://whls.blob.core.windows.net/ci-files/v11.1.7z -o cuda.7z + curl -k -L https://whls.blob.core.windows.net/ci-files/v${{ matrix.cuda-version }}.7z -o cuda.7z 7z x cuda.7z -o'D:/CUDA' rm cuda.7z - ls D:/CUDA/v11.1 + ls D:/CUDA/v${{ matrix.cuda-version }} - uses: actions/cache@v2 with: @@ -172,7 +176,7 @@ jobs: - name: py311 build whl and upload run: | cd "$env:GITHUB_WORKSPACE/jax" - ../bazel-build-cuda.ps1 -bazel_path $env:BAZEL_PATH -cuda_version '11.1' -cuda_prefix 'D:/CUDA' + ../build-jaxlib.ps1 cuda -bazel_path $env:BAZEL_PATH -cuda_version '${{ matrix.cuda-version }}' -cuda_prefix 'D:/CUDA' -symlink_python az storage blob upload-batch --overwrite -d unstable -s "$env:GITHUB_WORKSPACE/jax/bazel-dist" --pattern '*.whl' rm -Recurse -Force "$env:GITHUB_WORKSPACE/jax/bazel-dist" @@ -188,7 +192,7 @@ jobs: - name: py310 build whl and upload run: | cd "$env:GITHUB_WORKSPACE/jax" - ../bazel-build-cuda.ps1 -bazel_path $env:BAZEL_PATH -cuda_version '11.1' -cuda_prefix 'D:/CUDA' + ../build-jaxlib.ps1 cuda -bazel_path $env:BAZEL_PATH -cuda_version '${{ matrix.cuda-version }}' -cuda_prefix 'D:/CUDA' -symlink_python az storage blob upload-batch --overwrite -d unstable -s "$env:GITHUB_WORKSPACE/jax/bazel-dist" --pattern '*.whl' rm -Recurse -Force "$env:GITHUB_WORKSPACE/jax/bazel-dist" @@ -204,7 +208,7 @@ jobs: - name: py39 build whl and upload run: | cd "$env:GITHUB_WORKSPACE/jax" - ../bazel-build-cuda.ps1 -bazel_path $env:BAZEL_PATH -cuda_version '11.1' -cuda_prefix 'D:/CUDA' + ../build-jaxlib.ps1 cuda -bazel_path $env:BAZEL_PATH -cuda_version '${{ matrix.cuda-version }}' -cuda_prefix 'D:/CUDA' -symlink_python az storage blob upload-batch --overwrite -d unstable -s "$env:GITHUB_WORKSPACE/jax/bazel-dist" --pattern '*.whl' rm -Recurse -Force "$env:GITHUB_WORKSPACE/jax/bazel-dist" @@ -220,7 +224,7 @@ jobs: - name: py38 build whl and upload run: | cd "$env:GITHUB_WORKSPACE/jax" - ../bazel-build-cuda.ps1 -bazel_path $env:BAZEL_PATH -cuda_version '11.1' -cuda_prefix 'D:/CUDA' + ../build-jaxlib.ps1 cuda -bazel_path $env:BAZEL_PATH -cuda_version '${{ matrix.cuda-version }}' -cuda_prefix 'D:/CUDA' -symlink_python az storage blob upload-batch --overwrite -d unstable -s "$env:GITHUB_WORKSPACE/jax/bazel-dist" --pattern '*.whl' rm -Recurse -Force "$env:GITHUB_WORKSPACE/jax/bazel-dist" @@ -228,7 +232,7 @@ jobs: if: ${{ ! cancelled() }} needs: - build-cpu - # - build-cuda111 + - build-cuda runs-on: windows-2019 env: AZURE_STORAGE_CONNECTION_STRING: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }} diff --git a/.gitmodules b/.gitmodules index 872dc3a..fb799d3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "jax"] path = jax url = https://github.com/google/jax.git +[submodule "triton"] + path = triton + url = https://github.com/cloudhan/triton.git diff --git a/bazel-build-cpu.ps1 b/bazel-build-cpu.ps1 deleted file mode 100644 index 4310949..0000000 --- a/bazel-build-cpu.ps1 +++ /dev/null @@ -1,66 +0,0 @@ -param( - [Parameter(Mandatory = $true)] - [String]$bazel_path -) - -. (Join-Path (Split-Path $MyInvocation.MyCommand.Path) functions.ps1) -$ErrorActionPreference = "Stop" - -# path for patch.exe and realpath.exe -$msys2_path = "C:\msys64\usr\bin" - -[System.Collections.ArrayList]$new_path = ` - 'C:\Windows\System32', ` - 'C:\Windows', ` - 'C:\Windows\System32\Wbem', ` - 'C:\Windows\System32\WindowsPowerShell\v1.0' - -Push-Environment -Push-Location - -try { - # insert your path here - $new_path.Insert(0, 'C:\Program Files\Git\cmd') - $new_path.Insert(0, 'C:\tools\bazelisk') - $new_path.Insert(0, "$msys2_path") - - if ($env:pythonLocation) { - # bring github actions python into path - $new_path.Insert(0, "$env:pythonLocation") - $new_path.Insert(0, "$env:pythonLocation/Scripts") - } - - $env:PATH = $new_path -join ";" - - Set-VSEnv - - # bring conda python into environment, this supersede MSYS2's python and - # maybe VS's python - # conda activate $conda_env - - echo 'try-import %workspace%/../windows_configure.bazelrc' > .bazelrc.user - - mkdir ~/bzl_out -ErrorAction Continue - New-Item -Type Junction -Target (Resolve-Path ~/bzl_out) -Path D:/bzl_out -ErrorAction Continue - - python .\build\build.py ` - --noenable_cuda ` - --bazel_path="$bazel_path" ` - --bazel_startup_options="--output_user_root=D:/bzl_out" - - if ($LASTEXITCODE -ne 0) { - throw "last command exit with $LASTEXITCODE" - } - - if ((ls dist).Count -ne 1) { - throw "number of whl files != 1" - } - $name = (ls dist)[0].Name - - mkdir "bazel-dist/cpu" -ErrorAction 0 - mv -Force "dist/$name" "bazel-dist/cpu/$name" -} -finally { - Pop-Location - Pop-Environment -} diff --git a/bazel-build-cuda.ps1 b/bazel-build-cuda.ps1 deleted file mode 100644 index c08bc8b..0000000 --- a/bazel-build-cuda.ps1 +++ /dev/null @@ -1,108 +0,0 @@ -param( - [Parameter(Mandatory = $true)] - [String]$bazel_path, - - # [Parameter(Mandatory = $true)] - # [String]$conda_env, - - [Parameter(Mandatory = $true)] - [ValidateSet('11.2', '11.1', '10.1')] - $cuda_version, - - [Parameter(Mandatory = $false)] - [String]$cuda_prefix = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA" -) - -. (Join-Path (Split-Path $MyInvocation.MyCommand.Path) functions.ps1) -$ErrorActionPreference = "Stop" - -# path for patch.exe and realpath.exe -$msys2_path = "C:\msys64\usr\bin" - -switch ($cuda_version) { - '11.2' { - $cudnn_version = '8.2.2' - } - '11.1' { - $cudnn_version = '8.2.2' - } -} - -$cuda_version = [System.Version]$cuda_version -$cudnn_version = [System.Version]$cudnn_version - -$cuda_path = "$cuda_prefix/v$cuda_version" -$cudnn_path = $cuda_path - -[System.Collections.ArrayList]$new_path = ` - 'C:\Windows\System32', ` - 'C:\Windows', ` - 'C:\Windows\System32\Wbem', ` - 'C:\Windows\System32\WindowsPowerShell\v1.0' - -Push-Environment -Push-Location - -try { - # https://github.com/tensorflow/tensorflow/blob/9e2743271dd09609e8726edaffdd7c6762d3bf05/third_party/gpus/find_cuda_config.py#L26-L33 - # and tf 2.0 release note - if ($cuda_path -eq $cudnn_path) { - # https://github.com/tensorflow/tensorflow/issues/51040 - $env:TF_CUDA_PATHS="$cuda_path" - } - else { - $env:TF_CUDA_PATHS="$cuda_path,$cudnn_path" - } - - # insert your path here - $new_path.Insert(0, 'C:\Program Files\Git\cmd') - $new_path.Insert(0, 'C:\tools\bazelisk') - $new_path.Insert(0, "$msys2_path") - - if ($env:pythonLocation) { - # bring github actions python into path - $new_path.Insert(0, "$env:pythonLocation") - $new_path.Insert(0, "$env:pythonLocation/Scripts") - } - - $env:PATH = $new_path -join ";" - - Set-VSEnv - - # bring conda python into environment, this supersede MSYS2's python and - # maybe VS's python - # conda activate $conda_env - - echo 'try-import %workspace%/../windows_configure.bazelrc' > .bazelrc.user - - mkdir ~/bzl_out -ErrorAction Continue - New-Item -Type Junction -Target (Resolve-Path ~/bzl_out) -Path D:/bzl_out -ErrorAction Continue - - python .\build\build.py ` - --enable_cuda ` - --cuda_version="$cuda_version" ` - --cuda_path="$cuda_path" ` - --cudnn_version="$cudnn_version" ` - --cudnn_path="$cudnn_path" ` - --bazel_path="$bazel_path" ` - --bazel_startup_options="--output_user_root=D:/bzl_out" - - if ($LASTEXITCODE -ne 0) { - throw "last command exit with $LASTEXITCODE" - } - - if ((ls dist).Count -ne 1) { - throw "number of whl files != 1" - } - $name = (ls dist)[0].Name - $cuda_dir = "cuda$($cuda_version.Major)$($cuda_version.Minor)" - $cuda_cudnn_tag = "cuda$($cuda_version.Major).cudnn$($cudnn_version.Major)$($cudnn_version.Minor)" - $new_name = $name.Insert($name.IndexOf("-", $name.IndexOf("-") + 1), "+$cuda_cudnn_tag") - - mkdir "bazel-dist/$cuda_dir" -ErrorAction 0 - mv -Force "dist/$name" "bazel-dist/$cuda_dir/$new_name" -} -finally { - Pop-Location - Pop-Environment -} diff --git a/build-jaxlib.ps1 b/build-jaxlib.ps1 new file mode 100644 index 0000000..2199d87 --- /dev/null +++ b/build-jaxlib.ps1 @@ -0,0 +1,189 @@ +param( + [Parameter(Position=0, Mandatory = $true)] + [ValidateSet('cpu', 'cuda')] + [String]$build_type, + + [Parameter(Mandatory = $false)] + [String]$bazel_path = "bazel", + + [Parameter(Mandatory = $false)] + [int]$bazel_jobs = -1, + + [Parameter(Mandatory = $false)] + [String]$conda_env = "", + + [Parameter(Mandatory = $false)] + [ValidateSet('12.1', '11.8')] + [String]$cuda_version = "12.1", + + [Parameter(Mandatory = $false)] + [String]$cuda_prefix = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA", + + [Parameter(Mandatory = $false)] + [String]$bazel_output_root = "C:/bazel_output_root", + + [Parameter(Mandatory = $false)] + [ValidateSet("2022", "2019")] + [String]$vs_version = "", + + [Parameter(Mandatory = $false)] + [String]$bazel_vc_full_version = "", + + [Parameter(Mandatory = $false)] + [String]$xla_submodule = (Join-Path $PSScriptRoot xla), + + [Parameter(Mandatory = $false)] + [String]$triton_submodule = (Join-Path $PSScriptRoot triton), + + # For CI to avoid full rebuild when changing python version + [switch]$symlink_python +) + +. (Join-Path (Split-Path $MyInvocation.MyCommand.Path) functions.ps1) +$ErrorActionPreference = "Stop" + +# path for patch.exe and realpath.exe +$msys2_path = "C:\msys64\usr\bin" + +switch ($cuda_version) { + '12.1' { + $cudnn_version = '8.9.1' + } + '11.8' { + $cudnn_version = '8.6.0' + } +} + +$cuda_version = [System.Version]$cuda_version +$cudnn_version = [System.Version]$cudnn_version + +$cuda_path = "$cuda_prefix/v$cuda_version" +$cudnn_path = $cuda_path + +if ($xla_submodule -ne (Join-Path $PSScriptRoot xla)) { + $xla_submodule = Resolve-Path $xla_submodule +} + +if ($triton_submodule -ne (Join-Path $PSScriptRoot triton)) { + $triton_submodule = Resolve-Path $triton_submodule +} + +[System.Collections.ArrayList]$new_path = ` + 'C:\tools', ` + 'C:\Program Files\Git\cmd', ` + 'C:\Windows\System32', ` + 'C:\Windows', ` + 'C:\Windows\System32\Wbem', ` + 'C:\Windows\System32\WindowsPowerShell\v1.0' + +Push-Environment +Push-Location + +try { + if ($cuda_path -eq $cudnn_path) { + $env:TF_CUDA_PATHS="$cuda_path" + } + else { + $env:TF_CUDA_PATHS="$cuda_path,$cudnn_path" + } + + # insert your path here + $new_path.Insert(0, "$msys2_path") + + # bring github actions python into path + if ($env:pythonLocation) { + $new_path.Insert(0, "$env:pythonLocation") + $new_path.Insert(0, "$env:pythonLocation/Scripts") + } + + $env:PATH = $new_path -join ";" + + if ($vs_version -ne "") { + Set-VSEnv $vs_version + } + if ($bazel_vc_full_version -ne "") { + $env:BAZEL_VC_FULL_VERSION = $bazel_vc_full_version + } + + # bring conda python into environment, this supersede MSYS2's python and + # maybe VS's python + if ($conda_env -ne "") { + conda activate $conda_env + } + + echo 'try-import %workspace%/../windows_configure.bazelrc' > .bazelrc.user + + if ($bazel_jobs -gt 0) { + echo "build --jobs=${bazel_jobs}" >> .bazelrc.user + } + + if (Test-Path $xla_submodule) { + Write-Host -ForegroundColor Yellow "Use xla submodule " $xla_submodule + echo ('build:windows --override_repository=xla=' + $xla_submodule.Replace("\", "/")) >> .bazelrc.user + } + + if (Test-Path $triton_submodule) { + Write-Host -ForegroundColor Yellow "Use triton submodule " $triton_submodule + echo ('build:windows --override_repository=triton=' + $triton_submodule.Replace("\", "/")) >> .bazelrc.user + } + + $python_bin_path = "" + if ($symlink_python) { + $python_symlined_home = Join-Path $PSScriptRoot python_symlinked + Remove-Item $python_symlined_home -Force -ErrorAction 0 + New-Item -Type SymbolicLink $python_symlined_home -Target (Split-Path (Get-Command python).Source) -Force + $new_path.Insert(0, $python_symlined_home) + + $python_bin_path = Join-Path $python_symlined_home python.exe + + # We use it to trigger the repository rule when python is changed + $python_lib_path = (Get-Item $python_symlined_home).Target.Replace("\", "/") + Write-Host -ForegroundColor Yellow "Use PYTHON_LIB_PATH " $python_lib_path + echo ('build:windows --repo_env PYTHON_LIB_PATH="' + $python_lib_path + '"') >> .bazelrc.user + } + + # NOTE: In case it is needed to debug a build failure, run `bazel --output_user_root=$bazel_output_root ` + if ($build_type -eq 'cpu') { + python .\build\build.py ` + --python_bin_path="$python_bin_path" ` + --noenable_cuda ` + --bazel_path="$bazel_path" ` + --bazel_startup_options="--output_user_root=$bazel_output_root" + } elseif ($build_type -eq 'cuda') { + python .\build\build.py ` + --python_bin_path="$python_bin_path" ` + --enable_cuda ` + --cuda_version="$cuda_version" ` + --cuda_path="$cuda_path" ` + --cudnn_version="$cudnn_version" ` + --cudnn_path="$cudnn_path" ` + --bazel_path="$bazel_path" ` + --bazel_startup_options="--output_user_root=$bazel_output_root" + } + + if ($LASTEXITCODE -ne 0) { + throw "last command exit with $LASTEXITCODE" + } + + if ((ls dist).Count -ne 1) { + throw "number of whl files != 1" + } + + $name = (ls dist)[0].Name + + if ($build_type -eq 'cpu') { + mkdir "bazel-dist/cpu" -ErrorAction 0 + mv -Force "dist/$name" "bazel-dist/cpu/$name" + } elseif ($build_type -eq 'cuda') { + $cuda_dir = "cuda$($cuda_version.Major)$($cuda_version.Minor)" + $cuda_cudnn_tag = "cuda$($cuda_version.Major).cudnn$($cudnn_version.Major)$($cudnn_version.Minor)" + $new_name = $name.Insert($name.IndexOf("-", $name.IndexOf("-") + 1), "+$cuda_cudnn_tag") + + mkdir "bazel-dist/$cuda_dir" -ErrorAction 0 + mv -Force "dist/$name" "bazel-dist/$cuda_dir/$new_name" + } +} +finally { + Pop-Location + Pop-Environment +} diff --git a/functions.ps1 b/functions.ps1 index 279a2b9..d96a93d 100644 --- a/functions.ps1 +++ b/functions.ps1 @@ -19,7 +19,7 @@ function Set-VSEnv { param ( [parameter(Mandatory = $false)] [ValidateSet(2022, 2019, 2017)] - [int]$Version = 2019, + [int]$Version = 2022, [parameter(Mandatory = $false)] [ValidateSet("all", "x86", "x64")] diff --git a/jax b/jax index 21fc6e0..48ad9a6 160000 --- a/jax +++ b/jax @@ -1 +1 @@ -Subproject commit 21fc6e0229e0f5f1cb5f1f69d2c3daa2e5c2ca11 +Subproject commit 48ad9a6f3e72f19f5b86f3bb014587e6bc6c0c4c diff --git a/triton b/triton new file mode 160000 index 0000000..8ecb6c9 --- /dev/null +++ b/triton @@ -0,0 +1 @@ +Subproject commit 8ecb6c9cd198d1322f1c0d52ac49d7a8486bd735 diff --git a/windows_configure.bazelrc b/windows_configure.bazelrc index 18cddbd..8ab71c3 100644 --- a/windows_configure.bazelrc +++ b/windows_configure.bazelrc @@ -3,9 +3,6 @@ build:windows --features=compiler_param_file build:windows --copt=/d2ReducedOptimizeHugeFunctions build:windows --host_copt=/d2ReducedOptimizeHugeFunctions -# build:windows --@org_tensorflow//tensorflow/compiler/xla/python:enable_tpu=false - build:windows --copt=/arch:AVX build:windows --copt=/arch:AVX2 build:windows --copt=/DTF_COMPILE_LIBRARY -