Skip to content

Commit

Permalink
Merge branch 'master' into hjiang/remove-stdout-stderr
Browse files Browse the repository at this point in the history
  • Loading branch information
edoakes authored Feb 8, 2025
2 parents c5b4e06 + 2756c07 commit 64f5e43
Show file tree
Hide file tree
Showing 189 changed files with 4,646 additions and 2,056 deletions.
2 changes: 1 addition & 1 deletion .buildkite/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

This directory contains buildkite pipelines used to start CI tests.

Each step contains a buildkite step that is parsed and executed according to the
Each step contains a buildkite step that is parsed and executed according to the
[Buildkite pipeline specification](https://buildkite.com/docs/pipelines).

## Conditions
Expand Down
5 changes: 3 additions & 2 deletions .buildkite/base.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ steps:
- "3.12"
env:
PYTHON: "{{matrix}}"

- name: oss-ci-base_build
wanda: ci/docker/base.build.py39.wanda.yaml
depends_on: oss-ci-base_test
Expand All @@ -31,7 +31,7 @@ steps:
- name: oss-ci-base_test-aarch64
wanda: ci/docker/base.test.aarch64.wanda.yaml
instance_type: builder-arm64

- name: oss-ci-base_build-aarch64
wanda: ci/docker/base.build.aarch64.wanda.yaml
depends_on: oss-ci-base_test-aarch64
Expand All @@ -45,6 +45,7 @@ steps:
label: "wanda: oss-ci-base_ml-py{{matrix}}"
wanda: ci/docker/base.ml.wanda.yaml
matrix:
- "3.11"
- "3.12"
env:
PYTHON: "{{matrix}}"
Expand Down
6 changes: 3 additions & 3 deletions .buildkite/bisect/bisect.rayci.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
group: bisect
depends_on:
depends_on:
- forge
steps:
- name: macos test
if: build.env("RAYCI_TEST_TYPE") != null && build.env("RAYCI_TEST_TYPE") == "macos_test"
commands:
- if [[ "$(buildkite-agent meta-data get test-type)" != "macos_test" ]]; then exit 0; fi
- RAYCI_BISECT_RUN=1 ./ci/ray_ci/macos/macos_ci.sh bisect "$(buildkite-agent meta-data get test-name)"
- RAYCI_BISECT_RUN=1 ./ci/ray_ci/macos/macos_ci.sh bisect "$(buildkite-agent meta-data get test-name)"
"$(buildkite-agent meta-data get passing-commit)" "$(buildkite-agent meta-data get failing-commit)"
mount_buildkite_agent: true
job_env: MACOS
Expand All @@ -17,7 +17,7 @@ steps:
if: build.env("RAYCI_TEST_TYPE") != null && (build.env("RAYCI_TEST_TYPE") == "linux_test" || build.env("RAYCI_TEST_TYPE") == "windows_test")
commands:
- if [[ "$(buildkite-agent meta-data get test-type)" != "linux_test" && "$(buildkite-agent meta-data get test-type)" != "windows_test" ]]; then exit 0; fi
- bazel run //ci/ray_ci/bisect:bisect_test "$(buildkite-agent meta-data get test-name)"
- bazel run //ci/ray_ci/bisect:bisect_test "$(buildkite-agent meta-data get test-name)"
"$(buildkite-agent meta-data get passing-commit)" "$(buildkite-agent meta-data get failing-commit)"
mount_buildkite_agent: true
priority: 10
4 changes: 2 additions & 2 deletions .buildkite/hooks/post-command
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ if [ -d "/tmp/artifacts/test-summaries" ] && [ "$(ls -A /tmp/artifacts/test-summ
docker run --rm -v /tmp/artifacts:/artifact-mount alpine:latest /bin/sh -c 'rm -rf /artifact-mount/test-summaries' || true
fi

# clean up bazel logs if any, this only has effect when the bazel test runs in the same
# environment as the buildkite job commands, and has no effect when the tests run
# clean up bazel logs if any, this only has effect when the bazel test runs in the same
# environment as the buildkite job commands, and has no effect when the tests run
# inside another test container
rm -rf /tmp/bazel_event_logs
rm -rf /tmp/artifacts/test-summaries
4 changes: 2 additions & 2 deletions .buildkite/llm.rayci.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
group: llm tests
depends_on:
- forge
- oss-ci-base_ml
- oss-ci-base_ml-multipy
steps:
- name: llmbuild
wanda: ci/docker/llm.build.wanda.yaml

- label: "llm tests"
key: "llm-tests"
tags:
Expand Down
57 changes: 30 additions & 27 deletions .buildkite/ml.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ steps:
instance_type: large
parallelism: 2
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--except-tags gpu_only,gpu,minimal,tune,doctest,needs_credentials,train_v2
depends_on: [ "mlbuild", "forge" ]
Expand All @@ -75,7 +75,7 @@ steps:

- label: ":train: ml: {{matrix.python}} tests ({{matrix.worker_id}})"
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
tags:
tags:
- python
- train
- tune
Expand All @@ -86,17 +86,17 @@ steps:
--workers 4 --worker-id {{matrix.worker_id}} --parallelism-per-worker 3
--python-version {{matrix.python}}
--except-tags gpu_only,gpu,minimal,doctest,needs_credentials,soft_imports,rllib,multinode
depends_on:
depends_on:
- mlbuild-multipy
- forge
job_env:
job_env:
matrix:
setup:
python: ["3.12"]
worker_id: ["0", "1", "2", "3"]

- label: ":train: ml: train gpu tests"
tags:
tags:
- train
- gpu
instance_type: gpu-large
Expand All @@ -110,7 +110,7 @@ steps:

- label: ":train: ml: train gpu {{matrix.python}} tests ({{matrix.worker_id}})"
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
tags:
tags:
- train
- gpu
instance_type: gpu-large
Expand All @@ -135,9 +135,9 @@ steps:
- oss
instance_type: medium
commands:
- pip install -U boto3==1.28.70 awscli==1.29.70
- pip install -U boto3==1.28.70 awscli==1.29.70
- $(python ci/env/setup_credentials.py)
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--parallelism-per-worker 3
--only-tags needs_credentials
--test-env=WANDB_API_KEY --test-env=COMET_API_KEY
Expand All @@ -147,7 +147,7 @@ steps:
tags: tune
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
--parallelism-per-worker 3
--except-tags soft_imports,gpu_only,rllib,multinode
depends_on: [ "mlbuild", "forge" ]
Expand All @@ -157,18 +157,18 @@ steps:
instance_type: small
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
--only-tags soft_imports
--only-tags soft_imports
--build-name oss-ci-base_build
depends_on: [ "oss-ci-base_build", "forge" ]

- label: ":train: ml: air tests"
tags: ml
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/air/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/air/... ml
--parallelism-per-worker 3
--except-tags gpu
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... ml
--parallelism-per-worker 3
--only-tags ray_air
--skip-ray-installation
Expand All @@ -178,19 +178,19 @@ steps:
tags: train
instance_type: medium
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--parallelism-per-worker 3
--only-tags tune
--except-tags gpu_only,ray_air,gpu,doctest,needs_credentials
depends_on: [ "mlbuild", "forge" ]

- label: ":train: ml: rllib+tune tests"
tags:
tags:
- tune
- rllib
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
--parallelism-per-worker 3
--only-tags rllib
--except-tags gpu_only
Expand All @@ -203,7 +203,7 @@ steps:
- release_tests
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //release/... ml
- bazel run //ci/ray_ci:test_in_docker -- //release/... ml
--parallelism-per-worker 3
depends_on: [ "mlbuild", "forge" ]

Expand All @@ -225,52 +225,55 @@ steps:
- bazel run //ci/ray_ci:build_in_docker -- docker
--platform cpu --image-type ray --canonical-tag multinode
- python ./ci/build/build-multinode-image.py rayproject/ray:multinode rayproject/ray:multinode
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
--only-tags multinode
--test-env=RAY_HAS_SSH=1
--test-env=RAY_DOCKER_IMAGE=rayproject/ray:multinode
--test-env=RAY_TEMPDIR="/ray-mount"
--test-env=RAY_HOSTDIR="$${RAYCI_CHECKOUT_DIR}"
--test-env=RAY_TESTHOST="rayci.localhost"
depends_on:
depends_on:
- manylinux
- forge
- raycpubase
- mlbuild

- label: ":train: ml: doc tests"
tags:
tags:
- train
- tune
- doc
instance_type: large
parallelism: 2
commands:
# doc tests
- bazel run //ci/ray_ci:test_in_docker -- python/ray/... //doc/... ml
- bazel run //ci/ray_ci:test_in_docker -- python/ray/... //doc/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--only-tags doctest
--except-tags gpu
--parallelism-per-worker 3
# doc examples
- bazel run //ci/ray_ci:test_in_docker -- //doc/... ml
- bazel run //ci/ray_ci:test_in_docker -- //doc/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--except-tags gpu,post_wheel_build,doctest,highly_parallel
--parallelism-per-worker 3
--skip-ray-installation
depends_on: [ "mlbuild", "forge" ]

- label: ":train: ml: train gpu lightning 2.0 tests"
tags:
tags:
- train
- gpu
instance_type: gpu-large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--build-name mllightning2gpubuild
--only-tags ptl_v2
depends_on: [ "mllightning2gpubuild", "forge" ]

- label: ":train: ml: flaky tests"
key: ml_flaky_tests
tags:
tags:
- train
- skip-on-premerge
instance_type: large
Expand All @@ -289,7 +292,7 @@ steps:
- oss
instance_type: medium
commands:
- pip install -U boto3==1.28.70 awscli==1.29.70
- pip install -U boto3==1.28.70 awscli==1.29.70
- $(python ci/env/setup_credentials.py)
- bazel run //ci/ray_ci:test_in_docker -- //... ml --run-flaky-tests
--parallelism-per-worker 3
Expand All @@ -300,7 +303,7 @@ steps:

- label: ":train: ml: train gpu flaky tests"
key: ml_flaky_gpu_tests
tags:
tags:
- train
- skip-on-premerge
- gpu
Expand Down
4 changes: 2 additions & 2 deletions .buildkite/release-automation/pre_release.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ steps:
key: check-commit-hash
commands:
- bash .buildkite/release-automation/check-commit-hash.sh

- label: "Build update version binary"
key: build-update-version-zip
instance_type: default
Expand All @@ -37,7 +37,7 @@ steps:
env:
RAYCI_RELEASE: 1
RAYCI_FULL_PLATFORM_RELEASE: "${RAYCI_FULL_PLATFORM_RELEASE}"

- label: "Trigger Postmerge nightly build & test"
if: build.env("RAYCI_WEEKLY_RELEASE_NIGHTLY") == "1"
trigger: "postmerge"
Expand Down
8 changes: 4 additions & 4 deletions .buildkite/release-automation/wheels.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ group: Upload & Validate wheels
steps:
- block: "Upload wheels from S3 to TestPyPI"
key: block-upload-wheels-testpypi
depends_on:
depends_on:
- forge

- label: "Upload wheels from S3 to TestPyPI"
Expand All @@ -15,7 +15,7 @@ steps:
- export RAY_VERSION="$RAY_VERSION"
- export RAY_COMMIT="$RAY_COMMIT"
- source .buildkite/release-automation/set-ray-version.sh
- bazel run //ci/ray_ci/automation:upload_wheels_pypi --
- bazel run //ci/ray_ci/automation:upload_wheels_pypi --
--ray_version="$$RAY_VERSION" --commit_hash="$$RAY_COMMIT"
--pypi_env=test

Expand Down Expand Up @@ -65,7 +65,7 @@ steps:
- block: "Download & validate Ray wheels from TestPyPI Mac"
key: block-validate-macos-wheels
depends_on: []

- label: "MacOS x86_64"
key: validate-macos-x86_64-wheels
depends_on:
Expand Down Expand Up @@ -98,6 +98,6 @@ steps:
- export RAY_VERSION="$RAY_VERSION"
- export RAY_COMMIT="$RAY_COMMIT"
- source .buildkite/release-automation/set-ray-version.sh
- bazel run //ci/ray_ci/automation:upload_wheels_pypi --
- bazel run //ci/ray_ci/automation:upload_wheels_pypi --
--ray_version="$$RAY_VERSION" --commit_hash="$$RAY_COMMIT"
--pypi_env=prod
Loading

0 comments on commit 64f5e43

Please sign in to comment.