diff --git a/.github/workflows/_build_container.yml b/.github/workflows/_build_container.yml new file mode 100644 index 00000000..c4a17c39 --- /dev/null +++ b/.github/workflows/_build_container.yml @@ -0,0 +1,65 @@ +name: Build NeMo Curator container +on: + workflow_call: + inputs: + ref: + description: Git ref to checkout + default: ${{ github.sha }} + required: false + type: string + +defaults: + run: + shell: bash -x -e -u -o pipefail {0} + +jobs: + main: + runs-on: self-hosted + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + path: ${{ github.run_id }} + ref: ${{ inputs.sha }} + + - name: Clean runner cache + run: | + docker system prune --filter "until=24h" --force + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + # We use `docker` driver as this speeds things up for + # trivial (non-multi-stage) builds. + driver: docker + + - name: Pull cache images + run: | + docker pull nemoci.azurecr.io/nemo_curator_container:${{ github.event.pull_request.number || 'buildcache' }} || true + docker pull nemoci.azurecr.io/nemo_curator_container:buildcache || true + + - name: Build and push + uses: docker/build-push-action@v5 + with: + file: Dockerfile + push: true + build-args: | + MAX_JOBS=32 + ALIGNER_COMMIT=${{ github.event.pull_request.head.sha || github.sha }} + cache-from: | + nemoci.azurecr.io/nemo_curator_container:${{ github.event.pull_request.number || 'buildcache' }} + nemoci.azurecr.io/nemo_curator_container:buildcache + cache-to: type=inline + tags: nemoci.azurecr.io/nemo_curator_container:${{ github.run_id }} + + - name: Update PR image + if: github.event_name == 'pull_request' + run: | + docker tag nemoci.azurecr.io/nemo_curator_container:${{ github.run_id }} nemoci.azurecr.io/nemo_curator_container:${{ github.event.pull_request.number }} + docker push nemoci.azurecr.io/nemo_curator_container:${{ github.event.pull_request.number }} + + - name: Update buildcache image + if: github.ref == 'refs/heads/main' + run: | + docker tag nemoci.azurecr.io/nemo_curator_container:${{ github.run_id }} nemoci.azurecr.io/nemo_curator_container:buildcache + docker push nemoci.azurecr.io/nemo_curator_container:buildcache diff --git a/.github/workflows/gpuci.yml b/.github/workflows/gpuci.yml index c27ade49..3e019d01 100644 --- a/.github/workflows/gpuci.yml +++ b/.github/workflows/gpuci.yml @@ -4,11 +4,17 @@ on: pull_request: branches: - 'main' - - 'r**' + - '[rv][0-9].[0-9].[0-9]' + - '[rv][0-9].[0-9].[0-9]rc[0-9]' types: [ labeled ] jobs: + build-container: + if: ${{ github.event.label.name == 'gpuci' }} + uses: ./.github/workflows/_build_container.yml + gpu-test: + needs: build-container runs-on: self-hosted if: ${{ github.event.label.name == 'gpuci' }} @@ -19,18 +25,29 @@ jobs: nvidia-smi - name: Checkout the PR code - uses: actions/checkout@v3 + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Pull the Docker image built in the previous step + run: docker pull nemoci.azurecr.io/nemo_curator_container:buildcache - - name: Set up Python 3.10 - uses: actions/setup-python@v4 - with: - python-version: '3.10' + - name: Run Docker container with Python 3.10 + run: | + docker run --name nemo-curator-container -d nemoci.azurecr.io/nemo_curator_container:buildcache bash -c "sleep infinity" + + - name: Install Python 3.10 inside the container + run: | + docker exec nemo-curator-container apt-get update + docker exec nemo-curator-container apt-get install -y python3.10 python3.10-venv python3.10-dev + docker exec nemo-curator-container python3.10 -m venv /opt/py310 + docker exec nemo-curator-container /opt/py310/bin/pip install --upgrade pip - - name: Install NeMo-Curator and PyTest + - name: Install NeMo-Curator and PyTest dependencies run: | - pip install cython setuptools pip --upgrade - pip install --extra-index-url https://pypi.nvidia.com ".[cuda12x]" - pip install pytest + docker exec nemo-curator-container /opt/py310/bin/pip install cython pytest setuptools pip --upgrade + docker exec nemo-curator-container /opt/py310/bin/pip install --extra-index-url https://pypi.nvidia.com ".[cuda12x]" - name: Verify installation run: | @@ -38,4 +55,7 @@ jobs: - name: Run PyTests with GPU mark run: | - python -m pytest -m gpu + docker exec nemo-curator-container /opt/py310/bin/pytest -m gpu + + - name: Cleanup + run: docker stop nemo-curator-container && docker rm nemo-curator-container