.github/workflows/pr_tests.yml

name: GPU Enabled Integration Test on PRs

on:
  pull_request:
    branches:
      - main
  push:
    branches:
      - ci-*

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

env:
  OPTIMUM_NVIDIA_IS_CI: ON
  HF_HUB_ENABLE_HF_TRANSFER: ON

jobs:
  run_gpu_tests:
    strategy:
      fail-fast: false
      matrix:
        config:
          - name: Optimum-Nvidia Test Suite
            image: nvidia/cuda:12.6.3-devel-ubuntu24.04
        gpu_target: ["aws-g6-12xlarge-cache", "aws-g5-12xlarge-cache"]

    name: ${{ matrix.config.name }}
    runs-on:
      group: "${{matrix.gpu_target}}"

    container:
      image: ${{ matrix.config.image }}
      options: --mount type=tmpfs,destination=/tmp --privileged --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/
      env:
        HF_TOKEN: ${{ secrets.OPTIMUM_NVIDIA_HUB_READ_TOKEN }}

    defaults:
      run:
        shell: bash

    steps:
      - name: Change ownership of /github/home
        run: chown -R $(whoami) /github/home

      - uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Checkout optimum-nvidia
        uses: actions/checkout@v4
        with:
          fetch-depth: 1

      - name: Install dependencies
        run: |
          apt update && apt install -y libmpich-dev libopenmpi-dev openmpi-bin git
          python3 -m pip install --upgrade -e .[quality,tests] --extra-index-url https://pypi.nvidia.com

      - name: Run nvidia-smi
        run: |
          nvidia-smi

      - name: Print TensorRT-LLM version
        run: |
          python -c "from tensorrt_llm import __version__; print(__version__)"
          
      - name: Run optimum-nvidia test-suite
        run: |
          pytest -s -v -p no:warnings -o log_cli=true --ignore=tests/cli tests/

  run_optimum_cli_tests:
    strategy:
      fail-fast: false
      matrix:
        config:
          - name: Optimum-Nvidia CLI Test Suite
            image: nvidia/cuda:12.6.3-devel-ubuntu24.04
        gpu_target: ["aws-g6-12xlarge-cache", "aws-g5-12xlarge-cache"]

    name: ${{ matrix.config.name }}
    runs-on:
      group: "${{matrix.gpu_target}}"

    container:
      image: ${{ matrix.config.image }}
      options: --mount type=tmpfs,destination=/tmp --privileged --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/
      env:
        HF_TOKEN: ${{ secrets.OPTIMUM_NVIDIA_HUB_READ_TOKEN }}

    defaults:
      run:
        shell: bash

    steps:
      - name: Change ownership of /github/home
        run: chown -R $(whoami) /github/home

      - uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Checkout optimum-nvidia
        uses: actions/checkout@v4
        with:
          fetch-depth: 1

      - name: Install dependencies
        run: |
          apt update && apt install -y openmpi-bin libopenmpi-dev git
          python3 -m pip install --upgrade -e .[quality,tests] --extra-index-url https://pypi.nvidia.com

      - name: Run nvidia-smi
        run: |
          nvidia-smi

      - name: Print TensorRT-LLM version
        run: |
          python -c "from tensorrt_llm import __version__; print(__version__)"

      - name: Run optimum-nvidia cli test-suite
        run: |
          pytest -rx -s -p no:warnings tests/cli
          
      - name: Tailscale Wait
        if: ${{ failure() || runner.debug == '1' }}
        uses: huggingface/tailscale-action@main
        with:
           waitForSSH: true
           authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
           slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
           slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}