Skip to content

Make float8 quantization back in the game. #22

Make float8 quantization back in the game.

Make float8 quantization back in the game. #22

Workflow file for this run

name: Test Suite on PRs
on:
pull_request:
branches:
- main
push:
branches:
- ci-*
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
OPTIMUM_NVIDIA_IS_CI: ON
RUN_CPU_ONLY: ON
jobs:
run_fast_tests:
strategy:
fail-fast: false
matrix:
config:
- name: Fast Optimum-Nvidia Test Suite
runner: [ci, nvidia-gpu]
image: huggingface/optimum-nvidia:devel
report: dev
# cuda_arch: [sm_86, sm_89]
cuda_arch: [sm_89]
name: ${{ matrix.config.name }}
runs-on: [ci, nvidia-gpu, multi-gpu, "${{ matrix.cuda_arch }}"]
container:
image: ${{ matrix.config.image }}
options: --shm-size "16gb" --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/
env:
HF_TOKEN: ${{ secrets.OPTIMUM_NVIDIA_HUB_READ_TOKEN }}
defaults:
run:
shell: bash
steps:
- name: Checkout optimum-nvidia
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Install dependencies
run: |
python -m pip install --upgrade -e .[quality,tests]
- name: Run fast optimum-nvidia CPU tests
run: |
python -m pytest -n 4 -s -v -p no:warnings tests