-
Notifications
You must be signed in to change notification settings - Fork 96
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add quantization tests * Add some tooling for the CI * Added initial set of test for quantization and sharding * Use Protocol instead of ABC * Add utility to ensure we can run on GPU and define how much we need * Attempt to enable the workflow * Second attempt * Let's relax numpy version for now. * Disable a few things * Again * Again Again * Refactored tests and utils * Update command for unittests * Quality * Once more * One last? * Add Makefile to make it easier to run common commands
- Loading branch information
1 parent
f2ca732
commit fc57497
Showing
11 changed files
with
177 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
name: CPU Only Test Suite on PRs | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
push: | ||
branches: | ||
- ci-* | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: true | ||
|
||
env: | ||
OPTIMUM_NVIDIA_IS_CI: ON | ||
RUN_CPU_ONLY: ON | ||
|
||
jobs: | ||
run_fast_tests: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
config: | ||
- name: Fast Optimum-Nvidia Test Suite | ||
runner: [ci, nvidia-gpu] | ||
image: huggingface/optimum-nvidia | ||
report: cpu_only | ||
|
||
name: ${{ matrix.config.name }} | ||
runs-on: ${{ matrix.config.runner }} | ||
|
||
container: | ||
image: ${{ matrix.config.image }} | ||
options: --shm-size "16gb" --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/ | ||
|
||
defaults: | ||
run: | ||
shell: bash | ||
|
||
steps: | ||
- name: Checkout optimum-nvidia | ||
uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 1 | ||
|
||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade -e .[quality,tests] | ||
- name: Run fast optimum-nvidia CPU tests | ||
run: | | ||
python -m pytest -s -v -p no:warnings tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
fix-quality: | ||
python3 -m ruff check examples scripts src tests | ||
python3 -m ruff format examples scripts src tests | ||
|
||
quality: | ||
python3 -m ruff check examples scripts src tests | ||
python3 -m ruff format examples scripts src tests --check |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .utils import nightly, requires_gpu, slow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import functools | ||
import os | ||
from distutils.util import strtobool | ||
|
||
import pytest | ||
|
||
from optimum.nvidia.utils.nvml import get_device_count | ||
|
||
|
||
INT_TRUE_VALUE = 1 | ||
|
||
# Environment variable controlling test set | ||
ENVVAR_NAME_RUN_NIGHTLY = "RUN_NIGHTLY" | ||
ENVVAR_NAME_RUN_SLOW = "RUN_SLOW" | ||
ENVVAR_NAME_RUN_CPU_ONLY = "RUN_CPU_ONLY" | ||
|
||
|
||
@functools.cache | ||
def parse_flag_from_env(name: str, default: bool) -> bool: | ||
""" | ||
Parse the environment variable `name` as a boolean | ||
:param name: Name of target environment variable | ||
:param default: The default value to apply if `name` is not present | ||
:return: Boolean value | ||
""" | ||
|
||
# Retrieve the value or `default` if not present | ||
value = os.environ.get(name, str(default)) | ||
|
||
try: | ||
return strtobool(value) == INT_TRUE_VALUE | ||
except ValueError: | ||
raise ValueError(f"Failed to convert environment variable {name}={value} to a bool") | ||
|
||
|
||
nightly = pytest.mark.skipif(parse_flag_from_env(ENVVAR_NAME_RUN_NIGHTLY, False), reason="Nightly test") | ||
slow = pytest.mark.skipif(parse_flag_from_env(ENVVAR_NAME_RUN_SLOW, False), reason="Slow test") | ||
|
||
requires_gpu = pytest.mark.skipif( | ||
parse_flag_from_env(ENVVAR_NAME_RUN_CPU_ONLY, False) or not get_device_count(), | ||
reason=f"RUN_CPU_ONLY={parse_flag_from_env(ENVVAR_NAME_RUN_CPU_ONLY, False)} or " | ||
f"no GPU detected (num_gpus={get_device_count()})", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from parameterized import parameterized | ||
|
||
from optimum.nvidia.models.llama import LLamaForCausalLM as TrtLlamaForCausalLM | ||
from optimum.nvidia.utils.tests import requires_gpu | ||
|
||
|
||
@parameterized.expand(["float16", "bfloat16"]) | ||
@requires_gpu | ||
def test_build_engine_7b_with_tp(dtype: str): | ||
model = TrtLlamaForCausalLM.from_pretrained("huggingface/llama-7b", dtype=dtype) | ||
assert model |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from tensorrt_llm.quantization import QuantMode | ||
|
||
from optimum.nvidia.configs import NO_QUANTIZATION, QuantizationConfig | ||
|
||
|
||
def test_no_quantization_has_quantization_step(): | ||
qconfig = QuantizationConfig(NO_QUANTIZATION) | ||
assert not qconfig.has_quantization_step | ||
|
||
|
||
def test_float8_quantization_has_quantization_step(): | ||
qconfig = QuantizationConfig(QuantMode.from_description(use_fp8_qdq=True, use_fp8_kv_cache=True)) | ||
|
||
assert qconfig.has_quantization_step |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from unittest import TestCase | ||
|
||
import numpy as np | ||
from parameterized import parameterized | ||
|
||
from optimum.nvidia.weights import shard | ||
|
||
|
||
TENSOR_DIM_0 = 1024 | ||
TENSOR_DIM_1 = 4096 | ||
|
||
|
||
class MatrixShardingTestCase(TestCase): | ||
def setUp(self): | ||
self.tensor = np.random.rand(TENSOR_DIM_0, TENSOR_DIM_1) | ||
|
||
def test_no_sharding(self): | ||
sharded_tensor = shard(self.tensor, 0, 1, axis=0) | ||
self.assertTrue(np.array_equal(sharded_tensor, self.tensor)) | ||
|
||
@parameterized.expand([1, 2, 4, 8]) | ||
def test_sharding_tensor_parallelism_axis_0(self, tp_degree: int): | ||
shard_size = TENSOR_DIM_0 // tp_degree | ||
|
||
shards = [shard(self.tensor, rank, tp_degree, axis=0) for rank in range(tp_degree)] | ||
|
||
for rank, tensor in enumerate(shards): | ||
self.assertTupleEqual(tensor.shape, (TENSOR_DIM_0 // tp_degree, TENSOR_DIM_1)) | ||
self.assertTrue(np.array_equal(tensor, self.tensor[rank * shard_size : (rank + 1) * shard_size])) | ||
|
||
@parameterized.expand([1, 2, 4, 8]) | ||
def test_sharding_tensor_parallelism_axis_1(self, tp_degree: int): | ||
shard_size = TENSOR_DIM_1 // tp_degree | ||
|
||
shards = [shard(self.tensor, rank, tp_degree, axis=1) for rank in range(tp_degree)] | ||
|
||
for rank, tensor in enumerate(shards): | ||
self.assertTupleEqual(tensor.shape, (TENSOR_DIM_0, TENSOR_DIM_1 // tp_degree)) | ||
self.assertTrue(np.array_equal(tensor, self.tensor[:, rank * shard_size : (rank + 1) * shard_size])) |