From fc57497db0a1bd43c0e8b3568a604619d77dcbc5 Mon Sep 17 00:00:00 2001 From: Funtowicz Morgan Date: Thu, 14 Dec 2023 15:26:25 +0100 Subject: [PATCH] Initial set of unittest in CI (#43) * Add quantization tests * Add some tooling for the CI * Added initial set of test for quantization and sharding * Use Protocol instead of ABC * Add utility to ensure we can run on GPU and define how much we need * Attempt to enable the workflow * Second attempt * Let's relax numpy version for now. * Disable a few things * Again * Again Again * Refactored tests and utils * Update command for unittests * Quality * Once more * One last? * Add Makefile to make it easier to run common commands --- .github/workflows/pr_fast_tests.yml | 53 ++++++++++++++++++++++ Makefile | 7 +++ pyproject.toml | 8 +++- setup.py | 2 +- src/optimum/nvidia/configs/__init__.py | 2 +- src/optimum/nvidia/utils/tests/__init__.py | 1 + src/optimum/nvidia/utils/tests/utils.py | 43 ++++++++++++++++++ tests/models/test_llama.py | 11 +++++ tests/quantization.py | 0 tests/test_quantization.py | 14 ++++++ tests/test_sharding.py | 39 ++++++++++++++++ 11 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/pr_fast_tests.yml create mode 100644 Makefile create mode 100644 src/optimum/nvidia/utils/tests/__init__.py create mode 100644 src/optimum/nvidia/utils/tests/utils.py create mode 100644 tests/models/test_llama.py delete mode 100644 tests/quantization.py create mode 100644 tests/test_quantization.py create mode 100644 tests/test_sharding.py diff --git a/.github/workflows/pr_fast_tests.yml b/.github/workflows/pr_fast_tests.yml new file mode 100644 index 00000000..6acc23c8 --- /dev/null +++ b/.github/workflows/pr_fast_tests.yml @@ -0,0 +1,53 @@ +name: CPU Only Test Suite on PRs + +on: + pull_request: + branches: + - main + push: + branches: + - ci-* + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +env: + OPTIMUM_NVIDIA_IS_CI: ON + RUN_CPU_ONLY: ON + +jobs: + run_fast_tests: + strategy: + fail-fast: false + matrix: + config: + - name: Fast Optimum-Nvidia Test Suite + runner: [ci, nvidia-gpu] + image: huggingface/optimum-nvidia + report: cpu_only + + name: ${{ matrix.config.name }} + runs-on: ${{ matrix.config.runner }} + + container: + image: ${{ matrix.config.image }} + options: --shm-size "16gb" --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/ + + defaults: + run: + shell: bash + + steps: + - name: Checkout optimum-nvidia + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Install dependencies + run: | + python -m pip install --upgrade -e .[quality,tests] + + - name: Run fast optimum-nvidia CPU tests + run: | + python -m pytest -s -v -p no:warnings tests \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..5e4634ec --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +fix-quality: + python3 -m ruff check examples scripts src tests + python3 -m ruff format examples scripts src tests + +quality: + python3 -m ruff check examples scripts src tests + python3 -m ruff format examples scripts src tests --check \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b20c0730..a5d800da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,4 +24,10 @@ indent-style = "space" skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. -line-ending = "auto" \ No newline at end of file +line-ending = "auto" + + +[tool.pytest.ini_options] +pythonpath = [ + "src" +] \ No newline at end of file diff --git a/setup.py b/setup.py index 928dd81b..d9d6dc60 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ "fsspec", "huggingface_hub >= 0.14.0", "hf-transfer", - "numpy >= 1.24.0", + "numpy >= 1.22.0", "onnx >= 1.12.0", "optimum >= 1.13.0", "transformers >= 4.32.1", diff --git a/src/optimum/nvidia/configs/__init__.py b/src/optimum/nvidia/configs/__init__.py index e8273a0b..1eac3d2b 100644 --- a/src/optimum/nvidia/configs/__init__.py +++ b/src/optimum/nvidia/configs/__init__.py @@ -15,4 +15,4 @@ from typing import Protocol from .base import ModelConfig, TransformersConfig -from .quantization import QuantizationConfig +from .quantization import NO_QUANTIZATION, QuantizationConfig diff --git a/src/optimum/nvidia/utils/tests/__init__.py b/src/optimum/nvidia/utils/tests/__init__.py new file mode 100644 index 00000000..b6d12cfe --- /dev/null +++ b/src/optimum/nvidia/utils/tests/__init__.py @@ -0,0 +1 @@ +from .utils import nightly, requires_gpu, slow diff --git a/src/optimum/nvidia/utils/tests/utils.py b/src/optimum/nvidia/utils/tests/utils.py new file mode 100644 index 00000000..925ad830 --- /dev/null +++ b/src/optimum/nvidia/utils/tests/utils.py @@ -0,0 +1,43 @@ +import functools +import os +from distutils.util import strtobool + +import pytest + +from optimum.nvidia.utils.nvml import get_device_count + + +INT_TRUE_VALUE = 1 + +# Environment variable controlling test set +ENVVAR_NAME_RUN_NIGHTLY = "RUN_NIGHTLY" +ENVVAR_NAME_RUN_SLOW = "RUN_SLOW" +ENVVAR_NAME_RUN_CPU_ONLY = "RUN_CPU_ONLY" + + +@functools.cache +def parse_flag_from_env(name: str, default: bool) -> bool: + """ + Parse the environment variable `name` as a boolean + :param name: Name of target environment variable + :param default: The default value to apply if `name` is not present + :return: Boolean value + """ + + # Retrieve the value or `default` if not present + value = os.environ.get(name, str(default)) + + try: + return strtobool(value) == INT_TRUE_VALUE + except ValueError: + raise ValueError(f"Failed to convert environment variable {name}={value} to a bool") + + +nightly = pytest.mark.skipif(parse_flag_from_env(ENVVAR_NAME_RUN_NIGHTLY, False), reason="Nightly test") +slow = pytest.mark.skipif(parse_flag_from_env(ENVVAR_NAME_RUN_SLOW, False), reason="Slow test") + +requires_gpu = pytest.mark.skipif( + parse_flag_from_env(ENVVAR_NAME_RUN_CPU_ONLY, False) or not get_device_count(), + reason=f"RUN_CPU_ONLY={parse_flag_from_env(ENVVAR_NAME_RUN_CPU_ONLY, False)} or " + f"no GPU detected (num_gpus={get_device_count()})", +) diff --git a/tests/models/test_llama.py b/tests/models/test_llama.py new file mode 100644 index 00000000..ab705b5c --- /dev/null +++ b/tests/models/test_llama.py @@ -0,0 +1,11 @@ +from parameterized import parameterized + +from optimum.nvidia.models.llama import LLamaForCausalLM as TrtLlamaForCausalLM +from optimum.nvidia.utils.tests import requires_gpu + + +@parameterized.expand(["float16", "bfloat16"]) +@requires_gpu +def test_build_engine_7b_with_tp(dtype: str): + model = TrtLlamaForCausalLM.from_pretrained("huggingface/llama-7b", dtype=dtype) + assert model diff --git a/tests/quantization.py b/tests/quantization.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/test_quantization.py b/tests/test_quantization.py new file mode 100644 index 00000000..1fb6e325 --- /dev/null +++ b/tests/test_quantization.py @@ -0,0 +1,14 @@ +from tensorrt_llm.quantization import QuantMode + +from optimum.nvidia.configs import NO_QUANTIZATION, QuantizationConfig + + +def test_no_quantization_has_quantization_step(): + qconfig = QuantizationConfig(NO_QUANTIZATION) + assert not qconfig.has_quantization_step + + +def test_float8_quantization_has_quantization_step(): + qconfig = QuantizationConfig(QuantMode.from_description(use_fp8_qdq=True, use_fp8_kv_cache=True)) + + assert qconfig.has_quantization_step diff --git a/tests/test_sharding.py b/tests/test_sharding.py new file mode 100644 index 00000000..63b956a4 --- /dev/null +++ b/tests/test_sharding.py @@ -0,0 +1,39 @@ +from unittest import TestCase + +import numpy as np +from parameterized import parameterized + +from optimum.nvidia.weights import shard + + +TENSOR_DIM_0 = 1024 +TENSOR_DIM_1 = 4096 + + +class MatrixShardingTestCase(TestCase): + def setUp(self): + self.tensor = np.random.rand(TENSOR_DIM_0, TENSOR_DIM_1) + + def test_no_sharding(self): + sharded_tensor = shard(self.tensor, 0, 1, axis=0) + self.assertTrue(np.array_equal(sharded_tensor, self.tensor)) + + @parameterized.expand([1, 2, 4, 8]) + def test_sharding_tensor_parallelism_axis_0(self, tp_degree: int): + shard_size = TENSOR_DIM_0 // tp_degree + + shards = [shard(self.tensor, rank, tp_degree, axis=0) for rank in range(tp_degree)] + + for rank, tensor in enumerate(shards): + self.assertTupleEqual(tensor.shape, (TENSOR_DIM_0 // tp_degree, TENSOR_DIM_1)) + self.assertTrue(np.array_equal(tensor, self.tensor[rank * shard_size : (rank + 1) * shard_size])) + + @parameterized.expand([1, 2, 4, 8]) + def test_sharding_tensor_parallelism_axis_1(self, tp_degree: int): + shard_size = TENSOR_DIM_1 // tp_degree + + shards = [shard(self.tensor, rank, tp_degree, axis=1) for rank in range(tp_degree)] + + for rank, tensor in enumerate(shards): + self.assertTupleEqual(tensor.shape, (TENSOR_DIM_0, TENSOR_DIM_1 // tp_degree)) + self.assertTrue(np.array_equal(tensor, self.tensor[:, rank * shard_size : (rank + 1) * shard_size]))