Skip to content

Commit

Permalink
Initial set of unittest in CI (#43)
Browse files Browse the repository at this point in the history
* Add quantization tests

* Add some tooling for the CI

* Added initial set of test for quantization and sharding

* Use Protocol instead of ABC

* Add utility to ensure we can run on GPU and define how much we need

* Attempt to enable the workflow

* Second attempt

* Let's relax numpy version for now.

* Disable a few things

* Again

* Again Again

* Refactored tests and utils

* Update command for unittests

* Quality

* Once more

* One last?

* Add Makefile to make it easier to run common commands
  • Loading branch information
mfuntowicz authored Dec 14, 2023
1 parent f2ca732 commit fc57497
Show file tree
Hide file tree
Showing 11 changed files with 177 additions and 3 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/pr_fast_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: CPU Only Test Suite on PRs

on:
pull_request:
branches:
- main
push:
branches:
- ci-*

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

env:
OPTIMUM_NVIDIA_IS_CI: ON
RUN_CPU_ONLY: ON

jobs:
run_fast_tests:
strategy:
fail-fast: false
matrix:
config:
- name: Fast Optimum-Nvidia Test Suite
runner: [ci, nvidia-gpu]
image: huggingface/optimum-nvidia
report: cpu_only

name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.runner }}

container:
image: ${{ matrix.config.image }}
options: --shm-size "16gb" --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/

defaults:
run:
shell: bash

steps:
- name: Checkout optimum-nvidia
uses: actions/checkout@v4
with:
fetch-depth: 1

- name: Install dependencies
run: |
python -m pip install --upgrade -e .[quality,tests]
- name: Run fast optimum-nvidia CPU tests
run: |
python -m pytest -s -v -p no:warnings tests
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
fix-quality:
python3 -m ruff check examples scripts src tests
python3 -m ruff format examples scripts src tests

quality:
python3 -m ruff check examples scripts src tests
python3 -m ruff format examples scripts src tests --check
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,10 @@ indent-style = "space"
skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"
line-ending = "auto"


[tool.pytest.ini_options]
pythonpath = [
"src"
]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"fsspec",
"huggingface_hub >= 0.14.0",
"hf-transfer",
"numpy >= 1.24.0",
"numpy >= 1.22.0",
"onnx >= 1.12.0",
"optimum >= 1.13.0",
"transformers >= 4.32.1",
Expand Down
2 changes: 1 addition & 1 deletion src/optimum/nvidia/configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
from typing import Protocol

from .base import ModelConfig, TransformersConfig
from .quantization import QuantizationConfig
from .quantization import NO_QUANTIZATION, QuantizationConfig
1 change: 1 addition & 0 deletions src/optimum/nvidia/utils/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .utils import nightly, requires_gpu, slow
43 changes: 43 additions & 0 deletions src/optimum/nvidia/utils/tests/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import functools
import os
from distutils.util import strtobool

import pytest

from optimum.nvidia.utils.nvml import get_device_count


INT_TRUE_VALUE = 1

# Environment variable controlling test set
ENVVAR_NAME_RUN_NIGHTLY = "RUN_NIGHTLY"
ENVVAR_NAME_RUN_SLOW = "RUN_SLOW"
ENVVAR_NAME_RUN_CPU_ONLY = "RUN_CPU_ONLY"


@functools.cache
def parse_flag_from_env(name: str, default: bool) -> bool:
"""
Parse the environment variable `name` as a boolean
:param name: Name of target environment variable
:param default: The default value to apply if `name` is not present
:return: Boolean value
"""

# Retrieve the value or `default` if not present
value = os.environ.get(name, str(default))

try:
return strtobool(value) == INT_TRUE_VALUE
except ValueError:
raise ValueError(f"Failed to convert environment variable {name}={value} to a bool")


nightly = pytest.mark.skipif(parse_flag_from_env(ENVVAR_NAME_RUN_NIGHTLY, False), reason="Nightly test")
slow = pytest.mark.skipif(parse_flag_from_env(ENVVAR_NAME_RUN_SLOW, False), reason="Slow test")

requires_gpu = pytest.mark.skipif(
parse_flag_from_env(ENVVAR_NAME_RUN_CPU_ONLY, False) or not get_device_count(),
reason=f"RUN_CPU_ONLY={parse_flag_from_env(ENVVAR_NAME_RUN_CPU_ONLY, False)} or "
f"no GPU detected (num_gpus={get_device_count()})",
)
11 changes: 11 additions & 0 deletions tests/models/test_llama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from parameterized import parameterized

from optimum.nvidia.models.llama import LLamaForCausalLM as TrtLlamaForCausalLM
from optimum.nvidia.utils.tests import requires_gpu


@parameterized.expand(["float16", "bfloat16"])
@requires_gpu
def test_build_engine_7b_with_tp(dtype: str):
model = TrtLlamaForCausalLM.from_pretrained("huggingface/llama-7b", dtype=dtype)
assert model
Empty file removed tests/quantization.py
Empty file.
14 changes: 14 additions & 0 deletions tests/test_quantization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from tensorrt_llm.quantization import QuantMode

from optimum.nvidia.configs import NO_QUANTIZATION, QuantizationConfig


def test_no_quantization_has_quantization_step():
qconfig = QuantizationConfig(NO_QUANTIZATION)
assert not qconfig.has_quantization_step


def test_float8_quantization_has_quantization_step():
qconfig = QuantizationConfig(QuantMode.from_description(use_fp8_qdq=True, use_fp8_kv_cache=True))

assert qconfig.has_quantization_step
39 changes: 39 additions & 0 deletions tests/test_sharding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from unittest import TestCase

import numpy as np
from parameterized import parameterized

from optimum.nvidia.weights import shard


TENSOR_DIM_0 = 1024
TENSOR_DIM_1 = 4096


class MatrixShardingTestCase(TestCase):
def setUp(self):
self.tensor = np.random.rand(TENSOR_DIM_0, TENSOR_DIM_1)

def test_no_sharding(self):
sharded_tensor = shard(self.tensor, 0, 1, axis=0)
self.assertTrue(np.array_equal(sharded_tensor, self.tensor))

@parameterized.expand([1, 2, 4, 8])
def test_sharding_tensor_parallelism_axis_0(self, tp_degree: int):
shard_size = TENSOR_DIM_0 // tp_degree

shards = [shard(self.tensor, rank, tp_degree, axis=0) for rank in range(tp_degree)]

for rank, tensor in enumerate(shards):
self.assertTupleEqual(tensor.shape, (TENSOR_DIM_0 // tp_degree, TENSOR_DIM_1))
self.assertTrue(np.array_equal(tensor, self.tensor[rank * shard_size : (rank + 1) * shard_size]))

@parameterized.expand([1, 2, 4, 8])
def test_sharding_tensor_parallelism_axis_1(self, tp_degree: int):
shard_size = TENSOR_DIM_1 // tp_degree

shards = [shard(self.tensor, rank, tp_degree, axis=1) for rank in range(tp_degree)]

for rank, tensor in enumerate(shards):
self.assertTupleEqual(tensor.shape, (TENSOR_DIM_0, TENSOR_DIM_1 // tp_degree))
self.assertTrue(np.array_equal(tensor, self.tensor[:, rank * shard_size : (rank + 1) * shard_size]))

0 comments on commit fc57497

Please sign in to comment.