diff --git a/.github/workflows/nv-sd.yml b/.github/workflows/nv-sd.yml new file mode 100644 index 000000000000..5ca159074a4d --- /dev/null +++ b/.github/workflows/nv-sd.yml @@ -0,0 +1,70 @@ +name: nv-sd + +on: + schedule: + - cron: "0 0 * * 0" + workflow_dispatch: + pull_request: + paths: + - "deepspeed/ops/transformer/inference/diffusers_**" + - "tests/unit/inference/test_stable_diffusion.py" + - "deepspeed/model_implementations/diffusers/unet.py" + - "deepspeed/model_implementations/diffusers/vae.py" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + issues: write + +jobs: + sd-tests: + runs-on: [self-hosted, nvidia, a6000] + container: + image: nvcr.io/nvidia/pytorch:23.03-py3 + ports: + - 80 + options: --gpus all --shm-size "8G" + + steps: + - uses: actions/checkout@v3 + + - name: Check container state + run: | + ldd --version + nvcc --version + nvidia-smi + python -c "import torch; print('torch:', torch.__version__, torch)" + python -c "import torch; print('CUDA available:', torch.cuda.is_available())" + - name: Install transformers + run: | + git clone https://github.com/huggingface/transformers + cd transformers + git rev-parse --short HEAD + python -m pip install . + - name: Install deepspeed + run: | + pip install image-similarity-measures + python -m pip install opencv-python==4.6.* --force-reinstall + python -m pip install docutils==0.18.1 jinja2==3.0 urllib3==1.26.11 ninja + python -m pip install .[dev,1bit,autotuning,sd] + ds_report + - name: Python environment + run: | + python -m pip list + - name: Unit tests + run: | + unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch + cd tests + python -m pytest --color=yes --durations=0 --verbose -rF -m 'stable_diffusion' -k "TestStableDiffusion" unit/ --torch_ver="2.0" --cuda_ver="12" + + - name: Open GitHub issue if weekly CI fails + if: ${{ failure() && (github.event_name == 'schedule') }} + uses: JasonEtco/create-an-issue@v2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + filename: .github/ISSUE_TEMPLATE/ci_failure_report.md + update_existing: true diff --git a/requirements/requirements-sd.txt b/requirements/requirements-sd.txt index 086a8e3f4879..cb679ae3771d 100644 --- a/requirements/requirements-sd.txt +++ b/requirements/requirements-sd.txt @@ -1,2 +1,2 @@ diffusers -triton +triton>=2.1.0 diff --git a/tests/pytest.ini b/tests/pytest.ini index cc6b6564daa8..8d043c8b3f9d 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = -m "not sequential and not nightly and not inference and not seq_inference and not inference_ops and not inference_v2 and not inference_v2_ops" +addopts = -m "not sequential and not nightly and not inference and not seq_inference and not inference_ops and not inference_v2 and not inference_v2_ops and not stable_diffusion" markers = sequential:Tests that need to be run sequentially inference:Inference model tests @@ -9,3 +9,4 @@ markers = seq_inference:Inference model tests to run sequentially nightly:Tests that should be run nightly world_size:Change world size of individual tests in a class + stable_diffusion:Tests that run Stable Diffusion diff --git a/tests/unit/inference/test_stable_diffusion.py b/tests/unit/inference/test_stable_diffusion.py new file mode 100644 index 000000000000..ac39b7ab12fa --- /dev/null +++ b/tests/unit/inference/test_stable_diffusion.py @@ -0,0 +1,48 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +import os +import torch +import pytest +import deepspeed +import numpy +from unit.common import DistributedTest +from deepspeed.accelerator import get_accelerator + + +# Setup for these models is different from other pipelines, so we add a separate test +@pytest.mark.stable_diffusion +class TestStableDiffusion(DistributedTest): + world_size = 1 + + def test(self): + from diffusers import DiffusionPipeline + from image_similarity_measures.quality_metrics import rmse + generator = torch.Generator(device=get_accelerator().current_device()) + seed = 0xABEDABE7 + generator.manual_seed(seed) + prompt = "a dog on a rocket" + model = "prompthero/midjourney-v4-diffusion" + local_rank = int(os.getenv("LOCAL_RANK", "0")) + device = torch.device(f"cuda:{local_rank}") + + pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch.half) + pipe = pipe.to(device) + baseline_image = pipe(prompt, guidance_scale=7.5, generator=generator).images[0] + + pipe = deepspeed.init_inference( + pipe, + mp_size=1, + dtype=torch.half, + replace_with_kernel_inject=True, + enable_cuda_graph=True, + ) + generator.manual_seed(seed) + deepspeed_image = pipe(prompt, guidance_scale=7.5, generator=generator).images[0] + + rmse_value = rmse(org_img=numpy.asarray(baseline_image), pred_img=numpy.asarray(deepspeed_image)) + + # RMSE threshold value is arbitrary, may need to adjust as needed + assert rmse_value <= 0.01