From 33e6ec5d4ffcf5b413c3fdc83b4d6ff7512ff1c4 Mon Sep 17 00:00:00 2001 From: grajguru Date: Tue, 7 Nov 2023 13:28:26 +0530 Subject: [PATCH 1/3] diffuser model load using model and path params --- mii/legacy/models/providers/diffusers.py | 9 +++++++-- mii/legacy/models/providers/utils.py | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 mii/legacy/models/providers/utils.py diff --git a/mii/legacy/models/providers/diffusers.py b/mii/legacy/models/providers/diffusers.py index 721ac269..2985fd28 100644 --- a/mii/legacy/models/providers/diffusers.py +++ b/mii/legacy/models/providers/diffusers.py @@ -5,8 +5,11 @@ import os import torch +from .utils import attempt_load +from mii.config import ModelConfig -def diffusers_provider(model_config): + +def diffusers_provider(model_config: ModelConfig): from diffusers import DiffusionPipeline local_rank = int(os.getenv("LOCAL_RANK", "0")) @@ -16,7 +19,9 @@ def diffusers_provider(model_config): kwargs["torch_dtype"] = torch.float16 kwargs["revision"] = "fp16" - pipeline = DiffusionPipeline.from_pretrained(model_config.model_name, **kwargs) + pipeline = attempt_load(DiffusionPipeline.from_pretrained, + model_config.model, model_config.model_path, + kwargs=kwargs) pipeline = pipeline.to(f"cuda:{local_rank}") pipeline.set_progress_bar_config(disable=True) return pipeline diff --git a/mii/legacy/models/providers/utils.py b/mii/legacy/models/providers/utils.py new file mode 100644 index 00000000..e9da3bfc --- /dev/null +++ b/mii/legacy/models/providers/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +from mii.utils import is_aml, mii_cache_path + + +def attempt_load(load_fn, model_name, model_path, cache_path=None, kwargs={}): + try: + value = load_fn(model_name, **kwargs) + except OSError: + if is_aml(): + print(f"Attempted load but failed, retrying using model_path={model_path}") + value = load_fn(model_path, **kwargs) + else: + cache_path = cache_path or mii_cache_path() + print(f"Attempted load but failed, retrying using cache_dir={cache_path}") + value = load_fn(model_name, cache_dir=cache_path, **kwargs) + return value From 05e3453315705fc82e33e4e27ae1fb46fa629c31 Mon Sep 17 00:00:00 2001 From: grajguru Date: Tue, 7 Nov 2023 18:21:43 +0530 Subject: [PATCH 2/3] for all exceptions --- mii/legacy/models/providers/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mii/legacy/models/providers/utils.py b/mii/legacy/models/providers/utils.py index e9da3bfc..4a8e86e2 100644 --- a/mii/legacy/models/providers/utils.py +++ b/mii/legacy/models/providers/utils.py @@ -9,12 +9,12 @@ def attempt_load(load_fn, model_name, model_path, cache_path=None, kwargs={}): try: value = load_fn(model_name, **kwargs) - except OSError: + except Exception as ex: if is_aml(): - print(f"Attempted load but failed, retrying using model_path={model_path}") + print(f"Attempted load but failed - {str(ex)}, retrying using model_path={model_path}") value = load_fn(model_path, **kwargs) else: cache_path = cache_path or mii_cache_path() - print(f"Attempted load but failed, retrying using cache_dir={cache_path}") + print(f"Attempted load but failed - {str(ex)}, retrying using cache_dir={cache_path}") value = load_fn(model_name, cache_dir=cache_path, **kwargs) return value From 03cb32f498345089b66dff366ebc1bf659d266b1 Mon Sep 17 00:00:00 2001 From: grajguru Date: Tue, 7 Nov 2023 23:14:27 +0530 Subject: [PATCH 3/3] flake --- mii/legacy/models/providers/diffusers.py | 3 ++- mii/legacy/models/providers/utils.py | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mii/legacy/models/providers/diffusers.py b/mii/legacy/models/providers/diffusers.py index 2985fd28..15973d0e 100644 --- a/mii/legacy/models/providers/diffusers.py +++ b/mii/legacy/models/providers/diffusers.py @@ -20,7 +20,8 @@ def diffusers_provider(model_config: ModelConfig): kwargs["revision"] = "fp16" pipeline = attempt_load(DiffusionPipeline.from_pretrained, - model_config.model, model_config.model_path, + model_config.model, + model_config.model_path, kwargs=kwargs) pipeline = pipeline.to(f"cuda:{local_rank}") pipeline.set_progress_bar_config(disable=True) diff --git a/mii/legacy/models/providers/utils.py b/mii/legacy/models/providers/utils.py index 4a8e86e2..f9e237ba 100644 --- a/mii/legacy/models/providers/utils.py +++ b/mii/legacy/models/providers/utils.py @@ -11,10 +11,14 @@ def attempt_load(load_fn, model_name, model_path, cache_path=None, kwargs={}): value = load_fn(model_name, **kwargs) except Exception as ex: if is_aml(): - print(f"Attempted load but failed - {str(ex)}, retrying using model_path={model_path}") + print( + f"Attempted load but failed - {str(ex)}, retrying using model_path={model_path}" + ) value = load_fn(model_path, **kwargs) else: cache_path = cache_path or mii_cache_path() - print(f"Attempted load but failed - {str(ex)}, retrying using cache_dir={cache_path}") + print( + f"Attempted load but failed - {str(ex)}, retrying using cache_dir={cache_path}" + ) value = load_fn(model_name, cache_dir=cache_path, **kwargs) return value