From 33e6ec5d4ffcf5b413c3fdc83b4d6ff7512ff1c4 Mon Sep 17 00:00:00 2001
From: grajguru <grajguru@microsoft.com>
Date: Tue, 7 Nov 2023 13:28:26 +0530
Subject: [PATCH 1/3] diffuser model load using model and path params

---
 mii/legacy/models/providers/diffusers.py |  9 +++++++--
 mii/legacy/models/providers/utils.py     | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 mii/legacy/models/providers/utils.py

diff --git a/mii/legacy/models/providers/diffusers.py b/mii/legacy/models/providers/diffusers.py
index 721ac269..2985fd28 100644
--- a/mii/legacy/models/providers/diffusers.py
+++ b/mii/legacy/models/providers/diffusers.py
@@ -5,8 +5,11 @@
 import os
 import torch
 
+from .utils import attempt_load
+from mii.config import ModelConfig
 
-def diffusers_provider(model_config):
+
+def diffusers_provider(model_config: ModelConfig):
     from diffusers import DiffusionPipeline
 
     local_rank = int(os.getenv("LOCAL_RANK", "0"))
@@ -16,7 +19,9 @@ def diffusers_provider(model_config):
         kwargs["torch_dtype"] = torch.float16
         kwargs["revision"] = "fp16"
 
-    pipeline = DiffusionPipeline.from_pretrained(model_config.model_name, **kwargs)
+    pipeline = attempt_load(DiffusionPipeline.from_pretrained,
+                            model_config.model, model_config.model_path,
+                            kwargs=kwargs)
     pipeline = pipeline.to(f"cuda:{local_rank}")
     pipeline.set_progress_bar_config(disable=True)
     return pipeline
diff --git a/mii/legacy/models/providers/utils.py b/mii/legacy/models/providers/utils.py
new file mode 100644
index 00000000..e9da3bfc
--- /dev/null
+++ b/mii/legacy/models/providers/utils.py
@@ -0,0 +1,20 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+from mii.utils import is_aml, mii_cache_path
+
+
+def attempt_load(load_fn, model_name, model_path, cache_path=None, kwargs={}):
+    try:
+        value = load_fn(model_name, **kwargs)
+    except OSError:
+        if is_aml():
+            print(f"Attempted load but failed, retrying using model_path={model_path}")
+            value = load_fn(model_path, **kwargs)
+        else:
+            cache_path = cache_path or mii_cache_path()
+            print(f"Attempted load but failed, retrying using cache_dir={cache_path}")
+            value = load_fn(model_name, cache_dir=cache_path, **kwargs)
+    return value

From 05e3453315705fc82e33e4e27ae1fb46fa629c31 Mon Sep 17 00:00:00 2001
From: grajguru <grajguru@microsoft.com>
Date: Tue, 7 Nov 2023 18:21:43 +0530
Subject: [PATCH 2/3] for all exceptions

---
 mii/legacy/models/providers/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mii/legacy/models/providers/utils.py b/mii/legacy/models/providers/utils.py
index e9da3bfc..4a8e86e2 100644
--- a/mii/legacy/models/providers/utils.py
+++ b/mii/legacy/models/providers/utils.py
@@ -9,12 +9,12 @@
 def attempt_load(load_fn, model_name, model_path, cache_path=None, kwargs={}):
     try:
         value = load_fn(model_name, **kwargs)
-    except OSError:
+    except Exception as ex:
         if is_aml():
-            print(f"Attempted load but failed, retrying using model_path={model_path}")
+            print(f"Attempted load but failed - {str(ex)}, retrying using model_path={model_path}")
             value = load_fn(model_path, **kwargs)
         else:
             cache_path = cache_path or mii_cache_path()
-            print(f"Attempted load but failed, retrying using cache_dir={cache_path}")
+            print(f"Attempted load but failed - {str(ex)}, retrying using cache_dir={cache_path}")
             value = load_fn(model_name, cache_dir=cache_path, **kwargs)
     return value

From 03cb32f498345089b66dff366ebc1bf659d266b1 Mon Sep 17 00:00:00 2001
From: grajguru <grajguru@microsoft.com>
Date: Tue, 7 Nov 2023 23:14:27 +0530
Subject: [PATCH 3/3] flake

---
 mii/legacy/models/providers/diffusers.py | 3 ++-
 mii/legacy/models/providers/utils.py     | 8 ++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/mii/legacy/models/providers/diffusers.py b/mii/legacy/models/providers/diffusers.py
index 2985fd28..15973d0e 100644
--- a/mii/legacy/models/providers/diffusers.py
+++ b/mii/legacy/models/providers/diffusers.py
@@ -20,7 +20,8 @@ def diffusers_provider(model_config: ModelConfig):
         kwargs["revision"] = "fp16"
 
     pipeline = attempt_load(DiffusionPipeline.from_pretrained,
-                            model_config.model, model_config.model_path,
+                            model_config.model,
+                            model_config.model_path,
                             kwargs=kwargs)
     pipeline = pipeline.to(f"cuda:{local_rank}")
     pipeline.set_progress_bar_config(disable=True)
diff --git a/mii/legacy/models/providers/utils.py b/mii/legacy/models/providers/utils.py
index 4a8e86e2..f9e237ba 100644
--- a/mii/legacy/models/providers/utils.py
+++ b/mii/legacy/models/providers/utils.py
@@ -11,10 +11,14 @@ def attempt_load(load_fn, model_name, model_path, cache_path=None, kwargs={}):
         value = load_fn(model_name, **kwargs)
     except Exception as ex:
         if is_aml():
-            print(f"Attempted load but failed - {str(ex)}, retrying using model_path={model_path}")
+            print(
+                f"Attempted load but failed - {str(ex)}, retrying using model_path={model_path}"
+            )
             value = load_fn(model_path, **kwargs)
         else:
             cache_path = cache_path or mii_cache_path()
-            print(f"Attempted load but failed - {str(ex)}, retrying using cache_dir={cache_path}")
+            print(
+                f"Attempted load but failed - {str(ex)}, retrying using cache_dir={cache_path}"
+            )
             value = load_fn(model_name, cache_dir=cache_path, **kwargs)
     return value