diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml
index 5b93f80dcd..ce5577309b 100644
--- a/.github/workflows/build_docker_images.yml
+++ b/.github/workflows/build_docker_images.yml
@@ -10,6 +10,8 @@ concurrency:
   group: docker-image-builds
   cancel-in-progress: false
 
+permissions: {}
+
 env:
   CI_SLACK_CHANNEL: ${{ secrets.CI_DOCKER_CHANNEL }}
 
diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml
index 1ff01d1a5e..42e7972bc2 100644
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@@ -7,6 +7,8 @@ on:
       - doc-builder*
       - v*-release
 
+permissions: {}
+
 jobs:
    build:
     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
index 35ceab6e60..3fe27e8a04 100644
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@@ -7,6 +7,8 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
   cancel-in-progress: true
 
+permissions: {}
+
 jobs:
   build:
     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
diff --git a/.github/workflows/integrations_tests.yml b/.github/workflows/integrations_tests.yml
index 38ab96246e..3d61c8d915 100644
--- a/.github/workflows/integrations_tests.yml
+++ b/.github/workflows/integrations_tests.yml
@@ -7,6 +7,8 @@ on:
         description: 'Branch to test on'
         required: true
 
+permissions: {}
+
 jobs:
   run_transformers_integration_tests:
     strategy:
diff --git a/.github/workflows/nightly-bnb.yml b/.github/workflows/nightly-bnb.yml
index 0fba12dfb9..bc68af80c8 100644
--- a/.github/workflows/nightly-bnb.yml
+++ b/.github/workflows/nightly-bnb.yml
@@ -12,6 +12,7 @@ env:
   NVIDIA_DISABLE_REQUIRE: "1"
   SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
 
+permissions: {}
 
 jobs:
   run_all_tests_single_gpu:
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 7e6635b392..d578900489 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -12,6 +12,7 @@ env:
   NVIDIA_DISABLE_REQUIRE: "1"
   SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
 
+permissions: {}
 
 jobs:
   run_all_tests_single_gpu:
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index fc65794663..054c4b53c4 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -4,6 +4,8 @@ on:
   schedule:
     - cron: "0 15 * * *"
 
+permissions: {}
+
 jobs:
   close_stale_issues:
     name: Close Stale Issues
diff --git a/.github/workflows/test-docker-build.yml b/.github/workflows/test-docker-build.yml
index 33a177bba2..558c5f74de 100644
--- a/.github/workflows/test-docker-build.yml
+++ b/.github/workflows/test-docker-build.yml
@@ -5,6 +5,9 @@ on:
     paths:
       # Run only when DockerFile files are modified
       - "docker/*/Dockerfile"
+
+permissions: {}
+
 jobs:
   get_changed_files:
     name: "Build all modified docker images"
diff --git a/.github/workflows/tests-main.yml b/.github/workflows/tests-main.yml
index 1b06083e73..d614d547b7 100644
--- a/.github/workflows/tests-main.yml
+++ b/.github/workflows/tests-main.yml
@@ -6,6 +6,8 @@ on:
     paths-ignore:
         - 'docs/**'
 
+permissions: {}
+
 jobs:
   tests:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index fac7446184..36e6841f0e 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -12,6 +12,8 @@ on:
 env:
   HF_HOME: .cache/huggingface
 
+permissions: {}
+
 jobs:
   check_code_quality:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/torch_compile_tests.yml b/.github/workflows/torch_compile_tests.yml
index f93d3760d6..02243de643 100644
--- a/.github/workflows/torch_compile_tests.yml
+++ b/.github/workflows/torch_compile_tests.yml
@@ -17,6 +17,8 @@ env:
   # To be able to run tests on CUDA 12.2
   NVIDIA_DISABLE_REQUIRE: "1"
 
+permissions: {}
+
 jobs:
   run_tests_with_compile:
     runs-on:
diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml
index 9a613bb5b7..bdcdac7561 100644
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@@ -3,6 +3,8 @@ on:
 
 name: Secret Leaks
 
+permissions: {}
+
 jobs:
   trufflehog:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/upload_pr_documentation.yml b/.github/workflows/upload_pr_documentation.yml
index 380f67550d..7659af7e5c 100644
--- a/.github/workflows/upload_pr_documentation.yml
+++ b/.github/workflows/upload_pr_documentation.yml
@@ -6,6 +6,8 @@ on:
     types:
       - completed
 
+permissions: {}
+
 jobs:
   build:
     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
diff --git a/.github/workflows/zizmor.yaml b/.github/workflows/zizmor.yaml
index c9ab11998c..502c4f8d62 100644
--- a/.github/workflows/zizmor.yaml
+++ b/.github/workflows/zizmor.yaml
@@ -3,13 +3,13 @@ name: CI security linting
 on:
   push:
     branches: ["main"]
-    paths:
-      - '.github/**'
   pull_request:
     branches: ["*"]
     paths:
       - '.github/**'
 
+permissions: {}
+
 jobs:
   zizmor:
     name: zizmor latest via Cargo
diff --git a/.github/zizmor.yml b/.github/zizmor.yml
index f83c7e2b12..1746cfe25f 100644
--- a/.github/zizmor.yml
+++ b/.github/zizmor.yml
@@ -3,3 +3,13 @@ rules:
     ignore:
       # this workflow is only triggered after maintainer approval
       - upload_pr_documentation.yml:3:1
+  cache-poisoning:
+    ignore:
+      # the docker buildx binary is cached and zizmor warns about a cache poisoning attack.
+      # OTOH this cache would make us more resilient against an intrusion on docker-buildx' side.
+      # There is no obvious benefit so we leave it as it is.
+      - build_docker_images.yml:37:9
+      - build_docker_images.yml:70:9
+      - build_docker_images.yml:103:9
+      - build_docker_images.yml:136:9
+      - build_docker_images.yml:169:9
diff --git a/examples/corda_finetuning/README.md b/examples/corda_finetuning/README.md
index c248e99ae1..f07672f7a5 100644
--- a/examples/corda_finetuning/README.md
+++ b/examples/corda_finetuning/README.md
@@ -100,7 +100,12 @@ lora_config = LoraConfig(
     init_lora_weights="corda",
     corda_config=corda_config,
 )
+
+# Call `preprocess_corda` first to collect covariance matrix and build SVD result for model
+# For more details, please refer to documentation of `preprocess_corda`
 preprocess_corda(model, lora_config, run_model=run_model)
+
+# Call `get_peft_model` after preprocessing, or else you'll encounter error
 peft_model = get_peft_model(model, lora_config)
 peft_model.print_trainable_parameters()
 
diff --git a/examples/corda_finetuning/preprocess.py b/examples/corda_finetuning/preprocess.py
index 01721d296e..15bb18cb6b 100644
--- a/examples/corda_finetuning/preprocess.py
+++ b/examples/corda_finetuning/preprocess.py
@@ -21,7 +21,7 @@
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from peft.mapping import get_peft_model
+from peft import get_peft_model
 from peft.tuners.lora.config import CordaConfig, LoraConfig
 from peft.tuners.lora.corda import preprocess_corda
 
diff --git a/src/peft/import_utils.py b/src/peft/import_utils.py
index 1745f5bdde..7599aed35f 100644
--- a/src/peft/import_utils.py
+++ b/src/peft/import_utils.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 import importlib
 import importlib.metadata as importlib_metadata
+import platform
 from functools import lru_cache
 
 import packaging.version
+import torch
 
 
 @lru_cache
@@ -111,3 +113,23 @@ def is_torchao_available():
             f"but only versions above {TORCHAO_MINIMUM_VERSION} are supported"
         )
     return True
+
+
+@lru_cache
+def is_xpu_available(check_device=False):
+    """
+    Checks if XPU acceleration is available and potentially if a XPU is in the environment
+    """
+
+    system = platform.system()
+    if system == "Darwin":
+        return False
+    else:
+        if check_device:
+            try:
+                # Will raise a RuntimeError if no XPU is found
+                _ = torch.xpu.device_count()
+                return torch.xpu.is_available()
+            except RuntimeError:
+                return False
+        return hasattr(torch, "xpu") and torch.xpu.is_available()
diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 62061a84e8..28f715a100 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -3022,3 +3022,19 @@ def check_irrgular(vals: list[bool | Literal["irregular"]]) -> bool | Literal["i
         devices=devices,
     )
     return adapter_model_status
+
+
+def __getattr__(name):
+    if name == "PEFT_TYPE_TO_MODEL_MAPPING":
+        # This is for backwards compatibility: In #2282, PEFT_TYPE_TO_MODEL_MAPPING was removed as it was redundant with
+        # PEFT_TYPE_TO_TUNER_MAPPING. However, third party code could still use this mapping, e.g.:
+        # https://github.com/AutoGPTQ/AutoGPTQ/blob/6689349625de973b9ee3016c28c11f32acf7f02c/auto_gptq/utils/peft_utils.py#L8
+        # TODO: Remove after 2026-01
+        msg = (
+            "PEFT_TYPE_TO_MODEL_MAPPING is deprecated, please use `from peft import PEFT_TYPE_TO_TUNER_MAPPING` instead. "
+            "The deprecated variable will be removed in 2026."
+        )
+        warnings.warn(msg, category=DeprecationWarning)
+        return PEFT_TYPE_TO_TUNER_MAPPING
+
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py
index df1fc06958..b36de0c43a 100644
--- a/src/peft/tuners/lora/config.py
+++ b/src/peft/tuners/lora/config.py
@@ -145,6 +145,8 @@ class CordaConfig:
         use_float16_for_covariance (`bool`):
             If true, uses float16 for the covariance matrix. This can reduce the memory usage of the covariance matrix
             by half, but may lead to numerical instability. Defaults to `False`.
+        prune_temporary_fields (`bool`):
+            If true, temporary fields generated in CorDA preprocessing will be pruned. Defaults to `True`.
     """
 
     cache_file: Optional[str] = field(
@@ -189,6 +191,9 @@ class CordaConfig:
             )
         },
     )
+    prune_temporary_fields: bool = field(
+        default=True, metadata={"help": "If true, temporary fields generated in CorDA preprocessing will be pruned."}
+    )
 
 
 @dataclass
diff --git a/src/peft/tuners/lora/corda.py b/src/peft/tuners/lora/corda.py
index 0d1d70b1a8..8b991d276e 100644
--- a/src/peft/tuners/lora/corda.py
+++ b/src/peft/tuners/lora/corda.py
@@ -61,6 +61,10 @@ def preprocess_corda(
     """
     Build necessary CorDA fields for a model.
 
+    For each `M * N` linear layer, a `M * M` covariance matrix will be built temporarily during the preprocessing
+    process, consuming roughly another `2 * MODEL_SIZE` memory for typical LLMs if model weight is FP16 and covariance
+    is FP32. If that's too much, consider specifying `use_float16_for_covariance` in `lora_config.corda_config`.
+
     Args:
         model (`nn.Module`):
             Model to preprocess.
@@ -68,17 +72,16 @@ def preprocess_corda(
             Lora configuration of the model. `lora_config.corda_config` should be set.
         run_model (`Optional[Callable[[], None]]`):
             Callback to run the model when building covariance. Typically you should run model inference on your sample
-            dataset in this callback. Experiments have shown 256 samples to be a good default dataset size. `run_model`
-            can be `None` only if covariance file in `lora_config.corda_config` is already created.
+            dataset in this callback. Experiments have shown that when token count per sample is 2048, hidden dimension
+            is 4096, collecting 256 distinct samples is enough. If you collect too few or too repetitive samples, the
+            covariance matrix may be low-ranked and unstabilize preprocessing. You can estimate sample count as
+            `HIDDEN_DIM / TOKEN_PER_SAMPLE * 128`. `run_model` can be `None` only if covariance file in
+            `lora_config.corda_config` is already created.
         hooked_model (`Optional[nn.Module]`):
             Model to hook when building covariance. If none, original model will be hooked. This is only useful when
             you want to hook a different model than the one you are training, typically you should leave this `None`.
 
     Upon completion, the following fields are set for each target module:
-        corda_method (`Literal["ipm", "kpm"]`):
-            CorDA method to apply. "ipm" for Instruction-Previewed Mode, "kpm" for Knowledge-Preserved Mode.
-        rank (`int`):
-            Rank of CorDA to apply.
         eigens.S_WC (`torch.Tensor`):
             Singular values of the weight matrix.
         eigens.U_WC (`torch.Tensor`):
@@ -90,13 +93,12 @@ def preprocess_corda(
     covariance_file = lora_config.corda_config.covariance_file
     corda_method = lora_config.corda_config.corda_method
     verbose = lora_config.corda_config.verbose
+    prune_temporary_fields = lora_config.corda_config.prune_temporary_fields
 
     # If cache exists, skip building
     if cache_file is not None and os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
         cache = torch.load(cache_file, map_location=get_model_device(model))
         for name, module in target_modules(model, lora_config):
-            module.corda_method = cache[f"{name}.corda_method"]
-            module.rank = cache[f"{name}.rank"]
             module.eigens = CordaEigens(
                 S_WC=cache[f"{name}.eigens.S_WC"],
                 U_WC=cache[f"{name}.eigens.U_WC"],
@@ -123,12 +125,22 @@ def preprocess_corda(
         # Crop CorDA eigens so that there's less to save
         crop_corda_eigens(model, lora_config)
 
+        # Remove redundant fields if exist
+        if prune_temporary_fields:
+            for name, module in target_modules(model, lora_config):
+                if hasattr(module, "sample_count"):
+                    del module.sample_count
+                if hasattr(module, "covariance_matrix"):
+                    del module.covariance_matrix
+                if hasattr(module, "corda_method"):
+                    del module.corda_method
+                if hasattr(module, "rank"):
+                    del module.rank
+
         # Save cache to disk
         if cache_file is not None:
             cache: dict[str, Any] = {}
             for name, module in target_modules(model, lora_config):
-                cache[f"{name}.corda_method"] = module.corda_method
-                cache[f"{name}.rank"] = module.rank
                 cache[f"{name}.eigens.S_WC"] = module.eigens.S_WC
                 cache[f"{name}.eigens.U_WC"] = module.eigens.U_WC
                 cache[f"{name}.eigens.V_WC"] = module.eigens.V_WC
@@ -174,15 +186,9 @@ def hook(module, input, output):
                 "Invalid value found in covariance. Please file an issue at https://github.com/huggingface/peft/issues."
             )
 
-        # calculate mean and std
-        mean = input.mean(0)
-        std = input.std(0)
-
         # add to module
         module.sample_count += 1
         module.covariance_matrix += covariance
-        module.mean += mean
-        module.std += std
 
         # free memory
         del covariance, input
@@ -191,8 +197,6 @@ def hook(module, input, output):
     for name, module in target_modules(hooked_model, config):
         module.sample_count = 0
         module.covariance_matrix = 0
-        module.mean = 0
-        module.std = 0
         handles.append(module.register_forward_hook(hook))
 
     run_model()
@@ -213,14 +217,10 @@ def hook(module, input, output):
             if name in targets:
                 targets[name].sample_count = module.sample_count
                 targets[name].covariance_matrix = module.covariance_matrix
-                targets[name].mean = module.mean
-                targets[name].std = module.std
 
     # Divide by sample count
     for name, module in target_modules(model, config):
         module.covariance_matrix /= module.sample_count
-        module.mean /= module.sample_count
-        module.std /= module.sample_count
 
     # Save covariance to disk
     if covariance_file is not None:
diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
index 20bef8ed10..557fcfd188 100644
--- a/src/peft/tuners/lora/layer.py
+++ b/src/peft/tuners/lora/layer.py
@@ -349,6 +349,9 @@ def corda_init(self, adapter_name, init_lora_weights):
         weight = weight.to(dtype)
         self.get_base_layer().weight.data = weight
 
+        # Remove redundant fields
+        del linear.eigens
+
     def loftq_init(self, adapter_name):
         from peft.utils.loftq_utils import loftq_init
 
@@ -1402,6 +1405,33 @@ def batch_first(self) -> bool:
     def head_dim(self) -> int:
         return self.get_base_layer().head_dim
 
+    @property
+    def in_proj_weight(self) -> nn.Parameter:
+        return self.get_base_layer().in_proj_weight
+
+    @property
+    def in_proj_bias(self) -> nn.Parameter:
+        return self.get_base_layer().in_proj_bias
+
+    @property
+    def out_proj(self) -> nn.Module:
+        return self.get_base_layer().out_proj.get_base_layer()
+
+    @property
+    def bias_k(self) -> Optional[nn.Parameter]:
+        return self.get_base_layer().bias_k
+
+    @property
+    def bias_v(self) -> Optional[nn.Parameter]:
+        return self.get_base_layer().bias_v
+
+    def merge_masks(self, *args, **kwargs) -> tuple[Optional[torch.Tensor], Optional[int]]:
+        return self.get_base_layer().merge_masks(*args, **kwargs)
+
+    @property
+    def add_zero_attn(self) -> bool:
+        return self.get_base_layer().add_zero_attn
+
     def update_layer(self, *args, **kwargs) -> None:
         super().update_layer(*args, **kwargs)
         # Note: LoRA is applied to both in_proj and out_proj. There is currently no way to only specify one of them.
diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 32631647b2..2967b8da9c 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -444,6 +444,18 @@ def _enable_peft_forward_hooks(self, *args, **kwargs):
         if unexpected_adapters:
             raise ValueError(f"Trying to infer with non-existing adapter(s): {', '.join(sorted(unexpected_adapters))}")
 
+        # deal with beam search
+        num_beams = kwargs.get("num_beams", None)
+        uses_beam_search = isinstance(num_beams, int) and (num_beams > 1)
+        original_adapter_names = adapter_names[:]
+        if uses_beam_search:
+            if not isinstance(adapter_names, (list, tuple)):
+                raise TypeError(f"Got adapter names of type {type(adapter_names)}, expected a list of str.")
+            # When there is beam search, the inputs are repeated n times, thus we repeat each adapter name n times and
+            # then flatten the nested list. For encoder-decoder models, this extended list should not be applied to the
+            # encoder part. Further below, the original argument is thus restored for the encoder.
+            adapter_names = sum(([n] * kwargs["num_beams"] for n in adapter_names), [])
+
         hook_handles = []
         for module in self.modules():
             if isinstance(module, LoraLayer) or isinstance(module, ModulesToSaveWrapper):
@@ -451,6 +463,17 @@ def _enable_peft_forward_hooks(self, *args, **kwargs):
                 handle = module.register_forward_pre_hook(pre_forward, with_kwargs=True)
                 hook_handles.append(handle)
 
+        if uses_beam_search and hasattr(self.model, "get_encoder"):
+            # For encoder-decoder models, even when applying beam search, the encoder part of the model should not use
+            # the extended adapter_names. This is because the encoder still uses the original, non-extended samples.
+            for module in self.model.get_encoder().modules():
+                if isinstance(module, LoraLayer) or isinstance(module, ModulesToSaveWrapper):
+                    # Add another hook to overwrite the kwargs with the original adapter names -- this is easier than
+                    # trying to exclude the encoder.
+                    pre_forward = partial(_adapter_names_pre_forward_hook, adapter_names=original_adapter_names)
+                    handle = module.register_forward_pre_hook(pre_forward, with_kwargs=True)
+                    hook_handles.append(handle)
+
         yield
 
         for handle in hook_handles:
diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py
index 5337afeb9f..e42e4a5e8c 100644
--- a/src/peft/utils/save_and_load.py
+++ b/src/peft/utils/save_and_load.py
@@ -335,10 +335,11 @@ def set_peft_model_state_dict(
     state_dict = {}
     if getattr(model, "modules_to_save", None) is not None:
         for key, value in peft_model_state_dict.items():
-            if any(module_name in key for module_name in model.modules_to_save):
-                for module_name in model.modules_to_save:
-                    if module_name in key:
-                        key = key.replace(module_name, f"{module_name}.modules_to_save.{adapter_name}")
+            if any(f".{module_name}." in key for module_name in model.modules_to_save):
+                # sort to make order deterministic, but should not affect overall logic
+                for module_name in sorted(model.modules_to_save):
+                    if f".{module_name}." in key:
+                        key = key.replace(f".{module_name}.", f".{module_name}.modules_to_save.{adapter_name}.")
                         break
             state_dict[key] = value
     else:
diff --git a/tests/test_common_gpu.py b/tests/test_common_gpu.py
index 09b8b4e901..a92e2c8171 100644
--- a/tests/test_common_gpu.py
+++ b/tests/test_common_gpu.py
@@ -53,11 +53,12 @@
     get_peft_model,
     prepare_model_for_kbit_training,
 )
-from peft.import_utils import is_bnb_4bit_available, is_bnb_available
+from peft.import_utils import is_bnb_4bit_available, is_bnb_available, is_xpu_available
 from peft.tuners.lora.config import LoraRuntimeConfig
 from peft.utils import infer_device
 
 from .testing_utils import (
+    device_count,
     require_bitsandbytes,
     require_multi_accelerator,
     require_non_cpu,
@@ -99,6 +100,8 @@ def tearDown(self):
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        elif is_xpu_available():
+            torch.xpu.empty_cache()
         gc.collect()
 
     @require_bitsandbytes
@@ -563,7 +566,7 @@ def test_ia3_bnb_4bit_quantization(self):
         assert isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit)
 
     @pytest.mark.multi_gpu_tests
-    @require_torch_multi_gpu
+    @require_multi_accelerator
     def test_lora_causal_lm_multi_gpu_inference(self):
         r"""
         Test if LORA can be used for inference on multiple GPUs.
@@ -580,7 +583,7 @@ def test_lora_causal_lm_multi_gpu_inference(self):
         model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map="balanced")
         tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
 
-        assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
+        assert set(model.hf_device_map.values()) == set(range(device_count))
 
         model = get_peft_model(model, lora_config)
         assert isinstance(model, PeftModel)
@@ -607,7 +610,7 @@ def test_lora_seq2seq_lm_multi_gpu_inference(self):
         )
         tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
 
-        assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
+        assert set(model.hf_device_map.values()) == set(range(device_count))
 
         model = get_peft_model(model, lora_config)
         assert isinstance(model, PeftModel)
@@ -706,7 +709,7 @@ def test_print_4bit_expected(self):
         assert trainable_params == EXPECTED_TRAINABLE_PARAMS
         assert all_params == EXPECTED_ALL_PARAMS
 
-    @require_torch_gpu
+    @require_non_cpu
     @pytest.mark.single_gpu_tests
     @require_bitsandbytes
     def test_modules_to_save_grad(self):
@@ -742,7 +745,7 @@ def test_modules_to_save_grad(self):
         assert original_module.weight.grad is None
         assert modules_to_save.weight.grad is not None
 
-    @require_torch_gpu
+    @require_non_cpu
     @pytest.mark.single_gpu_tests
     @require_bitsandbytes
     def test_8bit_merge_lora(self):
@@ -1408,7 +1411,7 @@ def test_apply_GS_hra_inference(self):
 
         assert not torch.allclose(logits_hra, logits_hra_GS)
 
-    @require_torch_gpu
+    @require_non_cpu
     @pytest.mark.single_gpu_tests
     def test_apply_GS_hra_conv2d_inference(self):
         # check for different result with and without apply_GS
@@ -1434,7 +1437,7 @@ def test_apply_GS_hra_conv2d_inference(self):
 
         assert not torch.allclose(logits_hra, logits_hra_GS)
 
-    @require_torch_gpu
+    @require_non_cpu
     @pytest.mark.single_gpu_tests
     def test_r_odd_hra_inference(self):
         # check that an untrained HRA adapter can't be initialized as an identity tranformation
@@ -1456,9 +1459,13 @@ def test_r_odd_hra_inference(self):
         assert not torch.allclose(logits, logits_hra)
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a CUDA GPU")
+@pytest.mark.skipif(
+    not (torch.cuda.is_available() or is_xpu_available()), reason="test requires a hardware accelerator"
+)
 @pytest.mark.single_gpu_tests
 class TestSameAdapterDifferentDevices:
+    device = infer_device()
+
     # 1639
     # The original issue comes down to the following problem: If the user has a base layer on CUDA, moves the adapter to
     # CPU, then adds another adapter (which will automatically be moved to CUDA), then the first adapter will also be
@@ -1495,29 +1502,29 @@ def __init__(self):
     def test_lora_one_target_add_new_adapter_does_not_change_device(self, mlp):
         config = LoraConfig(target_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.lora_A.cpu()
         model.lin0.lora_B.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.lora_A.default.weight.device.type == "cpu"
         assert model.lin0.lora_B.default.weight.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.lora_A.default.weight.device.type == "cpu"
         assert model.lin0.lora_B.default.weight.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.lora_A.other.weight.device.type == "cuda"
-        assert model.lin0.lora_B.other.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.lora_A.other.weight.device.type == self.device
+        assert model.lin0.lora_B.other.weight.device.type == self.device
 
     def test_lora_multiple_targets_add_new_adapater_does_not_change_device(self, mlp):
         # same as the previous test, but targeting multiple layers
         config = LoraConfig(target_modules=["lin0", "lin1"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         # move lin1 to CPU but leave lin0 on GPU
         model.lin1.lora_A.cpu()
         model.lin1.lora_B.cpu()
@@ -1525,74 +1532,74 @@ def test_lora_multiple_targets_add_new_adapater_does_not_change_device(self, mlp
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin1.lora_A.default.weight.device.type == "cpu"
         assert model.lin1.lora_B.default.weight.device.type == "cpu"
-        assert model.lin1.base_layer.weight.device.type == "cuda"
-        assert model.lin0.lora_A.default.weight.device.type == "cuda"
-        assert model.lin0.lora_B.default.weight.device.type == "cuda"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin1.base_layer.weight.device.type == self.device
+        assert model.lin0.lora_A.default.weight.device.type == self.device
+        assert model.lin0.lora_B.default.weight.device.type == self.device
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin1.lora_A.default.weight.device.type == "cpu"
         assert model.lin1.lora_B.default.weight.device.type == "cpu"
-        assert model.lin1.base_layer.weight.device.type == "cuda"
+        assert model.lin1.base_layer.weight.device.type == self.device
         # the rest should be on GPU
-        assert model.lin0.lora_A.default.weight.device.type == "cuda"
-        assert model.lin0.lora_B.default.weight.device.type == "cuda"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.lora_A.other.weight.device.type == "cuda"
-        assert model.lin0.lora_B.other.weight.device.type == "cuda"
-        assert model.lin1.lora_A.other.weight.device.type == "cuda"
-        assert model.lin1.lora_B.other.weight.device.type == "cuda"
+        assert model.lin0.lora_A.default.weight.device.type == self.device
+        assert model.lin0.lora_B.default.weight.device.type == self.device
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.lora_A.other.weight.device.type == self.device
+        assert model.lin0.lora_B.other.weight.device.type == self.device
+        assert model.lin1.lora_A.other.weight.device.type == self.device
+        assert model.lin1.lora_B.other.weight.device.type == self.device
 
     def test_lora_embedding_target_add_new_adapter_does_not_change_device(self, emb_conv1d):
         # same as first test, but targeting the embedding layer
         config = LoraConfig(target_modules=["emb"])
         model = get_peft_model(emb_conv1d, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.emb.lora_embedding_A.cpu()
         model.emb.lora_embedding_B.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.emb.lora_embedding_A.default.device.type == "cpu"
         assert model.emb.lora_embedding_B.default.device.type == "cpu"
-        assert model.emb.weight.device.type == "cuda"
+        assert model.emb.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.emb.lora_embedding_A.default.device.type == "cpu"
         assert model.emb.lora_embedding_B.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.emb.weight.device.type == "cuda"
-        assert model.emb.lora_embedding_A.other.device.type == "cuda"
-        assert model.emb.lora_embedding_B.other.device.type == "cuda"
+        assert model.emb.weight.device.type == self.device
+        assert model.emb.lora_embedding_A.other.device.type == self.device
+        assert model.emb.lora_embedding_B.other.device.type == self.device
 
     def test_lora_conv1d_target_add_new_adapter_does_not_change_device(self, emb_conv1d):
         # same as first test, but targeting the Conv1D layer
         config = LoraConfig(target_modules=["conv1d"])
         model = get_peft_model(emb_conv1d, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.conv1d.lora_A.cpu()
         model.conv1d.lora_B.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.conv1d.lora_A.default.weight.device.type == "cpu"
         assert model.conv1d.lora_B.default.weight.device.type == "cpu"
-        assert model.conv1d.weight.device.type == "cuda"
+        assert model.conv1d.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.conv1d.lora_A.default.weight.device.type == "cpu"
         assert model.conv1d.lora_B.default.weight.device.type == "cpu"
         # the rest should be on GPU
-        assert model.conv1d.weight.device.type == "cuda"
-        assert model.conv1d.lora_A.other.weight.device.type == "cuda"
-        assert model.conv1d.lora_B.other.weight.device.type == "cuda"
+        assert model.conv1d.weight.device.type == self.device
+        assert model.conv1d.lora_A.other.weight.device.type == self.device
+        assert model.conv1d.lora_B.other.weight.device.type == self.device
 
     def test_lora_dora_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but also using DoRA
         config = LoraConfig(target_modules=["lin0"], use_dora=True)
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.lora_A.cpu()
         model.lin0.lora_B.cpu()
         model.lin0.lora_magnitude_vector.cpu()
@@ -1601,7 +1608,7 @@ def test_lora_dora_add_new_adapter_does_not_change_device(self, mlp):
         assert model.lin0.lora_A.default.weight.device.type == "cpu"
         assert model.lin0.lora_B.default.weight.device.type == "cpu"
         assert model.lin0.lora_magnitude_vector.default.weight.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
@@ -1609,182 +1616,182 @@ def test_lora_dora_add_new_adapter_does_not_change_device(self, mlp):
         assert model.lin0.lora_B.default.weight.device.type == "cpu"
         assert model.lin0.lora_magnitude_vector.default.weight.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.lora_A.other.weight.device.type == "cuda"
-        assert model.lin0.lora_B.other.weight.device.type == "cuda"
-        assert model.lin0.lora_magnitude_vector.other.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.lora_A.other.weight.device.type == self.device
+        assert model.lin0.lora_B.other.weight.device.type == self.device
+        assert model.lin0.lora_magnitude_vector.other.weight.device.type == self.device
 
     def test_adalora_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using AdaLORA
         # AdaLora does not like multiple trainable adapters, hence inference_mode=True
         config = AdaLoraConfig(target_modules=["lin0"], inference_mode=True)
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.lora_A.cpu()
         model.lin0.lora_E.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.lora_A.default.device.type == "cpu"
         assert model.lin0.lora_E.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.lora_A.default.device.type == "cpu"
         assert model.lin0.lora_E.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.lora_A.other.device.type == "cuda"
-        assert model.lin0.lora_E.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.lora_A.other.device.type == self.device
+        assert model.lin0.lora_E.other.device.type == self.device
 
     def test_boft_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using BoFT
         config = BOFTConfig(target_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.boft_R.cpu()
         model.lin0.boft_s.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.boft_R.default.device.type == "cpu"
         assert model.lin0.boft_s.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.boft_R.default.device.type == "cpu"
         assert model.lin0.boft_s.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.boft_R.other.device.type == "cuda"
-        assert model.lin0.boft_s.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.boft_R.other.device.type == self.device
+        assert model.lin0.boft_s.other.device.type == self.device
 
     def test_ia3_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using IA3
         config = IA3Config(target_modules=["lin0"], feedforward_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.ia3_l.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.ia3_l.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.ia3_l.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.ia3_l.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.ia3_l.other.device.type == self.device
 
     @pytest.mark.xfail(reason="LN Tuning handling of multiple adapters may not be correct", strict=True)
     def test_ln_tuning_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using LN tuning
         config = LNTuningConfig(target_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.ln_tuning_layers.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.ln_tuning_layers.default.weight.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.ln_tuning_layers.default.weight.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.ln_tuning_layers.other.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.ln_tuning_layers.other.weight.device.type == self.device
 
     def test_loha_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using LoHa
         config = LoHaConfig(target_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.hada_w1_a.cpu()
         model.lin0.hada_w2_b.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.hada_w1_a.default.device.type == "cpu"
         assert model.lin0.hada_w2_b.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.hada_w1_a.default.device.type == "cpu"
         assert model.lin0.hada_w2_b.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.hada_w1_a.other.device.type == "cuda"
-        assert model.lin0.hada_w2_b.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.hada_w1_a.other.device.type == self.device
+        assert model.lin0.hada_w2_b.other.device.type == self.device
 
     def test_lokr_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using LoKr
         config = LoKrConfig(target_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.lokr_w1.cpu()
         model.lin0.lokr_w2.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.lokr_w1.default.device.type == "cpu"
         assert model.lin0.lokr_w2.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.lokr_w1.default.device.type == "cpu"
         assert model.lin0.lokr_w2.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.lokr_w1.other.device.type == "cuda"
-        assert model.lin0.lokr_w2.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.lokr_w1.other.device.type == self.device
+        assert model.lin0.lokr_w2.other.device.type == self.device
 
     def test_oft_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using OFT
         config = OFTConfig(target_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.oft_r.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.oft_r.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.oft_r.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.oft_r.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.oft_r.other.device.type == self.device
 
     def test_vera_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using VERA
         config = VeraConfig(target_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.vera_A.cpu()
         model.lin0.vera_lambda_d.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.vera_A.default.device.type == "cpu"
         assert model.lin0.vera_lambda_d.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.vera_A.default.device.type == "cpu"
         assert model.lin0.vera_lambda_d.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.vera_A.other.device.type == "cuda"
-        assert model.lin0.vera_lambda_d.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.vera_A.other.device.type == self.device
+        assert model.lin0.vera_lambda_d.other.device.type == self.device
 
     def test_vblora_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using VBLoRA
         config = VBLoRAConfig(target_modules=["lin0"], vector_length=2)
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.vblora_logits_A.cpu()
         model.lin0.vblora_logits_B.cpu()
         model.lin0.vblora_vector_bank.cpu()
@@ -1793,7 +1800,7 @@ def test_vblora_add_new_adapter_does_not_change_device(self, mlp):
         assert model.lin0.vblora_logits_A.default.device.type == "cpu"
         assert model.lin0.vblora_logits_B.default.device.type == "cpu"
         assert model.lin0.vblora_vector_bank.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
@@ -1801,25 +1808,25 @@ def test_vblora_add_new_adapter_does_not_change_device(self, mlp):
         assert model.lin0.vblora_logits_B.default.device.type == "cpu"
         assert model.lin0.vblora_vector_bank.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.vblora_logits_A.other.device.type == "cuda"
-        assert model.lin0.vblora_logits_B.other.device.type == "cuda"
-        assert model.lin0.vblora_vector_bank.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.vblora_logits_A.other.device.type == self.device
+        assert model.lin0.vblora_logits_B.other.device.type == self.device
+        assert model.lin0.vblora_vector_bank.other.device.type == self.device
 
     def test_hra_add_new_adapter_does_not_change_device(self, mlp):
         # same as first test, but using HRA
         config = HRAConfig(target_modules=["lin0"])
         model = get_peft_model(mlp, config)
-        model = model.cuda()
+        model = model.to(self.device)
         model.lin0.hra_u.cpu()
 
         # check that the adapter is indeed on CPU and the base model on GPU
         assert model.lin0.hra_u.default.device.type == "cpu"
-        assert model.lin0.base_layer.weight.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
 
         model.add_adapter("other", config)
         # check that after adding a new adapter, the old adapter is still on CPU
         assert model.lin0.hra_u.default.device.type == "cpu"
         # the rest should be on GPU
-        assert model.lin0.base_layer.weight.device.type == "cuda"
-        assert model.lin0.hra_u.other.device.type == "cuda"
+        assert model.lin0.base_layer.weight.device.type == self.device
+        assert model.lin0.hra_u.other.device.type == self.device
diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py
index a1aca79b12..78b4947db6 100644
--- a/tests/test_decoder_models.py
+++ b/tests/test_decoder_models.py
@@ -303,6 +303,18 @@ def test_merge_layers_nan(self, test_name, model_id, config_cls, config_kwargs):
     def test_mixed_adapter_batches(self, test_name, model_id, config_cls, config_kwargs):
         self._test_mixed_adapter_batches(model_id, config_cls, config_kwargs)
 
+    @parameterized.expand(
+        PeftTestConfigManager.get_grid_parameters(
+            {
+                "model_ids": PEFT_DECODER_MODELS_TO_TEST,
+                "lora_kwargs": {"init_lora_weights": [False]},
+                "task_type": "CAUSAL_LM",
+            },
+        )
+    )
+    def test_generate_with_mixed_adapter_batches(self, test_name, model_id, config_cls, config_kwargs):
+        self._test_generate_with_mixed_adapter_batches_and_beam_search(model_id, config_cls, config_kwargs)
+
     @parameterized.expand(
         PeftTestConfigManager.get_grid_parameters(FULL_GRID, filter_params_func=skip_oft_or_hra_and_gpt2)
     )
diff --git a/tests/test_encoder_decoder_models.py b/tests/test_encoder_decoder_models.py
index e22f010089..8f8eb9c0dd 100644
--- a/tests/test_encoder_decoder_models.py
+++ b/tests/test_encoder_decoder_models.py
@@ -118,6 +118,18 @@ def test_merge_layers(self, test_name, model_id, config_cls, config_kwargs):
     def test_mixed_adapter_batches(self, test_name, model_id, config_cls, config_kwargs):
         self._test_mixed_adapter_batches(model_id, config_cls, config_kwargs)
 
+    @parameterized.expand(
+        PeftTestConfigManager.get_grid_parameters(
+            {
+                "model_ids": PEFT_ENCODER_DECODER_MODELS_TO_TEST,
+                "lora_kwargs": {"init_lora_weights": [False]},
+                "task_type": "SEQ_2_SEQ_LM",
+            },
+        )
+    )
+    def test_generate_with_mixed_adapter_batches(self, test_name, model_id, config_cls, config_kwargs):
+        self._test_generate_with_mixed_adapter_batches_and_beam_search(model_id, config_cls, config_kwargs)
+
     # skip non lora models - generate does not work for prefix tuning, prompt tuning
     @parameterized.expand(PeftTestConfigManager.get_grid_parameters(FULL_GRID))
     def test_generate(self, test_name, model_id, config_cls, config_kwargs):
diff --git a/tests/test_gpu_examples.py b/tests/test_gpu_examples.py
index 1d33631a05..3b0cde1444 100644
--- a/tests/test_gpu_examples.py
+++ b/tests/test_gpu_examples.py
@@ -70,6 +70,7 @@
     replace_lora_weights_loftq,
     set_peft_model_state_dict,
 )
+from peft.import_utils import is_xpu_available
 from peft.tuners import boft
 from peft.utils import SAFETENSORS_WEIGHTS_NAME, infer_device
 from peft.utils.loftq_utils import NFQuantizer
@@ -82,6 +83,7 @@
     require_bitsandbytes,
     require_eetq,
     require_hqq,
+    require_multi_accelerator,
     require_non_cpu,
     require_non_xpu,
     require_optimum,
@@ -2073,7 +2075,10 @@ def test_olora_with_quantized_model(self, bits):
         assert torch.isfinite(logits).all()
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a GPU")
+@pytest.mark.skipif(
+    not (torch.cuda.is_available() or is_xpu_available()), reason="test requires a hardware accelerator"
+)
+@require_bitsandbytes
 class TestLoftQ:
     r"""
     Tests for LoftQ to ensure that it reduces the quantization error compared to normal LoRA quantization.
@@ -2083,19 +2088,18 @@ class TestLoftQ:
     # quantization without LoftQ. Thus 1.03 means that the error should be decreased by 3% at least. This is a very
     # conservative value to prevent flakiness, in practice most gains are > 1.5
     error_factor = 1.03
+    device = infer_device()
 
     def get_input(self, model_id, device):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         inputs = tokenizer("All I want is", padding=True, return_tensors="pt")
-        if device == "cuda":
-            inputs = inputs.to("cuda")
+        inputs = inputs.to(self.device)
         return inputs
 
     def get_base_model(self, model_id, device, **kwargs):
         cls = AutoModelForSeq2SeqLM if "t5" in str(model_id) else AutoModelForCausalLM
         model = cls.from_pretrained(model_id, **kwargs).eval()
-        if device == "cuda":
-            model = model.to("cuda")
+        model = model.to(self.device)
         return model
 
     def get_logits(self, model, inputs):
@@ -3808,28 +3812,30 @@ class TestBOFT:
     Test that we can correctly use half-precision models with BOFT.
     """
 
-    @require_torch_gpu
+    device = infer_device()
+
+    @require_non_cpu
     @pytest.mark.single_gpu_tests
     def test_boft_half_linear(self):
         # Check that we can use BoFT with model loaded in half precision
-        layer = torch.nn.Linear(160, 160).cuda()
+        layer = torch.nn.Linear(160, 160).to(self.device)
         layer = boft.layer.Linear(layer, "layer", boft_n_butterfly_factor=2).to(dtype=torch.bfloat16)
-        x = torch.randn(160, 160, device="cuda", dtype=torch.bfloat16)
+        x = torch.randn(160, 160, device=self.device, dtype=torch.bfloat16)
         layer(x)  # does not raise
 
-    @require_torch_gpu
+    @require_non_cpu
     @pytest.mark.single_gpu_tests
     def test_boft_half_conv(self):
-        conv = torch.nn.Conv2d(1, 1, 4).cuda()
+        conv = torch.nn.Conv2d(1, 1, 4).to(self.device)
         conv = boft.layer.Conv2d(conv, "conv", boft_n_butterfly_factor=2).to(dtype=torch.bfloat16)
-        x = torch.randn(1, 160, 160, device="cuda", dtype=torch.bfloat16)
+        x = torch.randn(1, 160, 160, device=self.device, dtype=torch.bfloat16)
         conv(x)  # does not raise
 
 
-@require_torch_gpu
 class TestPTuningReproducibility:
     device = infer_device()
 
+    @require_non_cpu
     def test_p_tuning_exactly_reproducible_after_loading(self, tmp_path):
         # See: https://github.com/huggingface/peft/issues/2043#issuecomment-2321522577
         # Ensure that after loading a p-tuning checkpoint, results are exactly reproducible (before the patch, they were
@@ -3865,7 +3871,6 @@ def test_p_tuning_exactly_reproducible_after_loading(self, tmp_path):
         torch.testing.assert_close(gen_loaded, gen_peft)
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a GPU")
 @pytest.mark.single_gpu_tests
 class TestLowCpuMemUsageDifferentDevices:
     """Test for the low CPU memory usage option for loading PEFT models.
@@ -3878,7 +3883,8 @@ class TestLowCpuMemUsageDifferentDevices:
     model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
     device = infer_device()
 
-    @pytest.mark.parametrize("device_model, device_sd", [("cpu", "cuda"), ("cuda", "cpu")])
+    @require_non_cpu
+    @pytest.mark.parametrize("device_model, device_sd", [("cpu", infer_device()), (infer_device(), "cpu")])
     def test_low_cpu_mem_usage_model_model_on_gpu_state_dict_on_cpu_works(self, device_model, device_sd):
         # specifically test diverging devices for the model and state_dict
         inputs = {"input_ids": torch.randint(0, 100, (1, 10)), "attention_mask": torch.ones(1, 10)}
@@ -3914,6 +3920,7 @@ def test_low_cpu_mem_usage_model_model_on_gpu_state_dict_on_cpu_works(self, devi
         assert torch.allclose(logits_low_cpu_mem, logits_not_low_cpu_mem)
         assert {p.device.type for p in model.parameters()} == {device_model}
 
+    @require_bitsandbytes
     @pytest.mark.parametrize("quantization_method", ["bnb-4bit", "bnb-8bit"])
     def test_low_cpu_mem_usage_with_quantization(self, quantization_method):
         # Ensure that low_cpu_mem_usage works with quantization
@@ -3953,7 +3960,7 @@ class TestEvaInitializationGPU:
     MAX_LENGTH = 256
     LORA_DIM = 8
     LORA_ALPHA = 1
-    DEVICE = "cuda"
+    DEVICE = infer_device()
 
     @pytest.fixture
     def tokenizer(self):
@@ -4021,7 +4028,8 @@ def is_bnb_model(self, model):
     def collate_fn(examples):
         return {k: torch.stack([v[k] for v in examples], dim=0) for k in examples[0].keys()}
 
-    @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a GPU")
+    @require_non_cpu
+    @require_bitsandbytes
     @pytest.mark.single_gpu_tests
     @pytest.mark.parametrize("model_fixture", ["model", "model_bnb"], indirect=True)
     def test_eva_initialization_consistency(self, model_fixture, dataset, peft_config):
@@ -4059,14 +4067,16 @@ def test_eva_initialization_consistency(self, model_fixture, dataset, peft_confi
             )
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a GPU")
 @pytest.mark.multi_gpu_tests
 class TestPrefixTuning:
+    device = infer_device()
+
+    @require_multi_accelerator
     def test_prefix_tuning_multiple_devices_decoder_model(self):
         # See issue 2134
         model_id = "hf-internal-testing/tiny-random-MistralForCausalLM"
         tokenizer = AutoTokenizer.from_pretrained(model_id, padding="left")
-        inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
+        inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to(self.device)
 
         device_map = {
             "model.embed_tokens": 0,
@@ -4086,11 +4096,12 @@ def test_prefix_tuning_multiple_devices_decoder_model(self):
         model = get_peft_model(model, peft_config)
         model.generate(**inputs)  # does not raise
 
+    @require_multi_accelerator
     def test_prefix_tuning_multiple_devices_encoder_decoder_model(self):
         # See issue 2134
         model_id = "hf-internal-testing/tiny-random-T5Model"
         tokenizer = AutoTokenizer.from_pretrained(model_id, padding="left")
-        inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
+        inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to(self.device)
         device_map = {
             "shared": 0,
             "encoder.embed_tokens": 0,
@@ -4120,7 +4131,9 @@ def test_prefix_tuning_multiple_devices_encoder_decoder_model(self):
         model.generate(**inputs)  # does not raise
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a GPU")
+@pytest.mark.skipif(
+    not (torch.cuda.is_available() or is_xpu_available()), reason="test requires a hardware accelerator"
+)
 @pytest.mark.single_gpu_tests
 class TestHotSwapping:
     def test_hotswapping_compiled_model_does_not_trigger_recompilation(self):
diff --git a/tests/test_initialization.py b/tests/test_initialization.py
index 103fa6696d..510f12892e 100644
--- a/tests/test_initialization.py
+++ b/tests/test_initialization.py
@@ -1145,6 +1145,7 @@ def test_mha_with_dora_raises(self, mha_cls):
             get_peft_model(model, config)
 
     def test_mha_exposes_attributes(self, mha_cls):
+        # MHA requires a bunch of attributes to be exposed, try to check them exhaustively here
         model = mha_cls()
         embed_dim = model.mha.embed_dim
         kdim = model.mha.kdim
@@ -1154,6 +1155,12 @@ def test_mha_exposes_attributes(self, mha_cls):
         dropout = model.mha.dropout
         batch_first = model.mha.batch_first
         head_dim = model.mha.head_dim
+        in_proj_weight = model.mha.in_proj_weight
+        in_proj_bias = model.mha.in_proj_bias
+        out_proj = model.mha.out_proj
+        bias_k = model.mha.bias_k
+        bias_v = model.mha.bias_v
+        add_zero_attn = model.mha.add_zero_attn
 
         config = LoraConfig(target_modules=["mha"])
         peft_model = get_peft_model(model, config)
@@ -1165,6 +1172,39 @@ def test_mha_exposes_attributes(self, mha_cls):
         assert peft_model.base_model.mha.dropout == dropout
         assert peft_model.base_model.mha.batch_first == batch_first
         assert peft_model.base_model.mha.head_dim == head_dim
+        if in_proj_weight is not None:
+            assert torch.allclose(peft_model.base_model.mha.in_proj_weight, in_proj_weight)
+        else:
+            assert peft_model.base_model.mha.in_proj_weight is None
+        if in_proj_bias is not None:
+            assert torch.allclose(peft_model.base_model.mha.in_proj_bias, in_proj_bias)
+        else:
+            assert peft_model.base_model.mha.in_proj_bias is None
+        assert peft_model.base_model.mha.out_proj is out_proj
+        if bias_k is not None:
+            assert torch.allclose(peft_model.base_model.mha.bias_k, bias_k)
+        else:
+            assert peft_model.base_model.mha.bias_k is None
+        if bias_v is not None:
+            assert torch.allclose(peft_model.base_model.mha.bias_v, bias_v)
+        else:
+            assert peft_model.base_model.mha.bias_v is None
+        assert peft_model.base_model.mha.add_zero_attn == add_zero_attn
+
+    def test_mha_merge_masks_method(self, mha_cls):
+        # MHA requires a merge_masks method to be exposed, check that it works
+        model = mha_cls()
+        config = LoraConfig(target_modules=["mha"])
+        peft_model = get_peft_model(model, config)
+
+        attn_mask = torch.randint(0, 2, (10, 10))
+        key_padding_mask = torch.randint(0, 2, (10, 10))
+        query = torch.rand(10, 10, 10)
+        merged_mask0, mask_type0 = model.mha.merge_masks(attn_mask, key_padding_mask, query)
+        merged_mask1, mask_type1 = peft_model.base_model.mha.merge_masks(attn_mask, key_padding_mask, query)
+
+        assert torch.allclose(merged_mask0, merged_mask1)
+        assert mask_type0 == mask_type1
 
     def test_lora_with_bias_extra_params(self):
         # lora with lora_bias=True
@@ -1916,6 +1956,38 @@ def data(self):
         torch.manual_seed(233)
         return torch.rand(1000, 1000).to(self.torch_device)
 
+    @pytest.mark.parametrize("corda_method", ("ipm", "kpm"))
+    def test_lora_corda_no_redundant_fields(self, data, corda_method):
+        original_model = self.get_model()
+        model = deepcopy(original_model)
+
+        corda_config = CordaConfig(
+            corda_method=corda_method,
+        )
+        config = LoraConfig(
+            init_lora_weights="corda",
+            target_modules=["linear"],
+            corda_config=corda_config,
+        )
+        preprocess_corda(
+            model,
+            config,
+            run_model=lambda: model(data),
+            hooked_model=model,
+        )
+        peft_model = get_peft_model(model, config)
+
+        # check if the redundant fields are removed
+        assert not hasattr(peft_model.base_model.linear, "sample_count")
+        assert not hasattr(peft_model.base_model.linear, "covariance_matrix")
+        assert not hasattr(peft_model.base_model.linear, "corda_method")
+        assert not hasattr(peft_model.base_model.linear, "rank")
+        assert not hasattr(peft_model.base_model.linear, "eigens")
+
+        # legacy debug fields
+        assert not hasattr(peft_model.base_model.linear, "mean")
+        assert not hasattr(peft_model.base_model.linear, "std")
+
     @pytest.mark.parametrize("corda_method", ("ipm", "kpm"))
     def test_lora_corda_sample_count(self, data, corda_method):
         original_model = self.get_model()
@@ -1923,6 +1995,7 @@ def test_lora_corda_sample_count(self, data, corda_method):
 
         corda_config = CordaConfig(
             corda_method=corda_method,
+            prune_temporary_fields=False,
         )
         config = LoraConfig(
             init_lora_weights="corda",
@@ -1960,6 +2033,7 @@ def hook(*args):
 
         corda_config = CordaConfig(
             corda_method=corda_method,
+            prune_temporary_fields=False,
         )
         config = LoraConfig(
             init_lora_weights="corda",
@@ -2961,3 +3035,25 @@ def test_hotswap_extra_key_raises(self, tmp_path):
         msg = f"Hot swapping the adapter did not succeed. Unexpected keys: {new_key}"
         with pytest.raises(RuntimeError, match=msg):
             hotswap_adapter(model, tmp_path / "adapter1", adapter_name="default")
+
+
+def test_import_peft_type_to_model_mapping_deprecation_warning(recwarn):
+    # This is for backwards compatibility: In #2282, PEFT_TYPE_TO_MODEL_MAPPING was removed as it was redundant with
+    # PEFT_TYPE_TO_TUNER_MAPPING. However, third party code could still use this mapping, e.g.:
+    # https://github.com/AutoGPTQ/AutoGPTQ/blob/6689349625de973b9ee3016c28c11f32acf7f02c/auto_gptq/utils/peft_utils.py#L8
+    # TODO: Remove after 2026-01
+
+    # first check that there is no warning under normal circumstances
+    from peft.peft_model import PeftModel  # noqa
+
+    expected = (
+        "PEFT_TYPE_TO_MODEL_MAPPING is deprecated, please use `from peft import PEFT_TYPE_TO_TUNER_MAPPING` instead"
+    )
+    warnings = (w.message.args[0] for w in recwarn.list)
+    assert not any(w.startswith(expected) for w in warnings)
+
+    from peft.peft_model import PEFT_TYPE_TO_MODEL_MAPPING  # noqa
+
+    # check that there is a warning with this message after importing the variable
+    warnings = (w.message.args[0] for w in recwarn.list)
+    assert any(w.startswith(expected) for w in warnings)
diff --git a/tests/test_other.py b/tests/test_other.py
index 75d8a7565c..7ee521f1c3 100644
--- a/tests/test_other.py
+++ b/tests/test_other.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 import pytest
 import torch
 from torch import nn
 from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification
 
-from peft import LoraConfig, get_peft_model
+from peft import LoraConfig, PeftModel, get_peft_model
 from peft.utils.other import ModulesToSaveWrapper
 
 
@@ -199,3 +200,100 @@ def test_transient_attribute_access_non_existing_adapter(self, mlp):
         model.base_model.model.lin1._active_adapter = "does-not-exist"
         with pytest.raises(AttributeError, match="has no attribute 'weight'"):
             model.lin1.weight
+
+
+class TestModulesToSaveNameSubstringBug:
+    """Test a bug that could occur with multiple modules to save where one adapter's name is a substring of another
+    adapter's name.
+
+    This bug was the result of an error in the logic of modifying the state_dict for modules_to_save in
+    set_peft_model_state_dict. The error in the logic was that it was checked if an entry from modules_to_save (a set
+    of strings) is a substring of a key of the state_dict. If it was, a new name was assigned to that key in the
+    state_dict, which would allow to load the weight later.
+
+    The issue that stems from the substring check occurs if there are multiple modules_to_save, and one of them has a
+    name that is a substring of another. So e.g. if one is named "classifier" and the other is named "classifier2",
+    there could be a false match.
+
+
+    This bug was reported in #2289.
+
+    """
+
+    def get_model(self):
+        class MyModule(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.lin = nn.Linear(5, 4)
+                # important: "classifier" is a substring of "classifier2", "classifier3", "classifier4"
+                self.classifier = nn.Linear(4, 2)
+                self.classifier2 = nn.Linear(4, 2)
+                self.classifier3 = nn.Linear(4, 2)
+                self.classifier4 = nn.Linear(4, 2)
+
+            def forward(self, x):
+                x = self.lin(x)
+                return self.classifier(x) + self.classifier2(x) + self.classifier3(x) + self.classifier4(x)
+
+        torch.manual_seed(0)
+        return MyModule()
+
+    @pytest.fixture
+    def path_merged_and_unmerged(self, tmp_path):
+        # Create 2 checkpoints:
+        # 1. merged: the model after calling merge_and_unload
+        # 2. unmerged: the PEFT model saved without calling merge_and_unload
+        path = tmp_path / "model.pt"
+
+        lora_config = LoraConfig(
+            target_modules=["lin"],
+            # important: "classifier" is a substring of "classifier2", "classifier3", "classifier4"
+            modules_to_save=["classifier", "classifier2", "classifier3", "classifier4"],
+        )
+        model = get_peft_model(self.get_model(), lora_config)
+        # mock training
+        for _ in range(5):
+            optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+            output = model(torch.randn(10, 5))
+            loss = output.sum()
+            loss.backward()
+            optimizer.step()
+
+        # save the peft model without merging
+        path_unmerged = tmp_path / "unmerged"
+        model.save_pretrained(path_unmerged)
+
+        # merge the model and save state_dict
+        path_merged = tmp_path / "merged"
+        merged = model.merge_and_unload()
+        state_dict = merged.state_dict()
+        torch.save(state_dict, path_merged)
+
+        return path_merged, path_unmerged
+
+    def test_load_merged_and_unmerged_same_weights(self, path_merged_and_unmerged):
+        # Note that this test is quasi flaky, it has a 1 in 4 chance of passing even without the bugfix. It passes when
+        # "classifier" happens to be the last element of the set model.modules_to_save. The order of the set is random.
+        # It is not possible just run this test multiple times to minimize the probability of this happening, because
+        # within the same process, the hash order is consistent. With the bug fix, this doesn't matter, as the test will
+        # always pass, but if there is a regression, there is a 1 in 4 chance of not catching it. Since the CI runs many
+        # tests, it is overall very unlikely that none will catch it though. If you see this test failing in CI, thus be
+        # aware that some of the passing tests may just pass owing to randomness.
+        path_merged, path_unmerged = path_merged_and_unmerged
+
+        # load the merged model directly
+        state_dict = torch.load(path_merged, weights_only=True)
+        model = self.get_model()
+        model.load_state_dict(state_dict)
+        sd_merged = model.state_dict()
+        del model
+
+        # load the unmerged model and merge it
+        unmerged = PeftModel.from_pretrained(self.get_model(), path_unmerged)
+        sd_unmerged = unmerged.merge_and_unload().state_dict()
+
+        assert sd_merged.keys() == sd_unmerged.keys()
+        for key in sd_merged.keys():
+            param_merged = sd_merged[key]
+            param_unmerged = sd_unmerged[key]
+            assert torch.allclose(param_merged, param_unmerged)
diff --git a/tests/testing_common.py b/tests/testing_common.py
index fec265812b..a553b24747 100644
--- a/tests/testing_common.py
+++ b/tests/testing_common.py
@@ -941,6 +941,87 @@ def _test_mixed_adapter_batches(self, model_id, config_cls, config_kwargs):
         assert torch.allclose(logits_adapter0[1::3], logits_mixed[1::3], atol=atol, rtol=rtol)
         assert torch.allclose(logits_adapter1[2::3], logits_mixed[2::3], atol=atol, rtol=rtol)
 
+    def _test_generate_with_mixed_adapter_batches_and_beam_search(self, model_id, config_cls, config_kwargs):
+        # Test generating with beam search and with mixing different adapters in a single batch by passing the
+        # adapter_names argument. See #2283.
+        if config_cls not in (LoraConfig,):
+            return pytest.skip(f"Mixed adapter batches not supported for {config_cls}")
+
+        config = config_cls(
+            base_model_name_or_path=model_id,
+            **config_kwargs,
+        )
+
+        torch.manual_seed(0)
+        model = self.transformers_class.from_pretrained(model_id)
+        model = get_peft_model(model, config, adapter_name="adapter0").eval()
+        model.add_adapter("adapter1", config)
+
+        # In contrast to forward, for generate, it can sometimes happen that we get the same results as the base model
+        # even with LoRA applied because the impact of LoRA is not big enough. Therefore, use this "trick" to make LoRA
+        # stronger.
+        for name, param in model.named_parameters():
+            if model.base_model.prefix in name:
+                param.data.mul_(10.0)
+
+        model = model.to(self.torch_device).eval()
+
+        dummy_input = self.prepare_inputs_for_testing()
+        # ensure that we have at least 3 samples for this test
+        dummy_input = {k: torch.cat([v for _ in range(3)]) for k, v in dummy_input.items()}
+
+        gen_kwargs = {**dummy_input, "max_length": 20, "num_beams": 10, "early_stopping": True}
+        with torch.inference_mode():
+            with model.disable_adapter():
+                gen_base = model.generate(**gen_kwargs)
+
+        model.set_adapter("adapter0")
+        with torch.inference_mode():
+            gen_adapter0 = model.generate(**gen_kwargs)
+
+        model.set_adapter("adapter1")
+        with torch.inference_mode():
+            gen_adapter1 = model.generate(**gen_kwargs)
+
+        def remove_padding(seq, pad_value):
+            lst = list(seq)
+            while lst and (lst[-1] == pad_value):
+                lst.pop()
+            return lst
+
+        def gens_are_same(gen0, gen1):
+            # Special function to compare generations. We cannot use torch.allclose it will raise an error when sequence
+            # lengths differ. Morevoer, we need to remove the padding from the sequences. This is because, even though
+            # normally identical sequences should have the same length, when we do mixed adapter batches, each sample
+            # will be padded to the longest sequence in that mixed batch, which can be different from the longest
+            # sequence without mixed adapter batches.
+            pad_value = model.config.eos_token_id
+            for sample0, sample1 in zip(gen0, gen1):
+                sample0 = remove_padding(sample0, pad_value)
+                sample1 = remove_padding(sample1, pad_value)
+                if (len(sample0) != len(sample1)) or (sample0 != sample1):
+                    # at least one sample differs, the generations are not identical
+                    return False
+            return True
+
+        # sanity check that there are enough outputs and that they are different
+        assert len(gen_base) == len(gen_adapter0) == len(gen_adapter1)
+        assert len(gen_adapter1) >= 3
+        assert not gens_are_same(gen_base, gen_adapter0)
+        assert not gens_are_same(gen_base, gen_adapter1)
+        assert not gens_are_same(gen_adapter0, gen_adapter1)
+
+        # alternate between base model, adapter0, and adapter1
+        adapters = ["__base__", "adapter0", "adapter1"]
+        gen_kwargs["adapter_names"] = [adapters[i % 3] for i in (range(len(dummy_input["input_ids"])))]
+
+        with torch.inference_mode():
+            gen_mixed = model.generate(**gen_kwargs)
+
+        assert gens_are_same(gen_base[::3], gen_mixed[::3])
+        assert gens_are_same(gen_adapter0[1::3], gen_mixed[1::3])
+        assert gens_are_same(gen_adapter1[2::3], gen_mixed[2::3])
+
     def _test_generate(self, model_id, config_cls, config_kwargs):
         model = self.transformers_class.from_pretrained(model_id)
         config = config_cls(