Merge branch 'main' into feature/ci-cache

huggingface · Jan 22, 2025 · 1f2179b · 1f2179b
2 parents 0542f71 + 93d8046
commit 1f2179b
Show file tree

Hide file tree

Showing 31 changed files with 596 additions and 140 deletions.
diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml
@@ -10,6 +10,8 @@ concurrency:
   group: docker-image-builds
   cancel-in-progress: false
 
+permissions: {}
+
 env:
   CI_SLACK_CHANNEL: ${{ secrets.CI_DOCKER_CHANNEL }}
 

diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml
@@ -7,6 +7,8 @@ on:
       - doc-builder*
       - v*-release
 
+permissions: {}
+
 jobs:
    build:
     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main

diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
@@ -7,6 +7,8 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
   cancel-in-progress: true
 
+permissions: {}
+
 jobs:
   build:
     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main

diff --git a/.github/workflows/integrations_tests.yml b/.github/workflows/integrations_tests.yml
@@ -7,6 +7,8 @@ on:
         description: 'Branch to test on'
         required: true
 
+permissions: {}
+
 jobs:
   run_transformers_integration_tests:
     strategy:

diff --git a/.github/workflows/nightly-bnb.yml b/.github/workflows/nightly-bnb.yml
@@ -12,6 +12,7 @@ env:
   NVIDIA_DISABLE_REQUIRE: "1"
   SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
 
+permissions: {}
 
 jobs:
   run_all_tests_single_gpu:

diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -12,6 +12,7 @@ env:
   NVIDIA_DISABLE_REQUIRE: "1"
   SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
 
+permissions: {}
 
 jobs:
   run_all_tests_single_gpu:

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -4,6 +4,8 @@ on:
   schedule:
     - cron: "0 15 * * *"
 
+permissions: {}
+
 jobs:
   close_stale_issues:
     name: Close Stale Issues

diff --git a/.github/workflows/test-docker-build.yml b/.github/workflows/test-docker-build.yml
@@ -5,6 +5,9 @@ on:
     paths:
       # Run only when DockerFile files are modified
       - "docker/*/Dockerfile"
+
+permissions: {}
+
 jobs:
   get_changed_files:
     name: "Build all modified docker images"

diff --git a/.github/workflows/tests-main.yml b/.github/workflows/tests-main.yml
@@ -6,6 +6,8 @@ on:
     paths-ignore:
         - 'docs/**'
 
+permissions: {}
+
 jobs:
   tests:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -12,6 +12,8 @@ on:
 env:
   HF_HOME: .cache/huggingface
 
+permissions: {}
+
 jobs:
   check_code_quality:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/torch_compile_tests.yml b/.github/workflows/torch_compile_tests.yml
@@ -17,6 +17,8 @@ env:
   # To be able to run tests on CUDA 12.2
   NVIDIA_DISABLE_REQUIRE: "1"
 
+permissions: {}
+
 jobs:
   run_tests_with_compile:
     runs-on:

diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml
@@ -3,6 +3,8 @@ on:
 
 name: Secret Leaks
 
+permissions: {}
+
 jobs:
   trufflehog:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/upload_pr_documentation.yml b/.github/workflows/upload_pr_documentation.yml
@@ -6,6 +6,8 @@ on:
     types:
       - completed
 
+permissions: {}
+
 jobs:
   build:
     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main

diff --git a/.github/workflows/zizmor.yaml b/.github/workflows/zizmor.yaml
@@ -3,13 +3,13 @@ name: CI security linting
 on:
   push:
     branches: ["main"]
-    paths:
-      - '.github/**'
   pull_request:
     branches: ["*"]
     paths:
       - '.github/**'
 
+permissions: {}
+
 jobs:
   zizmor:
     name: zizmor latest via Cargo

diff --git a/.github/zizmor.yml b/.github/zizmor.yml
@@ -3,3 +3,13 @@ rules:
     ignore:
       # this workflow is only triggered after maintainer approval
       - upload_pr_documentation.yml:3:1
+  cache-poisoning:
+    ignore:
+      # the docker buildx binary is cached and zizmor warns about a cache poisoning attack.
+      # OTOH this cache would make us more resilient against an intrusion on docker-buildx' side.
+      # There is no obvious benefit so we leave it as it is.
+      - build_docker_images.yml:37:9
+      - build_docker_images.yml:70:9
+      - build_docker_images.yml:103:9
+      - build_docker_images.yml:136:9
+      - build_docker_images.yml:169:9
diff --git a/examples/corda_finetuning/README.md b/examples/corda_finetuning/README.md
@@ -100,7 +100,12 @@ lora_config = LoraConfig(
     init_lora_weights="corda",
     corda_config=corda_config,
 )
+
+# Call `preprocess_corda` first to collect covariance matrix and build SVD result for model
+# For more details, please refer to documentation of `preprocess_corda`
 preprocess_corda(model, lora_config, run_model=run_model)
+
+# Call `get_peft_model` after preprocessing, or else you'll encounter error
 peft_model = get_peft_model(model, lora_config)
 peft_model.print_trainable_parameters()
 

diff --git a/examples/corda_finetuning/preprocess.py b/examples/corda_finetuning/preprocess.py
@@ -21,7 +21,7 @@
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from peft.mapping import get_peft_model
+from peft import get_peft_model
 from peft.tuners.lora.config import CordaConfig, LoraConfig
 from peft.tuners.lora.corda import preprocess_corda
 

diff --git a/src/peft/import_utils.py b/src/peft/import_utils.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 import importlib
 import importlib.metadata as importlib_metadata
+import platform
 from functools import lru_cache
 
 import packaging.version
+import torch
 
 
 @lru_cache
@@ -111,3 +113,23 @@ def is_torchao_available():
             f"but only versions above {TORCHAO_MINIMUM_VERSION} are supported"
         )
     return True
+
+
+@lru_cache
+def is_xpu_available(check_device=False):
+    """
+    Checks if XPU acceleration is available and potentially if a XPU is in the environment
+    """
+
+    system = platform.system()
+    if system == "Darwin":
+        return False
+    else:
+        if check_device:
+            try:
+                # Will raise a RuntimeError if no XPU is found
+                _ = torch.xpu.device_count()
+                return torch.xpu.is_available()
+            except RuntimeError:
+                return False
+        return hasattr(torch, "xpu") and torch.xpu.is_available()
diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
@@ -3022,3 +3022,19 @@ def check_irrgular(vals: list[bool | Literal["irregular"]]) -> bool | Literal["i
         devices=devices,
     )
     return adapter_model_status
+
+
+def __getattr__(name):
+    if name == "PEFT_TYPE_TO_MODEL_MAPPING":
+        # This is for backwards compatibility: In #2282, PEFT_TYPE_TO_MODEL_MAPPING was removed as it was redundant with
+        # PEFT_TYPE_TO_TUNER_MAPPING. However, third party code could still use this mapping, e.g.:
+        # https://github.com/AutoGPTQ/AutoGPTQ/blob/6689349625de973b9ee3016c28c11f32acf7f02c/auto_gptq/utils/peft_utils.py#L8
+        # TODO: Remove after 2026-01
+        msg = (
+            "PEFT_TYPE_TO_MODEL_MAPPING is deprecated, please use `from peft import PEFT_TYPE_TO_TUNER_MAPPING` instead. "
+            "The deprecated variable will be removed in 2026."
+        )
+        warnings.warn(msg, category=DeprecationWarning)
+        return PEFT_TYPE_TO_TUNER_MAPPING
+
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py
@@ -145,6 +145,8 @@ class CordaConfig:
         use_float16_for_covariance (`bool`):
             If true, uses float16 for the covariance matrix. This can reduce the memory usage of the covariance matrix
             by half, but may lead to numerical instability. Defaults to `False`.
+        prune_temporary_fields (`bool`):
+            If true, temporary fields generated in CorDA preprocessing will be pruned. Defaults to `True`.
     """
 
     cache_file: Optional[str] = field(
@@ -189,6 +191,9 @@ class CordaConfig:
             )
         },
     )
+    prune_temporary_fields: bool = field(
+        default=True, metadata={"help": "If true, temporary fields generated in CorDA preprocessing will be pruned."}
+    )
 
 
 @dataclass

diff --git a/src/peft/tuners/lora/corda.py b/src/peft/tuners/lora/corda.py
@@ -61,24 +61,27 @@ def preprocess_corda(
     """
     Build necessary CorDA fields for a model.
 
+    For each `M * N` linear layer, a `M * M` covariance matrix will be built temporarily during the preprocessing
+    process, consuming roughly another `2 * MODEL_SIZE` memory for typical LLMs if model weight is FP16 and covariance
+    is FP32. If that's too much, consider specifying `use_float16_for_covariance` in `lora_config.corda_config`.
+
     Args:
         model (`nn.Module`):
             Model to preprocess.
         lora_config (`LoraConfig`):
             Lora configuration of the model. `lora_config.corda_config` should be set.
         run_model (`Optional[Callable[[], None]]`):
             Callback to run the model when building covariance. Typically you should run model inference on your sample
-            dataset in this callback. Experiments have shown 256 samples to be a good default dataset size. `run_model`
-            can be `None` only if covariance file in `lora_config.corda_config` is already created.
+            dataset in this callback. Experiments have shown that when token count per sample is 2048, hidden dimension
+            is 4096, collecting 256 distinct samples is enough. If you collect too few or too repetitive samples, the
+            covariance matrix may be low-ranked and unstabilize preprocessing. You can estimate sample count as
+            `HIDDEN_DIM / TOKEN_PER_SAMPLE * 128`. `run_model` can be `None` only if covariance file in
+            `lora_config.corda_config` is already created.
         hooked_model (`Optional[nn.Module]`):
             Model to hook when building covariance. If none, original model will be hooked. This is only useful when
             you want to hook a different model than the one you are training, typically you should leave this `None`.
 
     Upon completion, the following fields are set for each target module:
-        corda_method (`Literal["ipm", "kpm"]`):
-            CorDA method to apply. "ipm" for Instruction-Previewed Mode, "kpm" for Knowledge-Preserved Mode.
-        rank (`int`):
-            Rank of CorDA to apply.
         eigens.S_WC (`torch.Tensor`):
             Singular values of the weight matrix.
         eigens.U_WC (`torch.Tensor`):
@@ -90,13 +93,12 @@ def preprocess_corda(
     covariance_file = lora_config.corda_config.covariance_file
     corda_method = lora_config.corda_config.corda_method
     verbose = lora_config.corda_config.verbose
+    prune_temporary_fields = lora_config.corda_config.prune_temporary_fields
 
     # If cache exists, skip building
     if cache_file is not None and os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
         cache = torch.load(cache_file, map_location=get_model_device(model))
         for name, module in target_modules(model, lora_config):
-            module.corda_method = cache[f"{name}.corda_method"]
-            module.rank = cache[f"{name}.rank"]
             module.eigens = CordaEigens(
                 S_WC=cache[f"{name}.eigens.S_WC"],
                 U_WC=cache[f"{name}.eigens.U_WC"],
@@ -123,12 +125,22 @@ def preprocess_corda(
         # Crop CorDA eigens so that there's less to save
         crop_corda_eigens(model, lora_config)
 
+        # Remove redundant fields if exist
+        if prune_temporary_fields:
+            for name, module in target_modules(model, lora_config):
+                if hasattr(module, "sample_count"):
+                    del module.sample_count
+                if hasattr(module, "covariance_matrix"):
+                    del module.covariance_matrix
+                if hasattr(module, "corda_method"):
+                    del module.corda_method
+                if hasattr(module, "rank"):
+                    del module.rank
+
         # Save cache to disk
         if cache_file is not None:
             cache: dict[str, Any] = {}
             for name, module in target_modules(model, lora_config):
-                cache[f"{name}.corda_method"] = module.corda_method
-                cache[f"{name}.rank"] = module.rank
                 cache[f"{name}.eigens.S_WC"] = module.eigens.S_WC
                 cache[f"{name}.eigens.U_WC"] = module.eigens.U_WC
                 cache[f"{name}.eigens.V_WC"] = module.eigens.V_WC
@@ -174,15 +186,9 @@ def hook(module, input, output):
                 "Invalid value found in covariance. Please file an issue at https://github.com/huggingface/peft/issues."
             )
 
-        # calculate mean and std
-        mean = input.mean(0)
-        std = input.std(0)
-
         # add to module
         module.sample_count += 1
         module.covariance_matrix += covariance
-        module.mean += mean
-        module.std += std
 
         # free memory
         del covariance, input
@@ -191,8 +197,6 @@ def hook(module, input, output):
     for name, module in target_modules(hooked_model, config):
         module.sample_count = 0
         module.covariance_matrix = 0
-        module.mean = 0
-        module.std = 0
         handles.append(module.register_forward_hook(hook))
 
     run_model()
@@ -213,14 +217,10 @@ def hook(module, input, output):
             if name in targets:
                 targets[name].sample_count = module.sample_count
                 targets[name].covariance_matrix = module.covariance_matrix
-                targets[name].mean = module.mean
-                targets[name].std = module.std
 
     # Divide by sample count
     for name, module in target_modules(model, config):
         module.covariance_matrix /= module.sample_count
-        module.mean /= module.sample_count
-        module.std /= module.sample_count
 
     # Save covariance to disk
     if covariance_file is not None:
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,8 @@ on: @@
         types:
           - completed
+    permissions: {}
     jobs:
       build:
         uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
@@ Expand Down @@