Skip to content

Commit

Permalink
Merge branch 'main' into feature/ci-cache
Browse files Browse the repository at this point in the history
  • Loading branch information
githubnemo authored Jan 22, 2025
2 parents 0542f71 + 93d8046 commit 1f2179b
Show file tree
Hide file tree
Showing 31 changed files with 596 additions and 140 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ concurrency:
group: docker-image-builds
cancel-in-progress: false

permissions: {}

env:
CI_SLACK_CHANNEL: ${{ secrets.CI_DOCKER_CHANNEL }}

Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/build_documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ on:
- doc-builder*
- v*-release

permissions: {}

jobs:
build:
uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/build_pr_documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions: {}

jobs:
build:
uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/integrations_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ on:
description: 'Branch to test on'
required: true

permissions: {}

jobs:
run_transformers_integration_tests:
strategy:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/nightly-bnb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ env:
NVIDIA_DISABLE_REQUIRE: "1"
SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}

permissions: {}

jobs:
run_all_tests_single_gpu:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ env:
NVIDIA_DISABLE_REQUIRE: "1"
SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}

permissions: {}

jobs:
run_all_tests_single_gpu:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/stale.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
schedule:
- cron: "0 15 * * *"

permissions: {}

jobs:
close_stale_issues:
name: Close Stale Issues
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/test-docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:
paths:
# Run only when DockerFile files are modified
- "docker/*/Dockerfile"

permissions: {}

jobs:
get_changed_files:
name: "Build all modified docker images"
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/tests-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ on:
paths-ignore:
- 'docs/**'

permissions: {}

jobs:
tests:
runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ on:
env:
HF_HOME: .cache/huggingface

permissions: {}

jobs:
check_code_quality:
runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/torch_compile_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ env:
# To be able to run tests on CUDA 12.2
NVIDIA_DISABLE_REQUIRE: "1"

permissions: {}

jobs:
run_tests_with_compile:
runs-on:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/trufflehog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ on:

name: Secret Leaks

permissions: {}

jobs:
trufflehog:
runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/upload_pr_documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ on:
types:
- completed

permissions: {}

jobs:
build:
uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/zizmor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ name: CI security linting
on:
push:
branches: ["main"]
paths:
- '.github/**'
pull_request:
branches: ["*"]
paths:
- '.github/**'

permissions: {}

jobs:
zizmor:
name: zizmor latest via Cargo
Expand Down
10 changes: 10 additions & 0 deletions .github/zizmor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,13 @@ rules:
ignore:
# this workflow is only triggered after maintainer approval
- upload_pr_documentation.yml:3:1
cache-poisoning:
ignore:
# the docker buildx binary is cached and zizmor warns about a cache poisoning attack.
# OTOH this cache would make us more resilient against an intrusion on docker-buildx' side.
# There is no obvious benefit so we leave it as it is.
- build_docker_images.yml:37:9
- build_docker_images.yml:70:9
- build_docker_images.yml:103:9
- build_docker_images.yml:136:9
- build_docker_images.yml:169:9
5 changes: 5 additions & 0 deletions examples/corda_finetuning/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,12 @@ lora_config = LoraConfig(
init_lora_weights="corda",
corda_config=corda_config,
)

# Call `preprocess_corda` first to collect covariance matrix and build SVD result for model
# For more details, please refer to documentation of `preprocess_corda`
preprocess_corda(model, lora_config, run_model=run_model)

# Call `get_peft_model` after preprocessing, or else you'll encounter error
peft_model = get_peft_model(model, lora_config)
peft_model.print_trainable_parameters()

Expand Down
2 changes: 1 addition & 1 deletion examples/corda_finetuning/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer

from peft.mapping import get_peft_model
from peft import get_peft_model
from peft.tuners.lora.config import CordaConfig, LoraConfig
from peft.tuners.lora.corda import preprocess_corda

Expand Down
22 changes: 22 additions & 0 deletions src/peft/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
# limitations under the License.
import importlib
import importlib.metadata as importlib_metadata
import platform
from functools import lru_cache

import packaging.version
import torch


@lru_cache
Expand Down Expand Up @@ -111,3 +113,23 @@ def is_torchao_available():
f"but only versions above {TORCHAO_MINIMUM_VERSION} are supported"
)
return True


@lru_cache
def is_xpu_available(check_device=False):
"""
Checks if XPU acceleration is available and potentially if a XPU is in the environment
"""

system = platform.system()
if system == "Darwin":
return False
else:
if check_device:
try:
# Will raise a RuntimeError if no XPU is found
_ = torch.xpu.device_count()
return torch.xpu.is_available()
except RuntimeError:
return False
return hasattr(torch, "xpu") and torch.xpu.is_available()
16 changes: 16 additions & 0 deletions src/peft/peft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3022,3 +3022,19 @@ def check_irrgular(vals: list[bool | Literal["irregular"]]) -> bool | Literal["i
devices=devices,
)
return adapter_model_status


def __getattr__(name):
if name == "PEFT_TYPE_TO_MODEL_MAPPING":
# This is for backwards compatibility: In #2282, PEFT_TYPE_TO_MODEL_MAPPING was removed as it was redundant with
# PEFT_TYPE_TO_TUNER_MAPPING. However, third party code could still use this mapping, e.g.:
# https://github.com/AutoGPTQ/AutoGPTQ/blob/6689349625de973b9ee3016c28c11f32acf7f02c/auto_gptq/utils/peft_utils.py#L8
# TODO: Remove after 2026-01
msg = (
"PEFT_TYPE_TO_MODEL_MAPPING is deprecated, please use `from peft import PEFT_TYPE_TO_TUNER_MAPPING` instead. "
"The deprecated variable will be removed in 2026."
)
warnings.warn(msg, category=DeprecationWarning)
return PEFT_TYPE_TO_TUNER_MAPPING

raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
5 changes: 5 additions & 0 deletions src/peft/tuners/lora/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ class CordaConfig:
use_float16_for_covariance (`bool`):
If true, uses float16 for the covariance matrix. This can reduce the memory usage of the covariance matrix
by half, but may lead to numerical instability. Defaults to `False`.
prune_temporary_fields (`bool`):
If true, temporary fields generated in CorDA preprocessing will be pruned. Defaults to `True`.
"""

cache_file: Optional[str] = field(
Expand Down Expand Up @@ -189,6 +191,9 @@ class CordaConfig:
)
},
)
prune_temporary_fields: bool = field(
default=True, metadata={"help": "If true, temporary fields generated in CorDA preprocessing will be pruned."}
)


@dataclass
Expand Down
44 changes: 22 additions & 22 deletions src/peft/tuners/lora/corda.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,27 @@ def preprocess_corda(
"""
Build necessary CorDA fields for a model.
For each `M * N` linear layer, a `M * M` covariance matrix will be built temporarily during the preprocessing
process, consuming roughly another `2 * MODEL_SIZE` memory for typical LLMs if model weight is FP16 and covariance
is FP32. If that's too much, consider specifying `use_float16_for_covariance` in `lora_config.corda_config`.
Args:
model (`nn.Module`):
Model to preprocess.
lora_config (`LoraConfig`):
Lora configuration of the model. `lora_config.corda_config` should be set.
run_model (`Optional[Callable[[], None]]`):
Callback to run the model when building covariance. Typically you should run model inference on your sample
dataset in this callback. Experiments have shown 256 samples to be a good default dataset size. `run_model`
can be `None` only if covariance file in `lora_config.corda_config` is already created.
dataset in this callback. Experiments have shown that when token count per sample is 2048, hidden dimension
is 4096, collecting 256 distinct samples is enough. If you collect too few or too repetitive samples, the
covariance matrix may be low-ranked and unstabilize preprocessing. You can estimate sample count as
`HIDDEN_DIM / TOKEN_PER_SAMPLE * 128`. `run_model` can be `None` only if covariance file in
`lora_config.corda_config` is already created.
hooked_model (`Optional[nn.Module]`):
Model to hook when building covariance. If none, original model will be hooked. This is only useful when
you want to hook a different model than the one you are training, typically you should leave this `None`.
Upon completion, the following fields are set for each target module:
corda_method (`Literal["ipm", "kpm"]`):
CorDA method to apply. "ipm" for Instruction-Previewed Mode, "kpm" for Knowledge-Preserved Mode.
rank (`int`):
Rank of CorDA to apply.
eigens.S_WC (`torch.Tensor`):
Singular values of the weight matrix.
eigens.U_WC (`torch.Tensor`):
Expand All @@ -90,13 +93,12 @@ def preprocess_corda(
covariance_file = lora_config.corda_config.covariance_file
corda_method = lora_config.corda_config.corda_method
verbose = lora_config.corda_config.verbose
prune_temporary_fields = lora_config.corda_config.prune_temporary_fields

# If cache exists, skip building
if cache_file is not None and os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
cache = torch.load(cache_file, map_location=get_model_device(model))
for name, module in target_modules(model, lora_config):
module.corda_method = cache[f"{name}.corda_method"]
module.rank = cache[f"{name}.rank"]
module.eigens = CordaEigens(
S_WC=cache[f"{name}.eigens.S_WC"],
U_WC=cache[f"{name}.eigens.U_WC"],
Expand All @@ -123,12 +125,22 @@ def preprocess_corda(
# Crop CorDA eigens so that there's less to save
crop_corda_eigens(model, lora_config)

# Remove redundant fields if exist
if prune_temporary_fields:
for name, module in target_modules(model, lora_config):
if hasattr(module, "sample_count"):
del module.sample_count
if hasattr(module, "covariance_matrix"):
del module.covariance_matrix
if hasattr(module, "corda_method"):
del module.corda_method
if hasattr(module, "rank"):
del module.rank

# Save cache to disk
if cache_file is not None:
cache: dict[str, Any] = {}
for name, module in target_modules(model, lora_config):
cache[f"{name}.corda_method"] = module.corda_method
cache[f"{name}.rank"] = module.rank
cache[f"{name}.eigens.S_WC"] = module.eigens.S_WC
cache[f"{name}.eigens.U_WC"] = module.eigens.U_WC
cache[f"{name}.eigens.V_WC"] = module.eigens.V_WC
Expand Down Expand Up @@ -174,15 +186,9 @@ def hook(module, input, output):
"Invalid value found in covariance. Please file an issue at https://github.com/huggingface/peft/issues."
)

# calculate mean and std
mean = input.mean(0)
std = input.std(0)

# add to module
module.sample_count += 1
module.covariance_matrix += covariance
module.mean += mean
module.std += std

# free memory
del covariance, input
Expand All @@ -191,8 +197,6 @@ def hook(module, input, output):
for name, module in target_modules(hooked_model, config):
module.sample_count = 0
module.covariance_matrix = 0
module.mean = 0
module.std = 0
handles.append(module.register_forward_hook(hook))

run_model()
Expand All @@ -213,14 +217,10 @@ def hook(module, input, output):
if name in targets:
targets[name].sample_count = module.sample_count
targets[name].covariance_matrix = module.covariance_matrix
targets[name].mean = module.mean
targets[name].std = module.std

# Divide by sample count
for name, module in target_modules(model, config):
module.covariance_matrix /= module.sample_count
module.mean /= module.sample_count
module.std /= module.sample_count

# Save covariance to disk
if covariance_file is not None:
Expand Down
Loading

0 comments on commit 1f2179b

Please sign in to comment.