diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ab6662a392..e44afe2e59 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -96,6 +96,9 @@ jobs: [ -f "$(which shasum)" ] && SHASUM=shasum find "${{ env.HF_HOME }}/hub" -type f -exec "$SHASUM" {} \; > cache_content_after diff -udp cache_content_initial cache_content_after || true + - name: Delete old model cache entries + run: | + python scripts/ci_clean_cache.py -d - name: Update model cache uses: actions/cache/save@v4 # Only let one runner (preferably the one that covers most tests) update the model cache diff --git a/scripts/ci_clean_cache.py b/scripts/ci_clean_cache.py new file mode 100644 index 0000000000..e927c0bac3 --- /dev/null +++ b/scripts/ci_clean_cache.py @@ -0,0 +1,61 @@ +""" +Utility to clean cache files that exceed a specific time in days according to their +last access time recorded in the cache. + +Exit code: +- 1 if no candidates are found +- 0 if candidates are found + +Deletion can be enabled by passing `-d` parameter, otherwise it will only list the candidates. +""" +import sys +import argparse +from huggingface_hub import scan_cache_dir +from datetime import datetime as dt + + +def find_old_revisions(scan_results, max_age_days=30): + """Find commit hashes of objects in the cache. These objects need a last access time that + is above the passed `max_age_days` parameter. Returns an empty list if no objects are found. + Time measurement is based of the current time and the recorded last access tiem in the cache. + """ + now = dt.now() + revisions = [(i.revisions, i.last_accessed) for i in scan_results.repos] + revisions_ages = [(rev, (now - dt.fromtimestamp(ts_access)).days) for rev, ts_access in revisions] + delete_candidates = [rev for rev, age in revisions_ages if age > max_age_days] + hashes = [n.commit_hash for rev in delete_candidates for n in rev] + + return hashes + + +def delete_old_revisions(scan_results, delete_candidates, do_delete=False): + delete_operation = scan_results.delete_revisions(*delete_candidates) + print(f'Would free {delete_operation.expected_freed_size_str}') + print(f'Candidates: {delete_candidates}') + + if do_delete: + print("Deleting now.") + delete_operation.execute() + else: + print("Not deleting, pass the -d flag.") + + +if __name__ == "__main__": + from argparse import ArgumentParser + + parser = ArgumentParser() + parser.add_argument('-a', '--max-age', type=int, default=30, help="Max. age in days items in the cache may have.") + parser.add_argument('-d', '--delete', action='store_true', help=( + "Delete mode; Really delete items if there are candidates. Exit code = 0 when we found something to delete, 1 " + "otherwise." + )) + args = parser.parse_args() + + scan_results = scan_cache_dir() + + delete_candidates = find_old_revisions(scan_results, args.max_age) + if not delete_candidates: + print('No delete candidates found, not deleting anything.') + sys.exit(1) + + delete_old_revisions(scan_results, delete_candidates, do_delete=args.delete)