-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
45dd257
commit 42a66d7
Showing
4 changed files
with
163 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"cSpell.words": [ | ||
"Rutkowski" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
|
||
# Duplicate Guard | ||
|
||
**Duplicate Guard** is a lightweight GitHub Action designed to prevent duplicate files from being added or modified in a repository. This helps reduce repository bloat, minimize downloadable app sizes, and optimize asset management. Duplicate files can significantly increase the size of compressed artifacts (such as ZIP files) because they are not optimized against themselves during compression. This action ensures your repository remains clean and efficient by detecting and blocking redundant files. | ||
|
||
--- | ||
|
||
## 🚀 Features | ||
- Detects and blocks unintentionally duplicated files in pull requests. | ||
- Helps reduce downloadable app sizes by eliminating redundant assets. | ||
- Supports .gitignore-like syntax to exclude specific files or directories. | ||
|
||
--- | ||
|
||
## 🛠️ Usage | ||
|
||
### 1. **Create an Ignore File** | ||
Add a `duplicate_guard.ignore` file to the root of your repository to define patterns for files or directories to exclude from duplicate checks. The syntax follows `.gitignore` conventions. | ||
|
||
**Example `duplicate_guard.ignore`:** | ||
```gitignore | ||
test/* | ||
logs/* | ||
*.log | ||
``` | ||
|
||
--- | ||
|
||
### 2. **Add the GitHub Action** | ||
Create a GitHub Actions workflow in `.github/workflows/duplicate_guard.yml`: | ||
|
||
```yaml | ||
name: Duplicate Guard | ||
on: | ||
pull_request: | ||
branches: | ||
- master | ||
workflow_dispatch: | ||
|
||
jobs: | ||
filesize_guard: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Duplicate Guard | ||
uses: chris-rutkowski/[email protected] | ||
``` | ||
--- | ||
## ⚙️ Configuration | ||
### **Specify a Custom Ignore File Path** | ||
If your `duplicate_guard.ignore` file is not in the root directory, specify its location using the `ignore_file` input: | ||
|
||
```yaml | ||
steps: | ||
- name: Duplicate Guard | ||
uses: chris-rutkowski/[email protected] | ||
with: | ||
ignore_file: ./my/path/my_filesize_guard.ignore | ||
``` | ||
|
||
--- | ||
|
||
## 📄 License | ||
This project is licensed under the [MIT License](LICENSE). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
name: "Duplicate Guard" | ||
description: "Blocks pull requests with unintentionally duplicated files" | ||
author: "Chris Rutkowski" | ||
inputs: | ||
ignore_file: | ||
description: "Path to the ignore file" | ||
required: true | ||
default: "./duplicate_guard.ignore" | ||
|
||
runs: | ||
using: "composite" | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Get changed files | ||
id: changed-files | ||
uses: tj-actions/changed-files@v45 | ||
with: | ||
separator: "," | ||
|
||
- name: Run Duplicate Guard | ||
run: | | ||
files="${{ steps.changed-files.outputs.added_files }},${{ steps.changed-files.outputs.modified_files }}" | ||
python3 ${GITHUB_ACTION_PATH}/duplicate_guard.py ${{ inputs.ignore_file }} "$files" | ||
shell: bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import fnmatch | ||
import hashlib | ||
import os | ||
import sys | ||
|
||
def load_ignore_patterns(ignore_file): | ||
with open(ignore_file, "r") as f: | ||
return [line.strip() for line in f if line.strip() and not line.startswith("#")] | ||
|
||
def should_ignore(file, patterns): | ||
return any(fnmatch.fnmatch(file, pattern) for pattern in patterns) | ||
|
||
def calculate_checksum(file_path): | ||
sha256_hash = hashlib.sha256() | ||
with open(file_path, "rb") as f: | ||
for byte_block in iter(lambda: f.read(4096), b""): | ||
sha256_hash.update(byte_block) | ||
return sha256_hash.hexdigest() | ||
|
||
def get_all_repository_files(ignore_patterns): | ||
repo_files = [] | ||
for root, _, files in os.walk("."): | ||
for file in files: | ||
file_path = os.path.join(root, file) | ||
relative_path = os.path.relpath(file_path, ".") | ||
if not should_ignore(relative_path, ignore_patterns): | ||
repo_files.append(relative_path) | ||
return repo_files | ||
|
||
ignore_file = sys.argv[1] | ||
files = sys.argv[2].split(",") | ||
ignore_patterns = load_ignore_patterns(ignore_file) | ||
|
||
# Step 1: Build a checksum map for all existing repository files | ||
print("Calculating checksums for all repository files...") | ||
checksums = {} | ||
for file in get_all_repository_files(ignore_patterns): | ||
checksum = calculate_checksum(file) | ||
checksums[checksum] = file | ||
print(f"Done, {len(checksums)} checksums") | ||
|
||
# Step 2: Check new/modified files against the repository and themselves | ||
exit_code = 0 | ||
|
||
for file in files: | ||
if not file or not os.path.isfile(file): | ||
continue | ||
|
||
if should_ignore(file, ignore_patterns): | ||
print(f"Ignoring: '{file}'") | ||
continue | ||
|
||
print(f"Processing: '{file}'") | ||
|
||
checksum = calculate_checksum(file) | ||
|
||
if checksum in checksums: | ||
if checksums[checksum] == file: | ||
continue | ||
|
||
print(f"Error: '{file}' is a duplicate of '{checksums[checksum]}'") | ||
exit_code = 1 | ||
else: | ||
checksums[checksum] = file | ||
|
||
sys.exit(exit_code) |