From af03e6b50c8a77ba0839f74fe0d8bbd0e7d5488c Mon Sep 17 00:00:00 2001 From: Rob Miller Date: Fri, 17 Nov 2023 17:58:54 +0000 Subject: [PATCH] implement and test --movedel option to move file and delete if duplicate detected --- phockup.py | 11 +++++++++++ src/phockup.py | 8 +++++++- tests/test_phockup.py | 21 +++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/phockup.py b/phockup.py index d1da7aa..20bd530 100755 --- a/phockup.py +++ b/phockup.py @@ -276,6 +276,16 @@ def parse_args(args=sys.argv[1:]): """, ) + exclusive_group_debug_silent.add_argument( + '--movedel', + action='store_true', + default=False, + help="""\ + DELETE source files which are determined to be duplicates of files + already transferred. Only valid in conjunction with both `--move` + and `--skip-unknown`. + """, + ) parser.add_argument( '--output_prefix', type=str, @@ -365,6 +375,7 @@ def main(options): max_concurrency=options.max_concurrency, no_date_dir=options.no_date_dir, skip_unknown=options.skip_unknown, + movedel=options.movedel, output_prefix=options.output_prefix, output_suffix=options.output_suffix, from_date=options.from_date, diff --git a/src/phockup.py b/src/phockup.py index f685d40..4c2b89e 100755 --- a/src/phockup.py +++ b/src/phockup.py @@ -50,6 +50,7 @@ def __init__(self, input_dir, output_dir, **args): self.timestamp = args.get('timestamp', False) self.date_field = args.get('date_field', False) self.skip_unknown = args.get("skip_unknown", False) + self.movedel = args.get("movedel", False), self.dry_run = args.get('dry_run', False) self.progress = args.get('progress', False) self.max_depth = args.get('max_depth', -1) @@ -297,7 +298,12 @@ def process_file(self, filename): if os.path.isfile(target_file): if filename != target_file and filecmp.cmp(filename, target_file, shallow=False): - progress = f'{progress} => skipped, duplicated file {target_file}' + if self.movedel and self.move and self.skip_unknown: + if not self.dry_run: + os.remove(filename) + progress = f'{progress} => deleted, duplicated file {target_file}' + else: + progress = f'{progress} => skipped, duplicated file {target_file}' self.duplicates_found += 1 if self.progress: self.pbar.write(progress) diff --git a/tests/test_phockup.py b/tests/test_phockup.py index 8268a73..aefa776 100644 --- a/tests/test_phockup.py +++ b/tests/test_phockup.py @@ -290,6 +290,27 @@ def test_process_move(mocker): shutil.rmtree('output', ignore_errors=True) +def test_process_movedel(mocker, caplog): + shutil.rmtree('output', ignore_errors=True) + mocker.patch.object(Phockup, 'check_directories') + mocker.patch.object(Phockup, 'walk_directory') + mocker.patch.object(Exif, 'data') + Exif.data.return_value = { + "MIMEType": "image/jpeg" + } + phockup = Phockup('input', 'output', move=True, movedel=True, skip_unknown=True) + open("input/tmp_20170101_010101.jpg", "w").close() + open("input/sub_folder/tmp_20170101_010101.jpg", "w").close() + phockup.process_file("input/tmp_20170101_010101.jpg") + assert not os.path.isfile("input/tmp_20170101_010101.jpg") + assert os.path.isfile("output/2017/01/01/20170101-010101.jpg") + with caplog.at_level(logging.INFO): + phockup.process_file("input/sub_folder/tmp_20170101_010101.jpg") + assert 'deleted, duplicated file' in caplog.text + assert not os.path.isfile("input/sub_folder/tmp_20170101_010101.jpg") + shutil.rmtree('output', ignore_errors=True) + + def test_process_link(mocker): shutil.rmtree('output', ignore_errors=True) mocker.patch.object(Phockup, 'check_directories')