Skip to content

Commit

Permalink
Adde from_date and to_date options to limit the processed files (#202)
Browse files Browse the repository at this point in the history
* Added from_date and to_date options to limit the processed files

* Removed a leftover debug print

* Added info for this feature in the readme. Replaced "_" with "-" in the arguments names

---------

Co-authored-by: Marco Dondero <[email protected]>
Co-authored-by: Ivan Dokov <[email protected]>
  • Loading branch information
3 people authored Nov 13, 2023
1 parent 83f2d88 commit 3f85d38
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 3 deletions.
24 changes: 23 additions & 1 deletion phockup.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,26 @@ def parse_args(args=sys.argv[1:]):
files by an additional level after sorting by date.
"""
)

parser.add_argument(
'--from-date',
type=str,
default=None,
help="""\
Limit the operations to the files that are newer than --from-date (inclusive).
The date must be specified in format YYYY-MM-DD. Files with unknown date won't be skipped.
"""
)

parser.add_argument(
'--to-date',
type=str,
default=None,
help="""\
Limit the operations to the files that are older than --to-date (inclusive).
The date must be specified in format YYYY-MM-DD. Files with unknown date won't be skipped.
"""
)

return parser.parse_args(args)

Expand Down Expand Up @@ -344,7 +364,9 @@ def main(options):
no_date_dir=options.no_date_dir,
skip_unknown=options.skip_unknown,
output_prefix=options.output_prefix,
output_suffix=options.output_suffix
output_suffix=options.output_suffix,
from_date=options.from_date,
to_date=options.to_date
)


Expand Down
23 changes: 23 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,29 @@ saving to the same central respository.

The two options above can be used to help sort/store images

#### Limit files processed by date
`--from-date` flag can be used to limit the operations to the files that are newer than the provided date (inclusive).
The date must be specified in format YYYY-MM-DD. Files with unknown date won't be skipped.

For example:
```
phockup ~/Pictures/DCIM/NIKOND40 ~/Pictures/sorted --from-date="2017-01-02"
```
`--to-date` flag can be used to limit the operations to the files that are older than the provided date (inclusive).
The date must be specified in format YYYY-MM-DD. Files with unknown date won't be skipped.

For example:
```
phockup ~/Pictures/DCIM/NIKOND40 ~/Pictures/sorted --to-date="2017-01-02"
```

`--from-date` and `--to-date` can be combined for better control over the files that are processed.

For example:
```
phockup ~/Pictures/DCIM/NIKOND40 ~/Pictures/sorted --from-date="2017-01-02" --to-date="2017-01-03"
```

### Missing date information in EXIF
If any of the photos does not have date information you can use the `-r | --regex` option to specify date format for date extraction from filenames:
```
Expand Down
30 changes: 28 additions & 2 deletions src/phockup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ def __init__(self, input_dir, output_dir, **args):
self.max_depth = args.get('max_depth', -1)
# default to concurrency of one to retain existing behavior
self.max_concurrency = args.get("max_concurrency", 1)

self.from_date = args.get("from_date", None)
self.to_date = args.get("to_date", None)
if self.from_date is not None:
self.from_date = Date.strptime(f"{self.from_date} 00:00:00", "%Y-%m-%d %H:%M:%S")
if self.to_date is not None:
self.to_date = Date.strptime(f"{self.to_date} 23:59:59", "%Y-%m-%d %H:%M:%S")

if self.max_concurrency > 1:
logger.info(f"Using {self.max_concurrency} workers to process files.")

Expand Down Expand Up @@ -249,7 +257,7 @@ def process_file(self, filename):

progress = f'{filename}'

output, target_file_name, target_file_path, target_file_type = self.get_file_name_and_path(filename)
output, target_file_name, target_file_path, target_file_type, file_date = self.get_file_name_and_path(filename)
suffix = 1
target_file = target_file_path

Expand All @@ -261,6 +269,7 @@ def process_file(self, filename):
logger.info(progress)
break

date_unknown = file_date == None or output.endswith(self.no_date_dir)
if self.skip_unknown and output.endswith(self.no_date_dir):
# Skip files that didn't generate a path from EXIF data
progress = f"{progress} => skipped, unknown date EXIF information for '{target_file_name}'"
Expand All @@ -270,6 +279,22 @@ def process_file(self, filename):
logger.info(progress)
break

if not date_unknown:
skip = False
if type(file_date) is dict:
file_date = file_date["date"]
if self.from_date is not None and file_date < self.from_date:
progress = f"{progress} => {filename} skipped: date {file_date} is older than --from-date {self.from_date}"
skip = True
if self.to_date is not None and file_date > self.to_date:
progress = f"{progress} => {filename} skipped: date {file_date} is newer than --to-date {self.to_date}"
skip = True
if skip:
if self.progress:
self.pbar.write(progress)
logger.info(progress)
break

if os.path.isfile(target_file):
if filename != target_file and filecmp.cmp(filename, target_file, shallow=False):
progress = f'{progress} => skipped, duplicated file {target_file}'
Expand Down Expand Up @@ -330,6 +355,7 @@ def get_file_name_and_path(self, filename):
if exif_data and 'MIMEType' in exif_data:
target_file_type = self.get_file_type(exif_data['MIMEType'])

date = None
if target_file_type in ['image', 'video']:
date = Date(filename).from_exif(exif_data, self.timestamp, self.date_regex,
self.date_field)
Expand All @@ -342,7 +368,7 @@ def get_file_name_and_path(self, filename):
target_file_name = os.path.basename(filename)

target_file_path = os.path.sep.join([output, target_file_name])
return output, target_file_name, target_file_path, target_file_type
return output, target_file_name, target_file_path, target_file_type, date

def process_xmp(self, original_filename, file_name, suffix, output):
"""
Expand Down
63 changes: 63 additions & 0 deletions tests/test_phockup.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,3 +473,66 @@ def test_skip_unknown():
assert len([name for name in os.listdir(dir4) if
os.path.isfile(os.path.join(dir4, name))]) == 1
shutil.rmtree('output', ignore_errors=True)

def test_from_date():
shutil.rmtree('output', ignore_errors=True)
Phockup('input', 'output', from_date="2017-10-06")
dir1 = 'output/2017/01/01'
dir2 = 'output/2017/10/06'
dir3 = 'output/unknown'
dir4 = 'output/2018/01/01/'
assert os.path.isdir(dir1)
assert os.path.isdir(dir2)
assert os.path.isdir(dir3)
assert os.path.isdir(dir4)
assert len([name for name in os.listdir(dir1) if
os.path.isfile(os.path.join(dir1, name))]) == 0
assert len([name for name in os.listdir(dir2) if
os.path.isfile(os.path.join(dir2, name))]) == 1
assert len([name for name in os.listdir(dir3) if
os.path.isfile(os.path.join(dir3, name))]) == 1
assert len([name for name in os.listdir(dir4) if
os.path.isfile(os.path.join(dir4, name))]) == 1
shutil.rmtree('output', ignore_errors=True)

def test_to_date():
shutil.rmtree('output', ignore_errors=True)
Phockup('input', 'output', to_date="2017-10-06", progress=True)
dir1 = 'output/2017/01/01'
dir2 = 'output/2017/10/06'
dir3 = 'output/unknown'
dir4 = 'output/2018/01/01/'
assert os.path.isdir(dir1)
assert os.path.isdir(dir2)
assert os.path.isdir(dir3)
assert os.path.isdir(dir4)
assert len([name for name in os.listdir(dir1) if
os.path.isfile(os.path.join(dir1, name))]) == 3
assert len([name for name in os.listdir(dir2) if
os.path.isfile(os.path.join(dir2, name))]) == 1
assert len([name for name in os.listdir(dir3) if
os.path.isfile(os.path.join(dir3, name))]) == 1
assert len([name for name in os.listdir(dir4) if
os.path.isfile(os.path.join(dir4, name))]) == 0
shutil.rmtree('output', ignore_errors=True)

def test_from_date_to_date():
shutil.rmtree('output', ignore_errors=True)
Phockup('input', 'output', to_date="2017-10-06", from_date="2017-01-02", progress=True)
dir1 = 'output/2017/01/01'
dir2 = 'output/2017/10/06'
dir3 = 'output/unknown'
dir4 = 'output/2018/01/01/'
assert os.path.isdir(dir1)
assert os.path.isdir(dir2)
assert os.path.isdir(dir3)
assert os.path.isdir(dir4)
assert len([name for name in os.listdir(dir1) if
os.path.isfile(os.path.join(dir1, name))]) == 0
assert len([name for name in os.listdir(dir2) if
os.path.isfile(os.path.join(dir2, name))]) == 1
assert len([name for name in os.listdir(dir3) if
os.path.isfile(os.path.join(dir3, name))]) == 1
assert len([name for name in os.listdir(dir4) if
os.path.isfile(os.path.join(dir4, name))]) == 0
shutil.rmtree('output', ignore_errors=True)

0 comments on commit 3f85d38

Please sign in to comment.