Skip to content

Commit

Permalink
Merge pull request #64 from ddebeau/config-max-parts
Browse files Browse the repository at this point in the history
Add max_multipart_parts config option
  • Loading branch information
ddebeau authored Mar 28, 2022
2 parents a024abb + 4ae9346 commit 0b475f3
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 8 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
config.cfg
.idea/*
.idea/*
.venv/*
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Add optional `max_multipart_parts` config option for supporting storage
providers that don't have the same 10,000 part limit as S3.
[#61](https://github.com/ddebeau/zfs_uploader/issues/61)

### Fixed

- Fix bug where the snapshot size was incorrect due to the command missing the
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ jobs can be set in one file.
Maximum number of incremental backups per full backup.
#### storage_class : str, default: STANDARD
S3 storage class.
#### max_multipart_parts : int, default: 10000
Maximum number of parts to use in a multipart S3 upload.

### Examples
#### Multiple full backups
Expand Down
5 changes: 4 additions & 1 deletion zfs_uploader/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def __init__(self, file_path=None):
v.getint('max_incremental_backups_per_full') or
default.getint('max_incremental_backups_per_full')), # noqa
storage_class=(v.get('storage_class') or
default.get('storage_class'))
default.get('storage_class')),
max_multipart_parts=(
v.getint('max_multipart_parts') or
default.getint('max_multipart_parts'))
)
)

Expand Down
21 changes: 15 additions & 6 deletions zfs_uploader/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
KB = 1024
MB = KB * KB
S3_MAX_CONCURRENCY = 20
S3_MAX_PART_NUMBER = 10000


class BackupError(Exception):
Expand Down Expand Up @@ -88,6 +87,11 @@ def storage_class(self):
""" S3 storage class. """
return self._storage_class

@property
def max_multipart_parts(self):
""" Maximum number of parts to use in a multipart S3 upload. """
return self._max_multipart_parts

@property
def backup_db(self):
""" BackupDB """
Expand All @@ -101,7 +105,7 @@ def snapshot_db(self):
def __init__(self, bucket_name, access_key, secret_key, filesystem,
region=None, cron=None, max_snapshots=None, max_backups=None,
max_incremental_backups_per_full=None, storage_class=None,
endpoint=None):
endpoint=None, max_multipart_parts=None):
""" Create ZFSjob object.
Parameters
Expand All @@ -128,6 +132,8 @@ def __init__(self, bucket_name, access_key, secret_key, filesystem,
Maximum number of incremental backups per full backup.
storage_class : str, default: STANDARD
S3 storage class.
max_multipart_parts : int, default: 10000
Maximum number of parts to use in a multipart S3 upload.
"""
self._bucket_name = bucket_name
Expand All @@ -150,6 +156,7 @@ def __init__(self, bucket_name, access_key, secret_key, filesystem,
self._max_backups = max_backups
self._max_incremental_backups_per_full = max_incremental_backups_per_full # noqa
self._storage_class = storage_class or 'STANDARD'
self._max_multipart_parts = max_multipart_parts or 10000
self._logger = logging.getLogger(__name__)

if max_snapshots and not max_snapshots >= 0:
Expand Down Expand Up @@ -284,7 +291,8 @@ def _backup_full(self):
filesystem = snapshot.filesystem

send_size = int(get_snapshot_send_size(filesystem, backup_time))
transfer_config = _get_transfer_config(send_size)
transfer_config = _get_transfer_config(send_size,
self._max_multipart_parts)

s3_key = f'{filesystem}/{backup_time}.full'
self._logger.info(f'filesystem={filesystem} '
Expand Down Expand Up @@ -331,7 +339,8 @@ def _backup_incremental(self, backup_time_full):
send_size = int(get_snapshot_send_size_inc(filesystem,
backup_time_full,
backup_time))
transfer_config = _get_transfer_config(send_size)
transfer_config = _get_transfer_config(send_size,
self._max_multipart_parts)

s3_key = f'{filesystem}/{backup_time}.inc'
self._logger.info(f'filesystem={filesystem} '
Expand Down Expand Up @@ -544,10 +553,10 @@ def callback(self, transfer):
self._time_0 = time_1


def _get_transfer_config(send_size):
def _get_transfer_config(send_size, max_multipart_parts):
""" Get transfer config. """
# should never get close to the max part number
chunk_size = send_size // (S3_MAX_PART_NUMBER - 100)
chunk_size = send_size // (max_multipart_parts - 100)
# only set chunk size if greater than default value
chunk_size = chunk_size if chunk_size > 8 * MB else 8 * MB
return TransferConfig(max_concurrency=S3_MAX_CONCURRENCY,
Expand Down

0 comments on commit 0b475f3

Please sign in to comment.