From c77ac3ed09eacbd33a41fb6fb47a2737305676e8 Mon Sep 17 00:00:00 2001 From: Erisa A Date: Sat, 26 Mar 2022 14:39:09 +0000 Subject: [PATCH 1/4] Add max_multipart_parts config option --- .gitignore | 3 ++- README.md | 20 ++++++++++++++++++++ zfs_uploader/config.py | 4 +++- zfs_uploader/job.py | 20 +++++++++++++------- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index f9ebde9..15b7477 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ config.cfg -.idea/* \ No newline at end of file +.idea/* +.venv/* \ No newline at end of file diff --git a/README.md b/README.md index 77e9850..ba6684b 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,8 @@ jobs can be set in one file. Maximum number of incremental backups per full backup. #### storage_class : str, default: STANDARD S3 storage class. +#### max_multipart_parts : int, default: 10000 + Maximum number of parts to use in a multipart S3 upload. ### Examples #### Multiple full backups @@ -119,6 +121,24 @@ max_incremental_backups_per_full = 6 max_backups = 7 ``` +#### Scaleway S3-compatible Object Storage, full backups +```ini +[DEFAULT] +bucket_name = BUCKET_NAME +region = fr-par +access_key = ACCESS_KEY +secret_key = SECRET_KEY +storage_class = STANDARD +endpoint = https://s3.fr-par.scw.cloud +max_multipart_parts = 1000 + +[pool/filesystem] +cron = 0 2 * * * +max_snapshots = 7 +max_incremental_backups_per_full = 6 +max_backups = 7 +``` + ##### Structure full backup (f), incremental backup (i) diff --git a/zfs_uploader/config.py b/zfs_uploader/config.py index e14d149..3723a8d 100644 --- a/zfs_uploader/config.py +++ b/zfs_uploader/config.py @@ -77,7 +77,9 @@ def __init__(self, file_path=None): v.getint('max_incremental_backups_per_full') or default.getint('max_incremental_backups_per_full')), # noqa storage_class=(v.get('storage_class') or - default.get('storage_class')) + default.get('storage_class')), + max_multipart_parts=(v.getint('max_multipart_parts') or + default.getint('max_multipart_parts')) ) ) diff --git a/zfs_uploader/job.py b/zfs_uploader/job.py index 1bbe376..23f637a 100644 --- a/zfs_uploader/job.py +++ b/zfs_uploader/job.py @@ -15,8 +15,6 @@ KB = 1024 MB = KB * KB S3_MAX_CONCURRENCY = 20 -S3_MAX_PART_NUMBER = 10000 - class BackupError(Exception): """ Baseclass for backup exceptions. """ @@ -88,6 +86,11 @@ def storage_class(self): """ S3 storage class. """ return self._storage_class + @property + def max_multipart_parts(self): + """ Maximum number of parts to use in a multipart S3 upload. """ + return self._max_multipart_parts + @property def backup_db(self): """ BackupDB """ @@ -101,7 +104,7 @@ def snapshot_db(self): def __init__(self, bucket_name, access_key, secret_key, filesystem, region=None, cron=None, max_snapshots=None, max_backups=None, max_incremental_backups_per_full=None, storage_class=None, - endpoint=None): + endpoint=None, max_multipart_parts=None): """ Create ZFSjob object. Parameters @@ -128,6 +131,8 @@ def __init__(self, bucket_name, access_key, secret_key, filesystem, Maximum number of incremental backups per full backup. storage_class : str, default: STANDARD S3 storage class. + max_multipart_parts : int, default: 10000 + Maximum number of parts to use in a multipart S3 upload. """ self._bucket_name = bucket_name @@ -150,6 +155,7 @@ def __init__(self, bucket_name, access_key, secret_key, filesystem, self._max_backups = max_backups self._max_incremental_backups_per_full = max_incremental_backups_per_full # noqa self._storage_class = storage_class or 'STANDARD' + self._max_multipart_parts = max_multipart_parts or 10000 self._logger = logging.getLogger(__name__) if max_snapshots and not max_snapshots >= 0: @@ -284,7 +290,7 @@ def _backup_full(self): filesystem = snapshot.filesystem send_size = int(get_snapshot_send_size(filesystem, backup_time)) - transfer_config = _get_transfer_config(send_size) + transfer_config = _get_transfer_config(send_size, self._max_multipart_parts) s3_key = f'{filesystem}/{backup_time}.full' self._logger.info(f'filesystem={filesystem} ' @@ -331,7 +337,7 @@ def _backup_incremental(self, backup_time_full): send_size = int(get_snapshot_send_size_inc(filesystem, backup_time_full, backup_time)) - transfer_config = _get_transfer_config(send_size) + transfer_config = _get_transfer_config(send_size, self._max_multipart_parts) s3_key = f'{filesystem}/{backup_time}.inc' self._logger.info(f'filesystem={filesystem} ' @@ -544,10 +550,10 @@ def callback(self, transfer): self._time_0 = time_1 -def _get_transfer_config(send_size): +def _get_transfer_config(send_size, max_multipart_parts): """ Get transfer config. """ # should never get close to the max part number - chunk_size = send_size // (S3_MAX_PART_NUMBER - 100) + chunk_size = send_size // (max_multipart_parts - 100) # only set chunk size if greater than default value chunk_size = chunk_size if chunk_size > 8 * MB else 8 * MB return TransferConfig(max_concurrency=S3_MAX_CONCURRENCY, From 778c617e590d28b391e07e808ed6d467d61ac583 Mon Sep 17 00:00:00 2001 From: Erisa A Date: Sun, 27 Mar 2022 18:31:59 +0100 Subject: [PATCH 2/4] Add changes from code review --- .gitignore | 2 +- README.md | 18 ------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 15b7477..16f6a07 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ config.cfg .idea/* -.venv/* \ No newline at end of file +.venv/* diff --git a/README.md b/README.md index ba6684b..2d9d255 100644 --- a/README.md +++ b/README.md @@ -121,24 +121,6 @@ max_incremental_backups_per_full = 6 max_backups = 7 ``` -#### Scaleway S3-compatible Object Storage, full backups -```ini -[DEFAULT] -bucket_name = BUCKET_NAME -region = fr-par -access_key = ACCESS_KEY -secret_key = SECRET_KEY -storage_class = STANDARD -endpoint = https://s3.fr-par.scw.cloud -max_multipart_parts = 1000 - -[pool/filesystem] -cron = 0 2 * * * -max_snapshots = 7 -max_incremental_backups_per_full = 6 -max_backups = 7 -``` - ##### Structure full backup (f), incremental backup (i) From 77d69d64c4cf73526ae7b96a53d3ac0be68ca193 Mon Sep 17 00:00:00 2001 From: David Debeau Date: Mon, 28 Mar 2022 17:55:06 -0500 Subject: [PATCH 3/4] Fix flake8 errors --- zfs_uploader/config.py | 5 +++-- zfs_uploader/job.py | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/zfs_uploader/config.py b/zfs_uploader/config.py index 3723a8d..fdc70d7 100644 --- a/zfs_uploader/config.py +++ b/zfs_uploader/config.py @@ -78,8 +78,9 @@ def __init__(self, file_path=None): default.getint('max_incremental_backups_per_full')), # noqa storage_class=(v.get('storage_class') or default.get('storage_class')), - max_multipart_parts=(v.getint('max_multipart_parts') or - default.getint('max_multipart_parts')) + max_multipart_parts=( + v.getint('max_multipart_parts') or + default.getint('max_multipart_parts')) ) ) diff --git a/zfs_uploader/job.py b/zfs_uploader/job.py index 23f637a..d0bad5d 100644 --- a/zfs_uploader/job.py +++ b/zfs_uploader/job.py @@ -16,6 +16,7 @@ MB = KB * KB S3_MAX_CONCURRENCY = 20 + class BackupError(Exception): """ Baseclass for backup exceptions. """ @@ -290,7 +291,8 @@ def _backup_full(self): filesystem = snapshot.filesystem send_size = int(get_snapshot_send_size(filesystem, backup_time)) - transfer_config = _get_transfer_config(send_size, self._max_multipart_parts) + transfer_config = _get_transfer_config(send_size, + self._max_multipart_parts) s3_key = f'{filesystem}/{backup_time}.full' self._logger.info(f'filesystem={filesystem} ' @@ -337,7 +339,8 @@ def _backup_incremental(self, backup_time_full): send_size = int(get_snapshot_send_size_inc(filesystem, backup_time_full, backup_time)) - transfer_config = _get_transfer_config(send_size, self._max_multipart_parts) + transfer_config = _get_transfer_config(send_size, + self._max_multipart_parts) s3_key = f'{filesystem}/{backup_time}.inc' self._logger.info(f'filesystem={filesystem} ' From 4ae9346e0ced2788b8aee6bf68ec7227a57a38bd Mon Sep 17 00:00:00 2001 From: David Debeau Date: Mon, 28 Mar 2022 18:00:48 -0500 Subject: [PATCH 4/4] Append to changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 508c4b2..cff0460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add optional `max_multipart_parts` config option for supporting storage + providers that don't have the same 10,000 part limit as S3. +[#61](https://github.com/ddebeau/zfs_uploader/issues/61) + ### Fixed - Fix bug where the snapshot size was incorrect due to the command missing the