Skip to content

Commit

Permalink
feat: support arbitary boto3 s3 config options in s3 backend (#1697)
Browse files Browse the repository at this point in the history
* feat: support arbitary boto3 s3 config options in s3 backend

* feat: update naming of boto config's prefix for clarity

* test: ensure boto3 props are used

* refact: ensure boto3 config params are applied to all instantiations of

* Update CHANGES.md

---------

Co-authored-by: Cooper Lees <[email protected]>
  • Loading branch information
joel-aws and cooperlees authored Apr 8, 2024
1 parent 5101907 commit 5642501
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## New Features

- Add arbitrary configuration option for S3 Storage Backend Boto3 calls (PR #1697)
- Added HTTPS support in Docker Compose + Enabled bind-mount volume for Nginx config + add documentation in README.md (PR #1653)

## Documentation
Expand Down
4 changes: 4 additions & 0 deletions docs/storage_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ To enable S3 support the optional `s3` install must be done:

- `pip install bandersnatch[s3]`
- Add a `[s3]` section in the bandersnatch config file
- Prefix keys with `config_param_` to add the key and its value as parameters to the underlying Boto3 S3 calls

You will need an [AWS account](https://aws.amazon.com/console/) and an [S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/creating-bucket.html)

Expand All @@ -55,6 +56,9 @@ aws_secret_access_key = your s3 secret access key
endpoint_url = endpoint url
# Optional manual signature version for compatibility
signature_version = s3v4
# Optional example for overriding parameters in Boto3 S3 calls
config_param_ServerSideEncryption = aws:kms
config_param_SSEKMSKeyId = your KMS key ID
```

### Serving your Mirror
Expand Down
39 changes: 39 additions & 0 deletions src/bandersnatch/tests/plugins/test_storage_plugin_s3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime

import pytest
from s3path import S3Path

from bandersnatch.tests.mock_config import mock_config
Expand Down Expand Up @@ -198,6 +199,44 @@ def test_plugin_init(s3_mock: S3Path) -> None:
assert resource.meta.client.meta.endpoint_url == "http://localhost:9090"


def test_plugin_init_with_boto3_configs(s3_mock: S3Path) -> None:
config_loader = mock_config(
"""
[mirror]
directory = /tmp/pypi
json = true
master = https://pypi.org
timeout = 60
global-timeout = 18000
workers = 3
hash-index = true
stop-on-error = true
storage-backend = swift
verifiers = 3
keep_index_versions = 2
compare-method = hash
[s3]
region_name = us-east-1
aws_access_key_id = 123456
aws_secret_access_key = 123456
endpoint_url = http://localhost:9090
signature_version = s3v4
config_param_ServerSideEncryption = AES256
"""
)
backend = s3.S3Storage(config=config_loader.config)
backend.initialize_plugin()

assert backend.configuration_parameters["ServerSideEncryption"] == "AES256"

# Limitation of min.io, but tells us that the expected config param was used
with pytest.raises(ValueError) as execinfo:
backend.write_file(f"/{s3_mock.bucket}/file1", "test")
assert "KMS not configured for a server side encrypted objects" in str(
execinfo.value
)


def test_upload_time(s3_mock: S3Path) -> None:
backend = s3.S3Storage()
backend.PATH_BACKEND(f"/{s3_mock.bucket}/folder1/file1").touch()
Expand Down
43 changes: 31 additions & 12 deletions src/bandersnatch_storage_plugins/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class S3Storage(StoragePlugin):
PATH_BACKEND = S3Path
resource = None
UPLOAD_TIME_METADATA_KEY = "uploaded-at"
BOTO_CONFIG_PREFIX = "config_param_"
configuration_parameters: dict = {}

def get_config_value(
self, config_key: str, *env_keys: Any, default: str | None = None
Expand All @@ -147,6 +149,11 @@ def initialize_plugin(self) -> None:
aws_secret_access_key = self.get_config_value("aws_secret_access_key")
endpoint_url = self.get_config_value("endpoint_url")
signature_version = self.get_config_value("signature_version")
self.configuration_parameters = {
k.removeprefix(self.BOTO_CONFIG_PREFIX): v
for k, v in self.configuration["s3"].items()
if k.startswith(self.BOTO_CONFIG_PREFIX)
}
try:
mirror_base_path = PureS3Path(self.configuration.get("mirror", "directory"))
except (configparser.NoOptionError, configparser.NoSectionError) as e:
Expand All @@ -166,7 +173,17 @@ def initialize_plugin(self) -> None:
if signature_version:
s3_args["config"] = Config(signature_version=signature_version)
resource = boto3.resource("s3", **s3_args)
register_configuration_parameter(mirror_base_path, resource=resource)
register_configuration_parameter(
mirror_base_path,
resource=resource,
parameters=self.configuration_parameters,
)

def create_path_backend(self, path: PATH_TYPES) -> S3Path:
path = self.PATH_BACKEND(path)
register_configuration_parameter(path, parameters=self.configuration_parameters)

return path

def get_lock(self, path: str | None = None) -> S3FileLock:
if path is None:
Expand All @@ -175,7 +192,7 @@ def get_lock(self, path: str | None = None) -> S3FileLock:

def walk(self, root: PATH_TYPES, dirs: bool = True) -> list[S3Path]:
if not isinstance(root, self.PATH_BACKEND):
root = self.PATH_BACKEND(root)
root = self.create_path_backend(root)

results: list[S3Path] = []
for pth in root.iterdir():
Expand Down Expand Up @@ -203,7 +220,7 @@ def rewrite(
"""Rewrite an existing file atomically to avoid programs running in
parallel to have race conditions while reading."""
if not isinstance(filepath, self.PATH_BACKEND):
filepath = self.PATH_BACKEND(filepath)
filepath = self.create_path_backend(filepath)
with filepath.open(mode=mode, **kw) as fh:
yield fh

Expand Down Expand Up @@ -255,6 +272,7 @@ def copy_file(self, source: PATH_TYPES, dest: PATH_TYPES) -> None:
Key=dest.key,
CopySource={"Bucket": source.bucket, "Key": source.key},
Bucket=dest.bucket,
**self.configuration_parameters,
)
return

Expand All @@ -265,7 +283,7 @@ def write_file(
encoding: str | None = None,
) -> None:
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
if isinstance(contents, str):
with path.open(mode="w", encoding=encoding) as fp:
fp.write(contents)
Expand Down Expand Up @@ -363,7 +381,7 @@ def rmdir(
If force is true, remove contents destructively.
"""
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
log_prefix = "[DRY RUN] " if dry_run else ""
logger.info(f"{log_prefix}Removing file: {path!s}")
if not dry_run:
Expand All @@ -372,22 +390,22 @@ def rmdir(

def exists(self, path: PATH_TYPES) -> bool:
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
return bool(path.exists())

def is_dir(self, path: PATH_TYPES) -> bool:
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
return bool(path.is_dir())

def is_file(self, path: PATH_TYPES) -> bool:
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
return bool(path.is_file())

def is_symlink(self, path: PATH_TYPES) -> bool:
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
return bool(path.is_symlink())

def get_hash(self, path: PATH_TYPES, function: str = "sha256") -> str:
Expand All @@ -403,12 +421,12 @@ def symlink(

def get_file_size(self, path: PATH_TYPES) -> int:
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
return int(path.stat().st_size)

def get_upload_time(self, path: PATH_TYPES) -> datetime.datetime:
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
resource, _ = path._accessor.configuration_map.get_configuration(path)
s3object = resource.Object(path.bucket, str(path.key))
ts = s3object.metadata.get(self.UPLOAD_TIME_METADATA_KEY, 0)
Expand All @@ -418,7 +436,7 @@ def get_upload_time(self, path: PATH_TYPES) -> datetime.datetime:

def set_upload_time(self, path: PATH_TYPES, time: datetime.datetime) -> None:
if not isinstance(path, self.PATH_BACKEND):
path = self.PATH_BACKEND(path)
path = self.create_path_backend(path)
resource, _ = path._accessor.configuration_map.get_configuration(path)
s3object = resource.Object(path.bucket, str(path.key))
s3object.metadata.update({self.UPLOAD_TIME_METADATA_KEY: str(time.timestamp())})
Expand All @@ -428,4 +446,5 @@ def set_upload_time(self, path: PATH_TYPES, time: datetime.datetime) -> None:
CopySource={"Bucket": path.bucket, "Key": str(path.key)},
Metadata=s3object.metadata,
MetadataDirective="REPLACE",
**self.configuration_parameters,
)

0 comments on commit 5642501

Please sign in to comment.