diff --git a/src/databudgie/utils.py b/src/databudgie/utils.py index 3521657..c1fac19 100644 --- a/src/databudgie/utils.py +++ b/src/databudgie/utils.py @@ -81,14 +81,25 @@ def join_paths(*components: Optional[str]) -> str: if len(real_components) == 1: return real_components[0] - first_component, *rest_components = real_components + bucket = None normalized_components = [] - for c in rest_components: + for index, c in enumerate(real_components): + normalized_c = c if is_s3_path(c): - normalized_c = S3Location(c).key + s3_location = S3Location(c) + if bucket is None: + bucket = s3_location.bucket + else: + if bucket != s3_location.bucket: + raise ValueError(f"Path contains two different buckets: {bucket}, {s3_location.bucket}") + normalized_c = s3_location.key else: - normalized_c = c.strip("/") + if index != 0: + normalized_c = c.lstrip("/") normalized_components.append(normalized_c) - return os.path.join(first_component, *normalized_components) + if bucket: + normalized_components.insert(0, f"s3://{bucket}") + + return os.path.join(*normalized_components) diff --git a/tests/adapter/test_fallback.py b/tests/adapter/test_fallback.py index 0cc858e..df2102e 100644 --- a/tests/adapter/test_fallback.py +++ b/tests/adapter/test_fallback.py @@ -4,9 +4,9 @@ from databudgie.backup import backup_all from databudgie.config import RootConfig from tests.adapter import test_postgres -from tests.test_backup import _get_file_buffer, _validate_backup_contents +from tests.test_backup import _validate_backup_contents from tests.test_restore import test_restore_one -from tests.utils import s3_config +from tests.utils import get_file_buffer, s3_config def test_backup(pg, mf, s3_resource): @@ -23,7 +23,7 @@ def test_backup(pg, mf, s3_resource): backup_all(pg, config.backup) _validate_backup_contents( - _get_file_buffer("s3://sample-bucket/public.customer/2021-04-26T09:00:00.csv", s3_resource), + get_file_buffer("s3://sample-bucket/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer], ) diff --git a/tests/mockmodels/factories.py b/tests/mockmodels/factories.py index c947469..0e6659e 100644 --- a/tests/mockmodels/factories.py +++ b/tests/mockmodels/factories.py @@ -12,10 +12,9 @@ @register_at("store") -@autoincrement -def create_store(autoincrement: int, name: Optional[str] = None): +def create_store(name: Optional[str] = None, *, id: Optional[int] = None): name = name or fake.company() - return Store(id=autoincrement, name=name) + return Store(id=id, name=name) @register_at("product") diff --git a/tests/test_backup.py b/tests/test_backup.py index 4ca6196..af67ee1 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -10,9 +10,8 @@ from databudgie.backup import backup_all from databudgie.config import RootConfig -from databudgie.s3 import is_s3_path, S3Location from tests.mockmodels.models import Customer -from tests.utils import s3_config +from tests.utils import get_file_buffer, s3_config fake = faker.Faker() @@ -139,7 +138,7 @@ def test_backup_local_file(pg, mf): config = RootConfig.from_dict({"tables": {"public.customer": {"location": f"{dir_name}/public.customer"}}}) backup_all(pg, config.backup) - _validate_backup_contents(_get_file_buffer(f"{dir_name}/public.customer/2021-04-26T09:00:00.csv"), [customer]) + _validate_backup_contents(get_file_buffer(f"{dir_name}/public.customer/2021-04-26T09:00:00.csv"), [customer]) def test_backup_s3(pg, mf, s3_resource): @@ -152,7 +151,7 @@ def test_backup_s3(pg, mf, s3_resource): backup_all(pg, config.backup) _validate_backup_contents( - _get_file_buffer("s3://sample/databudgie/test/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer] + get_file_buffer("s3://sample/databudgie/test/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer] ) @@ -178,21 +177,6 @@ def test_backup_failure(pg): assert console.call_count == 2 -def _get_file_buffer(filename, s3_resource=None): - buffer = io.BytesIO() - - if is_s3_path(filename) and s3_resource: - location = S3Location(filename) - uploaded_object = s3_resource.Object("sample-bucket", location.key) - uploaded_object.download_fileobj(buffer) - else: - with open(filename, "rb") as f: - buffer.write(f.read()) - buffer.seek(0) - - return buffer - - def _validate_backup_contents(buffer, expected_contents: List[Customer]): """Validate the contents of a backup file. Columns from the file will be raw.""" diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 62e4184..e6a26c1 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -9,10 +9,9 @@ from databudgie.storage import StorageBackend from tests.mockmodels.models import DatabudgieManifest from tests.test_backup import ( - _get_file_buffer, _validate_backup_contents, ) -from tests.utils import mock_s3_csv, s3_config +from tests.utils import get_file_buffer, mock_s3_csv, s3_config fake = faker.Faker() @@ -29,7 +28,7 @@ def test_manifest_backup(pg, mf, s3_resource): backup_all(pg, config.backup, storage) _validate_backup_contents( - _get_file_buffer("s3://sample/databudgie/test/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer] + get_file_buffer("s3://sample/databudgie/test/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer] ) row = pg.query(DatabudgieManifest).first() diff --git a/tests/test_root_location.py b/tests/test_root_location.py new file mode 100644 index 0000000..7698b27 --- /dev/null +++ b/tests/test_root_location.py @@ -0,0 +1,93 @@ +import tempfile + +import pytest + +from databudgie.backup import backup_all +from databudgie.config import RootConfig +from tests.utils import get_file_buffer, s3_config + + +def test_backup_local_file(pg, mf): + """Verify local files compose correctly with the 'root_location' option.""" + mf.store.new(id=1, name="example") + + with tempfile.TemporaryDirectory() as dir_name: + config = RootConfig.from_dict( + {"root_location": dir_name, "tables": {"public.store": {"location": "public.store/"}}} + ) + backup_all(pg, config.backup) + + content = get_file_buffer(f"{dir_name}/public.store/2021-04-26T09:00:00.csv").read() + assert content == b"id,name\n1,example\n" + + +def test_backup_s3_file_relative_local_path(pg, mf, s3_resource): + """Verify s3 files compose correctly with the 'root_location' option.""" + mf.store.new(id=1, name="example") + + config = RootConfig.from_dict( + { + "root_location": "s3://sample-bucket/root-path/", + "tables": {"public.store": {"location": "/public.store/"}}, + "sequences": False, + **s3_config, + } + ) + backup_all(pg, config.backup) + + content = get_file_buffer("s3://sample-bucket/root-path/public.store/2021-04-26T09:00:00.csv", s3_resource).read() + assert content == b"id,name\n1,example\n" + + +def test_backup_relative_root_absolute_location(pg, mf, s3_resource): + mf.store.new(id=1, name="example") + + config = RootConfig.from_dict( + { + "root_location": "root-path/", + "tables": {"public.store": {"location": "s3://sample-bucket/public.store/"}}, + "sequences": False, + **s3_config, + } + ) + backup_all(pg, config.backup) + + content = get_file_buffer("s3://sample-bucket/root-path/public.store/2021-04-26T09:00:00.csv", s3_resource).read() + assert content == b"id,name\n1,example\n" + + +def test_backup_s3_file_absolute_local_path(pg, mf, s3_resource): + """Verify s3 files compose correctly with the 'root_location' option. + + An absolute path to the same bucket should transparently compose the paths together. + """ + mf.store.new(id=1, name="example") + + config = RootConfig.from_dict( + { + "root_location": "s3://sample-bucket/root-path/", + "tables": {"public.store": {"location": "s3://sample-bucket/public.store/"}}, + **s3_config, + } + ) + backup_all(pg, config.backup) + + content = get_file_buffer("s3://sample-bucket/root-path/public.store/2021-04-26T09:00:00.csv", s3_resource).read() + assert content == b"id,name\n1,example\n" + + +def test_backup_s3_file_invalid_bucket(mf): + """Verify s3 files compose correctly with the 'root_location' option. + + Different buckets on the composed paths should fail. + """ + mf.store.new(id=1, name="example") + + with pytest.raises(ValueError): + RootConfig.from_dict( + { + "root_location": "s3://sample-bucket/root-path/", + "tables": {"public.store": {"location": "s3://other-bucket/public.store/"}}, + **s3_config, + } + ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4611e70..620b0b1 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,7 +12,7 @@ def test_first_component_absolute_path(self): def test_non_first_component_absolute_path(self): path = join_paths("first", "/2nd/") - assert path == "first/2nd" + assert path == "first/2nd/" def test_bad_first_component(self): path = join_paths(None, "/first") diff --git a/tests/utils.py b/tests/utils.py index 74814f5..d9f8bbf 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,12 +1,17 @@ import csv import gzip import io +import logging +from pathlib import Path from typing import List from mypy_boto3_s3.service_resource import S3ServiceResource +from databudgie.s3 import is_s3_path, S3Location from databudgie.utils import wrap_buffer +log = logging.getLogger(__name__) + s3_config = { "s3": { "aws_access_key_id": "foo", @@ -35,3 +40,29 @@ def mock_s3_csv(s3_resource: S3ServiceResource, key: str, data: List[dict], gzip bucket = s3_resource.Bucket("sample-bucket") buffer = mock_csv(data, gzipped=gzipped) bucket.put_object(Key=key, Body=buffer) + + +def get_file_buffer(filename, s3_resource=None): + buffer = io.BytesIO() + + if is_s3_path(filename): + assert s3_resource + location = S3Location(filename) + uploaded_object = s3_resource.Object("sample-bucket", location.key) + uploaded_object.download_fileobj(buffer) + else: + try: + with open(filename, "rb") as f: + buffer.write(f.read()) + except FileNotFoundError: + # For better error messaging, if the file doesnt exist + path = Path(filename) + suffix = path.suffix + parent = path.parent + log.info(list(parent.glob(f"*.{suffix}"))) + + raise + + buffer.seek(0) + + return buffer