Skip to content

Commit

Permalink
Merge pull request #103 from schireson/dc/fix-s3-root_location-compos…
Browse files Browse the repository at this point in the history
…ition
  • Loading branch information
DanCardin authored Jun 29, 2023
2 parents bdbeee6 + 51be9a7 commit 41e792b
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 34 deletions.
21 changes: 16 additions & 5 deletions src/databudgie/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,25 @@ def join_paths(*components: Optional[str]) -> str:
if len(real_components) == 1:
return real_components[0]

first_component, *rest_components = real_components
bucket = None
normalized_components = []
for c in rest_components:
for index, c in enumerate(real_components):
normalized_c = c
if is_s3_path(c):
normalized_c = S3Location(c).key
s3_location = S3Location(c)
if bucket is None:
bucket = s3_location.bucket
else:
if bucket != s3_location.bucket:
raise ValueError(f"Path contains two different buckets: {bucket}, {s3_location.bucket}")
normalized_c = s3_location.key
else:
normalized_c = c.strip("/")
if index != 0:
normalized_c = c.lstrip("/")

normalized_components.append(normalized_c)

return os.path.join(first_component, *normalized_components)
if bucket:
normalized_components.insert(0, f"s3://{bucket}")

return os.path.join(*normalized_components)
6 changes: 3 additions & 3 deletions tests/adapter/test_fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from databudgie.backup import backup_all
from databudgie.config import RootConfig
from tests.adapter import test_postgres
from tests.test_backup import _get_file_buffer, _validate_backup_contents
from tests.test_backup import _validate_backup_contents
from tests.test_restore import test_restore_one
from tests.utils import s3_config
from tests.utils import get_file_buffer, s3_config


def test_backup(pg, mf, s3_resource):
Expand All @@ -23,7 +23,7 @@ def test_backup(pg, mf, s3_resource):
backup_all(pg, config.backup)

_validate_backup_contents(
_get_file_buffer("s3://sample-bucket/public.customer/2021-04-26T09:00:00.csv", s3_resource),
get_file_buffer("s3://sample-bucket/public.customer/2021-04-26T09:00:00.csv", s3_resource),
[customer],
)

Expand Down
5 changes: 2 additions & 3 deletions tests/mockmodels/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@


@register_at("store")
@autoincrement
def create_store(autoincrement: int, name: Optional[str] = None):
def create_store(name: Optional[str] = None, *, id: Optional[int] = None):
name = name or fake.company()
return Store(id=autoincrement, name=name)
return Store(id=id, name=name)


@register_at("product")
Expand Down
22 changes: 3 additions & 19 deletions tests/test_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@

from databudgie.backup import backup_all
from databudgie.config import RootConfig
from databudgie.s3 import is_s3_path, S3Location
from tests.mockmodels.models import Customer
from tests.utils import s3_config
from tests.utils import get_file_buffer, s3_config

fake = faker.Faker()

Expand Down Expand Up @@ -139,7 +138,7 @@ def test_backup_local_file(pg, mf):
config = RootConfig.from_dict({"tables": {"public.customer": {"location": f"{dir_name}/public.customer"}}})
backup_all(pg, config.backup)

_validate_backup_contents(_get_file_buffer(f"{dir_name}/public.customer/2021-04-26T09:00:00.csv"), [customer])
_validate_backup_contents(get_file_buffer(f"{dir_name}/public.customer/2021-04-26T09:00:00.csv"), [customer])


def test_backup_s3(pg, mf, s3_resource):
Expand All @@ -152,7 +151,7 @@ def test_backup_s3(pg, mf, s3_resource):
backup_all(pg, config.backup)

_validate_backup_contents(
_get_file_buffer("s3://sample/databudgie/test/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer]
get_file_buffer("s3://sample/databudgie/test/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer]
)


Expand All @@ -178,21 +177,6 @@ def test_backup_failure(pg):
assert console.call_count == 2


def _get_file_buffer(filename, s3_resource=None):
buffer = io.BytesIO()

if is_s3_path(filename) and s3_resource:
location = S3Location(filename)
uploaded_object = s3_resource.Object("sample-bucket", location.key)
uploaded_object.download_fileobj(buffer)
else:
with open(filename, "rb") as f:
buffer.write(f.read())
buffer.seek(0)

return buffer


def _validate_backup_contents(buffer, expected_contents: List[Customer]):
"""Validate the contents of a backup file. Columns from the file will be raw."""

Expand Down
5 changes: 2 additions & 3 deletions tests/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
from databudgie.storage import StorageBackend
from tests.mockmodels.models import DatabudgieManifest
from tests.test_backup import (
_get_file_buffer,
_validate_backup_contents,
)
from tests.utils import mock_s3_csv, s3_config
from tests.utils import get_file_buffer, mock_s3_csv, s3_config

fake = faker.Faker()

Expand All @@ -29,7 +28,7 @@ def test_manifest_backup(pg, mf, s3_resource):
backup_all(pg, config.backup, storage)

_validate_backup_contents(
_get_file_buffer("s3://sample/databudgie/test/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer]
get_file_buffer("s3://sample/databudgie/test/public.customer/2021-04-26T09:00:00.csv", s3_resource), [customer]
)

row = pg.query(DatabudgieManifest).first()
Expand Down
93 changes: 93 additions & 0 deletions tests/test_root_location.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import tempfile

import pytest

from databudgie.backup import backup_all
from databudgie.config import RootConfig
from tests.utils import get_file_buffer, s3_config


def test_backup_local_file(pg, mf):
"""Verify local files compose correctly with the 'root_location' option."""
mf.store.new(id=1, name="example")

with tempfile.TemporaryDirectory() as dir_name:
config = RootConfig.from_dict(
{"root_location": dir_name, "tables": {"public.store": {"location": "public.store/"}}}
)
backup_all(pg, config.backup)

content = get_file_buffer(f"{dir_name}/public.store/2021-04-26T09:00:00.csv").read()
assert content == b"id,name\n1,example\n"


def test_backup_s3_file_relative_local_path(pg, mf, s3_resource):
"""Verify s3 files compose correctly with the 'root_location' option."""
mf.store.new(id=1, name="example")

config = RootConfig.from_dict(
{
"root_location": "s3://sample-bucket/root-path/",
"tables": {"public.store": {"location": "/public.store/"}},
"sequences": False,
**s3_config,
}
)
backup_all(pg, config.backup)

content = get_file_buffer("s3://sample-bucket/root-path/public.store/2021-04-26T09:00:00.csv", s3_resource).read()
assert content == b"id,name\n1,example\n"


def test_backup_relative_root_absolute_location(pg, mf, s3_resource):
mf.store.new(id=1, name="example")

config = RootConfig.from_dict(
{
"root_location": "root-path/",
"tables": {"public.store": {"location": "s3://sample-bucket/public.store/"}},
"sequences": False,
**s3_config,
}
)
backup_all(pg, config.backup)

content = get_file_buffer("s3://sample-bucket/root-path/public.store/2021-04-26T09:00:00.csv", s3_resource).read()
assert content == b"id,name\n1,example\n"


def test_backup_s3_file_absolute_local_path(pg, mf, s3_resource):
"""Verify s3 files compose correctly with the 'root_location' option.
An absolute path to the same bucket should transparently compose the paths together.
"""
mf.store.new(id=1, name="example")

config = RootConfig.from_dict(
{
"root_location": "s3://sample-bucket/root-path/",
"tables": {"public.store": {"location": "s3://sample-bucket/public.store/"}},
**s3_config,
}
)
backup_all(pg, config.backup)

content = get_file_buffer("s3://sample-bucket/root-path/public.store/2021-04-26T09:00:00.csv", s3_resource).read()
assert content == b"id,name\n1,example\n"


def test_backup_s3_file_invalid_bucket(mf):
"""Verify s3 files compose correctly with the 'root_location' option.
Different buckets on the composed paths should fail.
"""
mf.store.new(id=1, name="example")

with pytest.raises(ValueError):
RootConfig.from_dict(
{
"root_location": "s3://sample-bucket/root-path/",
"tables": {"public.store": {"location": "s3://other-bucket/public.store/"}},
**s3_config,
}
)
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_first_component_absolute_path(self):

def test_non_first_component_absolute_path(self):
path = join_paths("first", "/2nd/")
assert path == "first/2nd"
assert path == "first/2nd/"

def test_bad_first_component(self):
path = join_paths(None, "/first")
Expand Down
31 changes: 31 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import csv
import gzip
import io
import logging
from pathlib import Path
from typing import List

from mypy_boto3_s3.service_resource import S3ServiceResource

from databudgie.s3 import is_s3_path, S3Location
from databudgie.utils import wrap_buffer

log = logging.getLogger(__name__)

s3_config = {
"s3": {
"aws_access_key_id": "foo",
Expand Down Expand Up @@ -35,3 +40,29 @@ def mock_s3_csv(s3_resource: S3ServiceResource, key: str, data: List[dict], gzip
bucket = s3_resource.Bucket("sample-bucket")
buffer = mock_csv(data, gzipped=gzipped)
bucket.put_object(Key=key, Body=buffer)


def get_file_buffer(filename, s3_resource=None):
buffer = io.BytesIO()

if is_s3_path(filename):
assert s3_resource
location = S3Location(filename)
uploaded_object = s3_resource.Object("sample-bucket", location.key)
uploaded_object.download_fileobj(buffer)
else:
try:
with open(filename, "rb") as f:
buffer.write(f.read())
except FileNotFoundError:
# For better error messaging, if the file doesnt exist
path = Path(filename)
suffix = path.suffix
parent = path.parent
log.info(list(parent.glob(f"*.{suffix}")))

raise

buffer.seek(0)

return buffer

0 comments on commit 41e792b

Please sign in to comment.