Skip to content

Commit

Permalink
Enable flat output for Write-Only Replica Staging
Browse files Browse the repository at this point in the history
This commit modifies the Write-Only Replica Staging space to write
replicas into a flat directory, rather than Archivematica's typical
UUID quad directories. This enables more efficient pickup of replicas
stored in this space by offline storage systems.
  • Loading branch information
tw4l committed Jun 15, 2021
1 parent 446e6da commit 311b6ab
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 8 deletions.
31 changes: 31 additions & 0 deletions storage_service/common/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,3 +431,34 @@ def test_create_tar(
tarfile.is_tarfile.assert_any_call(tarpath)
if extension:
tarpath.endswith(utils.TAR_EXTENSION)


@pytest.mark.parametrize(
"input_path, expected_path",
[
# Ensure UUID quad dirs are removed.
(
"/var/archivematica/sharedDirectory/www/offlineReplicas/d8a4/d502/30b7/4902/b545/9c87/8242/f96c/uncompressed-test-d8a4d502-30b7-4902-b545-9c878242f96c",
"/var/archivematica/sharedDirectory/www/offlineReplicas/uncompressed-test-d8a4d502-30b7-4902-b545-9c878242f96c/",
),
(
"/var/archivematica/sharedDirectory/www/offlineReplicas/d8a4/d502/30b7/4902/b545/9c87/8242/f96c/uncompressed-test-d8a4d502-30b7-4902-b545-9c878242f96c/",
"/var/archivematica/sharedDirectory/www/offlineReplicas/uncompressed-test-d8a4d502-30b7-4902-b545-9c878242f96c/",
),
(
"/var/archivematica/sharedDirectory/www/offlineReplicas/2965/2761/a5b2/4da9/9af8/ffb4/bc06/2439/compressed-replica-29652761-a5b2-4da9-9af8-ffb4bc062439.7z",
"/var/archivematica/sharedDirectory/www/offlineReplicas/compressed-replica-29652761-a5b2-4da9-9af8-ffb4bc062439.7z",
),
# Ensure other directories are not removed.
(
"/var/archivematica/sharedDirectory/www/offlineReplicas/test-file.txt",
"/var/archivematica/sharedDirectory/www/offlineReplicas/test-file.txt",
),
(
"/var/archivematica/sharedDirectory/www/offlineReplicas/test/package.tar.gz",
"/var/archivematica/sharedDirectory/www/offlineReplicas/test/package.tar.gz",
),
],
)
def test_strip_quad_dirs_from_path(input_path, expected_path):
assert utils.strip_quad_dirs_from_path(input_path) == expected_path
21 changes: 21 additions & 0 deletions storage_service/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from lxml.builder import ElementMaker
import mimetypes
import os
import re
import shutil
import subprocess
import tarfile
Expand Down Expand Up @@ -682,6 +683,26 @@ def removedirs(relative_path, base=None):
head, tail = os.path.split(head)


def strip_quad_dirs_from_path(dest_path):
"""Return dest_path with UUID quad directories removed.
Ensure that paths to uncompressed packages terminate in a trailing slash.
"""
UUID4_QUAD = re.compile(r"[0-9a-f]{4}\Z", re.I)
dest_path = dest_path.rstrip("/")
output_path, package_name = os.path.split(dest_path)
for quad_dir in range(8):
head, tail = os.path.split(output_path)
if not re.match(UUID4_QUAD, tail):
continue
output_path = head
output_path = os.path.join(output_path, package_name)
_, file_extension = os.path.splitext(output_path)
if not file_extension:
return os.path.join(output_path, "")
return output_path


def coerce_str(string):
"""Return string as a str, not a unicode, encoded in utf-8.
Expand Down
13 changes: 11 additions & 2 deletions storage_service/locations/models/replica_staging.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,21 @@ def move_to_storage_service(self, src_path, dest_path, dest_space):
_("Write-Only Offline Staging does not implement fetching packages")
)

def move_from_storage_service(self, src_path, dest_path, package=None):
""" Moves self.staging_path/src_path to dest_path."""
def move_from_storage_service(
self, src_path, dest_path, package=None, flat_output=True
):
"""Moves self.staging_path/src_path to dest_path.
If flat_output is True, store the replica directly in the Replicator
Location, rather than in quad directories.
"""
if flat_output:
dest_path = utils.strip_quad_dirs_from_path(dest_path)
self.space.create_local_directory(dest_path)
if not package.is_packaged(src_path):
return self._store_tar_replica(src_path, dest_path, package)
self.space.move_rsync(src_path, dest_path)
package.current_path = dest_path

def _store_tar_replica(self, src_path, dest_path, package):
"""Create and store TAR replica."""
Expand Down
10 changes: 4 additions & 6 deletions storage_service/locations/tests/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,10 +751,9 @@ def test_replicate_aip_offline_staging_uncompressed(self):

assert aip.replicas.count() == 1
assert replica is not None
expected_replica_path = os.path.join(
replication_dir, utils.uuid_to_path(replica.uuid), "working_bag.tar"
)
expected_replica_path = os.path.join(replication_dir, "working_bag.tar")
assert os.path.exists(expected_replica_path)
assert replica.current_path == expected_replica_path

# Ensure tar file and quad dirs in staging are cleaned up properly.
assert staging_files_count_initial == recursive_file_count(staging_dir)
Expand Down Expand Up @@ -789,10 +788,9 @@ def test_replicate_aip_offline_staging_compressed(self):

assert aip.replicas.count() == 1
assert replica is not None
expected_replica_path = os.path.join(
replication_dir, utils.uuid_to_path(replica.uuid), "working_bag.7z"
)
expected_replica_path = os.path.join(replication_dir, "working_bag.7z")
assert os.path.exists(expected_replica_path)
assert replica.current_path == expected_replica_path

def test_deletion_and_creation_of_replicas_compressed(self):
"""Ensure that when it is requested a replica be created, then
Expand Down

0 comments on commit 311b6ab

Please sign in to comment.