diff --git a/storage_service/locations/fixtures/package.json b/storage_service/locations/fixtures/package.json index 01dec828a..c01ddff29 100644 --- a/storage_service/locations/fixtures/package.json +++ b/storage_service/locations/fixtures/package.json @@ -114,7 +114,25 @@ "status": "Uploaded", "misc_attributes": "{}" } -},{ +}, +{ + "pk": 7, + "model": "locations.package", + "fields": { + "uuid": "88deec53-c7dc-4828-865c-7356386e9399", + "description": "Small bagged 7zipped package", + "origin_pipeline": null, + "current_location": "615103f0-0ee0-4a12-ba17-43192d1143ea", + "current_path": "working_bag.7z", + "pointer_file_location": "", + "pointer_file_path": "", + "size": 595, + "package_type": "AIP", + "status": "Uploaded", + "misc_attributes": "{}" + } +}, +{ "pk": 1, "model": "locations.file", "fields": { diff --git a/storage_service/locations/fixtures/working_bag.7z b/storage_service/locations/fixtures/working_bag.7z new file mode 100644 index 000000000..4556d1579 Binary files /dev/null and b/storage_service/locations/fixtures/working_bag.7z differ diff --git a/storage_service/locations/models/package.py b/storage_service/locations/models/package.py index 2780c3e1b..e6091a622 100644 --- a/storage_service/locations/models/package.py +++ b/storage_service/locations/models/package.py @@ -484,11 +484,12 @@ def extract_file(self, relative_path='', extract_path=None): Returns path to the extracted file and a temp dir that needs to be deleted. """ - if extract_path is None: - ss_internal = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL) - extract_path = tempfile.mkdtemp(dir=ss_internal.full_path) + ss_internal = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL) full_path = self.fetch_local_path() + if extract_path is None: + extract_path = tempfile.mkdtemp(dir=ss_internal.full_path) + # The basename is the base directory containing a package # like an AIP inside the compressed file. try: @@ -527,17 +528,26 @@ def extract_file(self, relative_path='', extract_path=None): if relative_path: command.append(relative_path) LOGGER.info('Extracting file with: %s to %s', command, output_path) - rc = subprocess.call(command) - LOGGER.debug('Extract file RC: %s', rc) - if rc: + rc = subprocess.check_output(command) + if 'No files extracted' in rc: raise StorageException(_('Extraction error')) else: - LOGGER.info('Copying AIP from: %s to %s', full_path, output_path) - shutil.copytree(full_path, output_path) + if relative_path: + #copy only one file out of aip + head, tail = os.path.split(full_path) + src = os.path.join(head, relative_path) + os.mkdir(os.path.join(extract_path, basename)) + shutil.copy(src, output_path) + else: + src = full_path + shutil.copytree(full_path, output_path) + + LOGGER.info('Copying from: %s to %s', src, output_path) + - if not relative_path: - self.local_path_location = ss_internal - self.local_path = output_path + # if not relative_path: + # self.local_path_location = ss_internal + # self.local_path = output_path return (output_path, extract_path) diff --git a/storage_service/locations/tests/test_package.py b/storage_service/locations/tests/test_package.py index df5baaa1d..ec24d890f 100644 --- a/storage_service/locations/tests/test_package.py +++ b/storage_service/locations/tests/test_package.py @@ -1,4 +1,7 @@ import os +import pytest +import shutil +import tempfile import vcr from django.test import TestCase @@ -25,6 +28,11 @@ def setUp(self): # Arkivum space points at fixtures directory models.Space.objects.filter(uuid='6fb34c82-4222-425e-b0ea-30acfd31f52e').update(path=FIXTURES_DIR) + self.tmp_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + def test_parsing_mets_data(self): mets_data = self.package._parse_mets(prefix=self.mets_path) assert mets_data['transfer_uuid'] == 'de1b31fa-97dd-48e0-8417-03be78359531' @@ -134,3 +142,38 @@ def test_fixity_force_local(self): assert failures == [] assert message == '' assert timestamp is None + + def test_extract_file_aip_from_uncompressed_aip(self): + """ It should return an aip """ + package = models.Package.objects.get(uuid='0d4e739b-bf60-4b87-bc20-67a379b28cea') + basedir = package.get_base_directory() + output_path, extract_path = package.extract_file(extract_path=self.tmp_dir) + assert output_path==os.path.join(self.tmp_dir, basedir) + assert os.path.join(output_path, 'manifest-md5.txt') + + def test_extract_file_file_from_uncompressed_aip(self): + """ It should return a single file from an uncompressed aip """ + package = models.Package.objects.get(uuid='0d4e739b-bf60-4b87-bc20-67a379b28cea') + basedir = package.get_base_directory() + output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-md5.txt', extract_path=self.tmp_dir) + assert output_path == os.path.join(self.tmp_dir, basedir, 'manifest-md5.txt') + assert os.path.isfile(output_path) + + def test_extract_file_file_from_compressed_aip(self): + """ It should return a single file from a 7zip compressed aip """ + package = models.Package.objects.get(uuid='88deec53-c7dc-4828-865c-7356386e9399') + basedir = package.get_base_directory() + output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-md5.txt', extract_path=self.tmp_dir) + assert output_path == os.path.join(extract_path, basedir, 'manifest-md5.txt') + assert os.path.isfile(output_path) + + def test_extract_file_file_does_not_exist_compressed(self): + """ It should raise an error because the requested file does not exist""" + package = models.Package.objects.get(uuid='88deec53-c7dc-4828-865c-7356386e9399') + with pytest.raises(Exception) as e_info: + output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-sha512.txt', extract_path=self.tmp_dir) + + assert e_info.value.message == 'Extraction error' + + +