diff --git a/aips/create_dip.py b/aips/create_dip.py index ce5214ae..09537ca9 100644 --- a/aips/create_dip.py +++ b/aips/create_dip.py @@ -239,10 +239,18 @@ def create_dip(aip_dir, aip_uuid, output_dir, mets_type, dip_type): premis = techmd.contents.document update_premis_ns(premis, namespaces, premis_map) - original_name = get_original_name(premis, namespaces) - # Move original file with original name and create parent folders - dip_file_path = os.path.join(to_zip_dir, original_name[27:]) + original_name = get_premis_original_name(premis, namespaces) + if not original_name: + LOGGER.warning("Could not get original file name from premis:originalName") + continue + + original_relpath = get_original_relpath(original_name) + if not original_relpath: + continue + + # Move original file with original file name and create parent folders + dip_file_path = os.path.join(to_zip_dir, original_relpath) dip_dir_path = os.path.dirname(dip_file_path) if not os.path.exists(dip_dir_path): os.makedirs(dip_dir_path) @@ -411,17 +419,32 @@ def update_premis_ns(premis, namespaces, premis_map): ) -def get_original_name(premis, namespaces): - """Get original filename from PREMIS record""" +def get_premis_original_name(premis, namespaces): + """Get the original file name from a premis:originalName""" + original_name = premis.findtext("premis:originalName", namespaces=namespaces) if not original_name: LOGGER.warning("premis:originalName could not be found") - string_start = "%transferDirectory%objects/" - if original_name[:27] != string_start: - LOGGER.warning("premis:originalName not starting with %s", string_start) + return None + return original_name +def get_original_relpath(original_name): + """Get the relative file path from a premis:originalName""" + + path_prefixes = ["%transferDirectory%objects/", "%transferDirectory%data/"] + for prefix in path_prefixes: + if original_name.startswith(prefix): + return original_name[len(prefix) :] + + LOGGER.warning( + '"%s" has an invalid path prefix, it must be one of ("%s")', + original_name, + '", "'.join(path_prefixes), + ) + + if __name__ == "__main__": parser = argparse.ArgumentParser( diff --git a/tests/test_create_dip.py b/tests/test_create_dip.py index 813db5ae..65370a24 100644 --- a/tests/test_create_dip.py +++ b/tests/test_create_dip.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -import zipfile + import os import time import unittest import vcr +import zipfile import amclient @@ -134,3 +135,18 @@ def test_create_dip_fail_no_aip_dir(self): "bad_path", AIP_UUID, OUTPUT_DIR, "atom", "zipped-objects" ) assert dip_dir is None + + def test_get_original_relpath_objects_dir(self): + path = "%transferDirectory%objects/folder1/file5.txt" + + assert create_dip.get_original_relpath(path) == "folder1/file5.txt" + + def test_get_original_relpath_data_dir(self): + path = "%transferDirectory%data/folder1/file5.txt" + + assert create_dip.get_original_relpath(path) == "folder1/file5.txt" + + def test_get_original_relpath_warn_invalid_prefix(self): + path = "%transferDirectory%datas/folder1/file5.txt" + + assert create_dip.get_original_relpath(path) is None