diff --git a/pyproject.toml b/pyproject.toml index a36c8960b8..3826646624 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ module = [ "tests.integration.test_oidc_auth", "tests.MCPClient.conftest", "tests.MCPClient.test_characterize_file", + "tests.MCPClient.test_create_mets_v2", "tests.MCPClient.test_has_packages", "tests.MCPClient.test_identify_file_format", "tests.MCPClient.test_normalize", diff --git a/tests/MCPClient/test_create_mets_v2.py b/tests/MCPClient/test_create_mets_v2.py index 1fd62bc338..701b1e8673 100644 --- a/tests/MCPClient/test_create_mets_v2.py +++ b/tests/MCPClient/test_create_mets_v2.py @@ -1,11 +1,15 @@ +import pathlib import uuid from contextlib import ExitStack as does_not_raise from unittest import mock import pytest +import pytest_django +from client.job import Job from create_mets_v2 import createDMDIDsFromCSVMetadata from create_mets_v2 import main from lxml import etree +from main.models import SIP from main.models import DublinCore from main.models import File from main.models import MetadataAppliesToType @@ -14,7 +18,9 @@ @mock.patch("create_mets_v2.createDmdSecsFromCSVParsedMetadata", return_value=[]) -def test_createDMDIDsFromCSVMetadata_finds_non_ascii_paths(dmd_secs_creator_mock): +def test_createDMDIDsFromCSVMetadata_finds_non_ascii_paths( + dmd_secs_creator_mock: mock.Mock, +) -> None: state_mock = mock.Mock( **{ "CSV_METADATA": { @@ -38,7 +44,7 @@ def test_createDMDIDsFromCSVMetadata_finds_non_ascii_paths(dmd_secs_creator_mock @pytest.fixture() -def objects_path(sip_directory_path): +def objects_path(sip_directory_path: pathlib.Path) -> pathlib.Path: objects_path = sip_directory_path / "objects" objects_path.mkdir() @@ -46,7 +52,7 @@ def objects_path(sip_directory_path): @pytest.fixture() -def empty_dir_path(objects_path): +def empty_dir_path(objects_path: pathlib.Path) -> pathlib.Path: empty_dir_path = objects_path / "empty_dir" empty_dir_path.mkdir() @@ -54,7 +60,9 @@ def empty_dir_path(objects_path): @pytest.fixture() -def metadata_csv(sip, sip_directory_path, objects_path): +def metadata_csv( + sip: SIP, sip_directory_path: pathlib.Path, objects_path: pathlib.Path +) -> File: (objects_path / "metadata").mkdir() metadata_csv = objects_path / "metadata" / "metadata.csv" metadata_csv.write_text("Filename,dc.title\nobjects/file1,File 1") @@ -80,7 +88,7 @@ def metadata_csv(sip, sip_directory_path, objects_path): @pytest.fixture() -def sip_dublincore(sip): +def sip_dublincore(sip: SIP) -> DublinCore: return DublinCore.objects.create( metadataappliestotype_id=MetadataAppliesToType.SIP_TYPE, metadataappliestoidentifier=sip.pk, @@ -91,7 +99,7 @@ def sip_dublincore(sip): @pytest.fixture() -def file_path(objects_path): +def file_path(objects_path: pathlib.Path) -> pathlib.Path: file_path = objects_path / "file1" file_path.write_text("Hello world") @@ -99,7 +107,9 @@ def file_path(objects_path): @pytest.fixture() -def sip_file(sip_file, sip_directory_path, file_path): +def sip_file( + sip_file: File, sip_directory_path: pathlib.Path, file_path: pathlib.Path +) -> File: sip_file.originallocation = ( f"%transferDirectory%{file_path.relative_to(sip_directory_path)}".encode() ) @@ -112,7 +122,9 @@ def sip_file(sip_file, sip_directory_path, file_path): @pytest.mark.django_db -def test_simple_mets(mcp_job, sip_directory_path, sip, sip_file): +def test_simple_mets( + mcp_job: Job, sip_directory_path: pathlib.Path, sip: SIP, sip_file: File +) -> None: mets_path = sip_directory_path / f"METS.{sip.uuid}.xml" main( mcp_job, @@ -145,8 +157,12 @@ def test_simple_mets(mcp_job, sip_directory_path, sip, sip_file): @pytest.mark.django_db def test_aip_mets_includes_dublincore( - mcp_job, sip_directory_path, sip, sip_dublincore, sip_file -): + mcp_job: Job, + sip_directory_path: pathlib.Path, + sip: SIP, + sip_dublincore: DublinCore, + sip_file: File, +) -> None: mets_path = sip_directory_path / f"METS.{sip.uuid}.xml" main( mcp_job, @@ -180,8 +196,12 @@ def test_aip_mets_includes_dublincore( @pytest.mark.django_db def test_aip_mets_includes_dublincore_via_metadata_csv( - mcp_job, sip_directory_path, sip, sip_file, metadata_csv -): + mcp_job: Job, + sip_directory_path: pathlib.Path, + sip: SIP, + sip_file: File, + metadata_csv: File, +) -> None: mets_path = sip_directory_path / f"METS.{sip.uuid}.xml" main( mcp_job, @@ -211,8 +231,13 @@ def test_aip_mets_includes_dublincore_via_metadata_csv( @pytest.mark.django_db def test_aip_mets_normative_directory_structure( - mcp_job, sip_directory_path, sip, sip_file, metadata_csv, empty_dir_path -): + mcp_job: Job, + sip_directory_path: pathlib.Path, + sip: SIP, + sip_file: File, + metadata_csv: File, + empty_dir_path: pathlib.Path, +) -> None: mets_path = sip_directory_path / f"METS.{sip.uuid}.xml" main( mcp_job, @@ -264,16 +289,16 @@ def test_aip_mets_normative_directory_structure( ) @mock.patch("create_mets_v2.archivematicaCreateMETSMetadataXML.process_xml_metadata") def test_xml_validation_fail_on_error( - process_xml_metadata, - settings, - mcp_job, - sip_directory_path, - sip, - sip_file, - fail_on_error, - errors, - expectation, -): + process_xml_metadata: mock.Mock, + settings: pytest_django.fixtures.SettingsWrapper, + mcp_job: Job, + sip_directory_path: pathlib.Path, + sip: SIP, + sip_file: File, + fail_on_error: bool, + errors: list[str], + expectation: does_not_raise, +) -> None: mock_mets = mock.Mock( **{ "serialize.return_value": etree.Element("tag"), @@ -301,7 +326,7 @@ def test_xml_validation_fail_on_error( @pytest.fixture -def arranged_sip_path(tmp_path): +def arranged_sip_path(tmp_path: pathlib.Path) -> pathlib.Path: sip_path = tmp_path / "sip" sip_path.mkdir() @@ -309,7 +334,7 @@ def arranged_sip_path(tmp_path): @pytest.fixture -def create_arrangement(sip, arranged_sip_path): +def create_arrangement(sip: SIP, arranged_sip_path: pathlib.Path) -> None: # Create the directory structure representing the new arrangement. objects_path = arranged_sip_path / "objects" objects_path.mkdir() @@ -351,8 +376,8 @@ def create_arrangement(sip, arranged_sip_path): @pytest.mark.django_db def test_structmap_is_created_from_sip_arrangement( - mcp_job, create_arrangement, arranged_sip_path, sip -): + mcp_job: Job, create_arrangement: None, arranged_sip_path: pathlib.Path, sip: SIP +) -> None: mets_path = f"{arranged_sip_path}/METS.{sip.uuid}.xml" main( @@ -391,3 +416,161 @@ def test_structmap_is_created_from_sip_arrangement( assert file3_div.attrib["TYPE"] == "File" assert subdir_second_div.attrib["TYPE"] == "Subseries" assert file4_div.attrib["TYPE"] == "File" + + +@pytest.fixture +def bag_path(sip_directory_path: pathlib.Path, sip: SIP) -> pathlib.Path: + result = ( + sip_directory_path / "logs" / "transfers" / str(sip.uuid) / "logs" / "BagIt" + ) + result.mkdir(parents=True) + (result / "bag-info.txt").touch() + + return result + + +@pytest.mark.django_db +@mock.patch("create_mets_v2.Bag") +def test_bag_metadata_is_recorded_in_a_amdsec( + bag_class: mock.Mock, + mcp_job: Job, + sip_directory_path: pathlib.Path, + sip: SIP, + sip_file: File, + bag_path: pathlib.Path, +) -> None: + info = {"Bagging-Date": "2025-01-08", "Payload-Oxum": "0.2"} + bag_class.return_value = mock.Mock(info=info) + mets_path = sip_directory_path / f"METS.{sip.uuid}.xml" + + main( + mcp_job, + sipType="SIP", + baseDirectoryPath=str(sip_directory_path), + XMLFile=str(mets_path), + sipUUID=sip.pk, + includeAmdSec=False, + createNormativeStructmap=False, + ) + + mets_xml = etree.parse(mets_path.open()) + transfer_metadata = mets_xml.xpath( + ".//mets:amdSec//transfer_metadata/*", + namespaces=NSMAP, + ) + assert {e.tag: e.text for e in transfer_metadata} == info + + +@pytest.fixture() +def transfer_metadata_xml_path(sip: SIP, sip_directory_path: pathlib.Path) -> File: + metadata_dir_path = sip_directory_path / "objects" / "metadata" / "transfers" + metadata_dir_path.mkdir(parents=True) + + result = metadata_dir_path / "transfer_metadata.xml" + result.touch() + + return result + + +@pytest.fixture() +def transfer_metadata_xml( + sip: SIP, sip_directory_path: pathlib.Path, transfer_metadata_xml_path: pathlib.Path +) -> File: + return File.objects.create( + sip=sip, + currentlocation=f"%SIPDirectory%{transfer_metadata_xml_path.relative_to(sip_directory_path)}".encode(), + filegrpuse="metadata", + ) + + +@pytest.mark.django_db +def test_transfer_metadata_xml_is_recorded_in_a_amdsec( + mcp_job: Job, + sip_directory_path: pathlib.Path, + sip: SIP, + sip_file: File, + transfer_metadata_xml_path: pathlib.Path, + transfer_metadata_xml: File, +) -> None: + info = {"test": "foobar"} + for tag, value in info.items(): + transfer_metadata_xml_path.write_text(f"<{tag}>{value}") + mets_path = sip_directory_path / f"METS.{sip.uuid}.xml" + + main( + mcp_job, + sipType="SIP", + baseDirectoryPath=str(sip_directory_path), + XMLFile=str(mets_path), + sipUUID=sip.pk, + includeAmdSec=False, + createNormativeStructmap=False, + ) + + mets_xml = etree.parse(mets_path.open()) + transfer_metadata = mets_xml.xpath( + ".//mets:amdSec//mets:xmlData/*", + namespaces=NSMAP, + ) + assert {e.tag: e.text for e in transfer_metadata} == info + + +@pytest.fixture() +def source_metadata_xml_path(sip: SIP, sip_directory_path: pathlib.Path) -> File: + metadata_dir_path = ( + sip_directory_path / "objects" / "metadata" / "transfers" / "sourceMD" + ) + metadata_dir_path.mkdir(parents=True) + + result = metadata_dir_path / "file.xml" + result.touch() + + return result + + +@pytest.fixture() +def source_metadata_xml( + sip: SIP, sip_directory_path: pathlib.Path, source_metadata_xml_path: pathlib.Path +) -> File: + return File.objects.create( + sip=sip, + currentlocation=f"%SIPDirectory%{source_metadata_xml_path.relative_to(sip_directory_path)}".encode(), + filegrpuse="metadata", + ) + + +@pytest.mark.django_db +def test_source_metadata_xml_is_recorded_in_a_amdsec( + mcp_job: Job, + sip_directory_path: pathlib.Path, + sip: SIP, + sip_file: File, + source_metadata_xml_path: pathlib.Path, + source_metadata_xml: File, +) -> None: + mets_path = sip_directory_path / f"METS.{sip.uuid}.xml" + + main( + mcp_job, + sipType="SIP", + baseDirectoryPath=str(sip_directory_path), + XMLFile=str(mets_path), + sipUUID=sip.pk, + includeAmdSec=False, + createNormativeStructmap=False, + ) + + mets_xml = etree.parse(mets_path.open()) + transfer_metadata = mets_xml.xpath( + ".//mets:amdSec//mets:mdRef", + namespaces=NSMAP, + ) + assert len(transfer_metadata) == 1 + assert transfer_metadata[0].attrib == { + f"{{{NSMAP['xlink']}}}href": str( + source_metadata_xml_path.relative_to(sip_directory_path) + ), + "MDTYPE": "OTHER", + "LOCTYPE": "OTHER", + "OTHERLOCTYPE": "SYSTEM", + }