diff --git a/hack/submodules/archivematica-acceptance-tests b/hack/submodules/archivematica-acceptance-tests index 2388fcec14..4bd7f46827 160000 --- a/hack/submodules/archivematica-acceptance-tests +++ b/hack/submodules/archivematica-acceptance-tests @@ -1 +1 @@ -Subproject commit 2388fcec14ad01da2c804822516a74491a8a172c +Subproject commit 4bd7f46827f2cfe2e7bc69e899f1c8bf4a01c501 diff --git a/src/MCPClient/lib/clientScripts/archivematicaCreateMETSRightsDspaceMDRef.py b/src/MCPClient/lib/clientScripts/archivematicaCreateMETSRightsDspaceMDRef.py index 229bad2b33..3dd04fd956 100755 --- a/src/MCPClient/lib/clientScripts/archivematicaCreateMETSRightsDspaceMDRef.py +++ b/src/MCPClient/lib/clientScripts/archivematicaCreateMETSRightsDspaceMDRef.py @@ -24,9 +24,6 @@ from django.core.exceptions import ValidationError from main.models import File -# dashboard -# archivematicaCommon - def createMDRefDMDSec(LABEL, itemdirectoryPath, directoryPathSTR): XPTR = "xpointer(id(" @@ -68,7 +65,7 @@ def archivematicaCreateMETSRightsDspaceMDRef( metsFileUUID = mets.uuid metsLoc = mets.currentlocation.decode().replace("%SIPDirectory%", "", 1) metsLocation = os.path.join(os.path.dirname(itemdirectoryPath), "mets.xml") - LABEL = "mets.xml-%s" % (metsFileUUID) + LABEL = f"mets.xml-{metsFileUUID}" ret.append(createMDRefDMDSec(LABEL, metsLocation, metsLoc)) base = os.path.dirname(os.path.dirname(itemdirectoryPath)) @@ -96,7 +93,7 @@ def archivematicaCreateMETSRightsDspaceMDRef( metsLoc = f.currentlocation.decode().replace("%SIPDirectory%", "", 1) metsLocation = os.path.join(fullDir, "mets.xml") job.pyprint(metsLocation) - LABEL = "mets.xml-" + metsFileUUID + LABEL = f"mets.xml-{metsFileUUID}" ret.append(createMDRefDMDSec(LABEL, metsLocation, metsLoc)) except Exception as inst: diff --git a/src/MCPClient/lib/clientScripts/create_mets_v2.py b/src/MCPClient/lib/clientScripts/create_mets_v2.py index 59ffa8f003..7049349ec1 100755 --- a/src/MCPClient/lib/clientScripts/create_mets_v2.py +++ b/src/MCPClient/lib/clientScripts/create_mets_v2.py @@ -1235,7 +1235,7 @@ def createFileSec( use = "submissionDocumentation" admidApplyTo = None if GROUPID == "": # is an AIP identifier - GROUPID = f.uuid + GROUPID = str(f.uuid) admidApplyTo = structMapDiv.getparent() label = "mets.xml-%s" % (GROUPID) diff --git a/tests/MCPClient/fixtures/dspace/mets_item1.xml b/tests/MCPClient/fixtures/dspace/mets_item1.xml new file mode 100644 index 0000000000..d524e78158 --- /dev/null +++ b/tests/MCPClient/fixtures/dspace/mets_item1.xml @@ -0,0 +1,363 @@ + + + + + 2429/0 + + + DSpace 1.7.0 + + + + + + + + author + + Vice President Research, Office of the + + + 2008-10-20T18:58:54Z + + + 2008-10-20T18:58:54Z + + + 2006-05 + + http://hdl.handle.net/2429/2700 + Removing just one species from an ecosystem can have radical results. Tony Sinclair's grand-scale biodviersity knockout experiment sets out to determine why. + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-08-25T18:55:48Z +No. of bitstreams: 1 +Species Showdown[1].pdf: 853914 bytes, checksum: d92e2f6f72c6c8852d7b32339871757f (MD5) + Rejected by Andy Torr(andy.torr@ubc.ca), reason: on 2008-10-20T17:40:57Z (GMT) + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-10-20T17:52:05Z +No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Approved for entry into archive by Andy Torr(andy.torr@ubc.ca) on 2008-10-20T18:58:54Z (GMT) No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Made available in DSpace on 2008-10-20T18:58:54Z (GMT). No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Previous issue date: 2006-05 + + 857776 bytes + + + application/pdf + + + eng + + + Office of the Vice President Research, The University of British Columbia + + frontier: a journal of research and discovery, issue 1, May 2006 + + Tony Sinclair + + + zoology + + + biodiversity + + + ecosystem + + + Beaty Biodiversity Research Centre + + + Roy Turkington + + + keystone species + + + Serengeti + + + Species Showdown + + text + + + + + + + Vice President Research, Office of the + 2008-10-20T18:58:54Z + 2008-10-20T18:58:54Z + 2006-05 + http://hdl.handle.net/2429/2700 + Removing just one species from an ecosystem can have radical results. Tony Sinclair's grand-scale biodviersity knockout experiment sets out to determine why. + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-08-25T18:55:48Z +No. of bitstreams: 1 +Species Showdown[1].pdf: 853914 bytes, checksum: d92e2f6f72c6c8852d7b32339871757f (MD5) + Rejected by Andy Torr(andy.torr@ubc.ca), reason: on 2008-10-20T17:40:57Z (GMT) + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-10-20T17:52:05Z +No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Approved for entry into archive by Andy Torr(andy.torr@ubc.ca) on 2008-10-20T18:58:54Z (GMT) No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Made available in DSpace on 2008-10-20T18:58:54Z (GMT). No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Previous issue date: 2006-05 + 857776 bytes + application/pdf + eng + Office of the Vice President Research, The University of British Columbia + frontier: a journal of research and discovery, issue 1, May 2006 + Tony Sinclair + zoology + biodiversity + ecosystem + Beaty Biodiversity Research Centre + Roy Turkington + keystone species + Serengeti + Species Showdown + text + article + VP Research + + + + + + + + + + + + + + + + + + + + + svpr@exchange.ubc.ca + hdl:2429/2700 + hdl:2429/1314 + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2700/1/Species+Showdown%5B1%5D.pdf + + File + + + MD5 + 100deac6fb09e416738c92ccaf03d1ed + + 857776 + + + application/pdf + + + + Species Showdown[1].pdf + + + + + + + + + + + + + + + + + Species Showdown[1].pdf + Species Showdown[1].pdf + Adobe PDF + application/pdf + KNOWN + false + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2700/2/license.txt + + File + + + MD5 + 3b7436e26ac9ade94081d820c5161c4f + + 3975 + + + text/html + + + + license.txt + + + + + + + + + + + + + + + + + license.txt + Written by org.dspace.content.Item + License + text/html + KNOWN + true + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2700/3/Species+Showdown%5B1%5D.pdf.txt + + File + + + MD5 + 33b1ccb7a17d4f423770abe5fcc61ac6 + + 10714 + + + text/plain + + + + Species Showdown[1].pdf.txt + + + + + + + + + COLLECTION_hdl:2429/1314_ADMIN + + + + + + + + + Species Showdown[1].pdf.txt + Written by FormatFilter org.dspace.app.mediafilter.PDFFilter on 2009-12-04T12:14:31Z (GMT). + Extracted text + Text + text/plain + KNOWN + false + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+ +
+ +
+
+
diff --git a/tests/MCPClient/fixtures/dspace/mets_item2.xml b/tests/MCPClient/fixtures/dspace/mets_item2.xml new file mode 100644 index 0000000000..335ab7018a --- /dev/null +++ b/tests/MCPClient/fixtures/dspace/mets_item2.xml @@ -0,0 +1,371 @@ + + + + + 2429/0 + + + DSpace 1.7.0 + + + + + + + + author + + Vice President Research, Office of the + + + 2008-10-20T19:00:27Z + + + 2008-10-20T19:00:27Z + + + 2006-05 + + http://hdl.handle.net/2429/2701 + Melanie Jones and Dan Durall aren't looking to the treetops for clues about the "wood wide web." They're looking to the soil at fungi that are crucial to renewing our forests. + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-08-27T17:34:20Z +No. of bitstreams: 1 +Wood Wide Web[1].pdf: 114179 bytes, checksum: 28b548e21f6686bd16e0b7789982d980 (MD5) + Rejected by Andy Torr(andy.torr@ubc.ca), reason: on 2008-10-20T17:41:07Z (GMT) + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-10-20T17:52:48Z +No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Approved for entry into archive by Andy Torr(andy.torr@ubc.ca) on 2008-10-20T19:00:27Z (GMT) No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Made available in DSpace on 2008-10-20T19:00:27Z (GMT). No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Previous issue date: 2006-05 + + 118031 bytes + + + application/pdf + + + eng + + + Office of the Vice President Research, The University of British Columbia + + frontier: a journal of research and discovery, issue 1, May 2006 + + Melanie Jones + + + Dan Durall + + + soil biology + + + mycorrhizal fungi + + + Species at Risk + + + Habitat Studies Centre + + + Denise Brooks + + + Canada Foundation for Innovation + + + CFI + + + UBC Okanagan + + + Wood Wide Web + + text + + + + + + + Vice President Research, Office of the + 2008-10-20T19:00:27Z + 2008-10-20T19:00:27Z + 2006-05 + http://hdl.handle.net/2429/2701 + Melanie Jones and Dan Durall aren't looking to the treetops for clues about the "wood wide web." They're looking to the soil at fungi that are crucial to renewing our forests. + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-08-27T17:34:20Z +No. of bitstreams: 1 +Wood Wide Web[1].pdf: 114179 bytes, checksum: 28b548e21f6686bd16e0b7789982d980 (MD5) + Rejected by Andy Torr(andy.torr@ubc.ca), reason: on 2008-10-20T17:41:07Z (GMT) + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-10-20T17:52:48Z +No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Approved for entry into archive by Andy Torr(andy.torr@ubc.ca) on 2008-10-20T19:00:27Z (GMT) No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Made available in DSpace on 2008-10-20T19:00:27Z (GMT). No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Previous issue date: 2006-05 + 118031 bytes + application/pdf + eng + Office of the Vice President Research, The University of British Columbia + frontier: a journal of research and discovery, issue 1, May 2006 + Melanie Jones + Dan Durall + soil biology + mycorrhizal fungi + Species at Risk + Habitat Studies Centre + Denise Brooks + Canada Foundation for Innovation + CFI + UBC Okanagan + Wood Wide Web + text + article + VP Research + + + + + + + + + + + + + + + + + + + + + svpr@exchange.ubc.ca + hdl:2429/2701 + hdl:2429/1314 + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2701/1/Wood+Wide+Web%5B1%5D.pdf + + File + + + MD5 + 0124ee9d6a881589e011ead839761fc1 + + 118031 + + + application/pdf + + + + Wood Wide Web[1].pdf + + + + + + + + + + + + + + + + + Wood Wide Web[1].pdf + Wood Wide Web[1].pdf + Adobe PDF + application/pdf + KNOWN + false + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2701/2/license.txt + + File + + + MD5 + cdc58860dbfa551807059e5c744e8841 + + 3975 + + + text/html + + + + license.txt + + + + + + + + + + + + + + + + + license.txt + Written by org.dspace.content.Item + License + text/html + KNOWN + true + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2701/3/Wood+Wide+Web%5B1%5D.pdf.txt + + File + + + MD5 + 979e05921f91661e7240b7e0335bc927 + + 7792 + + + text/plain + + + + Wood Wide Web[1].pdf.txt + + + + + + + + + COLLECTION_hdl:2429/1314_ADMIN + + + + + + + + + Wood Wide Web[1].pdf.txt + Written by FormatFilter org.dspace.app.mediafilter.PDFFilter on 2009-12-04T10:49:24Z (GMT). + Extracted text + Text + text/plain + KNOWN + false + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+ +
+ +
+
+
diff --git a/tests/MCPClient/test_archivematicaCreateMETSRightsDspaceMDRef.py b/tests/MCPClient/test_archivematicaCreateMETSRightsDspaceMDRef.py new file mode 100644 index 0000000000..d743d48924 --- /dev/null +++ b/tests/MCPClient/test_archivematicaCreateMETSRightsDspaceMDRef.py @@ -0,0 +1,190 @@ +import pathlib +from unittest import mock + +import namespaces as ns +import pytest +from archivematicaCreateMETSRightsDspaceMDRef import ( + archivematicaCreateMETSRightsDspaceMDRef, +) +from client.job import Job +from create_mets_v2 import MetsState +from main import models + + +@pytest.mark.django_db +@pytest.fixture +def transfer_data(tmp_path): + fixtures_dir = pathlib.Path(__file__).parent / "fixtures" / "dspace" + + transfer_dir = tmp_path / "transfer" + transfer_dir.mkdir() + + transfer = models.Transfer.objects.create(currentlocation=transfer_dir) + + objects_dir = transfer_dir / "objects" + objects_dir.mkdir() + + # Add a DSpace item with a PDF and a METS file. + + item1_dir = objects_dir / "ITEM_2429-2700" + item1_dir.mkdir() + + item1_path = item1_dir / "bitstream.pdf" + item1_path.touch() + + item1_file = models.File.objects.create( + transfer=transfer, + currentlocation=f"%SIPDirectory%{item1_path.relative_to(transfer_dir)}".encode(), + ) + + item1_mets_path = item1_dir / "mets.xml" + with (fixtures_dir / "mets_item1.xml").open() as mets: + item1_mets_path.write_text(mets.read()) + + item1_mets_file = models.File.objects.create( + transfer=transfer, + currentlocation=f"%SIPDirectory%{item1_mets_path.relative_to(transfer_dir)}".encode(), + ) + + # Add a second DSpace item with a PDF and a METS file. + + item2_dir = objects_dir / "SOMEOTHERITEM" + item2_dir.mkdir() + + item2_path = item2_dir / "bitstream.pdf" + item2_path.touch() + + item2_file = models.File.objects.create( + transfer=transfer, + currentlocation=f"%SIPDirectory%{item2_path.relative_to(transfer_dir)}".encode(), + ) + + item2_mets_path = item2_dir / "mets.xml" + with (fixtures_dir / "mets_item2.xml").open() as mets: + item2_mets_path.write_text(mets.read()) + + item2_mets_file = models.File.objects.create( + transfer=transfer, + currentlocation=f"%SIPDirectory%{item2_mets_path.relative_to(transfer_dir)}".encode(), + ) + + return { + "item1_path": item1_path, + "item1_file": item1_file, + "item1_mets_file": item1_mets_file, + "item1_mets_path": item1_mets_path, + "item2_path": item2_path, + "item2_file": item2_file, + "item2_mets_file": item2_mets_file, + "item2_mets_path": item2_mets_path, + "transfer_dir": transfer_dir, + "transfer": transfer, + } + + +@pytest.mark.django_db +def test_archivematicaCreateMETSRightsDspaceMDRef(transfer_data): + job = mock.Mock(spec=Job) + state = MetsState() + file_path = transfer_data["item1_path"].relative_to(transfer_data["transfer_dir"]) + + result = archivematicaCreateMETSRightsDspaceMDRef( + job, + transfer_data["item1_file"].uuid, + file_path, + transfer_data["transfer"].uuid, + transfer_data["item1_path"], + state, + ) + assert state.error_accumulator.error_count == 0 + + # One dmdSec is created for each METS file. + assert len(result) == 2 + + # Verify the attributes of the returned mdRef elements. + dmd_secs = sorted([d.attrib for d in result], key=lambda d: d[f"{ns.xlinkBNS}href"]) + + assert dmd_secs == [ + { + "LABEL": f"mets.xml-{transfer_data['item1_mets_file'].uuid}", + "MDTYPE": "OTHER", + "OTHERMDTYPE": "METSRIGHTS", + "LOCTYPE": "OTHER", + "OTHERLOCTYPE": "SYSTEM", + "XPTR": "xpointer(id('rightsMD_371 rightsMD_374 rightsMD_384 rightsMD_393 rightsMD_401 rightsMD_409 rightsMD_417 rightsMD_425'))", + f"{ns.xlinkBNS}href": str( + transfer_data["item1_mets_path"].relative_to( + transfer_data["transfer_dir"] + ) + ), + }, + { + "LABEL": f"mets.xml-{transfer_data['item2_mets_file'].uuid}", + "MDTYPE": "OTHER", + "OTHERMDTYPE": "METSRIGHTS", + "LOCTYPE": "OTHER", + "OTHERLOCTYPE": "SYSTEM", + "XPTR": "xpointer(id('rightsMD_435 rightsMD_438 rightsMD_448 rightsMD_457 rightsMD_465 rightsMD_473 rightsMD_481 rightsMD_489'))", + f"{ns.xlinkBNS}href": str( + transfer_data["item2_mets_path"].relative_to( + transfer_data["transfer_dir"] + ) + ), + }, + ] + + job.pyprint.assert_has_calls( + [ + mock.call(transfer_data["item1_file"].uuid, file_path), + mock.call(str(transfer_data["item1_path"].parent)), + mock.call(str(transfer_data["item2_path"].parent)), + mock.call(str(transfer_data["item2_mets_path"])), + mock.call("continue"), + ], + # os.listdir returns files in arbitrary order. + any_order=True, + ) + + +@pytest.mark.django_db +@mock.patch( + "archivematicaCreateMETSRightsDspaceMDRef.createMDRefDMDSec", + side_effect=Exception("error"), +) +def test_archivematicaCreateMETSRightsDspaceMDRef_handle_exceptions( + createMDRefDMDSec, transfer_data +): + job = mock.Mock(spec=Job) + state = MetsState() + file_path = transfer_data["item1_path"].relative_to(transfer_data["transfer_dir"]) + + result = archivematicaCreateMETSRightsDspaceMDRef( + job, + transfer_data["item1_file"].uuid, + file_path, + transfer_data["transfer"].uuid, + transfer_data["item1_path"], + state, + ) + assert state.error_accumulator.error_count == 1 + + assert len(result) == 0 + + assert job.pyprint.mock_calls == [ + mock.call(transfer_data["item1_file"].uuid, file_path), + mock.call( + "Error creating mets dspace mdref", + transfer_data["item1_file"].uuid, + file_path, + file=mock.ANY, + ), + mock.call(mock.ANY, ("error",), file=mock.ANY), + ] + + createMDRefDMDSec.assert_called_once_with( + f"mets.xml-{transfer_data['item1_mets_file'].uuid}", + str(transfer_data["item1_mets_path"]), + str( + transfer_data["item1_mets_path"].relative_to(transfer_data["transfer_dir"]) + ), + )