From ed3af5e1efd6e6f383bef5419481725e9ec03421 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Tue, 30 Apr 2024 13:22:52 +0200 Subject: [PATCH 1/2] Fix Generate METS job for DSpace transfers This fixes file UUID serialization when multiple exports are included in a DSpace transfer. --- ...rchivematicaCreateMETSRightsDspaceMDRef.py | 7 +- .../lib/clientScripts/create_mets_v2.py | 2 +- .../MCPClient/fixtures/dspace/mets_item1.xml | 363 +++++++++++++++++ .../MCPClient/fixtures/dspace/mets_item2.xml | 371 ++++++++++++++++++ ...rchivematicaCreateMETSRightsDspaceMDRef.py | 190 +++++++++ 5 files changed, 927 insertions(+), 6 deletions(-) create mode 100644 tests/MCPClient/fixtures/dspace/mets_item1.xml create mode 100644 tests/MCPClient/fixtures/dspace/mets_item2.xml create mode 100644 tests/MCPClient/test_archivematicaCreateMETSRightsDspaceMDRef.py diff --git a/src/MCPClient/lib/clientScripts/archivematicaCreateMETSRightsDspaceMDRef.py b/src/MCPClient/lib/clientScripts/archivematicaCreateMETSRightsDspaceMDRef.py index 229bad2b33..3dd04fd956 100755 --- a/src/MCPClient/lib/clientScripts/archivematicaCreateMETSRightsDspaceMDRef.py +++ b/src/MCPClient/lib/clientScripts/archivematicaCreateMETSRightsDspaceMDRef.py @@ -24,9 +24,6 @@ from django.core.exceptions import ValidationError from main.models import File -# dashboard -# archivematicaCommon - def createMDRefDMDSec(LABEL, itemdirectoryPath, directoryPathSTR): XPTR = "xpointer(id(" @@ -68,7 +65,7 @@ def archivematicaCreateMETSRightsDspaceMDRef( metsFileUUID = mets.uuid metsLoc = mets.currentlocation.decode().replace("%SIPDirectory%", "", 1) metsLocation = os.path.join(os.path.dirname(itemdirectoryPath), "mets.xml") - LABEL = "mets.xml-%s" % (metsFileUUID) + LABEL = f"mets.xml-{metsFileUUID}" ret.append(createMDRefDMDSec(LABEL, metsLocation, metsLoc)) base = os.path.dirname(os.path.dirname(itemdirectoryPath)) @@ -96,7 +93,7 @@ def archivematicaCreateMETSRightsDspaceMDRef( metsLoc = f.currentlocation.decode().replace("%SIPDirectory%", "", 1) metsLocation = os.path.join(fullDir, "mets.xml") job.pyprint(metsLocation) - LABEL = "mets.xml-" + metsFileUUID + LABEL = f"mets.xml-{metsFileUUID}" ret.append(createMDRefDMDSec(LABEL, metsLocation, metsLoc)) except Exception as inst: diff --git a/src/MCPClient/lib/clientScripts/create_mets_v2.py b/src/MCPClient/lib/clientScripts/create_mets_v2.py index 59ffa8f003..7049349ec1 100755 --- a/src/MCPClient/lib/clientScripts/create_mets_v2.py +++ b/src/MCPClient/lib/clientScripts/create_mets_v2.py @@ -1235,7 +1235,7 @@ def createFileSec( use = "submissionDocumentation" admidApplyTo = None if GROUPID == "": # is an AIP identifier - GROUPID = f.uuid + GROUPID = str(f.uuid) admidApplyTo = structMapDiv.getparent() label = "mets.xml-%s" % (GROUPID) diff --git a/tests/MCPClient/fixtures/dspace/mets_item1.xml b/tests/MCPClient/fixtures/dspace/mets_item1.xml new file mode 100644 index 0000000000..d524e78158 --- /dev/null +++ b/tests/MCPClient/fixtures/dspace/mets_item1.xml @@ -0,0 +1,363 @@ + + + + + 2429/0 + + + DSpace 1.7.0 + + + + + + + + author + + Vice President Research, Office of the + + + 2008-10-20T18:58:54Z + + + 2008-10-20T18:58:54Z + + + 2006-05 + + http://hdl.handle.net/2429/2700 + Removing just one species from an ecosystem can have radical results. Tony Sinclair's grand-scale biodviersity knockout experiment sets out to determine why. + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-08-25T18:55:48Z +No. of bitstreams: 1 +Species Showdown[1].pdf: 853914 bytes, checksum: d92e2f6f72c6c8852d7b32339871757f (MD5) + Rejected by Andy Torr(andy.torr@ubc.ca), reason: on 2008-10-20T17:40:57Z (GMT) + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-10-20T17:52:05Z +No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Approved for entry into archive by Andy Torr(andy.torr@ubc.ca) on 2008-10-20T18:58:54Z (GMT) No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Made available in DSpace on 2008-10-20T18:58:54Z (GMT). No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Previous issue date: 2006-05 + + 857776 bytes + + + application/pdf + + + eng + + + Office of the Vice President Research, The University of British Columbia + + frontier: a journal of research and discovery, issue 1, May 2006 + + Tony Sinclair + + + zoology + + + biodiversity + + + ecosystem + + + Beaty Biodiversity Research Centre + + + Roy Turkington + + + keystone species + + + Serengeti + + + Species Showdown + + text + + + + + + + Vice President Research, Office of the + 2008-10-20T18:58:54Z + 2008-10-20T18:58:54Z + 2006-05 + http://hdl.handle.net/2429/2700 + Removing just one species from an ecosystem can have radical results. Tony Sinclair's grand-scale biodviersity knockout experiment sets out to determine why. + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-08-25T18:55:48Z +No. of bitstreams: 1 +Species Showdown[1].pdf: 853914 bytes, checksum: d92e2f6f72c6c8852d7b32339871757f (MD5) + Rejected by Andy Torr(andy.torr@ubc.ca), reason: on 2008-10-20T17:40:57Z (GMT) + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-10-20T17:52:05Z +No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Approved for entry into archive by Andy Torr(andy.torr@ubc.ca) on 2008-10-20T18:58:54Z (GMT) No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Made available in DSpace on 2008-10-20T18:58:54Z (GMT). No. of bitstreams: 1 +Species Showdown[1].pdf: 857776 bytes, checksum: 100deac6fb09e416738c92ccaf03d1ed (MD5) + Previous issue date: 2006-05 + 857776 bytes + application/pdf + eng + Office of the Vice President Research, The University of British Columbia + frontier: a journal of research and discovery, issue 1, May 2006 + Tony Sinclair + zoology + biodiversity + ecosystem + Beaty Biodiversity Research Centre + Roy Turkington + keystone species + Serengeti + Species Showdown + text + article + VP Research + + + + + + + + + + + + + + + + + + + + + svpr@exchange.ubc.ca + hdl:2429/2700 + hdl:2429/1314 + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2700/1/Species+Showdown%5B1%5D.pdf + + File + + + MD5 + 100deac6fb09e416738c92ccaf03d1ed + + 857776 + + + application/pdf + + + + Species Showdown[1].pdf + + + + + + + + + + + + + + + + + Species Showdown[1].pdf + Species Showdown[1].pdf + Adobe PDF + application/pdf + KNOWN + false + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2700/2/license.txt + + File + + + MD5 + 3b7436e26ac9ade94081d820c5161c4f + + 3975 + + + text/html + + + + license.txt + + + + + + + + + + + + + + + + + license.txt + Written by org.dspace.content.Item + License + text/html + KNOWN + true + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2700/3/Species+Showdown%5B1%5D.pdf.txt + + File + + + MD5 + 33b1ccb7a17d4f423770abe5fcc61ac6 + + 10714 + + + text/plain + + + + Species Showdown[1].pdf.txt + + + + + + + + + COLLECTION_hdl:2429/1314_ADMIN + + + + + + + + + Species Showdown[1].pdf.txt + Written by FormatFilter org.dspace.app.mediafilter.PDFFilter on 2009-12-04T12:14:31Z (GMT). + Extracted text + Text + text/plain + KNOWN + false + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+ +
+ +
+
+
diff --git a/tests/MCPClient/fixtures/dspace/mets_item2.xml b/tests/MCPClient/fixtures/dspace/mets_item2.xml new file mode 100644 index 0000000000..335ab7018a --- /dev/null +++ b/tests/MCPClient/fixtures/dspace/mets_item2.xml @@ -0,0 +1,371 @@ + + + + + 2429/0 + + + DSpace 1.7.0 + + + + + + + + author + + Vice President Research, Office of the + + + 2008-10-20T19:00:27Z + + + 2008-10-20T19:00:27Z + + + 2006-05 + + http://hdl.handle.net/2429/2701 + Melanie Jones and Dan Durall aren't looking to the treetops for clues about the "wood wide web." They're looking to the soil at fungi that are crucial to renewing our forests. + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-08-27T17:34:20Z +No. of bitstreams: 1 +Wood Wide Web[1].pdf: 114179 bytes, checksum: 28b548e21f6686bd16e0b7789982d980 (MD5) + Rejected by Andy Torr(andy.torr@ubc.ca), reason: on 2008-10-20T17:41:07Z (GMT) + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-10-20T17:52:48Z +No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Approved for entry into archive by Andy Torr(andy.torr@ubc.ca) on 2008-10-20T19:00:27Z (GMT) No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Made available in DSpace on 2008-10-20T19:00:27Z (GMT). No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Previous issue date: 2006-05 + + 118031 bytes + + + application/pdf + + + eng + + + Office of the Vice President Research, The University of British Columbia + + frontier: a journal of research and discovery, issue 1, May 2006 + + Melanie Jones + + + Dan Durall + + + soil biology + + + mycorrhizal fungi + + + Species at Risk + + + Habitat Studies Centre + + + Denise Brooks + + + Canada Foundation for Innovation + + + CFI + + + UBC Okanagan + + + Wood Wide Web + + text + + + + + + + Vice President Research, Office of the + 2008-10-20T19:00:27Z + 2008-10-20T19:00:27Z + 2006-05 + http://hdl.handle.net/2429/2701 + Melanie Jones and Dan Durall aren't looking to the treetops for clues about the "wood wide web." They're looking to the soil at fungi that are crucial to renewing our forests. + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-08-27T17:34:20Z +No. of bitstreams: 1 +Wood Wide Web[1].pdf: 114179 bytes, checksum: 28b548e21f6686bd16e0b7789982d980 (MD5) + Rejected by Andy Torr(andy.torr@ubc.ca), reason: on 2008-10-20T17:41:07Z (GMT) + Submitted by Janis Lai (svpr@exchange.ubc.ca) on 2008-10-20T17:52:48Z +No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Approved for entry into archive by Andy Torr(andy.torr@ubc.ca) on 2008-10-20T19:00:27Z (GMT) No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Made available in DSpace on 2008-10-20T19:00:27Z (GMT). No. of bitstreams: 1 +Wood Wide Web[1].pdf: 118031 bytes, checksum: 0124ee9d6a881589e011ead839761fc1 (MD5) + Previous issue date: 2006-05 + 118031 bytes + application/pdf + eng + Office of the Vice President Research, The University of British Columbia + frontier: a journal of research and discovery, issue 1, May 2006 + Melanie Jones + Dan Durall + soil biology + mycorrhizal fungi + Species at Risk + Habitat Studies Centre + Denise Brooks + Canada Foundation for Innovation + CFI + UBC Okanagan + Wood Wide Web + text + article + VP Research + + + + + + + + + + + + + + + + + + + + + svpr@exchange.ubc.ca + hdl:2429/2701 + hdl:2429/1314 + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2701/1/Wood+Wide+Web%5B1%5D.pdf + + File + + + MD5 + 0124ee9d6a881589e011ead839761fc1 + + 118031 + + + application/pdf + + + + Wood Wide Web[1].pdf + + + + + + + + + + + + + + + + + Wood Wide Web[1].pdf + Wood Wide Web[1].pdf + Adobe PDF + application/pdf + KNOWN + false + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2701/2/license.txt + + File + + + MD5 + cdc58860dbfa551807059e5c744e8841 + + 3975 + + + text/html + + + + license.txt + + + + + + + + + + + + + + + + + license.txt + Written by org.dspace.content.Item + License + text/html + KNOWN + true + + + + + + + + + + + + + + + + + + + + + + URL + https://circle-test.library.ubc.ca/bitstream/2429%2F2701/3/Wood+Wide+Web%5B1%5D.pdf.txt + + File + + + MD5 + 979e05921f91661e7240b7e0335bc927 + + 7792 + + + text/plain + + + + Wood Wide Web[1].pdf.txt + + + + + + + + + COLLECTION_hdl:2429/1314_ADMIN + + + + + + + + + Wood Wide Web[1].pdf.txt + Written by FormatFilter org.dspace.app.mediafilter.PDFFilter on 2009-12-04T10:49:24Z (GMT). + Extracted text + Text + text/plain + KNOWN + false + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+ +
+ +
+
+
diff --git a/tests/MCPClient/test_archivematicaCreateMETSRightsDspaceMDRef.py b/tests/MCPClient/test_archivematicaCreateMETSRightsDspaceMDRef.py new file mode 100644 index 0000000000..d743d48924 --- /dev/null +++ b/tests/MCPClient/test_archivematicaCreateMETSRightsDspaceMDRef.py @@ -0,0 +1,190 @@ +import pathlib +from unittest import mock + +import namespaces as ns +import pytest +from archivematicaCreateMETSRightsDspaceMDRef import ( + archivematicaCreateMETSRightsDspaceMDRef, +) +from client.job import Job +from create_mets_v2 import MetsState +from main import models + + +@pytest.mark.django_db +@pytest.fixture +def transfer_data(tmp_path): + fixtures_dir = pathlib.Path(__file__).parent / "fixtures" / "dspace" + + transfer_dir = tmp_path / "transfer" + transfer_dir.mkdir() + + transfer = models.Transfer.objects.create(currentlocation=transfer_dir) + + objects_dir = transfer_dir / "objects" + objects_dir.mkdir() + + # Add a DSpace item with a PDF and a METS file. + + item1_dir = objects_dir / "ITEM_2429-2700" + item1_dir.mkdir() + + item1_path = item1_dir / "bitstream.pdf" + item1_path.touch() + + item1_file = models.File.objects.create( + transfer=transfer, + currentlocation=f"%SIPDirectory%{item1_path.relative_to(transfer_dir)}".encode(), + ) + + item1_mets_path = item1_dir / "mets.xml" + with (fixtures_dir / "mets_item1.xml").open() as mets: + item1_mets_path.write_text(mets.read()) + + item1_mets_file = models.File.objects.create( + transfer=transfer, + currentlocation=f"%SIPDirectory%{item1_mets_path.relative_to(transfer_dir)}".encode(), + ) + + # Add a second DSpace item with a PDF and a METS file. + + item2_dir = objects_dir / "SOMEOTHERITEM" + item2_dir.mkdir() + + item2_path = item2_dir / "bitstream.pdf" + item2_path.touch() + + item2_file = models.File.objects.create( + transfer=transfer, + currentlocation=f"%SIPDirectory%{item2_path.relative_to(transfer_dir)}".encode(), + ) + + item2_mets_path = item2_dir / "mets.xml" + with (fixtures_dir / "mets_item2.xml").open() as mets: + item2_mets_path.write_text(mets.read()) + + item2_mets_file = models.File.objects.create( + transfer=transfer, + currentlocation=f"%SIPDirectory%{item2_mets_path.relative_to(transfer_dir)}".encode(), + ) + + return { + "item1_path": item1_path, + "item1_file": item1_file, + "item1_mets_file": item1_mets_file, + "item1_mets_path": item1_mets_path, + "item2_path": item2_path, + "item2_file": item2_file, + "item2_mets_file": item2_mets_file, + "item2_mets_path": item2_mets_path, + "transfer_dir": transfer_dir, + "transfer": transfer, + } + + +@pytest.mark.django_db +def test_archivematicaCreateMETSRightsDspaceMDRef(transfer_data): + job = mock.Mock(spec=Job) + state = MetsState() + file_path = transfer_data["item1_path"].relative_to(transfer_data["transfer_dir"]) + + result = archivematicaCreateMETSRightsDspaceMDRef( + job, + transfer_data["item1_file"].uuid, + file_path, + transfer_data["transfer"].uuid, + transfer_data["item1_path"], + state, + ) + assert state.error_accumulator.error_count == 0 + + # One dmdSec is created for each METS file. + assert len(result) == 2 + + # Verify the attributes of the returned mdRef elements. + dmd_secs = sorted([d.attrib for d in result], key=lambda d: d[f"{ns.xlinkBNS}href"]) + + assert dmd_secs == [ + { + "LABEL": f"mets.xml-{transfer_data['item1_mets_file'].uuid}", + "MDTYPE": "OTHER", + "OTHERMDTYPE": "METSRIGHTS", + "LOCTYPE": "OTHER", + "OTHERLOCTYPE": "SYSTEM", + "XPTR": "xpointer(id('rightsMD_371 rightsMD_374 rightsMD_384 rightsMD_393 rightsMD_401 rightsMD_409 rightsMD_417 rightsMD_425'))", + f"{ns.xlinkBNS}href": str( + transfer_data["item1_mets_path"].relative_to( + transfer_data["transfer_dir"] + ) + ), + }, + { + "LABEL": f"mets.xml-{transfer_data['item2_mets_file'].uuid}", + "MDTYPE": "OTHER", + "OTHERMDTYPE": "METSRIGHTS", + "LOCTYPE": "OTHER", + "OTHERLOCTYPE": "SYSTEM", + "XPTR": "xpointer(id('rightsMD_435 rightsMD_438 rightsMD_448 rightsMD_457 rightsMD_465 rightsMD_473 rightsMD_481 rightsMD_489'))", + f"{ns.xlinkBNS}href": str( + transfer_data["item2_mets_path"].relative_to( + transfer_data["transfer_dir"] + ) + ), + }, + ] + + job.pyprint.assert_has_calls( + [ + mock.call(transfer_data["item1_file"].uuid, file_path), + mock.call(str(transfer_data["item1_path"].parent)), + mock.call(str(transfer_data["item2_path"].parent)), + mock.call(str(transfer_data["item2_mets_path"])), + mock.call("continue"), + ], + # os.listdir returns files in arbitrary order. + any_order=True, + ) + + +@pytest.mark.django_db +@mock.patch( + "archivematicaCreateMETSRightsDspaceMDRef.createMDRefDMDSec", + side_effect=Exception("error"), +) +def test_archivematicaCreateMETSRightsDspaceMDRef_handle_exceptions( + createMDRefDMDSec, transfer_data +): + job = mock.Mock(spec=Job) + state = MetsState() + file_path = transfer_data["item1_path"].relative_to(transfer_data["transfer_dir"]) + + result = archivematicaCreateMETSRightsDspaceMDRef( + job, + transfer_data["item1_file"].uuid, + file_path, + transfer_data["transfer"].uuid, + transfer_data["item1_path"], + state, + ) + assert state.error_accumulator.error_count == 1 + + assert len(result) == 0 + + assert job.pyprint.mock_calls == [ + mock.call(transfer_data["item1_file"].uuid, file_path), + mock.call( + "Error creating mets dspace mdref", + transfer_data["item1_file"].uuid, + file_path, + file=mock.ANY, + ), + mock.call(mock.ANY, ("error",), file=mock.ANY), + ] + + createMDRefDMDSec.assert_called_once_with( + f"mets.xml-{transfer_data['item1_mets_file'].uuid}", + str(transfer_data["item1_mets_path"]), + str( + transfer_data["item1_mets_path"].relative_to(transfer_data["transfer_dir"]) + ), + ) From 17796664cdeb74be0efe155b1553d6413d1bab74 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Tue, 30 Apr 2024 13:27:30 +0200 Subject: [PATCH 2/2] Update archivematica-acceptance-tests submodule --- hack/submodules/archivematica-acceptance-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/submodules/archivematica-acceptance-tests b/hack/submodules/archivematica-acceptance-tests index 2388fcec14..4bd7f46827 160000 --- a/hack/submodules/archivematica-acceptance-tests +++ b/hack/submodules/archivematica-acceptance-tests @@ -1 +1 @@ -Subproject commit 2388fcec14ad01da2c804822516a74491a8a172c +Subproject commit 4bd7f46827f2cfe2e7bc69e899f1c8bf4a01c501