From 4afa5d14cc277621c5a8c26b8c62185f1662172c Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Mon, 18 Mar 2024 10:25:39 -0400 Subject: [PATCH 01/32] Add GHA to build and push images to docker hub --- .github/workflows/push-images.yml | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 .github/workflows/push-images.yml diff --git a/.github/workflows/push-images.yml b/.github/workflows/push-images.yml new file mode 100644 index 0000000000..ea67a51825 --- /dev/null +++ b/.github/workflows/push-images.yml @@ -0,0 +1,46 @@ +name: "Push images to Docker Hub" +on: workflow_dispatch +jobs: + build: + name: "Builds images and push them to Docker Hub" + runs-on: "ubuntu-22.04" + steps: + - name: "Check out repository" + uses: "actions/checkout@v4" + - name: "Set up buildx" + uses: "docker/setup-buildx-action@v3" + with: + version: latest + driver-opts: image=moby/buildkit:v0.13.0 + - name: "Login to Docker Hub" + uses: docker/login-action@v3 + with: + username: artefactual + password: ${{ secrets.DOCKER_HUB_TOKEN }} + - name: "Build and Push Dashboard" + uses: docker/build-push-action@v5 + with: + context: . + push: true + load: true + file: ./hack/Dockerfile + target: "archivematica-dashboard" + tags: artefactual/archivematica-dashboard:latest + - name: "Build and Push MCP-client" + uses: docker/build-push-action@v5 + with: + context: . + push: true + load: true + file: ./hack/Dockerfile + target: "archivematica-mcp-client" + tags: artefactual/archivematica-mcp-client:latest + - name: "Build and Push MCP-server" + uses: docker/build-push-action@v5 + with: + context: . + push: true + load: true + file: ./hack/Dockerfile + target: "archivematica-mcp-server" + tags: artefactual/archivematica-mcp-server:latest \ No newline at end of file From 6b5dbc7cbc5c0ce96f6311c91274ad7acb482be6 Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Mon, 18 Mar 2024 10:26:21 -0400 Subject: [PATCH 02/32] Make Docker images smaller --- hack/Dockerfile | 145 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 113 insertions(+), 32 deletions(-) diff --git a/hack/Dockerfile b/hack/Dockerfile index bcef69af74..9d9b346aa9 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -150,6 +150,35 @@ ARG USER_ID=1000 ARG GROUP_ID=1000 ARG PYENV_DIR=/pyenv +RUN set -ex \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + coreutils \ + clamav \ + libldap2-dev \ + libsasl2-dev \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/* + +RUN set -ex \ + && groupadd --gid ${GROUP_ID} --system archivematica \ + && useradd --uid ${USER_ID} --gid ${GROUP_ID} --home-dir /var/archivematica --system archivematica \ + && mkdir -p /var/archivematica/sharedDirectory \ + && chown -R archivematica:archivematica /var/archivematica + +# Download ClamAV virus signatures +RUN freshclam --quiet + +USER archivematica + +COPY --chown=${USER_ID}:${GROUP_ID} --from=pyenv-builder --link ${PYENV_DIR} ${PYENV_DIR} +COPY --chown=${USER_ID}:${GROUP_ID} --link . /src + +# ----------------------------------------------------------------------------- + +FROM base AS archivematica-mcp-client + +USER root + RUN set -ex \ && curl --retry 3 -fsSL https://packages.archivematica.org/1.16.x/key.asc | gpg --dearmor -o /etc/apt/keyrings/archivematica-1.16.x.gpg \ && echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/archivematica-1.16.x.gpg] http://packages.archivematica.org/1.16.x/ubuntu-externals jammy main" > /etc/apt/sources.list.d/archivematica-external.list \ @@ -159,66 +188,36 @@ RUN set -ex \ && apt-get install -y --no-install-recommends \ atool \ bulk-extractor \ - clamav \ - coreutils \ ffmpeg \ fits \ - g++ \ - gcc \ gearman \ - gettext \ ghostscript \ hashdeep \ imagemagick \ inkscape \ jhove \ - libffi-dev \ libimage-exiftool-perl \ - libldap2-dev \ - libmysqlclient-dev \ - libsasl2-dev \ - libssl-dev \ - libxml2-dev \ - libxslt1-dev \ logapp \ md5deep \ + nailgun \ mediaconch \ mediainfo \ - nailgun \ nfs-common \ openjdk-8-jre-headless \ p7zip-full \ pbzip2 \ pst-utils \ - python3-lxml \ rsync \ siegfried \ sleuthkit \ tesseract-ocr \ tree \ - unar \ unrar-free \ uuid \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/* -RUN set -ex \ - && groupadd --gid ${GROUP_ID} --system archivematica \ - && useradd --uid ${USER_ID} --gid ${GROUP_ID} --home-dir /var/archivematica --system archivematica \ - && mkdir -p /var/archivematica/sharedDirectory \ - && chown -R archivematica:archivematica /var/archivematica - -# Download ClamAV virus signatures -RUN freshclam --quiet - USER archivematica -COPY --chown=${USER_ID}:${GROUP_ID} --from=pyenv-builder --link ${PYENV_DIR} ${PYENV_DIR} -COPY --chown=${USER_ID}:${GROUP_ID} --link . /src - -# ----------------------------------------------------------------------------- - -FROM base AS archivematica-mcp-client - ENV DJANGO_SETTINGS_MODULE settings.common ENV PYTHONPATH /src/src/MCPClient/lib/:/src/src/MCPClient/lib/clientScripts:/src/src/archivematicaCommon/lib/:/src/src/dashboard/src/ ENV ARCHIVEMATICA_MCPCLIENT_ARCHIVEMATICACLIENTMODULES /src/src/MCPClient/lib/archivematicaClientModules @@ -251,6 +250,19 @@ ARG PYTHON_VERSION=3.9 USER root +RUN set -ex \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + gcc \ + g++ \ + gettext \ + libffi-dev \ + libxml2-dev \ + libxslt1-dev \ + unar \ + libssl-dev \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/* + RUN set -ex \ && internalDirs=' \ /src/src/dashboard/frontend \ @@ -284,7 +296,76 @@ ENTRYPOINT ["pyenv", "exec", "python3", "-m", "gunicorn", "--config=/src/src/das # ----------------------------------------------------------------------------- -FROM base AS archivematica-tests +FROM base-builder as archivematica-tests + +ARG USER_ID=1000 +ARG GROUP_ID=1000 +ARG PYENV_DIR=/pyenv + +RUN set -ex \ + && curl --retry 3 -fsSL https://packages.archivematica.org/1.16.x/key.asc | gpg --dearmor -o /etc/apt/keyrings/archivematica-1.16.x.gpg \ + && echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/archivematica-1.16.x.gpg] http://packages.archivematica.org/1.16.x/ubuntu-externals jammy main" > /etc/apt/sources.list.d/archivematica-external.list \ + && curl --retry 3 -so /tmp/repo-mediaarea_1.0-21_all.deb -L https://mediaarea.net/repo/deb/repo-mediaarea_1.0-21_all.deb \ + && dpkg -i /tmp/repo-mediaarea_1.0-21_all.deb \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + atool \ + bulk-extractor \ + clamav \ + coreutils \ + ffmpeg \ + fits \ + g++ \ + gcc \ + gearman \ + gettext \ + ghostscript \ + hashdeep \ + imagemagick \ + inkscape \ + jhove \ + libffi-dev \ + libimage-exiftool-perl \ + libldap2-dev \ + libmysqlclient-dev \ + libsasl2-dev \ + libssl-dev \ + libxml2-dev \ + libxslt1-dev \ + logapp \ + md5deep \ + mediaconch \ + mediainfo \ + nailgun \ + nfs-common \ + openjdk-8-jre-headless \ + p7zip-full \ + pbzip2 \ + pst-utils \ + python3-lxml \ + rsync \ + siegfried \ + sleuthkit \ + tesseract-ocr \ + tree \ + unar \ + unrar-free \ + uuid \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/* + +RUN set -ex \ + && groupadd --gid ${GROUP_ID} --system archivematica \ + && useradd --uid ${USER_ID} --gid ${GROUP_ID} --home-dir /var/archivematica --system archivematica \ + && mkdir -p /var/archivematica/sharedDirectory \ + && chown -R archivematica:archivematica /var/archivematica + +# Download ClamAV virus signatures +RUN freshclam --quiet + +USER archivematica + +COPY --chown=${USER_ID}:${GROUP_ID} --from=pyenv-builder --link ${PYENV_DIR} ${PYENV_DIR} +COPY --chown=${USER_ID}:${GROUP_ID} --link . /src # ----------------------------------------------------------------------------- From 550e541da9b70a9050594b3b4c8ea00883c13e68 Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Wed, 3 Apr 2024 11:52:19 -0400 Subject: [PATCH 03/32] WIP: feedback from code review --- hack/Dockerfile | 73 ++++++++----------------------------------------- 1 file changed, 12 insertions(+), 61 deletions(-) diff --git a/hack/Dockerfile b/hack/Dockerfile index 9d9b346aa9..d642be3546 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -146,17 +146,14 @@ ENTRYPOINT ["npm", "run", "test-single-run"] FROM base-builder as base -ARG USER_ID=1000 -ARG GROUP_ID=1000 -ARG PYENV_DIR=/pyenv +ARG USER_ID +ARG GROUP_ID +ARG PYENV_DIR RUN set -ex \ && apt-get update \ && apt-get install -y --no-install-recommends \ coreutils \ - clamav \ - libldap2-dev \ - libsasl2-dev \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/* RUN set -ex \ @@ -165,9 +162,6 @@ RUN set -ex \ && mkdir -p /var/archivematica/sharedDirectory \ && chown -R archivematica:archivematica /var/archivematica -# Download ClamAV virus signatures -RUN freshclam --quiet - USER archivematica COPY --chown=${USER_ID}:${GROUP_ID} --from=pyenv-builder --link ${PYENV_DIR} ${PYENV_DIR} @@ -186,6 +180,7 @@ RUN set -ex \ && dpkg -i /tmp/repo-mediaarea_1.0-21_all.deb \ && apt-get update \ && apt-get install -y --no-install-recommends \ + clamav \ atool \ bulk-extractor \ ffmpeg \ @@ -216,6 +211,9 @@ RUN set -ex \ uuid \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/* +# Download ClamAV virus signatures +RUN freshclam --quiet + USER archivematica ENV DJANGO_SETTINGS_MODULE settings.common @@ -298,9 +296,9 @@ ENTRYPOINT ["pyenv", "exec", "python3", "-m", "gunicorn", "--config=/src/src/das FROM base-builder as archivematica-tests -ARG USER_ID=1000 -ARG GROUP_ID=1000 -ARG PYENV_DIR=/pyenv +ARG USER_ID +ARG GROUP_ID +ARG PYENV_DIR RUN set -ex \ && curl --retry 3 -fsSL https://packages.archivematica.org/1.16.x/key.asc | gpg --dearmor -o /etc/apt/keyrings/archivematica-1.16.x.gpg \ @@ -309,64 +307,17 @@ RUN set -ex \ && dpkg -i /tmp/repo-mediaarea_1.0-21_all.deb \ && apt-get update \ && apt-get install -y --no-install-recommends \ - atool \ - bulk-extractor \ - clamav \ - coreutils \ - ffmpeg \ - fits \ - g++ \ gcc \ - gearman \ - gettext \ - ghostscript \ - hashdeep \ - imagemagick \ - inkscape \ - jhove \ - libffi-dev \ - libimage-exiftool-perl \ - libldap2-dev \ - libmysqlclient-dev \ - libsasl2-dev \ - libssl-dev \ - libxml2-dev \ - libxslt1-dev \ - logapp \ - md5deep \ - mediaconch \ - mediainfo \ - nailgun \ - nfs-common \ - openjdk-8-jre-headless \ + media-types \ p7zip-full \ pbzip2 \ pst-utils \ python3-lxml \ rsync \ - siegfried \ - sleuthkit \ - tesseract-ocr \ - tree \ unar \ - unrar-free \ - uuid \ + tox \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/* -RUN set -ex \ - && groupadd --gid ${GROUP_ID} --system archivematica \ - && useradd --uid ${USER_ID} --gid ${GROUP_ID} --home-dir /var/archivematica --system archivematica \ - && mkdir -p /var/archivematica/sharedDirectory \ - && chown -R archivematica:archivematica /var/archivematica - -# Download ClamAV virus signatures -RUN freshclam --quiet - -USER archivematica - -COPY --chown=${USER_ID}:${GROUP_ID} --from=pyenv-builder --link ${PYENV_DIR} ${PYENV_DIR} -COPY --chown=${USER_ID}:${GROUP_ID} --link . /src - # ----------------------------------------------------------------------------- FROM ${TARGET} From 7221d265950c314c7fef0f657c9cd6ea0b7c91cd Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Wed, 3 Apr 2024 11:53:37 -0400 Subject: [PATCH 04/32] WIP: fix typos in GH workflow --- .github/workflows/push-images.yml | 15 ++++++++++----- hack/Dockerfile | 1 - 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/push-images.yml b/.github/workflows/push-images.yml index ea67a51825..28b4d4a143 100644 --- a/.github/workflows/push-images.yml +++ b/.github/workflows/push-images.yml @@ -1,8 +1,13 @@ name: "Push images to Docker Hub" -on: workflow_dispatch +#on: workflow_dispatch +on: + push: + branches: + - "qa/**" + - "stable/**" jobs: build: - name: "Builds images and push them to Docker Hub" + name: "Build and push images" runs-on: "ubuntu-22.04" steps: - name: "Check out repository" @@ -26,7 +31,7 @@ jobs: file: ./hack/Dockerfile target: "archivematica-dashboard" tags: artefactual/archivematica-dashboard:latest - - name: "Build and Push MCP-client" + - name: "Build and Push MCPClient" uses: docker/build-push-action@v5 with: context: . @@ -35,7 +40,7 @@ jobs: file: ./hack/Dockerfile target: "archivematica-mcp-client" tags: artefactual/archivematica-mcp-client:latest - - name: "Build and Push MCP-server" + - name: "Build and Push MCPServer" uses: docker/build-push-action@v5 with: context: . @@ -43,4 +48,4 @@ jobs: load: true file: ./hack/Dockerfile target: "archivematica-mcp-server" - tags: artefactual/archivematica-mcp-server:latest \ No newline at end of file + tags: artefactual/archivematica-mcp-server:latest diff --git a/hack/Dockerfile b/hack/Dockerfile index d642be3546..e60d921010 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -315,7 +315,6 @@ RUN set -ex \ python3-lxml \ rsync \ unar \ - tox \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/* # ----------------------------------------------------------------------------- From d207c9151e4c6a0bae157ca5d0a65632fbb1b91d Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Wed, 3 Apr 2024 12:45:30 -0400 Subject: [PATCH 05/32] WIP: trigger CI --- .github/workflows/push-images.yml | 3 +-- hack/Dockerfile | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/push-images.yml b/.github/workflows/push-images.yml index 28b4d4a143..d912d1a7ad 100644 --- a/.github/workflows/push-images.yml +++ b/.github/workflows/push-images.yml @@ -3,8 +3,7 @@ name: "Push images to Docker Hub" on: push: branches: - - "qa/**" - - "stable/**" + - "dev/trim-dockerfile" jobs: build: name: "Build and push images" diff --git a/hack/Dockerfile b/hack/Dockerfile index e60d921010..80222a8aa9 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -19,6 +19,7 @@ ENV PYTHONUNBUFFERED 1 RUN set -ex \ && apt-get update \ && apt-get install -y --no-install-recommends \ + # coreutils \ ca-certificates \ curl \ git \ @@ -150,12 +151,6 @@ ARG USER_ID ARG GROUP_ID ARG PYENV_DIR -RUN set -ex \ - && apt-get update \ - && apt-get install -y --no-install-recommends \ - coreutils \ - && rm -rf /var/lib/apt/lists/* /var/cache/apt/* - RUN set -ex \ && groupadd --gid ${GROUP_ID} --system archivematica \ && useradd --uid ${USER_ID} --gid ${GROUP_ID} --home-dir /var/archivematica --system archivematica \ @@ -317,6 +312,11 @@ RUN set -ex \ unar \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/* +USER archivematica + +COPY --chown=${USER_ID}:${GROUP_ID} --from=pyenv-builder --link ${PYENV_DIR} ${PYENV_DIR} +COPY --chown=${USER_ID}:${GROUP_ID} --link . /src + # ----------------------------------------------------------------------------- FROM ${TARGET} From f6a5367b18e92801f7c44871f2318a93768027f2 Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Wed, 3 Apr 2024 14:10:33 -0400 Subject: [PATCH 06/32] Remove coreutils comment --- hack/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/hack/Dockerfile b/hack/Dockerfile index 80222a8aa9..0283357b81 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -19,7 +19,6 @@ ENV PYTHONUNBUFFERED 1 RUN set -ex \ && apt-get update \ && apt-get install -y --no-install-recommends \ - # coreutils \ ca-certificates \ curl \ git \ From 1efafce32fc944fb6fa8d3bca2426627c2e8e8cf Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Wed, 3 Apr 2024 14:38:26 -0400 Subject: [PATCH 07/32] Revert back Github action trigger to manual --- .github/workflows/push-images.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/push-images.yml b/.github/workflows/push-images.yml index d912d1a7ad..9eb3e565f7 100644 --- a/.github/workflows/push-images.yml +++ b/.github/workflows/push-images.yml @@ -1,9 +1,5 @@ name: "Push images to Docker Hub" -#on: workflow_dispatch -on: - push: - branches: - - "dev/trim-dockerfile" +on: workflow_dispatch jobs: build: name: "Build and push images" From b3f5b831a10ccd36776d4ef1d9e82b06d835ebd4 Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Mon, 15 Apr 2024 11:44:58 -0400 Subject: [PATCH 08/32] Add SSH package to mcp-client --- hack/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/hack/Dockerfile b/hack/Dockerfile index 0283357b81..4c0e2999f6 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -203,6 +203,7 @@ RUN set -ex \ tree \ unrar-free \ uuid \ + ssh \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/* # Download ClamAV virus signatures From dabcad0150b9d19daa14ad588ea2e6cad620deac Mon Sep 17 00:00:00 2001 From: Daniel Cosme Date: Wed, 1 May 2024 12:06:47 -0400 Subject: [PATCH 09/32] WIP: trim build time dependencies from final images --- hack/Dockerfile | 50 ++++++++++++++++++++++++++++++++------ hack/build_for_registry.sh | 44 +++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 8 deletions(-) create mode 100755 hack/build_for_registry.sh diff --git a/hack/Dockerfile b/hack/Dockerfile index 4c0e2999f6..bb04d7325e 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -144,11 +144,38 @@ ENTRYPOINT ["npm", "run", "test-single-run"] # ----------------------------------------------------------------------------- -FROM base-builder as base +FROM ubuntu:${UBUNTU_VERSION} AS base +ARG PYENV_DIR=/pyenv ARG USER_ID ARG GROUP_ID -ARG PYENV_DIR + +ENV DEBIAN_FRONTEND noninteractive +ENV PYTHONUNBUFFERED 1 + +RUN set -ex \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + # gnupg \ + # curl \ + # git \ + # libldap2-dev \ + libmysqlclient-dev \ + # libsasl2-dev \ + # libsqlite3-dev \ + locales \ + # pkg-config \ + tzdata \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/* + +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + +ENV PYENV_ROOT=${PYENV_DIR}/data +ENV PATH=$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH RUN set -ex \ && groupadd --gid ${GROUP_ID} --system archivematica \ @@ -161,12 +188,24 @@ USER archivematica COPY --chown=${USER_ID}:${GROUP_ID} --from=pyenv-builder --link ${PYENV_DIR} ${PYENV_DIR} COPY --chown=${USER_ID}:${GROUP_ID} --link . /src + # ----------------------------------------------------------------------------- FROM base AS archivematica-mcp-client USER root +RUN set -ex \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + gnupg \ + curl \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/* + +# Purge +# - gnupg +# - git + RUN set -ex \ && curl --retry 3 -fsSL https://packages.archivematica.org/1.16.x/key.asc | gpg --dearmor -o /etc/apt/keyrings/archivematica-1.16.x.gpg \ && echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/archivematica-1.16.x.gpg] http://packages.archivematica.org/1.16.x/ubuntu-externals jammy main" > /etc/apt/sources.list.d/archivematica-external.list \ @@ -174,7 +213,6 @@ RUN set -ex \ && dpkg -i /tmp/repo-mediaarea_1.0-21_all.deb \ && apt-get update \ && apt-get install -y --no-install-recommends \ - clamav \ atool \ bulk-extractor \ ffmpeg \ @@ -206,9 +244,6 @@ RUN set -ex \ ssh \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/* -# Download ClamAV virus signatures -RUN freshclam --quiet - USER archivematica ENV DJANGO_SETTINGS_MODULE settings.common @@ -246,8 +281,7 @@ USER root RUN set -ex \ && apt-get update \ && apt-get install -y --no-install-recommends \ - gcc \ - g++ \ + # g++ \ gettext \ libffi-dev \ libxml2-dev \ diff --git a/hack/build_for_registry.sh b/hack/build_for_registry.sh new file mode 100755 index 0000000000..9d8b915f5a --- /dev/null +++ b/hack/build_for_registry.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +set -xeo pipefail + +DIR=$(dirname "$(readlink -f "$0")") +ROOT_DIR=$(pwd) + +docker build \ + --target "archivematica-mcp-server" \ + --tag "archivematica-mcp-server" \ + --file $DIR/Dockerfile . + +docker build \ + --target "archivematica-mcp-client" \ + --tag "archivematica-mcp-client" \ + --file $DIR/Dockerfile . + +docker build \ + --target "archivematica-dashboard" \ + --tag "archivematica-dashboard" \ + --file $DIR/Dockerfile . + +if [[ ! -z "${PUSH_DOCKER}" ]] +then + docker image tag archivematica-mcp-server "artefactual/archivematica-mcp-server:latest" + docker image tag archivematica-mcp-client "artefactual/archivematica-mcp-client:latest" + docker image tag archivematica-dashboard "artefactual/archivematica-dashboard:latest" + + docker image tag archivematica-mcp-server "artefactual/archivematica-mcp-server:qa.1.x" + docker image tag archivematica-mcp-client "artefactual/archivematica-mcp-client:qa.1.x" + docker image tag archivematica-dashboard "artefactual/archivematica-dashboard:qa.1.x" + + # docker push --all-tags artefactual/archivematica-dashboard + # docker push --all-tags artefactual/archivematica-mcp-client + # docker push --all-tags artefactual/archivematica-mcp-server + docker push artefactual/archivematica-dashboard:qa.1.x + docker push artefactual/archivematica-mcp-client:qa.1.x + docker push artefactual/archivematica-mcp-server:qa.1.x + + docker push artefactual/archivematica-dashboard:latest + docker push artefactual/archivematica-mcp-client:latest + docker push artefactual/archivematica-mcp-server:latest +fi + From 3e1bd3b2388188482d610ea1efa7f6dbb4565492 Mon Sep 17 00:00:00 2001 From: Douglas Cerna Date: Thu, 2 May 2024 09:01:50 -0600 Subject: [PATCH 10/32] Extend elasticSearchFunctions test coverage --- .../test_index_aip_and_files_METS.xml | 1352 +++++++++++++++ .../test_elasticsearch_functions.py | 1492 +++++++++++------ 2 files changed, 2369 insertions(+), 475 deletions(-) create mode 100644 tests/archivematicaCommon/fixtures/test_index_aip_and_files_METS.xml diff --git a/tests/archivematicaCommon/fixtures/test_index_aip_and_files_METS.xml b/tests/archivematicaCommon/fixtures/test_index_aip_and_files_METS.xml new file mode 100644 index 0000000000..c4fc084e83 --- /dev/null +++ b/tests/archivematicaCommon/fixtures/test_index_aip_and_files_METS.xml @@ -0,0 +1,1352 @@ + + + + + + + + + UUID + d7a641c0-daa4-4ec6-b531-660deea920f6 + + pictures-with-dublincore-d7a641c0-daa4-4ec6-b531-660deea920f6 + + + + + + + + + Pictures with DublinCore + Archival Information Package + + + + + + + + + + + UUID + 8518d9a7-cb66-4530-953d-8ac2c554e5f2 + + + 0 + + sha256 + a469c730e705d757d66f53f38bb4455e89d5691a3d87fc7bc069b91fa2a50d46 + + 1361321 + + + JPEG + 1.01 + + + PRONOM + fmt/43 + + + + 2023-09-21T23:42:01Z + + + + + + + + + + + + + + + + + + + + + + + + + + 12.40 + [minor] Possibly incorrect maker notes offsets (fix by -74?) + Landing_zone.jpg + /var/archivematica/sharedDirectory/watchedDirectories/workFlowDecisions/extractPackagesChoice/pictures-with-dublincore-f08c3cbb-3b39-4997-bc1c-74de2fa69eb0/objects + 1329 KiB + 2023:09:21 09:42:01-07:00 + 2024:04:28 20:50:25-07:00 + 2024:04:28 20:50:23-07:00 + -rwxrwxr-- + JPEG + jpg + image/jpeg + Little-endian (Intel, II) + 3648 + 2736 + Baseline DCT, Huffman coding + 8 + 3 + YCbCr4:2:0 (2 2) + 1.01 + inches + 72 + 72 + Panasonic + DMC-FS20 + Horizontal (normal) + 72 + 72 + inches + GIMP 2.4.7 + 2008:09:15 11:01:51 + Co-sited + 1/250 + 4.0 + Landscape + 100 + 0221 + 2008:09:10 16:51:18 + 2008:09:10 16:51:18 + Y, Cb, Cr, - + 4 + 0 + 3.3 + Multi-segment + Unknown + Off, Did not fire + 5.2 mm + 0100 + sRGB + 3648 + 2736 + One-chip color area + Digital Camera + Directly photographed + Normal + Auto + Auto + 0 + 30 mm + Standard + None + Normal + Normal + Normal + High + 0.1.1.2 + Auto + Auto + Tracking + On, Mode 2 + Off + Scenery + No + (Binary data 8200 bytes, use -b option to extract) + 0 + +AAAAAAAAAAAAAAAAAAAAAA== + + 0260 + n/a + Off + 00:00:52.22 + Off + 0 + High + Standard + Off + Horizontal (normal) + Enabled but Not Used + Normal + + Standard + Off + n/a + Medium + Destination + Off + n/a + 5 + Off + 0 + Inf 0.00015 + 2 + 4 0 14674 56 + 2 7 4 0 + 0121 + Scenery + No + No + 2030 + 1054 + 1800 + Off + Off + +Aw== + + R98 - DCF basic file (sRGB) + 0100 + 0250 + JPEG (old-style) + Horizontal (normal) + 180 + 180 + inches + 10050 + 8789 + Co-sited + (Binary data 8789 bytes, use -b option to extract) + 4.0 + 1.70778 + 3648x2736 + 10.0 + 1.925996 + 5.8 + 1/250 + Scenery (intelligent auto) + 0.005 mm + 61.9 deg + 5.2 mm (35 mm equivalent: 30.0 mm) + 1.30 m + 12.0 + + + + MediaInfoLib + + + 349 + 1 + General + General + 0 + 1 + JPEG + JPEG + JPEG + /var/archivematica/sharedDirectory/watchedDirectories/workFlowDecisions/extractPackagesChoice/pictures-with-dublincore-f08c3cbb-3b39-4997-bc1c-74de2fa69eb0/objects/Landing_zone.jpg + /var/archivematica/sharedDirectory/watchedDirectories/workFlowDecisions/extractPackagesChoice/pictures-with-dublincore-f08c3cbb-3b39-4997-bc1c-74de2fa69eb0/objects + Landing_zone.jpg + Landing_zone + jpg + JPEG + JPEG + h3d jpeg jpg jpe jps mpo + JPEG + image/jpeg + 1361321 + 1.30 MiB + 1 MiB + 1.3 MiB + 1.30 MiB + 1.298 MiB + 0 + 0.00 Byte (0%) + Byte0 + 0.0 Byte + 0.00 Byte + 0.000 Byte + 0.00 Byte (0%) + 0.00000 + 2023-09-21 16:42:01 UTC + 2023-09-21 09:42:01 + + + 124 + 1 + Image + Image + 0 + JPEG + JPEG + JPEG + image/jpeg + 3648 + 3 648 pixels + 2736 + 2 736 pixels + YUV + 4:2:0 + 8 + 8 bits + Lossy + Lossy + 1361321 + 1.30 MiB (100%) + 1 MiB + 1.3 MiB + 1.30 MiB + 1.298 MiB + 1.30 MiB (100%) + 1.00000 + + + + + + %transferDirectory%objects/Landing zone.jpg + + + + + + + + + + UUID + a03ca4bf-fd40-42d9-9972-a6bffd597095 + + ingestion + 2024-04-29T03:50:21.847564+00:00 + + + + + + + + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + 1270366a-19d4-449f-87f5-5c32f7816cda + + message digest calculation + 2024-04-29T03:50:21.915253+00:00 + + program="python"; module="hashlib.sha256()" + + + + + a469c730e705d757d66f53f38bb4455e89d5691a3d87fc7bc069b91fa2a50d46 + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + 9afc763e-1986-41c2-931d-2beb14769137 + + virus check + 2024-04-29T03:50:22.674126+00:00 + + program="ClamAV (clamd)"; version="ClamAV 0.103.11"; virusDefinitions="27259/Sun Apr 28 08:22:36 2024" + + + Pass + + + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + 07b83b4e-f578-4e7f-a874-b6669ae2e4c0 + + filename change + 2024-04-29T03:50:23.816149+00:00 + + prohibited characters removed: program="change_names"; version="1.10.96655da16f29f2ac6ebd3eb4b48a689d82efe76d" + + + + + Original name="%transferDirectory%objects/Landing zone.jpg"; new name="%transferDirectory%objects/Landing_zone.jpg" + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + 4a54765f-8755-4a27-be3e-2c0e5d130bd4 + + format identification + 2024-04-29T03:50:25.776406+00:00 + + program="Siegfried"; version="1.9.6" + + + Positive + + fmt/43 + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + f4a7a90e-57b7-4492-8477-9da8b8044da8 + + validation + 2024-04-29T03:50:28.635310+00:00 + + program="JHOVE"; version="1.26" + + + pass + + format="JPEG"; version="1.01"; result="Well-Formed and valid" + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + preservation system + Archivematica-1.15.1 + + Archivematica + software + + + + + + + + + + repository code + AM qa/1.x SS qa/0x + + AM qa/1.x SS qa/0x + organization + + + + + + + + + + Archivematica user pk + 1 + + username="test", first_name="", last_name="" + Archivematica user + + + + + + + + + + + + UUID + 34c0e0c1-58f1-4025-91b9-2d58ba161593 + + + 0 + + sha256 + 3cd6050858efb22b71630eba9e99e6ffef68c59f4c03bb79e5ae6c4edf3c88d8 + + 4261562 + + + Tagged Image File Format + + + + + + + + + 2024-04-29T03:50:32Z + + + %SIPDirectory%objects/MARBLES-34c0e0c1-58f1-4025-91b9-2d58ba161593.tif + + derivation + has source + + UUID + f333c372-5cf6-4346-b342-ea40373d07d7 + + + UUID + fb445513-51aa-4cdd-b6a3-26fa70dcd77f + + + + + + + + + + + + UUID + d0b90430-8648-43d0-b954-8c295dd14260 + + creation + 2024-04-29T03:50:32.692103+00:00 + + + + + + + + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + c5aaaa36-be28-450d-8907-92c01d64af2c + + message digest calculation + 2024-04-29T03:50:32.733500+00:00 + + program="python"; module="hashlib.sha256()" + + + + + 3cd6050858efb22b71630eba9e99e6ffef68c59f4c03bb79e5ae6c4edf3c88d8 + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + a541b242-ee28-4ef7-90d9-03b99e8df139 + + validation + 2024-04-29T03:50:33.383765+00:00 + + program="JHOVE"; version="1.26" + + + pass + + format="TIFF"; version="5.0"; result="Well-Formed and valid" + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + preservation system + Archivematica-1.15.1 + + Archivematica + software + + + + + + + + + + repository code + AM qa/1.x SS qa/0x + + AM qa/1.x SS qa/0x + organization + + + + + + + + + + Archivematica user pk + 1 + + username="test", first_name="", last_name="" + Archivematica user + + + + + + + + + + + + UUID + f333c372-5cf6-4346-b342-ea40373d07d7 + + + 0 + + sha256 + 91a5ddca3637590c2ddb50da5feb73ff0b8a98cd09a98afb79adc2cf70bc6220 + + 4261301 + + + Truevision TGA Bitmap + 2.0 + + + PRONOM + fmt/402 + + + + 2023-09-21T23:42:01Z + + + + + + + + + + + + + + + + + + + + + + + + + MediaInfoLib + + + 349 + 1 + General + General + 0 + 1 + Raw + Raw + Raw + /var/archivematica/sharedDirectory/watchedDirectories/workFlowDecisions/extractPackagesChoice/pictures-with-dublincore-f08c3cbb-3b39-4997-bc1c-74de2fa69eb0/objects/MARBLES.TGA + /var/archivematica/sharedDirectory/watchedDirectories/workFlowDecisions/extractPackagesChoice/pictures-with-dublincore-f08c3cbb-3b39-4997-bc1c-74de2fa69eb0/objects + MARBLES.TGA + MARBLES + TGA + TGA + TGA + tga + TGA + 2 + image/tga + 4261301 + 4.06 MiB + 4 MiB + 4.1 MiB + 4.06 MiB + 4.064 MiB + 2023-09-21 16:42:01 UTC + 2023-09-21 09:42:01 + + + 124 + 1 + Image + Image + 0 + Raw + Raw + Raw + 2 + 1419 + 1 419 pixels + 1001 + 1 001 pixels + RGB + 24 + 24 bits + + + + + + %transferDirectory%objects/MARBLES.TGA + + derivation + is source of + + UUID + 34c0e0c1-58f1-4025-91b9-2d58ba161593 + + + UUID + fb445513-51aa-4cdd-b6a3-26fa70dcd77f + + + + + + + + + + + + UUID + 114ee05e-7448-456e-802f-6cda5c2b00ab + + ingestion + 2024-04-29T03:50:21.857621+00:00 + + + + + + + + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + 2e0c7cb0-9b05-4a74-b741-d6bf9e550f58 + + message digest calculation + 2024-04-29T03:50:21.922880+00:00 + + program="python"; module="hashlib.sha256()" + + + + + 91a5ddca3637590c2ddb50da5feb73ff0b8a98cd09a98afb79adc2cf70bc6220 + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + a70c05a3-40bc-4cc7-a1f7-a5cc0a3789e7 + + virus check + 2024-04-29T03:50:22.686031+00:00 + + program="ClamAV (clamd)"; version="ClamAV 0.103.11"; virusDefinitions="27259/Sun Apr 28 08:22:36 2024" + + + Pass + + + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + e0eef09b-cf22-490b-bf54-ec00187712b5 + + format identification + 2024-04-29T03:50:25.738444+00:00 + + program="Siegfried"; version="1.9.6" + + + Positive + + fmt/402 + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + fb445513-51aa-4cdd-b6a3-26fa70dcd77f + + normalization + 2024-04-29T03:50:32.741862+00:00 + + ArchivematicaFPRCommandID="a34ddc9b-c922-4bb6-8037-bbe713332175"; program="convert"; version="Version: ImageMagick 6.9.11-60 Q16 x86_64 2021-01-25 https://imagemagick.org" + + + + + + %SIPDirectory%objects/MARBLES-34c0e0c1-58f1-4025-91b9-2d58ba161593.tif + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + preservation system + Archivematica-1.15.1 + + Archivematica + software + + + + + + + + + + repository code + AM qa/1.x SS qa/0x + + AM qa/1.x SS qa/0x + organization + + + + + + + + + + Archivematica user pk + 1 + + username="test", first_name="", last_name="" + Archivematica user + + + + + + + + + + + + UUID + adca4c81-7a50-4bcf-ae48-d13822c860bd + + + 0 + + sha256 + f3909d6cd379df21bdc432edee6318fc4d1132ddb1c8f5f68fe704c76c46f976 + + 65139 + + + XML + 1.0 + + + PRONOM + fmt/101 + + + + 2024-04-29T03:51:03Z + + + %SIPDirectory%objects/submissionDocumentation/transfer-pictures-with-dublincore-f08c3cbb-3b39-4997-bc1c-74de2fa69eb0/METS.xml + + + + + + + + + + UUID + 91b12ecb-2a79-4897-84e5-bd35821f73b0 + + ingestion + 2024-04-29T03:51:03.933994+00:00 + + + + + + + + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + e2e3a522-019b-4935-a7f8-554422a34af8 + + message digest calculation + 2024-04-29T03:51:03.968583+00:00 + + program="python"; module="hashlib.sha256()" + + + + + f3909d6cd379df21bdc432edee6318fc4d1132ddb1c8f5f68fe704c76c46f976 + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + 8d57f933-f420-4e53-91ea-4d24fb958119 + + virus check + 2024-04-29T03:51:05.319694+00:00 + + program="ClamAV (clamd)"; version="ClamAV 0.103.11"; virusDefinitions="27259/Sun Apr 28 08:22:36 2024" + + + Pass + + + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + UUID + c149c5a6-08f8-4bff-bb39-6c43d6a17896 + + format identification + 2024-04-29T03:51:06.790549+00:00 + + program="Siegfried"; version="1.9.6" + + + Positive + + fmt/101 + + + + preservation system + Archivematica-1.15.1 + + + repository code + AM qa/1.x SS qa/0x + + + Archivematica user pk + 1 + + + + + + + + + + + preservation system + Archivematica-1.15.1 + + Archivematica + software + + + + + + + + + + repository code + AM qa/1.x SS qa/0x + + AM qa/1.x SS qa/0x + organization + + + + + + + + + + Archivematica user pk + 1 + + username="test", first_name="", last_name="" + Archivematica user + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/archivematicaCommon/test_elasticsearch_functions.py b/tests/archivematicaCommon/test_elasticsearch_functions.py index 31c976a87c..b63033f8d7 100644 --- a/tests/archivematicaCommon/test_elasticsearch_functions.py +++ b/tests/archivematicaCommon/test_elasticsearch_functions.py @@ -1,510 +1,516 @@ +import datetime import os -import unittest +import pathlib +import uuid from unittest import mock -from unittest.mock import ANY -from unittest.mock import patch import elasticSearchFunctions import pytest +from components import helpers +from django.utils.timezone import make_aware from lxml import etree from main.models import Directory +from main.models import File from main.models import Identifier from main.models import SIP +from main.models import Transfer THIS_DIR = os.path.dirname(os.path.abspath(__file__)) -class TestElasticSearchFunctions(unittest.TestCase): - def setUp(self): - with mock.patch("elasticsearch.transport.Transport.perform_request"): - elasticSearchFunctions.setup("elasticsearch:9200") - self.client = elasticSearchFunctions.get_client() - self.aip_uuid = "b34521a3-1c63-43dd-b901-584416f36c91" - self.file_uuid = "268421a7-a986-4fa0-95c1-54176e508210" +@pytest.fixture +def es_client(): + with mock.patch("elasticsearch.transport.Transport.perform_request"): + elasticSearchFunctions.setup("elasticsearch:9200") + return elasticSearchFunctions.get_client() - @mock.patch( - "elasticsearch.transport.Transport.perform_request", - side_effect=[ - { - "took": 2, - "timed_out": False, - "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, - "hits": { - "total": 1, - "max_score": 0.2876821, - "hits": [ - { - "_index": "aips", - "_type": "_doc", - "_id": "lBsZBWgBn49OAVhMXeO8", - "_score": 0.2876821, - "_source": {"uuid": "b34521a3-1c63-43dd-b901-584416f36c91"}, - } - ], - }, - }, - { - "took": 8, - "timed_out": False, + +@mock.patch( + "elasticsearch.transport.Transport.perform_request", + side_effect=[ + { + "took": 2, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": { "total": 1, - "deleted": 1, - "batches": 1, - "version_conflicts": 0, - "noops": 0, - "retries": {"bulk": 0, "search": 0}, - "throttled_millis": 0, - "requests_per_second": -1.0, - "throttled_until_millis": 0, - "failures": [], - }, - { - "took": 0, - "timed_out": False, - "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, - "hits": {"total": 0, "max_score": None, "hits": []}, + "max_score": 0.2876821, + "hits": [ + { + "_index": "aips", + "_type": "_doc", + "_id": "lBsZBWgBn49OAVhMXeO8", + "_score": 0.2876821, + "_source": {"uuid": "b34521a3-1c63-43dd-b901-584416f36c91"}, + } + ], }, - ], + }, + { + "took": 8, + "timed_out": False, + "total": 1, + "deleted": 1, + "batches": 1, + "version_conflicts": 0, + "noops": 0, + "retries": {"bulk": 0, "search": 0}, + "throttled_millis": 0, + "requests_per_second": -1.0, + "throttled_until_millis": 0, + "failures": [], + }, + { + "took": 0, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 0, "max_score": None, "hits": []}, + }, + ], +) +def test_delete_aip(perform_request, es_client): + aip_uuid = "b34521a3-1c63-43dd-b901-584416f36c91" + + # Verify AIP exists + results = es_client.search( + index="aips", + body={"query": {"term": {"uuid": aip_uuid}}}, + _source="uuid", ) - def test_delete_aip(self, perform_request): - # Verify AIP exists - results = self.client.search( - index="aips", - body={"query": {"term": {"uuid": self.aip_uuid}}}, - _source="uuid", - ) - assert results["hits"]["total"] == 1 - assert results["hits"]["hits"][0]["_source"]["uuid"] == self.aip_uuid - # Delete AIP - elasticSearchFunctions.delete_aip(self.client, self.aip_uuid) - # Verify AIP gone - results = self.client.search( - index="aips", - body={"query": {"term": {"uuid": self.aip_uuid}}}, - _source="uuid", - ) - assert results["hits"]["total"] == 0 + assert results["hits"]["total"] == 1 + assert results["hits"]["hits"][0]["_source"]["uuid"] == aip_uuid - @mock.patch( - "elasticsearch.transport.Transport.perform_request", - side_effect=[ - { - "took": 1, - "timed_out": False, - "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, - "hits": { - "total": 2, - "max_score": 0.2876821, - "hits": [ - { - "_index": "aipfiles", - "_type": "_doc", - "_id": "lRsZBWgBn49OAVhMXuMC", - "_score": 0.2876821, - "_source": { - "origin": "1a14043f-68ef-4bfe-a129-e2e4cdbe391b" - }, - }, - { - "_index": "aipfiles", - "_type": "_doc", - "_id": "lhsZBWgBn49OAVhMXuMh", - "_score": 0.2876821, - "_source": { - "origin": "1a14043f-68ef-4bfe-a129-e2e4cdbe391b" - }, - }, - ], - }, - }, - { - "took": 11, - "timed_out": False, + # Delete AIP + elasticSearchFunctions.delete_aip(es_client, "b34521a3-1c63-43dd-b901-584416f36c91") + + # Verify AIP gone + results = es_client.search( + index="aips", + body={"query": {"term": {"uuid": aip_uuid}}}, + _source="uuid", + ) + assert results["hits"]["total"] == 0 + + +@mock.patch( + "elasticsearch.transport.Transport.perform_request", + side_effect=[ + { + "took": 1, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": { "total": 2, - "deleted": 2, - "batches": 1, - "version_conflicts": 0, - "noops": 0, - "retries": {"bulk": 0, "search": 0}, - "throttled_millis": 0, - "requests_per_second": -1.0, - "throttled_until_millis": 0, - "failures": [], - }, - { - "took": 0, - "timed_out": False, - "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, - "hits": {"total": 0, "max_score": None, "hits": []}, + "max_score": 0.2876821, + "hits": [ + { + "_index": "aipfiles", + "_type": "_doc", + "_id": "lRsZBWgBn49OAVhMXuMC", + "_score": 0.2876821, + "_source": {"origin": "1a14043f-68ef-4bfe-a129-e2e4cdbe391b"}, + }, + { + "_index": "aipfiles", + "_type": "_doc", + "_id": "lhsZBWgBn49OAVhMXuMh", + "_score": 0.2876821, + "_source": {"origin": "1a14043f-68ef-4bfe-a129-e2e4cdbe391b"}, + }, + ], }, - ], + }, + { + "took": 11, + "timed_out": False, + "total": 2, + "deleted": 2, + "batches": 1, + "version_conflicts": 0, + "noops": 0, + "retries": {"bulk": 0, "search": 0}, + "throttled_millis": 0, + "requests_per_second": -1.0, + "throttled_until_millis": 0, + "failures": [], + }, + { + "took": 0, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 0, "max_score": None, "hits": []}, + }, + ], +) +def test_delete_aip_files(perform_request, es_client): + aip_uuid = "b34521a3-1c63-43dd-b901-584416f36c91" + + # Verify AIP files exist + results = es_client.search( + index="aipfiles", body={"query": {"term": {"AIPUUID": aip_uuid}}} ) - def test_delete_aip_files(self, perform_request): - # Verify AIP files exist - results = self.client.search( - index="aipfiles", body={"query": {"term": {"AIPUUID": self.aip_uuid}}} - ) - assert results["hits"]["total"] == 2 - # Delete AIP files - elasticSearchFunctions.delete_aip_files(self.client, self.aip_uuid) - # Verify AIP files gone - results = self.client.search( - index="aipfiles", body={"query": {"term": {"AIPUUID": self.aip_uuid}}} - ) - assert results["hits"]["total"] == 0 + assert results["hits"]["total"] == 2 - assert perform_request.mock_calls == [ - mock.call( - "GET", - "/aipfiles/_search", - params={}, - body={ - "query": { - "term": {"AIPUUID": "b34521a3-1c63-43dd-b901-584416f36c91"} - } - }, - ), - mock.call( - "POST", - "/aipfiles/_delete_by_query", - params={}, - body={ - "query": { - "term": {"AIPUUID": "b34521a3-1c63-43dd-b901-584416f36c91"} - } - }, - ), - mock.call( - "GET", - "/aipfiles/_search", - params={}, - body={ - "query": { - "term": {"AIPUUID": "b34521a3-1c63-43dd-b901-584416f36c91"} - } - }, - ), - ] + # Delete AIP files + elasticSearchFunctions.delete_aip_files(es_client, aip_uuid) + # Verify AIP files gone + results = es_client.search( + index="aipfiles", body={"query": {"term": {"AIPUUID": aip_uuid}}} + ) + assert results["hits"]["total"] == 0 - @mock.patch( - "elasticsearch.transport.Transport.perform_request", - side_effect=[ - { - "took": 1, - "timed_out": False, - "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, - "hits": { - "total": 1, - "max_score": 0.6931472, - "hits": [ - { - "_index": "transferfiles", - "_type": "_doc", - "_id": "mBsZBWgBn49OAVhMh-OV", - "_score": 0.6931472, - "_source": { - "accessionid": "", - "status": "backlog", - "sipuuid": "17b168b6-cbba-4f43-8838-a53360238acb", - "tags": [], - "file_extension": "jpg", - "relative_path": "test-17b168b6-cbba-4f43-8838-a53360238acb/objects/Landing_zone.jpg", - "bulk_extractor_reports": [], - "origin": "1a14043f-68ef-4bfe-a129-e2e4cdbe391b", - "size": 1.2982568740844727, - "modification_date": "2018-12-11", - "created": 1546273029.7313669, - "format": [], - "ingestdate": "2018-12-31", - "filename": "Landing_zone.jpg", - "fileuuid": "268421a7-a986-4fa0-95c1-54176e508210", - }, - } - ], - }, + assert perform_request.mock_calls == [ + mock.call( + "GET", + "/aipfiles/_search", + params={}, + body={ + "query": {"term": {"AIPUUID": "b34521a3-1c63-43dd-b901-584416f36c91"}} }, - { - "_index": "transferfiles", - "_type": "_doc", - "_id": "mBsZBWgBn49OAVhMh-OV", - "_version": 2, - "result": "updated", - "forced_refresh": True, - "_shards": {"total": 2, "successful": 1, "failed": 0}, - "_seq_no": 2, - "_primary_term": 1, + ), + mock.call( + "POST", + "/aipfiles/_delete_by_query", + params={}, + body={ + "query": {"term": {"AIPUUID": "b34521a3-1c63-43dd-b901-584416f36c91"}} }, - { - "took": 2, - "timed_out": False, - "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, - "hits": { - "total": 1, - "max_score": 0.47000363, - "hits": [ - { - "_index": "transferfiles", - "_type": "_doc", - "_id": "mBsZBWgBn49OAVhMh-OV", - "_score": 0.47000363, - "_source": {"tags": ["test"]}, - } - ], - }, + ), + mock.call( + "GET", + "/aipfiles/_search", + params={}, + body={ + "query": {"term": {"AIPUUID": "b34521a3-1c63-43dd-b901-584416f36c91"}} }, - ], - ) - def test_set_get_tags(self, perform_request): - elasticSearchFunctions.set_file_tags(self.client, self.file_uuid, ["test"]) - assert elasticSearchFunctions.get_file_tags(self.client, self.file_uuid) == [ - "test" - ] + ), + ] - assert perform_request.mock_calls == [ - mock.call( - "GET", - "/transferfiles/_search", - params={"size": "10000"}, - body={ - "query": { - "term": {"fileuuid": "268421a7-a986-4fa0-95c1-54176e508210"} + +@mock.patch( + "elasticsearch.transport.Transport.perform_request", + side_effect=[ + { + "took": 1, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": { + "total": 1, + "max_score": 0.6931472, + "hits": [ + { + "_index": "transferfiles", + "_type": "_doc", + "_id": "mBsZBWgBn49OAVhMh-OV", + "_score": 0.6931472, + "_source": { + "accessionid": "", + "status": "backlog", + "sipuuid": "17b168b6-cbba-4f43-8838-a53360238acb", + "tags": [], + "file_extension": "jpg", + "relative_path": "test-17b168b6-cbba-4f43-8838-a53360238acb/objects/Landing_zone.jpg", + "bulk_extractor_reports": [], + "origin": "1a14043f-68ef-4bfe-a129-e2e4cdbe391b", + "size": 1.2982568740844727, + "modification_date": "2018-12-11", + "created": 1546273029.7313669, + "format": [], + "ingestdate": "2018-12-31", + "filename": "Landing_zone.jpg", + "fileuuid": "268421a7-a986-4fa0-95c1-54176e508210", + }, } - }, - ), - mock.call( - "POST", - "/transferfiles/_doc/mBsZBWgBn49OAVhMh-OV/_update", - params={}, - body={"doc": {"tags": ["test"]}}, - ), - mock.call( - "GET", - "/transferfiles/_search", - params={"_source": b"tags"}, - body={ - "query": { - "term": {"fileuuid": "268421a7-a986-4fa0-95c1-54176e508210"} + ], + }, + }, + { + "_index": "transferfiles", + "_type": "_doc", + "_id": "mBsZBWgBn49OAVhMh-OV", + "_version": 2, + "result": "updated", + "forced_refresh": True, + "_shards": {"total": 2, "successful": 1, "failed": 0}, + "_seq_no": 2, + "_primary_term": 1, + }, + { + "took": 2, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": { + "total": 1, + "max_score": 0.47000363, + "hits": [ + { + "_index": "transferfiles", + "_type": "_doc", + "_id": "mBsZBWgBn49OAVhMh-OV", + "_score": 0.47000363, + "_source": {"tags": ["test"]}, } - }, - ), - ] + ], + }, + }, + ], +) +def test_set_get_tags(perform_request, es_client): + file_uuid = "268421a7-a986-4fa0-95c1-54176e508210" + elasticSearchFunctions.set_file_tags(es_client, file_uuid, ["test"]) + assert elasticSearchFunctions.get_file_tags(es_client, file_uuid) == ["test"] - @mock.patch( - "elasticsearch.transport.Transport.perform_request", - side_effect=[ - { - "took": 1, - "timed_out": False, - "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, - "hits": {"total": 0, "max_score": None, "hits": []}, - } - ], - ) - def test_list_tags_fails_when_file_cant_be_found(self, perform_request): - with pytest.raises(elasticSearchFunctions.EmptySearchResultError): - elasticSearchFunctions.get_file_tags(self.client, "no_such_file") - perform_request.assert_called_once_with( + assert perform_request.mock_calls == [ + mock.call( + "GET", + "/transferfiles/_search", + params={"size": "10000"}, + body={ + "query": {"term": {"fileuuid": "268421a7-a986-4fa0-95c1-54176e508210"}} + }, + ), + mock.call( + "POST", + "/transferfiles/_doc/mBsZBWgBn49OAVhMh-OV/_update", + params={}, + body={"doc": {"tags": ["test"]}}, + ), + mock.call( "GET", "/transferfiles/_search", params={"_source": b"tags"}, - body={"query": {"term": {"fileuuid": "no_such_file"}}}, - ) + body={ + "query": {"term": {"fileuuid": "268421a7-a986-4fa0-95c1-54176e508210"}} + }, + ), + ] + + +@mock.patch( + "elasticsearch.transport.Transport.perform_request", + side_effect=[ + { + "took": 1, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 0, "max_score": None, "hits": []}, + } + ], +) +def test_list_tags_fails_when_file_cant_be_found(perform_request, es_client): + with pytest.raises(elasticSearchFunctions.EmptySearchResultError): + elasticSearchFunctions.get_file_tags(es_client, "no_such_file") + perform_request.assert_called_once_with( + "GET", + "/transferfiles/_search", + params={"_source": b"tags"}, + body={"query": {"term": {"fileuuid": "no_such_file"}}}, + ) - @mock.patch( - "elasticsearch.transport.Transport.perform_request", - side_effect=[ - { - "took": 0, - "timed_out": False, - "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, - "hits": {"total": 0, "max_score": None, "hits": []}, - } - ], + +@mock.patch( + "elasticsearch.transport.Transport.perform_request", + side_effect=[ + { + "took": 0, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 0, "max_score": None, "hits": []}, + } + ], +) +def test_set_tags_fails_when_file_cant_be_found(perform_request, es_client): + with pytest.raises(elasticSearchFunctions.EmptySearchResultError): + elasticSearchFunctions.set_file_tags(es_client, "no_such_file", []) + perform_request.assert_called_once_with( + "GET", + "/transferfiles/_search", + params={"size": "10000"}, + body={"query": {"term": {"fileuuid": "no_such_file"}}}, ) - def test_set_tags_fails_when_file_cant_be_found(self, perform_request): - with pytest.raises(elasticSearchFunctions.EmptySearchResultError): - elasticSearchFunctions.set_file_tags(self.client, "no_such_file", []) - perform_request.assert_called_once_with( - "GET", - "/transferfiles/_search", - params={"size": "10000"}, - body={"query": {"term": {"fileuuid": "no_such_file"}}}, - ) - @pytest.mark.django_db - @mock.patch("elasticSearchFunctions.get_dashboard_uuid") - @mock.patch("elasticSearchFunctions.bulk") - def test_index_mets_file_metadata( - self, dummy_helpers_bulk, dummy_get_dashboard_uuid - ): - # Set up mocked functions - dummy_get_dashboard_uuid.return_value = "test-uuid" - indexed_data = {} - - def _bulk(client, actions, stats_only=False, *args, **kwargs): - for item in actions: - try: - dmd_section = item["_source"]["METS"]["dmdSec"] - metadata_container = dmd_section["mets:xmlData_dict"] - dc = metadata_container["dcterms:dublincore_dict"] - except (KeyError, IndexError): - dc = None - indexed_data[item["_source"]["filePath"]] = dc - - dummy_helpers_bulk.side_effect = _bulk - - # This METS file is a cut-down version of the AIP METS produced - # using the SampleTransfers/DemoTransfer - mets_file_path = os.path.join( - THIS_DIR, "fixtures", "test_index_metadata-METS.xml" - ) - mets_object_id = "771aa252-7930-4e68-b73e-f91416b1d4a4" - uuid = "f42a260a-9b53-4555-847e-8a4329c81662" - sipName = f"DemoTransfer-{uuid}" - identifiers = [] - elasticSearchFunctions._index_aip_files( - client=self.client, - uuid=uuid, - mets=etree.parse(mets_file_path).getroot(), - name=sipName, - identifiers=identifiers, - ) - assert dummy_helpers_bulk.call_count == 1 +@pytest.mark.django_db +@mock.patch("elasticSearchFunctions.get_dashboard_uuid") +@mock.patch("elasticSearchFunctions.bulk") +def test_index_mets_file_metadata(bulk, get_dashboard_uuid, es_client): + # Set up mocked functions + get_dashboard_uuid.return_value = "test-uuid" + indexed_data = {} - # ES should have indexed 12 files - # - 5 content files - # - 5 checksum and csv files in the metadata directory - # - 2 files generated in the transfer process - assert len(indexed_data) == 12 + def _bulk(client, actions, stats_only=False, *args, **kwargs): + for item in actions: + try: + dmd_section = item["_source"]["METS"]["dmdSec"] + metadata_container = dmd_section["mets:xmlData_dict"] + dc = metadata_container["dcterms:dublincore_dict"] + except (KeyError, IndexError): + dc = None + indexed_data[item["_source"]["filePath"]] = dc - # Metadata should have been indexed only for these content - # files because they are listed in the metadata.csv file - content_files_with_metadata = ( - { - "path": ( - "objects/View_from_lookout_over_Queenstown_" - "towards_the_Remarkables_in_spring.jpg" - ), - "title": ( - "Morning view from lookout over Queenstown " - "towards the Remarkables in spring" - ), - "creator": "Pseudopanax at English Wikipedia", - }, - { - "path": "objects/beihai.tif", - "title": "Beihai, Guanxi, China, 1988", - "creator": ( - "NASA/GSFC/METI/ERSDAC/JAROS and U.S./Japan " "ASTER Science Team" - ), - }, - { - "path": "objects/bird.mp3", - "title": "14000 Caen, France - Bird in my garden", - "creator": "Nicolas Germain", - }, - { - "path": "objects/ocr-image.png", - "title": "OCR image", - "creator": "Tesseract", - }, - ) - for file_metadata in content_files_with_metadata: - dc = indexed_data[file_metadata["path"]] - assert dc["dc:title"] == file_metadata["title"] - assert dc["dc:creator"] == file_metadata["creator"] - - # There is no metadata for this content file because - # it was not listed in the metadata.csv file - assert indexed_data["objects/piiTestDataCreditCardNumbers.txt"] is None - - # Checksum and csv files in the metadata directory - # won't have dublin core metadata indexed - files_in_metadata_directory = ( - "checksum.md5", - "checksum.sha1", - "checksum.sha256", - "metadata.csv", - "rights.csv", - ) - for filename in files_in_metadata_directory: - path = "objects/metadata/transfers/DemoTransfer-{}/{}".format( - mets_object_id, filename - ) - assert indexed_data[path] is None - - # Neither will the generated files during the transfer process - generated_files = ("dc.json", "directory_tree.txt") - for filename in generated_files: - path = "objects/metadata/transfers/DemoTransfer-{}/{}".format( - mets_object_id, filename - ) - assert indexed_data[path] is None - - @pytest.mark.django_db - @mock.patch("elasticSearchFunctions.bulk") - def test_index_mets_file_metadata_with_utf8(self, dummy_helpers_bulk): - def _bulk(client, actions, stats_only=False, *args, **kwargs): - pass - - dummy_helpers_bulk.side_effect = _bulk - mets_file_path = os.path.join( - THIS_DIR, "fixtures", "test_index_metadata-METS-utf8.xml" - ) - elasticSearchFunctions._index_aip_files( - client=self.client, - uuid="", - mets=etree.parse(mets_file_path).getroot(), - name="", - identifiers=[], - ) + bulk.side_effect = _bulk - @patch("elasticSearchFunctions.create_indexes_if_needed") - def test_default_setup(self, patch): - elasticSearchFunctions.setup("elasticsearch:9200") - patch.assert_called_with( - ANY, ["aips", "aipfiles", "transfers", "transferfiles"] - ) + # This METS file is a cut-down version of the AIP METS produced + # using the SampleTransfers/DemoTransfer + mets_file_path = os.path.join(THIS_DIR, "fixtures", "test_index_metadata-METS.xml") + mets_object_id = "771aa252-7930-4e68-b73e-f91416b1d4a4" + aip_uuid = "f42a260a-9b53-4555-847e-8a4329c81662" + sipName = f"DemoTransfer-{aip_uuid}" + identifiers = [] + elasticSearchFunctions._index_aip_files( + client=es_client, + uuid=aip_uuid, + mets=etree.parse(mets_file_path).getroot(), + name=sipName, + identifiers=identifiers, + ) - @patch("elasticSearchFunctions.create_indexes_if_needed") - def test_only_aips_setup(self, patch): - elasticSearchFunctions.setup("elasticsearch:9200", enabled=["aips"]) - patch.assert_called_with(ANY, ["aips", "aipfiles"]) - - @patch("elasticSearchFunctions.create_indexes_if_needed") - def test_only_transfers_setup(self, patch): - elasticSearchFunctions.setup("elasticsearch:9200", enabled=["transfers"]) - patch.assert_called_with(ANY, ["transfers", "transferfiles"]) - - @patch("elasticSearchFunctions.create_indexes_if_needed") - def test_no_indexes_setup(self, patch): - elasticSearchFunctions.setup("elasticsearch:9200", enabled=[]) - elasticSearchFunctions.setup("elasticsearch:9200", enabled=["unknown"]) - patch.assert_not_called() - - @patch("elasticsearch.client.indices.IndicesClient.create") - @patch("elasticsearch.client.indices.IndicesClient.exists", return_value=True) - def test_create_indexes_already_created(self, mock, patch): - elasticSearchFunctions.create_indexes_if_needed( - self.client, ["aips", "aipfiles", "transfers", "transferfiles"] + assert bulk.call_count == 1 + + # ES should have indexed 12 files + # - 5 content files + # - 5 checksum and csv files in the metadata directory + # - 2 files generated in the transfer process + assert len(indexed_data) == 12 + + # Metadata should have been indexed only for these content + # files because they are listed in the metadata.csv file + content_files_with_metadata = ( + { + "path": ( + "objects/View_from_lookout_over_Queenstown_" + "towards_the_Remarkables_in_spring.jpg" + ), + "title": ( + "Morning view from lookout over Queenstown " + "towards the Remarkables in spring" + ), + "creator": "Pseudopanax at English Wikipedia", + }, + { + "path": "objects/beihai.tif", + "title": "Beihai, Guanxi, China, 1988", + "creator": ( + "NASA/GSFC/METI/ERSDAC/JAROS and U.S./Japan " "ASTER Science Team" + ), + }, + { + "path": "objects/bird.mp3", + "title": "14000 Caen, France - Bird in my garden", + "creator": "Nicolas Germain", + }, + { + "path": "objects/ocr-image.png", + "title": "OCR image", + "creator": "Tesseract", + }, + ) + for file_metadata in content_files_with_metadata: + dc = indexed_data[file_metadata["path"]] + assert dc["dc:title"] == file_metadata["title"] + assert dc["dc:creator"] == file_metadata["creator"] + + # There is no metadata for this content file because + # it was not listed in the metadata.csv file + assert indexed_data["objects/piiTestDataCreditCardNumbers.txt"] is None + + # Checksum and csv files in the metadata directory + # won't have dublin core metadata indexed + files_in_metadata_directory = ( + "checksum.md5", + "checksum.sha1", + "checksum.sha256", + "metadata.csv", + "rights.csv", + ) + for filename in files_in_metadata_directory: + path = "objects/metadata/transfers/DemoTransfer-{}/{}".format( + mets_object_id, filename ) - patch.assert_not_called() + assert indexed_data[path] is None - @patch("elasticsearch.client.indices.IndicesClient.create") - @patch("elasticsearch.client.indices.IndicesClient.exists", return_value=False) - def test_create_indexes_creation_calls(self, mock, patch): - elasticSearchFunctions.create_indexes_if_needed( - self.client, ["aips", "aipfiles", "transfers", "transferfiles"] + # Neither will the generated files during the transfer process + generated_files = ("dc.json", "directory_tree.txt") + for filename in generated_files: + path = "objects/metadata/transfers/DemoTransfer-{}/{}".format( + mets_object_id, filename ) - assert patch.call_count == 4 + assert indexed_data[path] is None - @patch("elasticsearch.client.indices.IndicesClient.create") - @patch("elasticsearch.client.indices.IndicesClient.exists", return_value=False) - def test_create_indexes_wrong_index(self, mock, patch): - elasticSearchFunctions.create_indexes_if_needed( - self.client, ["aips", "aipfiles", "unknown"] - ) - assert patch.call_count == 2 + +@pytest.mark.django_db +@mock.patch("elasticSearchFunctions.bulk") +def test_index_mets_file_metadata_with_utf8(bulk, es_client): + def _bulk(client, actions, stats_only=False, *args, **kwargs): + pass + + bulk.side_effect = _bulk + mets_file_path = os.path.join( + THIS_DIR, "fixtures", "test_index_metadata-METS-utf8.xml" + ) + elasticSearchFunctions._index_aip_files( + client=es_client, + uuid="", + mets=etree.parse(mets_file_path).getroot(), + name="", + identifiers=[], + ) + + +@mock.patch("elasticSearchFunctions.create_indexes_if_needed") +def test_default_setup(create_indexes_if_needed): + elasticSearchFunctions.setup("elasticsearch:9200") + create_indexes_if_needed.assert_called_with( + mock.ANY, ["aips", "aipfiles", "transfers", "transferfiles"] + ) + + +@mock.patch("elasticSearchFunctions.create_indexes_if_needed") +def test_only_aips_setup(create_indexes_if_needed): + elasticSearchFunctions.setup("elasticsearch:9200", enabled=["aips"]) + create_indexes_if_needed.assert_called_with(mock.ANY, ["aips", "aipfiles"]) + + +@mock.patch("elasticSearchFunctions.create_indexes_if_needed") +def test_only_transfers_setup(create_indexes_if_needed): + elasticSearchFunctions.setup("elasticsearch:9200", enabled=["transfers"]) + create_indexes_if_needed.assert_called_with( + mock.ANY, ["transfers", "transferfiles"] + ) + + +@mock.patch("elasticSearchFunctions.create_indexes_if_needed") +def test_no_indexes_setup(create_indexes_if_needed): + elasticSearchFunctions.setup("elasticsearch:9200", enabled=[]) + elasticSearchFunctions.setup("elasticsearch:9200", enabled=["unknown"]) + create_indexes_if_needed.assert_not_called() + + +@mock.patch("elasticsearch.client.indices.IndicesClient.create") +@mock.patch("elasticsearch.client.indices.IndicesClient.exists", return_value=True) +def test_create_indexes_already_created(exists, create, es_client): + elasticSearchFunctions.create_indexes_if_needed( + es_client, ["aips", "aipfiles", "transfers", "transferfiles"] + ) + create.assert_not_called() + + +@mock.patch("elasticsearch.client.indices.IndicesClient.create") +@mock.patch("elasticsearch.client.indices.IndicesClient.exists", return_value=False) +def test_create_indexes_creation_calls(exists, create, es_client): + elasticSearchFunctions.create_indexes_if_needed( + es_client, ["aips", "aipfiles", "transfers", "transferfiles"] + ) + assert create.call_count == 4 + + +@mock.patch("elasticsearch.client.indices.IndicesClient.create") +@mock.patch("elasticsearch.client.indices.IndicesClient.exists", return_value=False) +def test_create_indexes_wrong_index(exists, create, es_client): + elasticSearchFunctions.create_indexes_if_needed( + es_client, ["aips", "aipfiles", "unknown"] + ) + assert create.call_count == 2 fileuuid_premisv3 = ( @@ -570,12 +576,7 @@ def test_create_indexes_wrong_index(self, mock, patch): @mock.patch("elasticSearchFunctions.get_dashboard_uuid") @mock.patch("elasticSearchFunctions.bulk") def test_index_aipfile_fileuuid( - dummy_helpers_bulk, - dummy_get_dashboard_uuid, - metsfile, - fileuuid_dict, - aipuuid, - aipname, + bulk, get_dashboard_uuid, metsfile, fileuuid_dict, aipuuid, aipname ): """Check AIP file uuids are being correctly parsed from METS files. @@ -584,7 +585,7 @@ def test_index_aipfile_fileuuid( from the METS """ - dummy_get_dashboard_uuid.return_value = "test-uuid" + get_dashboard_uuid.return_value = "test-uuid" indexed_data = {} @@ -592,7 +593,7 @@ def _bulk(client, actions, stats_only=False, *args, **kwargs): for item in actions: indexed_data[item["_source"]["filePath"]] = item["_source"]["FILEUUID"] - dummy_helpers_bulk.side_effect = _bulk + bulk.side_effect = _bulk elasticSearchFunctions._index_aip_files( client=None, @@ -630,9 +631,7 @@ def _bulk(client, actions, stats_only=False, *args, **kwargs): ) @mock.patch("elasticSearchFunctions.get_dashboard_uuid") @mock.patch("elasticSearchFunctions.bulk") -def test_index_aipfile_dmdsec( - dummy_helpers_bulk, dummy_get_dashboard_uuid, metsfile, dmdsec_dict -): +def test_index_aipfile_dmdsec(bulk, get_dashboard_uuid, metsfile, dmdsec_dict): """Check AIP file dmdSec is correctly parsed from METS files. Mock _try_to_index() with a function that populates a dict @@ -640,7 +639,7 @@ def test_index_aipfile_dmdsec( from the METS """ - dummy_get_dashboard_uuid.return_value = "test-uuid" + get_dashboard_uuid.return_value = "test-uuid" indexed_data = {} @@ -654,7 +653,7 @@ def _bulk(client, actions, stats_only=False, *args, **kwargs): dc = None indexed_data[item["_source"]["filePath"]] = dc - dummy_helpers_bulk.side_effect = _bulk + bulk.side_effect = _bulk elasticSearchFunctions._index_aip_files( client=None, @@ -900,3 +899,546 @@ def test_get_metadata( ) == expected_metadata ) + + +def test_index_aip_and_files_logs_error_if_mets_does_not_exist( + es_client, tmp_path, caplog +): + printfn = mock.Mock() + aip_uuid = uuid.uuid4() + aip_stored_path = tmp_path / "aip.7z" + mets_staging_path = tmp_path / "mets.XML" + expected_error_message = f"METS file does not exist at: {mets_staging_path}" + + result = elasticSearchFunctions.index_aip_and_files( + es_client, + str(aip_uuid), + str(aip_stored_path), + str(mets_staging_path), + "aip", + 1024, + printfn=printfn, + ) + assert result == 1 + + assert [r.message for r in caplog.records] == [expected_error_message] + printfn.assert_called_once_with(expected_error_message, file=mock.ANY) + + +@pytest.mark.django_db +@mock.patch("elasticsearch.Elasticsearch.index") +@mock.patch( + "elasticsearch.client.cluster.ClusterClient.health", + return_value={"status": "green"}, +) +@mock.patch("elasticSearchFunctions._index_aip_files") +def test_index_aip_and_files(_index_aip_files, health, index, es_client, tmp_path): + dashboard_uuid = uuid.uuid4() + helpers.set_setting("dashboard_uuid", str(dashboard_uuid)) + printfn = mock.Mock() + aip_name = "aip" + aip_uuid = str(uuid.uuid4()) + aip_stored_path = tmp_path / "aip.7z" + mets_staging_path = pathlib.Path( + THIS_DIR, "fixtures", "test_index_aip_and_files_METS.xml" + ) + expected_file_count = 3 + accession_ids = "accession_ids" + _index_aip_files.return_value = (expected_file_count, accession_ids) + + result = elasticSearchFunctions.index_aip_and_files( + es_client, + str(aip_uuid), + str(aip_stored_path), + str(mets_staging_path), + aip_name, + 1024 * 1024 * 10, + printfn=printfn, + ) + assert result == 0 + + health.assert_called_once() + index.assert_called_once_with( + body={ + "AICID": None, + "accessionids": accession_ids, + "countAIPsinAIC": None, + "created": 1714362668, + "encrypted": False, + "filePath": str(aip_stored_path), + "file_count": expected_file_count, + "identifiers": [], + "isPartOf": "", + "location": "", + "name": aip_name, + "origin": str(dashboard_uuid), + "size": 10.0, + "status": elasticSearchFunctions.STATUS_UPLOADED, + "transferMetadata": [ + { + "__DIRECTORY_LABEL__": "objects", + "dc:title": "Pictures with DublinCore", + "dc:type": "Archival Information Package", + }, + ], + "uuid": str(aip_uuid), + }, + doc_type="_doc", + index="aips", + ) + assert printfn.mock_calls == [ + mock.call(f"AIP UUID: {aip_uuid}"), + mock.call("Indexing AIP files ..."), + mock.call(f"Files indexed: {expected_file_count}"), + mock.call("Indexing AIP ..."), + mock.call("Done."), + ] + + +def test_index_transfer_and_files_logs_error_if_transfer_path_does_not_exist( + es_client, tmp_path, caplog +): + printfn = mock.Mock() + transfer_uuid = uuid.uuid4() + transfer_path = tmp_path / "transfer" + expected_error_message = f"Transfer does not exist at: {transfer_path}" + + result = elasticSearchFunctions.index_transfer_and_files( + es_client, + str(transfer_uuid), + str(transfer_path), + 1024, + printfn=printfn, + ) + assert result == 1 + + assert [r.message for r in caplog.records] == [expected_error_message] + printfn.assert_called_once_with(expected_error_message, file=mock.ANY) + + +@pytest.mark.django_db +@pytest.fixture +def transfer(tmp_path): + transfer_dir = tmp_path / "transfer" + transfer_dir.mkdir() + + (transfer_dir / "processingMCP.xml").touch() + + return Transfer.objects.create( + # The trailing slash is expected by index_transfer_and_files. + currentlocation=f"{transfer_dir}/", + accessionid="accession_id", + ) + + +@pytest.mark.django_db +@pytest.fixture +def transfer_file(transfer): + filename = "file.txt" + (pathlib.Path(transfer.currentlocation) / filename).touch() + + result = File.objects.create( + transfer=transfer, currentlocation=f"%transferDirectory%{filename}".encode() + ) + + # enteredsystem is an auto_now DateTimeField. This resets its value. + dt = make_aware(datetime.datetime(2024, 1, 1)) + result.enteredsystem = dt + result.modificationtime = dt + result.save() + + return result + + +@pytest.mark.django_db +@mock.patch("elasticsearch.Elasticsearch.index") +@mock.patch( + "elasticsearch.client.cluster.ClusterClient.health", + return_value={"status": "green"}, +) +def test_index_transfer_and_files(health, index, es_client, transfer, transfer_file): + dashboard_uuid = uuid.uuid4() + helpers.set_setting("dashboard_uuid", str(dashboard_uuid)) + printfn = mock.Mock() + expected_transfer_name = "transfer" + expected_file_count = 1 + expected_file_name = "file.txt" + expected_date = "2024-01-01" + expected_status = "backlog" + + result = elasticSearchFunctions.index_transfer_and_files( + es_client, + str(transfer.uuid), + str(transfer.currentlocation), + 1024, + printfn=printfn, + ) + assert result == 0 + + assert health.mock_calls == [mock.call(), mock.call()] + assert index.mock_calls == [ + mock.call( + body={ + "filename": expected_file_name, + "relative_path": f"{expected_transfer_name}/{expected_file_name}", + "fileuuid": str(transfer_file.uuid), + "sipuuid": str(transfer.uuid), + "accessionid": transfer.accessionid, + "status": expected_status, + "origin": str(dashboard_uuid), + "ingestdate": expected_date, + "created": mock.ANY, + "modification_date": expected_date, + "size": 0.0, + "tags": [], + "file_extension": "txt", + "bulk_extractor_reports": [], + "format": [], + "pending_deletion": False, + }, + index="transferfiles", + doc_type="_doc", + ), + mock.call( + body={ + "accessionid": transfer.accessionid, + "file_count": expected_file_count, + "ingest_date": expected_date, + "name": expected_transfer_name, + "pending_deletion": False, + "size": 1024, + "status": expected_status, + "uuid": str(transfer.uuid), + }, + doc_type="_doc", + index="transfers", + ), + ] + # Cannot compare using mock_calls here because the use of os.listdir in + # _list_files_in_dir returns files in arbitrary order. + printfn.assert_has_calls( + [ + mock.call(f"Transfer UUID: {transfer.uuid}"), + mock.call("Indexing Transfer files ..."), + mock.call( + f"Indexing {expected_transfer_name}/{expected_file_name} (UUID: {transfer_file.uuid})" + ), + mock.call(f"Skipping indexing {expected_transfer_name}/processingMCP.xml"), + mock.call(f"Files indexed: {expected_file_count}"), + mock.call("Indexing Transfer ..."), + mock.call("Done."), + ], + any_order=True, + ) + + +@mock.patch( + "elasticSearchFunctions.search_all_results", + return_value={ + "hits": { + "hits": [ + { + "_source": { + "filename": "file.txt", + "fileuuid": "f704ab10-d52e-482f-af4a-cc21111f8df4", + }, + } + ], + }, + }, +) +def test_get_transfer_file_info_when_search_returns_a_single_document( + search_all_results, es_client +): + field = "fileuuid" + value = "f704ab10-d52e-482f-af4a-cc21111f8df4" + + result = elasticSearchFunctions.get_transfer_file_info(es_client, field, value) + + assert result == { + "filename": "file.txt", + "fileuuid": "f704ab10-d52e-482f-af4a-cc21111f8df4", + } + + search_all_results.assert_called_once_with( + es_client, + body={"query": {"term": {field: value}}}, + index=elasticSearchFunctions.TRANSFER_FILES_INDEX, + ) + + +@mock.patch( + "elasticSearchFunctions.search_all_results", + return_value={ + "hits": { + "hits": [ + { + "_source": { + "fileuuid": "f704ab10-d52e-482f-af4a-cc21111f8df4", + "filename": "foo.txt", + } + }, + { + "_source": { + "fileuuid": "f704ab10-d52e-482f-af4a-cc21111f8df4", + "filename": "bar.txt", + } + }, + ], + }, + }, +) +def test_get_transfer_file_info_logs_multiple_results(search_all_results, es_client): + field = "fileuuid" + value = "f704ab10-d52e-482f-af4a-cc21111f8df4" + + result = elasticSearchFunctions.get_transfer_file_info(es_client, field, value) + + assert result == {field: value, "filename": "foo.txt"} + + search_all_results.assert_called_once_with( + es_client, + body={"query": {"term": {field: value}}}, + index=elasticSearchFunctions.TRANSFER_FILES_INDEX, + ) + + +@pytest.mark.django_db +@mock.patch("elasticSearchFunctions.get_client") +@mock.patch("elasticSearchFunctions._document_ids_from_field_query") +def test_remove_backlog_transfer_files( + _document_ids_from_field_query, client, transfer +): + file_doc_id = str(uuid.uuid4()) + _document_ids_from_field_query.return_value = [file_doc_id] + + elasticSearchFunctions.remove_backlog_transfer_files(client, transfer.uuid) + + _document_ids_from_field_query.assert_called_once_with( + client, elasticSearchFunctions.TRANSFER_FILES_INDEX, "sipuuid", transfer.uuid + ) + client.delete.assert_called_once_with( + index=elasticSearchFunctions.TRANSFER_FILES_INDEX, + doc_type=elasticSearchFunctions.DOC_TYPE, + id=file_doc_id, + ) + + +@pytest.mark.django_db +@mock.patch("elasticSearchFunctions.get_client") +@mock.patch("elasticSearchFunctions._document_ids_from_field_query") +def test_remove_sip_transfer_files( + _document_ids_from_field_query, client, transfer, transfer_file +): + file_doc_id = str(uuid.uuid4()) + _document_ids_from_field_query.return_value = [file_doc_id] + + elasticSearchFunctions.remove_sip_transfer_files(client, transfer.uuid) + + _document_ids_from_field_query.assert_called_once_with( + client, elasticSearchFunctions.TRANSFER_FILES_INDEX, "sipuuid", transfer.uuid + ) + client.delete.assert_called_once_with( + index=elasticSearchFunctions.TRANSFER_FILES_INDEX, + doc_type=elasticSearchFunctions.DOC_TYPE, + id=file_doc_id, + ) + + +@mock.patch("elasticSearchFunctions.get_client") +@mock.patch("elasticSearchFunctions._document_ids_from_field_query", return_value=[]) +def test_mark_aip_stored_logs_error(_document_ids_from_field_query, client, caplog): + aip_uuid = str(uuid.uuid4()) + + elasticSearchFunctions.mark_aip_stored(es_client, aip_uuid) + + _document_ids_from_field_query.assert_called_once_with( + es_client, + elasticSearchFunctions.AIPS_INDEX, + elasticSearchFunctions.ES_FIELD_UUID, + aip_uuid, + ) + assert [r.message for r in caplog.records] == [ + f"Unable to find document with UUID {aip_uuid} in index {elasticSearchFunctions.AIPS_INDEX}" + ] + + +@mock.patch("elasticSearchFunctions.get_client") +@mock.patch("elasticSearchFunctions._document_ids_from_field_query") +def test_mark_aip_stored(_document_ids_from_field_query, client): + aip_uuid = str(uuid.uuid4()) + aip_doc_id = str(uuid.uuid4()) + _document_ids_from_field_query.return_value = [aip_doc_id] + + elasticSearchFunctions.mark_aip_stored(client, aip_uuid) + + client.update.assert_called_once_with( + body={ + "doc": { + elasticSearchFunctions.ES_FIELD_STATUS: elasticSearchFunctions.STATUS_UPLOADED + } + }, + index=elasticSearchFunctions.AIPS_INDEX, + doc_type=elasticSearchFunctions.DOC_TYPE, + id=aip_doc_id, + ) + _document_ids_from_field_query.assert_called_once_with( + client, + elasticSearchFunctions.AIPS_INDEX, + elasticSearchFunctions.ES_FIELD_UUID, + aip_uuid, + ) + + +@pytest.mark.parametrize( + "helper,package_index,files_index,package_uuid_field,field,value", + ( + ( + elasticSearchFunctions.mark_aip_deletion_requested, + elasticSearchFunctions.AIPS_INDEX, + elasticSearchFunctions.AIP_FILES_INDEX, + "AIPUUID", + elasticSearchFunctions.ES_FIELD_STATUS, + elasticSearchFunctions.STATUS_DELETE_REQUESTED, + ), + ( + elasticSearchFunctions.mark_backlog_deletion_requested, + elasticSearchFunctions.TRANSFERS_INDEX, + elasticSearchFunctions.TRANSFER_FILES_INDEX, + "sipuuid", + "pending_deletion", + True, + ), + ( + elasticSearchFunctions.revert_aip_deletion_request, + elasticSearchFunctions.AIPS_INDEX, + elasticSearchFunctions.AIP_FILES_INDEX, + "AIPUUID", + elasticSearchFunctions.ES_FIELD_STATUS, + elasticSearchFunctions.STATUS_UPLOADED, + ), + ( + elasticSearchFunctions.revert_backlog_deletion_request, + elasticSearchFunctions.TRANSFERS_INDEX, + elasticSearchFunctions.TRANSFER_FILES_INDEX, + "sipuuid", + "pending_deletion", + False, + ), + ), + ids=[ + "mark_aip_deletion_requested", + "mark_backlog_deletion_requested", + "revert_aip_deletion_request", + "revert_backlog_deletion_request", + ], +) +@mock.patch("elasticSearchFunctions.get_client") +@mock.patch("elasticSearchFunctions._document_ids_from_field_query") +def test_update_helpers( + _document_ids_from_field_query, + client, + helper, + package_index, + files_index, + package_uuid_field, + field, + value, +): + package_uuid = str(uuid.uuid4()) + package_doc_id = str(uuid.uuid4()) + file_doc_id = str(uuid.uuid4()) + _document_ids_from_field_query.side_effect = [[package_doc_id], [file_doc_id]] + + helper(client, package_uuid) + + assert client.update.mock_calls == [ + mock.call( + body={"doc": {field: value}}, + index=package_index, + doc_type=elasticSearchFunctions.DOC_TYPE, + id=package_doc_id, + ), + mock.call( + body={"doc": {field: value}}, + index=files_index, + doc_type=elasticSearchFunctions.DOC_TYPE, + id=file_doc_id, + ), + ] + assert _document_ids_from_field_query.mock_calls == [ + mock.call( + client, package_index, elasticSearchFunctions.ES_FIELD_UUID, package_uuid + ), + mock.call(client, files_index, package_uuid_field, package_uuid), + ] + + +def test_augment_raw_search_results(): + raw_results = { + "hits": { + "hits": [ + {"_id": "123", "_source": {"filename": "foo.txt"}}, + {"_id": "456", "_source": {"filename": "bar.txt"}}, + ], + }, + } + + result = elasticSearchFunctions.augment_raw_search_results(raw_results) + + assert result == [ + {"document_id": "123", "filename": "foo.txt"}, + {"document_id": "456", "filename": "bar.txt"}, + ] + + +@mock.patch("elasticSearchFunctions.get_client") +def test_try_to_index_fails_with_invalid_maximum_retries(client): + with pytest.raises(ValueError, match="max_tries must be 1 or greater"): + elasticSearchFunctions._try_to_index( + client, {}, elasticSearchFunctions.AIPS_INDEX, max_tries=0 + ) + + +@mock.patch("elasticSearchFunctions.get_client") +def test_try_to_index_retries_after_error(client): + error = Exception("error") + client.index.side_effect = [error, None] + printfn = mock.Mock() + data = {} + index = elasticSearchFunctions.AIPS_INDEX + + elasticSearchFunctions._try_to_index( + client, data, index, wait_between_tries=0, printfn=printfn + ) + + assert client.index.mock_calls == [ + mock.call(body=data, index=index, doc_type=elasticSearchFunctions.DOC_TYPE), + mock.call(body=data, index=index, doc_type=elasticSearchFunctions.DOC_TYPE), + ] + assert printfn.mock_calls == [ + mock.call("ERROR: error trying to index."), + mock.call(error), + ] + + +@mock.patch("elasticSearchFunctions.get_client") +def test_try_to_index_raises_exception_after_retries(client): + error = Exception("error") + client.index.side_effect = [error, None] + printfn = mock.Mock() + data = {} + index = elasticSearchFunctions.AIPS_INDEX + + with pytest.raises(Exception, match="error"): + elasticSearchFunctions._try_to_index( + client, data, index, wait_between_tries=0, max_tries=1, printfn=printfn + ) + + assert client.index.mock_calls == [ + mock.call(body=data, index=index, doc_type=elasticSearchFunctions.DOC_TYPE), + ] + assert printfn.mock_calls == [ + mock.call("ERROR: error trying to index."), + mock.call(error), + ] From 58e30b371fd4351e2c059f6fd4f007207d1e10b0 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Tue, 30 Apr 2024 17:56:17 +0200 Subject: [PATCH 11/32] Remove comment from JHOVE validation migration --- src/dashboard/src/fpr/migrations/0040_update_jhove_validation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dashboard/src/fpr/migrations/0040_update_jhove_validation.py b/src/dashboard/src/fpr/migrations/0040_update_jhove_validation.py index c062de5566..adff577fb7 100644 --- a/src/dashboard/src/fpr/migrations/0040_update_jhove_validation.py +++ b/src/dashboard/src/fpr/migrations/0040_update_jhove_validation.py @@ -1,4 +1,3 @@ -"""Update commands and rules for Python 3 compatibility.""" from django.db import migrations JHOVE_TOOL_ID = "085d8690-93b7-4d31-84f7-2c5f4cbf6735" From ba0db52e575f8e14f3e26c82490ff495afb292d1 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Tue, 30 Apr 2024 17:56:46 +0200 Subject: [PATCH 12/32] Fix default thumbnail normalization command This adds a data migration for replacing the existing command with a fixed version of its Python script. --- .../0043_update_default_thumbnail_command.py | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/dashboard/src/fpr/migrations/0043_update_default_thumbnail_command.py diff --git a/src/dashboard/src/fpr/migrations/0043_update_default_thumbnail_command.py b/src/dashboard/src/fpr/migrations/0043_update_default_thumbnail_command.py new file mode 100644 index 0000000000..511b6122b3 --- /dev/null +++ b/src/dashboard/src/fpr/migrations/0043_update_default_thumbnail_command.py @@ -0,0 +1,107 @@ +from django.db import migrations + +OLD_DEFAULT_THUMBNAIL_CMD_UUID = "95149bc4-0620-4c20-964c-1d6c34b9400e" + +DEFAULT_THUMBNAIL_RULES = ("3a19f9a3-c5d5-4934-9286-13b3ad6c24d3",) + +NEW_DEFAULT_THUMBNAIL_CMD_UUID = "484d3a8f-9e59-4912-a5b8-f8a2deb3466a" +NEW_DEFAULT_THUMBNAIL_CMD = """ +import argparse +import base64 +import sys + +# http://i.imgur.com/ijwSkff.jpg +DEFAULT_THUMBNAIL = \"\"\" +/9j/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB +AQEBAQEBAQEBAQEBAQH/2wBDAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB +AQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/wAARCAAwADADASIAAhEBAxEB/8QAHAABAQACAgMA +AAAAAAAAAAAACAAFCQEKAwQG/8QANBAAAAUDAgIFCwUAAAAAAAAAAQMEBQYAAgcICRESExQ5eLcV +GSExV1iVmMHS1xZScYWn/8QAFgEBAQEAAAAAAAAAAAAAAAAAAAEC/8QAKBEAAgADBgUFAAAAAAAA +AAAAAAECESESMVFhkcEiQUJxoVJystHS/9oADAMBAAIRAxEAPwDelul5RzWOsiAYeh2d8yYlgjdp +mTZLBtxBOF+PVTjLXvKcmi7iufHJjLKXPye5mj7MQhQu5yxIymJVKhjKbTnl/vdQn13Pfvpa1vmN +mf30vNzvtGIj3KWDx0n9F2gML13Pfvpa1vmNmf314znLPJBJp9+tLWwNhJRhtwW6jJkNw2l2jfcF +oCYADcIAPABEA4+sQD01nq9VcUaciWEkcnTGpVBRPSCNpfSmFX2F842gNwWc4hzCACIW8RABH0UA +xdv3cZl2K2rHuPNW85dpbjLIrexrYhqAmLkscHTGsslRFi+yIZZkj48Ozu6QV6VKRvbMivKm0uBv +Sq9reDUuNFrVZivsa10WSpLlNjhDVBXjT4dJWxsjLVHFR9kua1qNyKbW5Mg6yY3AxKxKAy5PapAm ++43oLuXlPvEsDR3HbLOsLKUyksm0c5BaXVQx4zxmom+OnWTOVy2UQ6Mx56g8SGAqHDqKW2SxxQfL +LXOJ86VrugaBtURZrPeYisizHj2TWK1B8Tud9oxEe5SweOk/ou0otzvtGIj3KWDx0n9F2qCqqqoC +pC7VicpPuS5UEouwvrGkZ5PM5LQt5zByliEq6+7gAcbhAq0BuH0iAAHqAKPVI3a07SPJfc+efFjF +FZivg9z+MRVdF23RldzvtGIj3KWDx0n9F2tru4Lt+ah9ROf4VnnT9P8AEjGvb8PW4ik0fysErbiC +EjXNHmZNrwyuMXj8sFzNdTpGqRL0S1AyeRbWNIemWPnl5QQxC7zWe5H7TNH4f3OWPricarbXS3mr +O8SfgSzS12TDnVSM81nuSe0zR/8AGcr/AImq81nuSe0zR/8AGcr/AImqWn6ItYP0JLFefoOdIzaz +HjuSZND9uj94D/V8TXfWuB2styQfVk7SAH8PGVvriUaY+3ht4ahNNeoTIef8/wCQ8UyBxkGKQxdH +o9i79TuKe9O4yaNSNxdXVxkcah1zX5LuhyBMhQpkD/e9Xv7goUODEWxpEj1Ktw8LUnOtnBqSk3Wv +a+ook6pzWeKfNZH/2Q==\"\"\" + +def main(target): + with open(target, 'wb') as f: + f.write(base64.b64decode(DEFAULT_THUMBNAIL)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--output-location', required=True) + args, _ = parser.parse_known_args() + + sys.exit(main(args.output_location)) +""" + + +def data_migration_up(apps, schema_editor): + FPCommand = apps.get_model("fpr", "FPCommand") + FPRule = apps.get_model("fpr", "FPRule") + + # Get the old command + old_command = FPCommand.objects.get(uuid=OLD_DEFAULT_THUMBNAIL_CMD_UUID) + + # Replace the existing command with the following + FPCommand.objects.create( + uuid=NEW_DEFAULT_THUMBNAIL_CMD_UUID, + replaces_id=OLD_DEFAULT_THUMBNAIL_CMD_UUID, + tool_id=old_command.tool_id, + enabled=old_command.enabled, + command=NEW_DEFAULT_THUMBNAIL_CMD, + script_type=old_command.script_type, + command_usage=old_command.command_usage, + description=old_command.description, + output_location=old_command.output_location, + output_format_id=old_command.output_format_id, + event_detail_command_id=old_command.event_detail_command_id, + verification_command_id=old_command.verification_command_id, + ) + + # Update existing rules + FPRule.objects.filter(uuid__in=DEFAULT_THUMBNAIL_RULES).update( + command_id=NEW_DEFAULT_THUMBNAIL_CMD_UUID, + ) + + # Disable the old command + old_command.enabled = False + old_command.save() + + +def data_migration_down(apps, schema_editor): + FPCommand = apps.get_model("fpr", "FPCommand") + FPRule = apps.get_model("fpr", "FPRule") + + # The order matters. We make sure that the rules point to the previous + # command before the latter is deleted. Otherwise our rules would be + # deleted by Django's on cascade mechanism + FPRule.objects.filter(uuid__in=DEFAULT_THUMBNAIL_RULES).update( + command_id=OLD_DEFAULT_THUMBNAIL_CMD_UUID, + ) + + # Enable the old command. At this point we do not know if the + # command was in fact enabled before the migration was run, so + # this may have unexpected consequences + old_command = FPCommand.objects.get(uuid=OLD_DEFAULT_THUMBNAIL_CMD_UUID) + old_command.enabled = True + old_command.save() + + # Delete the new command + FPCommand.objects.filter(uuid=NEW_DEFAULT_THUMBNAIL_CMD_UUID).delete() + + +class Migration(migrations.Migration): + dependencies = [("fpr", "0042_update_idtools")] + + operations = [migrations.RunPython(data_migration_up, data_migration_down)] From fb0990297b97e2cb9f9ade097055b72b222bbb83 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Tue, 7 May 2024 17:50:58 +0200 Subject: [PATCH 13/32] Fix SIP arrangement from ArchivesSpace pane --- src/dashboard/src/components/access/urls.py | 1 + src/dashboard/src/components/access/views.py | 4 +- tests/dashboard/test_access.py | 207 +++++++++++++++++++ 3 files changed, 210 insertions(+), 2 deletions(-) diff --git a/src/dashboard/src/components/access/urls.py b/src/dashboard/src/components/access/urls.py index 3cb41408ff..5e9937fa16 100644 --- a/src/dashboard/src/components/access/urls.py +++ b/src/dashboard/src/components/access/urls.py @@ -19,6 +19,7 @@ re_path( r"archivesspace/(?P[A-Za-z0-9-_]+)/copy_from_arrange/$", views.access_arrange_start_sip, + name="access_arrange_start_sip", ), re_path( r"archivesspace/(?P[A-Za-z0-9-_]+)/create_directory_within_arrange/$", diff --git a/src/dashboard/src/components/access/views.py b/src/dashboard/src/components/access/views.py index c47c873c42..e624f3ea09 100644 --- a/src/dashboard/src/components/access/views.py +++ b/src/dashboard/src/components/access/views.py @@ -97,7 +97,7 @@ def _get_sip(func): @wraps(func) def wrapper(request, mapping): arrange = SIPArrange.objects.get( - arrange_path=os.path.join(mapping.arrange_path, "") + arrange_path=os.path.join(mapping.arrange_path, "").encode() ) if arrange.sip is None: arrange.sip = SIP.objects.create(uuid=(uuid.uuid4()), currentpath=None) @@ -421,7 +421,7 @@ def access_arrange_start_sip(client, request, mapping): """ try: arrange = SIPArrange.objects.get( - arrange_path=os.path.join(mapping.arrange_path, "") + arrange_path=os.path.join(mapping.arrange_path, "").encode() ) except SIPArrange.DoesNotExist: response = { diff --git a/tests/dashboard/test_access.py b/tests/dashboard/test_access.py index abc5497e4f..718b6fb37a 100644 --- a/tests/dashboard/test_access.py +++ b/tests/dashboard/test_access.py @@ -1,7 +1,9 @@ import json import pathlib +from unittest import mock import archivematicaFunctions +import pytest from components import helpers from django.test import TestCase from django.test.client import Client @@ -56,3 +58,208 @@ def test_arrange_contents(self): in response_dict["entries"] ) assert len(response_dict["entries"]) == 1 + + +@pytest.mark.django_db +@pytest.fixture +def dashboard_uuid(): + helpers.set_setting("dashboard_uuid", "test-uuid") + + +def _encode_record_id(record_id): + return record_id.replace("/", "") + + +@pytest.mark.django_db +def test_access_arrange_start_sip_fails_if_arrange_mapping_does_not_exist( + dashboard_uuid, admin_client +): + record_id = "/repositories/2/archival_objects/1" + + response = admin_client.get( + reverse( + "access:access_arrange_start_sip", + kwargs={"record_id": _encode_record_id(record_id)}, + ) + ) + assert response.status_code == 404 + + result = json.loads(response.content.decode()) + assert result == { + "message": f"No SIP Arrange mapping exists for record {_encode_record_id(record_id)}", + "success": False, + } + + +@pytest.mark.django_db +@mock.patch("components.access.views.get_as_system_client") +def test_access_arrange_start_sip_fails_if_arrange_does_not_exist( + get_as_system_client, dashboard_uuid, admin_client +): + record_id = "/repositories/2/archival_objects/1" + models.SIPArrangeAccessMapping.objects.create( + arrange_path="/foobar", + system=models.SIPArrangeAccessMapping.ARCHIVESSPACE, + identifier=_encode_record_id(record_id), + ) + + response = admin_client.get( + reverse( + "access:access_arrange_start_sip", + kwargs={"record_id": _encode_record_id(record_id)}, + ) + ) + assert response.status_code == 404 + + result = json.loads(response.content.decode()) + assert result == { + "message": f"No SIP Arrange object exists for record {_encode_record_id(record_id)}", + "success": False, + } + + +@pytest.mark.django_db +@mock.patch( + "components.access.views.get_as_system_client", + return_value=mock.Mock( + **{ + "get_record.side_effect": [ + # Archival object. + {"resource": {"ref": "/repositories/2/resources/10"}}, + # Resource. + {"linked_agents": []}, + ] + } + ), +) +def test_access_arrange_start_sip_fails_if_resource_creators_cannot_be_fetched( + get_as_system_client, dashboard_uuid, admin_client +): + record_id = "/repositories/2/archival_objects/1" + mapping = models.SIPArrangeAccessMapping.objects.create( + arrange_path="/foobar", + system=models.SIPArrangeAccessMapping.ARCHIVESSPACE, + identifier=_encode_record_id(record_id), + ) + models.SIPArrange.objects.create( + arrange_path=f"{pathlib.Path(mapping.arrange_path)}/".encode() + ) + + response = admin_client.get( + reverse( + "access:access_arrange_start_sip", + kwargs={"record_id": _encode_record_id(record_id)}, + ) + ) + assert response.status_code == 502 + + result = json.loads(response.content.decode()) + assert result == { + "message": "Unable to fetch ArchivesSpace creator", + "success": False, + } + + +@pytest.mark.django_db +@mock.patch("components.filesystem_ajax.views.copy_from_arrange_to_completed_common") +@mock.patch("components.access.views.get_as_system_client") +def test_access_arrange_start_sip( + get_as_system_client, + copy_from_arrange_to_completed_common, + dashboard_uuid, + admin_client, + caplog, +): + # Mock expected responses from ArchivesSpace. + archival_object = { + "resource": {"ref": "/repositories/2/resources/10"}, + "notes": [{"type": "odd", "subnotes": [{"content": "A note"}]}], + "display_string": "Object, 2024", + "parent": {"ref": "/repositories/2/resources/1"}, + } + resource = {"linked_agents": [{"ref": "/agents/people/3", "role": "creator"}]} + creator = {"display_name": {"sort_name": "Foo, Bar"}} + parent = {"title": "Parent resource"} + digital_object = {"id": "do"} + get_as_system_client.return_value = mock.Mock( + **{ + "get_record.side_effect": [archival_object, resource, creator, parent], + "add_digital_object.side_effect": [ + digital_object, + ], + } + ) + + # Mock interaction with copy_from_arrange_to_completed_common. + sip = models.SIP.objects.create() + expected_status_code = 201 + expected_response_content = {"message": "SIP created.", "sip_uuid": str(sip.uuid)} + copy_from_arrange_to_completed_common.return_value = ( + expected_status_code, + expected_response_content, + ) + + # Set database fixtures. + record_id = "/repositories/2/archival_objects/1" + mapping = models.SIPArrangeAccessMapping.objects.create( + arrange_path="/foobar", + system=models.SIPArrangeAccessMapping.ARCHIVESSPACE, + identifier=_encode_record_id(record_id), + ) + models.SIPArrange.objects.create( + arrange_path=f"{pathlib.Path(mapping.arrange_path)}/".encode() + ) + models.ArchivesSpaceDigitalObject.objects.create( + resourceid=_encode_record_id(record_id), started=False + ) + + response = admin_client.post( + reverse( + "access:access_arrange_start_sip", + kwargs={"record_id": _encode_record_id(record_id)}, + ), + data=json.dumps({}), + content_type="application/json", + ) + assert response.status_code == expected_status_code + + result = json.loads(response.content.decode()) + assert result == expected_response_content + + assert models.DublinCore.objects.count() == 1 + assert ( + models.DublinCore.objects.filter( + metadataappliestotype_id=models.MetadataAppliesToType.SIP_TYPE, + metadataappliestoidentifier=sip.uuid, + title=archival_object["display_string"], + creator=creator["display_name"]["sort_name"], + description=archival_object["notes"][0]["subnotes"][0]["content"], + rights=" ".join( + [ + "This content may be under copyright.", + "Researchers are responsible for determining the", + "appropriate use or reuse of materials.", + ] + ), + relation=parent["title"], + ).count() + == 1 + ) + + assert models.ArchivesSpaceDigitalObject.objects.count() == 1 + assert ( + models.ArchivesSpaceDigitalObject.objects.filter( + resourceid=_encode_record_id(record_id), + started=True, + remoteid=digital_object["id"], + sip_id=expected_response_content["sip_uuid"], + ).count() + == 1 + ) + + assert [r.message for r in caplog.records] == [ + f"archival object {archival_object}", + f"resource {resource}", + f"creator {creator}", + f"New SIP UUID {sip.uuid}", + ] From 79ab3d526345ef17d8d6ce9dae07cb60b7c06680 Mon Sep 17 00:00:00 2001 From: Douglas Cerna Date: Wed, 8 May 2024 15:40:16 -0600 Subject: [PATCH 14/32] Fix failure reports index --- src/dashboard/src/templates/_pager.html | 8 +++--- .../administration/test_administration.py | 25 ++++++++++++++++--- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/dashboard/src/templates/_pager.html b/src/dashboard/src/templates/_pager.html index e9eb32d0ae..e28353686c 100644 --- a/src/dashboard/src/templates/_pager.html +++ b/src/dashboard/src/templates/_pager.html @@ -4,11 +4,11 @@
{% if page.has_previous %} - {% ifnotequal page.previous_page_number 1 %} + {% if not page.previous_page_number == 1 %} {% trans "First" %} - {% endifnotequal %} + {% endif %} {% trans "Previous" %} @@ -34,11 +34,11 @@ {% trans "Next" %} - {% ifnotequal page.next_page_number page.paginator.num_pages %} + {% if not page.next_page_number == page.paginator.num_pages %} {% trans "Last" %} - {% endifnotequal %} + {% endif %} {% endif %} diff --git a/tests/dashboard/components/administration/test_administration.py b/tests/dashboard/components/administration/test_administration.py index 9cfb18b134..15abeb5fff 100644 --- a/tests/dashboard/components/administration/test_administration.py +++ b/tests/dashboard/components/administration/test_administration.py @@ -1,13 +1,19 @@ +import uuid + import pytest from components import helpers from django.urls import reverse from main.models import Report +@pytest.fixture @pytest.mark.django_db -def test_admin_set_language(admin_client): - helpers.set_setting("dashboard_uuid", "test-uuid") +def dashboard_uuid(): + helpers.set_setting("dashboard_uuid", str(uuid.uuid4())) + +@pytest.mark.django_db +def test_admin_set_language(dashboard_uuid, admin_client): response = admin_client.get(reverse("administration:admin_set_language")) assert response.status_code == 200 @@ -17,8 +23,7 @@ def test_admin_set_language(admin_client): @pytest.mark.django_db -def test_failure_report_delete(admin_client): - helpers.set_setting("dashboard_uuid", "test-uuid") +def test_failure_report_delete(dashboard_uuid, admin_client): report = Report.objects.create(content="my report") response = admin_client.post( @@ -30,3 +35,15 @@ def test_failure_report_delete(admin_client): assert "No reports found." in response.content.decode() assert Report.objects.count() == 0 + + +@pytest.mark.django_db +def test_failure_report(dashboard_uuid, admin_client): + report = Report.objects.create(content="my report") + + response = admin_client.get(reverse("administration:reports_failures_index")) + assert response.status_code == 200 + + content = response.content.decode() + assert "

Failure report

" in content + assert reverse("administration:failure_report", args=[report.pk]) in content From 34aa3c8031fa3731344ecb4216f54c433b3364c1 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Wed, 8 May 2024 20:54:48 +0200 Subject: [PATCH 15/32] Remove references to Binder --- src/MCPServer/lib/assets/workflow.json | 12 ++++++------ .../components/administration/forms_dip_upload.py | 10 +++++----- src/dashboard/src/media/js/ingest.js | 9 ++------- .../src/templates/administration/dips_atom_edit.html | 4 ++-- .../src/templates/administration/sidebar.html | 2 +- src/dashboard/src/templates/ingest/grid.html | 3 +-- 6 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/MCPServer/lib/assets/workflow.json b/src/MCPServer/lib/assets/workflow.json index 48c43dcc57..8f0ce2e563 100644 --- a/src/MCPServer/lib/assets/workflow.json +++ b/src/MCPServer/lib/assets/workflow.json @@ -69,9 +69,9 @@ }, "0fe9842f-9519-4067-a691-8a363132ae24": { "description": { - "en": "Upload DIP to AtoM/Binder", - "no": "Last opp DIP til AtoM/Binder", - "pt_BR": "Carregar DIP no AtoM/Binder" + "en": "Upload DIP to AtoM", + "no": "Last opp DIP til AtoM", + "pt_BR": "Carregar DIP no AtoM" }, "link_id": "7f975ba6-2185-434c-b507-2911f3c77213" }, @@ -6746,9 +6746,9 @@ "replacements": [] }, "description": { - "en": "Choose config for AtoM/Binder DIP upload", - "no": "Velg konfigurasjon for AtoM/Binder DIP opplasting", - "pt_BR": "Escolher configuração para carregar DIP no AtoM/Binder" + "en": "Choose config for AtoM DIP upload", + "no": "Velg konfigurasjon for AtoM DIP opplasting", + "pt_BR": "Escolher configuração para carregar DIP no AtoM" }, "exit_codes": { "0": { diff --git a/src/dashboard/src/components/administration/forms_dip_upload.py b/src/dashboard/src/components/administration/forms_dip_upload.py index 394125f87c..4b62fac4de 100644 --- a/src/dashboard/src/components/administration/forms_dip_upload.py +++ b/src/dashboard/src/components/administration/forms_dip_upload.py @@ -90,24 +90,24 @@ class AtomConfigForm(forms.Form): url = forms.CharField( label=_("Upload URL"), help_text=_( - "URL where the AtoM/Binder index.php frontend lives, SWORD services path will be appended." + "URL where the AtoM index.php frontend lives, SWORD services path will be appended." ), ) email = forms.CharField( label=_("Login email"), - help_text=_("E-mail account used to log into AtoM/Binder."), + help_text=_("E-mail account used to log into AtoM."), ) password = forms.CharField( - label=_("Login password"), help_text=_("Password used to log into AtoM/Binder.") + label=_("Login password"), help_text=_("Password used to log into AtoM.") ) version = forms.ChoiceField( - label=_("AtoM/Binder version"), choices=((1, "1.x"), (2, "2.x")) + label=_("AtoM version"), choices=((1, "1.x"), (2, "2.x")) ) rsync_target = forms.CharField( required=False, label=_("Rsync target"), help_text=_( - "The DIP can be sent with Rsync to a remote host before is deposited in AtoM/Binder. This is the destination value passed to Rsync (see man 1 rsync). For example: foobar.com:~/dips/." + "The DIP can be sent with Rsync to a remote host before is deposited in AtoM. This is the destination value passed to Rsync (see man 1 rsync). For example: foobar.com:~/dips/." ), ) rsync_command = forms.CharField( diff --git a/src/dashboard/src/media/js/ingest.js b/src/dashboard/src/media/js/ingest.js index 18bbe20fb7..efac18ebc7 100644 --- a/src/dashboard/src/media/js/ingest.js +++ b/src/dashboard/src/media/js/ingest.js @@ -313,8 +313,8 @@ $(function() return false; } - // "Upload DIP to AtoM/Binder" chain matched by its UUID. - // If no identifier for the AtoM or Binder SWORD V1 deposit endpoint + // "Upload DIP to AtoM" chain matched by its UUID. + // If no identifier for the AtoM SWORD V1 deposit endpoint // provided at start of transfer, display a modal dialog to request // such here. if (chainId == '0fe9842f-9519-4067-a691-8a363132ae24') @@ -407,11 +407,6 @@ $(function() .modal('show'); } else { - // The access system ID that the user supplies at the start of - // transfer must contain the correct target prefix if the upload - // is to Binder, i.e., the 'ar:' prefix for an artwork record and - // the 'tr:' prefix for a technical record. This is explained in - // the modal dialog help text. See templates/ingest/grid.html. var xhr = $.ajax(url, { type: 'POST', data: {'target': this.model.sip.attributes.access_system_id}, diff --git a/src/dashboard/src/templates/administration/dips_atom_edit.html b/src/dashboard/src/templates/administration/dips_atom_edit.html index be8627ebf6..81c7e3d1c9 100644 --- a/src/dashboard/src/templates/administration/dips_atom_edit.html +++ b/src/dashboard/src/templates/administration/dips_atom_edit.html @@ -35,9 +35,9 @@
{% csrf_token %} -

{% trans "AtoM/Binder DIP upload" %}

+

{% trans "AtoM DIP upload" %}

-

{% trans "The settings below configure DIP uploading to AtoM/Binder." %}

+

{% trans "The settings below configure DIP uploading to AtoM." %}

{% include "_form.html" %} diff --git a/src/dashboard/src/templates/administration/sidebar.html b/src/dashboard/src/templates/administration/sidebar.html index 56fe1703d2..6826129f96 100644 --- a/src/dashboard/src/templates/administration/sidebar.html +++ b/src/dashboard/src/templates/administration/sidebar.html @@ -31,7 +31,7 @@
  • {% trans "DIP upload" %}
  • diff --git a/src/dashboard/src/templates/ingest/grid.html b/src/dashboard/src/templates/ingest/grid.html index cf24aefd3c..575c47d7b6 100644 --- a/src/dashboard/src/templates/ingest/grid.html +++ b/src/dashboard/src/templates/ingest/grid.html @@ -155,10 +155,9 @@

    {% trans "Upload DIP" %}