From 0294d7429178e965798407a87943e91e17177a0a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 7 Feb 2024 02:09:19 +0100 Subject: [PATCH 1/6] startup: avoid repeating file actions --- startup.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/startup.sh b/startup.sh index 7cf713d..135f0a3 100755 --- a/startup.sh +++ b/startup.sh @@ -1,4 +1,7 @@ #! /bin/bash +# avoid repeating file actions when restarting container: +if ! grep -q ^ocrd: /etc/passwd; then + cat /authorized_keys >>/.ssh/authorized_keys cat /id_rsa >>/.ssh/id_rsa @@ -23,7 +26,7 @@ EOF fi # turn off the login banner -touch /.hushlogin +> /.hushlogin set | fgrep -ve BASH >/.ssh/environment @@ -49,6 +52,9 @@ echo ocrd:*:19020:0:99999:7::: >>/etc/shadow /bin/sed -i '/imklog/s/^/#/' /etc/rsyslog.conf # rsyslog upd reception on port 514 /bin/sed -i '/imudp/s/^#//' /etc/rsyslog.conf + +fi + # start syslog service rsyslog start From 24fe211f7a121eba4161733033351ee657879c1b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 7 Feb 2024 04:05:24 +0100 Subject: [PATCH 2/6] use admin@ instead of ocrd@controller to avoid worker semaphore --- ocrd_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_lib.sh b/ocrd_lib.sh index 35c71d3..4a4f6f7 100644 --- a/ocrd_lib.sh +++ b/ocrd_lib.sh @@ -175,7 +175,7 @@ pre_clone_to_workdir() { pre_sync_workdir () { # copy the data explicitly from Manager to Controller - rsync -av -e "ssh -p $CONTROLLERPORT -l ocrd" "$WORKDIR/" $CONTROLLERHOST:/data/$REMOTEDIR + rsync -av -e "ssh -p $CONTROLLERPORT -l admin" "$WORKDIR/" $CONTROLLERHOST:/data/$REMOTEDIR } ocrd_validate_workflow () { From c919395ad86ca4508741909285bc211617055a9a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 15 Feb 2024 02:09:53 +0100 Subject: [PATCH 3/6] force ssh tty to become interruptible --- ocrd_lib.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocrd_lib.sh b/ocrd_lib.sh index 4a4f6f7..fd09328 100644 --- a/ocrd_lib.sh +++ b/ocrd_lib.sh @@ -129,7 +129,7 @@ ocrd_exec() { for param in "$@"; do $param done - } | ssh -T -p "${CONTROLLERPORT}" ocrd@${CONTROLLERHOST} 2>&1 + } | ssh -tt -p "${CONTROLLERPORT}" ocrd@${CONTROLLERHOST} 2>&1 } pre_process_to_workdir() { @@ -175,6 +175,7 @@ pre_clone_to_workdir() { pre_sync_workdir () { # copy the data explicitly from Manager to Controller + # use admin instead of ocrd to avoid entering worker semaphore via sshrc rsync -av -e "ssh -p $CONTROLLERPORT -l admin" "$WORKDIR/" $CONTROLLERHOST:/data/$REMOTEDIR } From 53c41e8764a59c7f291e17f4bfa4674cd1fa2568 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 15 Feb 2024 02:10:20 +0100 Subject: [PATCH 4/6] use ocrd-import -j for speedup --- ocrd_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_lib.sh b/ocrd_lib.sh index fd09328..1f521ce 100644 --- a/ocrd_lib.sh +++ b/ocrd_lib.sh @@ -105,7 +105,7 @@ ocrd_format_workflow() { # ocrd import from workdir ocrd_import_workdir() { echo "echo \$\$ > $REMOTEDIR/ocrd.pid" - echo "if test -f '$REMOTEDIR/mets.xml'; then OV=--overwrite; else OV=; ocrd-import -i '$REMOTEDIR'; fi" + echo "if test -f '$REMOTEDIR/mets.xml'; then OV=--overwrite; else OV=; ocrd-import -j 1 -i '$REMOTEDIR'; fi" echo "cd '$REMOTEDIR'" } From 48b22ff620fe0da5b84b2b17e612860986f20de2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 15 Feb 2024 02:10:50 +0100 Subject: [PATCH 5/6] back-out of errors and interrupts by removing the data on the Controller --- ocrd_lib.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ocrd_lib.sh b/ocrd_lib.sh index 1f521ce..53aa86f 100644 --- a/ocrd_lib.sh +++ b/ocrd_lib.sh @@ -6,15 +6,21 @@ set -o pipefail TASK=$(basename $0) +cleanupremote() { + ssh -Tn -p "${CONTROLLERPORT}" admin@${CONTROLLERHOST} rm -fr /data/$REMOTEDIR +} + logerr() { logger -p user.info -t $TASK "terminating with error \$?=$? from ${BASH_COMMAND} on line $(caller)" + cleanupremote & kitodo_production_task_action_error_open } stopbg() { logger -p user.crit -t $TASK "passing SIGKILL to child $!" + cleanupremote # pass signal on to children - kill -KILL $! + kill -INT $! } # initialize variables, create ord-d work directory and exit if something is missing @@ -186,9 +192,9 @@ ocrd_validate_workflow () { post_sync_workdir () { # copy the results back from Controller to Manager - rsync -av -e "ssh -p $CONTROLLERPORT -l ocrd" $CONTROLLERHOST:/data/$REMOTEDIR/ "$WORKDIR" - # TODO: maybe also schedule cleanup (or have a cron job delete dirs in /data which are older than N days) - # e.g. `ssh --port $CONTROLLERPORT ocrd@$CONTROLLERHOST rm -fr /data/"$WORKDIR"` + rsync -av -e "ssh -p $CONTROLLERPORT -l admin" $CONTROLLERHOST:/data/$REMOTEDIR/ "$WORKDIR" + # schedule cleanup + cleanupremote } post_validate_workdir() { From 212326171e10f9784ca6f745a1ecabfd3d37fcaa Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 15 Feb 2024 23:16:13 +0100 Subject: [PATCH 6/6] enter REMOTEDIR before ocrd-import --- ocrd_lib.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ocrd_lib.sh b/ocrd_lib.sh index 53aa86f..d5b9402 100644 --- a/ocrd_lib.sh +++ b/ocrd_lib.sh @@ -110,15 +110,15 @@ ocrd_format_workflow() { # ocrd import from workdir ocrd_import_workdir() { - echo "echo \$\$ > $REMOTEDIR/ocrd.pid" - echo "if test -f '$REMOTEDIR/mets.xml'; then OV=--overwrite; else OV=; ocrd-import -j 1 -i '$REMOTEDIR'; fi" echo "cd '$REMOTEDIR'" + echo "echo \$\$ > ocrd.pid" + echo "if test -f mets.xml; then OV=--overwrite; else OV=; ocrd-import -j 1 -i; fi" } ocrd_enter_workdir() { - echo "echo \$\$ > $REMOTEDIR/ocrd.pid" - echo "if test -f '$REMOTEDIR/mets.xml'; then OV=--overwrite; else OV=; fi" echo "cd '$REMOTEDIR'" + echo "echo \$\$ > ocrd.pid" + echo "if test -f mets.xml; then OV=--overwrite; else OV=; fi" } ocrd_process_workflow() {