diff --git a/.env.example b/.env.example index f59065b..23a5fca 100644 --- a/.env.example +++ b/.env.example @@ -4,7 +4,7 @@ CONTROLLER_PORT_SSH=22 MANAGER_IMAGE=ghcr.io/slub/ocrd_manager:latest MANAGER_HOST=ocrd-manager MANAGER_PORT_SSH=22 - +MANAGER_PORT_WEB=4004 MANAGER_ENV_UID=1001 MANAGER_ENV_GID=1001 diff --git a/Dockerfile b/Dockerfile index c0ae792..ddc520c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,10 @@ LABEL \ ARG KITODO_MQ_CLIENT_VERSION=0.3 +# Changelog https://www.mongodb.com/docs/mongodb-shell/changelog/ +# Supported MongoDB Version https://www.mongodb.com/docs/mongodb-shell/connect/#supported-mongodb-versions +ARG MONGODB_SHELL_VERSION=1.10.1 + ENV HOME=/ # make apt system functional @@ -55,11 +59,19 @@ RUN chmod go+r $ACTIVEMQ_CLIENT # configure ActiveMQ client queue ENV ACTIVEMQ_CLIENT_QUEUE FinalizeTaskQueue -# workaround for OCR-D/core#983 -RUN pip install ocrd -# install mets-mods2tei and page-to-alto +# install mets-mods2tei (for METS updates outside of OCR-D workspace) RUN pip install mets-mods2tei +# install page-to-alto (for ALTO conversion outside of OCR-D workflow) RUN pip install ocrd-page-to-alto +# install mongosh (for job information) +RUN wget https://downloads.mongodb.com/compass/mongodb-mongosh_${MONGODB_SHELL_VERSION}_amd64.deb +RUN dpkg -i mongodb-mongosh_${MONGODB_SHELL_VERSION}_amd64.deb +# install socat and sampo (for minimal REST API to CLI entrypoints) +RUN apt-get install socat +RUN wget -O /usr/bin/sampo.sh https://github.com/bertsky/sampo/raw/external-script-cgiopts/docker/sampo/sampo.sh +#COPY sampo.sh /usr/bin/ +COPY sampo.conf /usr/bin/ +RUN chmod +x /usr/bin/sampo.sh # run OpenSSH server RUN ssh-keygen -A diff --git a/docker-compose.yml b/docker-compose.yml index 8ae206b..1f5b4ce 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,15 +12,17 @@ services: hostname: ${MANAGER_HOST} environment: - - UID=${MANAGER_ENV_UID} - - GID=${MANAGER_ENV_GID} - - UMASK=${MANAGER_ENV_UMASK} - - "CONTROLLER=${CONTROLLER_HOST}:${CONTROLLER_PORT_SSH}" - - "ACTIVEMQ=${MQ_HOST}:${MQ_PORT}" - - ACTIVEMQ_CLIENT_QUEUE=${MQ_CLIENT_QUEUE} + UID: ${MANAGER_ENV_UID} + GID: ${MANAGER_ENV_GID} + UMASK: ${MANAGER_ENV_UMASK} + CONTROLLER: "${CONTROLLER_HOST}:${CONTROLLER_PORT_SSH}" + DB_CONNECTION: "mongodb://${DB_ROOT_USER:-root}:${DB_ROOT_PASSWORD:-root_password}@ocrd-database:27017" + ACTIVEMQ: "${MQ_HOST}:${MQ_PORT}" + ACTIVEMQ_CLIENT_QUEUE: "${MQ_CLIENT_QUEUE}" ports: - ${MANAGER_PORT_SSH}:22 + - ${MANAGER_PORT_WEB}:4004 # must have been started independently: # depends_on: diff --git a/ocrd_lib.sh b/ocrd_lib.sh index 6fd138d..26fb144 100644 --- a/ocrd_lib.sh +++ b/ocrd_lib.sh @@ -8,7 +8,6 @@ TASK=$(basename $0) logerr() { logger -p user.info -t $TASK "terminating with error \$?=$? from ${BASH_COMMAND} on line $(caller)" - kitodo_production_task_action_error_open } @@ -22,7 +21,7 @@ stopbg() { init() { trap logerr ERR trap stopbg INT TERM KILL - + PID=$$ cd /data @@ -68,25 +67,29 @@ init() { CONTROLLERHOST=${CONTROLLER%:*} CONTROLLERPORT=${CONTROLLER#*:} - # create stats for monitor - mkdir -p /run/lock/ocrd.jobs/ - { - echo PID=$PID - echo TIME_CREATED=$(date --rfc-3339=seconds) - echo PROCESS_ID=$PROCESS_ID - echo TASK_ID=$TASK_ID - echo PROCESS_DIR=$PROCESS_DIR - echo WORKDIR=$WORKDIR - echo REMOTEDIR=$REMOTEDIR - echo WORKFLOW=$WORKFLOW - echo CONTROLLER=$CONTROLLER - } > /run/lock/ocrd.jobs/$REMOTEDIR + # create job stats for monitor + HOME=/tmp mongosh --quiet --norc --eval "use ocrd" --eval "db.OcrdJob.insertOne( { + pid: $PID, + time_created: ISODate(\"$(date --rfc-3339=seconds)\"), + process_id: \"$PROCESS_ID\", + task_id: \"$TASK_ID\", + process_dir: \"$PROCESS_DIR\", + workdir: \"$WORKDIR\", + remotedir: \"$REMOTEDIR\", + workflow_file: \"$WORKFLOW\", + controller_address: \"$CONTROLLER\" + } )" $DB_CONNECTION | logger -p user.debug -t $TASK } logret() { - sed -i "1s/PID=.*/RETVAL=$?/" /run/lock/ocrd.jobs/$REMOTEDIR - sed -i "2a TIME_TERMINATED=$(date --rfc-3339=seconds)" /run/lock/ocrd.jobs/$REMOTEDIR + HOME=/tmp mongosh --quiet --norc --eval "use ocrd" --eval "db.OcrdJob.findOneAndUpdate( { + pid: $PID }, { \$set: { + time_terminated: ISODate(\"$(date --rfc-3339=seconds)\"), + return_code: $? + }, \$unset: { + pid: \"\" + }})" $DB_CONNECTION | logger -p user.debug -t $TASK } init_task() { @@ -238,21 +241,25 @@ post_process_to_mets() { kitodo_production_task_action() { ACTION="" MESSAGE="${2}" + JOBCOMPLETE=0 case ${1} in 1) ACTION="COMMENT" ;; 2) ACTION="ERROR_OPEN" + JOBCOMPLETE=1 ;; 3) ACTION="ERROR_CLOSE" + JOBCOMPLETE=1 ;; 4) ACTION="PROCESS" ;; 5) ACTION="CLOSE" + JOBCOMPLETE=1 ;; *) logger -p user.error -t $TASK "Unknown task action type" @@ -265,14 +272,16 @@ kitodo_production_task_action() { elif test "$ACTION" == "CLOSE"; then java -Dlog4j2.configurationFile=$ACTIVEMQ_CLIENT_LOG4J2 -jar "$ACTIVEMQ_CLIENT" "tcp://$ACTIVEMQ?closeAsync=false" "$ACTIVEMQ_CLIENT_QUEUE" $TASK_ID "$MESSAGE" fi + if ((JOBCOMPLETE)); then + logret # communicate retval 0 + fi fi - logret # communicate retval 0 } kitodo_production_task_action_comment() { if test -n "${1}"; then kitodo_production_task_action 1 "${1}" - else + else logger -p user.info -t $TASK "Could not send task info cause no message was specified" fi } @@ -309,6 +318,5 @@ close() { # become synchronous again logger -p user.info -t $TASK "ocr_exit in sync mode - wait until the processing is completed" wait $! - #rm -f /run/lock/ocrd.jobs/$REMOTEDIR fi } diff --git a/sampo.conf b/sampo.conf new file mode 100644 index 0000000..2b085ad --- /dev/null +++ b/sampo.conf @@ -0,0 +1,7 @@ +match_uri '^/$' list_endpoints + +match_uri '^/for_production|^/process_images' run_external_script for_production.sh + +match_uri '^/for_presentation|^/process_mets' run_external_script for_presentation.sh + +match_uri '^/cancel_job/(.*)$' run_external_script kill diff --git a/startup.sh b/startup.sh index cd0bfa5..7cf713d 100755 --- a/startup.sh +++ b/startup.sh @@ -35,7 +35,6 @@ echo "umask $UMASK" >>/.ssh/rc # removes read/write/execute permissions from group and others, but preserves whatever permissions the owner had chmod go-rwx /.ssh/* -chmod go+rwx /run/lock/ocrd.jobs # set owner and group chown -R $UID:$GID /.ssh @@ -46,17 +45,20 @@ echo ocrd:x:$UID:$GID:SSH user:/:/bin/bash >>/etc/passwd # save password informations echo ocrd:*:19020:0:99999:7::: >>/etc/shadow -# start ssh as daemon and send output to standard error -#/usr/sbin/sshd -D -e -service ssh start - # Replace imklog to prevent starting problems of rsyslog /bin/sed -i '/imklog/s/^/#/' /etc/rsyslog.conf - # rsyslog upd reception on port 514 /bin/sed -i '/imudp/s/^#//' /etc/rsyslog.conf - +# start syslog service rsyslog start +# start ssh as daemon and send output to standard error +#/usr/sbin/sshd -D -e +service ssh start + +# start REST webservice +socat -d -ly TCP-LISTEN:4004,reuseaddr,fork,pf=ip4 exec:sampo.sh & + sleep 2 +# connect syslog to container stdout tail -f /var/log/syslog