Skip to content

Commit

Permalink
Merge pull request #62 from bertsky/job-db
Browse files Browse the repository at this point in the history
use mongosh connection to ocrd-database for job infos
  • Loading branch information
markusweigelt authored Aug 10, 2023
2 parents 35d420e + 653ad46 commit c5d4f77
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CONTROLLER_PORT_SSH=22
MANAGER_IMAGE=ghcr.io/slub/ocrd_manager:latest
MANAGER_HOST=ocrd-manager
MANAGER_PORT_SSH=22

MANAGER_PORT_WEB=4004

MANAGER_ENV_UID=1001
MANAGER_ENV_GID=1001
Expand Down
18 changes: 15 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ LABEL \

ARG KITODO_MQ_CLIENT_VERSION=0.3

# Changelog https://www.mongodb.com/docs/mongodb-shell/changelog/
# Supported MongoDB Version https://www.mongodb.com/docs/mongodb-shell/connect/#supported-mongodb-versions
ARG MONGODB_SHELL_VERSION=1.10.1

ENV HOME=/

# make apt system functional
Expand Down Expand Up @@ -55,11 +59,19 @@ RUN chmod go+r $ACTIVEMQ_CLIENT
# configure ActiveMQ client queue
ENV ACTIVEMQ_CLIENT_QUEUE FinalizeTaskQueue

# workaround for OCR-D/core#983
RUN pip install ocrd
# install mets-mods2tei and page-to-alto
# install mets-mods2tei (for METS updates outside of OCR-D workspace)
RUN pip install mets-mods2tei
# install page-to-alto (for ALTO conversion outside of OCR-D workflow)
RUN pip install ocrd-page-to-alto
# install mongosh (for job information)
RUN wget https://downloads.mongodb.com/compass/mongodb-mongosh_${MONGODB_SHELL_VERSION}_amd64.deb
RUN dpkg -i mongodb-mongosh_${MONGODB_SHELL_VERSION}_amd64.deb
# install socat and sampo (for minimal REST API to CLI entrypoints)
RUN apt-get install socat
RUN wget -O /usr/bin/sampo.sh https://github.com/bertsky/sampo/raw/external-script-cgiopts/docker/sampo/sampo.sh
#COPY sampo.sh /usr/bin/
COPY sampo.conf /usr/bin/
RUN chmod +x /usr/bin/sampo.sh

# run OpenSSH server
RUN ssh-keygen -A
Expand Down
14 changes: 8 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,17 @@ services:
hostname: ${MANAGER_HOST}

environment:
- UID=${MANAGER_ENV_UID}
- GID=${MANAGER_ENV_GID}
- UMASK=${MANAGER_ENV_UMASK}
- "CONTROLLER=${CONTROLLER_HOST}:${CONTROLLER_PORT_SSH}"
- "ACTIVEMQ=${MQ_HOST}:${MQ_PORT}"
- ACTIVEMQ_CLIENT_QUEUE=${MQ_CLIENT_QUEUE}
UID: ${MANAGER_ENV_UID}
GID: ${MANAGER_ENV_GID}
UMASK: ${MANAGER_ENV_UMASK}
CONTROLLER: "${CONTROLLER_HOST}:${CONTROLLER_PORT_SSH}"
DB_CONNECTION: "mongodb://${DB_ROOT_USER:-root}:${DB_ROOT_PASSWORD:-root_password}@ocrd-database:27017"
ACTIVEMQ: "${MQ_HOST}:${MQ_PORT}"
ACTIVEMQ_CLIENT_QUEUE: "${MQ_CLIENT_QUEUE}"

ports:
- ${MANAGER_PORT_SSH}:22
- ${MANAGER_PORT_WEB}:4004

# must have been started independently:
# depends_on:
Expand Down
48 changes: 28 additions & 20 deletions ocrd_lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ TASK=$(basename $0)

logerr() {
logger -p user.info -t $TASK "terminating with error \$?=$? from ${BASH_COMMAND} on line $(caller)"

kitodo_production_task_action_error_open
}

Expand All @@ -22,7 +21,7 @@ stopbg() {
init() {
trap logerr ERR
trap stopbg INT TERM KILL

PID=$$

cd /data
Expand Down Expand Up @@ -68,25 +67,29 @@ init() {
CONTROLLERHOST=${CONTROLLER%:*}
CONTROLLERPORT=${CONTROLLER#*:}

# create stats for monitor
mkdir -p /run/lock/ocrd.jobs/
{
echo PID=$PID
echo TIME_CREATED=$(date --rfc-3339=seconds)
echo PROCESS_ID=$PROCESS_ID
echo TASK_ID=$TASK_ID
echo PROCESS_DIR=$PROCESS_DIR
echo WORKDIR=$WORKDIR
echo REMOTEDIR=$REMOTEDIR
echo WORKFLOW=$WORKFLOW
echo CONTROLLER=$CONTROLLER
} > /run/lock/ocrd.jobs/$REMOTEDIR
# create job stats for monitor
HOME=/tmp mongosh --quiet --norc --eval "use ocrd" --eval "db.OcrdJob.insertOne( {
pid: $PID,
time_created: ISODate(\"$(date --rfc-3339=seconds)\"),
process_id: \"$PROCESS_ID\",
task_id: \"$TASK_ID\",
process_dir: \"$PROCESS_DIR\",
workdir: \"$WORKDIR\",
remotedir: \"$REMOTEDIR\",
workflow_file: \"$WORKFLOW\",
controller_address: \"$CONTROLLER\"
} )" $DB_CONNECTION | logger -p user.debug -t $TASK

}

logret() {
sed -i "1s/PID=.*/RETVAL=$?/" /run/lock/ocrd.jobs/$REMOTEDIR
sed -i "2a TIME_TERMINATED=$(date --rfc-3339=seconds)" /run/lock/ocrd.jobs/$REMOTEDIR
HOME=/tmp mongosh --quiet --norc --eval "use ocrd" --eval "db.OcrdJob.findOneAndUpdate( {
pid: $PID }, { \$set: {
time_terminated: ISODate(\"$(date --rfc-3339=seconds)\"),
return_code: $?
}, \$unset: {
pid: \"\"
}})" $DB_CONNECTION | logger -p user.debug -t $TASK
}

init_task() {
Expand Down Expand Up @@ -238,21 +241,25 @@ post_process_to_mets() {
kitodo_production_task_action() {
ACTION=""
MESSAGE="${2}"
JOBCOMPLETE=0
case ${1} in
1)
ACTION="COMMENT"
;;
2)
ACTION="ERROR_OPEN"
JOBCOMPLETE=1
;;
3)
ACTION="ERROR_CLOSE"
JOBCOMPLETE=1
;;
4)
ACTION="PROCESS"
;;
5)
ACTION="CLOSE"
JOBCOMPLETE=1
;;
*)
logger -p user.error -t $TASK "Unknown task action type"
Expand All @@ -265,14 +272,16 @@ kitodo_production_task_action() {
elif test "$ACTION" == "CLOSE"; then
java -Dlog4j2.configurationFile=$ACTIVEMQ_CLIENT_LOG4J2 -jar "$ACTIVEMQ_CLIENT" "tcp://$ACTIVEMQ?closeAsync=false" "$ACTIVEMQ_CLIENT_QUEUE" $TASK_ID "$MESSAGE"
fi
if ((JOBCOMPLETE)); then
logret # communicate retval 0
fi
fi
logret # communicate retval 0
}

kitodo_production_task_action_comment() {
if test -n "${1}"; then
kitodo_production_task_action 1 "${1}"
else
else
logger -p user.info -t $TASK "Could not send task info cause no message was specified"
fi
}
Expand Down Expand Up @@ -309,6 +318,5 @@ close() {
# become synchronous again
logger -p user.info -t $TASK "ocr_exit in sync mode - wait until the processing is completed"
wait $!
#rm -f /run/lock/ocrd.jobs/$REMOTEDIR
fi
}
7 changes: 7 additions & 0 deletions sampo.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
match_uri '^/$' list_endpoints

match_uri '^/for_production|^/process_images' run_external_script for_production.sh

match_uri '^/for_presentation|^/process_mets' run_external_script for_presentation.sh

match_uri '^/cancel_job/(.*)$' run_external_script kill
16 changes: 9 additions & 7 deletions startup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ echo "umask $UMASK" >>/.ssh/rc

# removes read/write/execute permissions from group and others, but preserves whatever permissions the owner had
chmod go-rwx /.ssh/*
chmod go+rwx /run/lock/ocrd.jobs

# set owner and group
chown -R $UID:$GID /.ssh
Expand All @@ -46,17 +45,20 @@ echo ocrd:x:$UID:$GID:SSH user:/:/bin/bash >>/etc/passwd
# save password informations
echo ocrd:*:19020:0:99999:7::: >>/etc/shadow

# start ssh as daemon and send output to standard error
#/usr/sbin/sshd -D -e
service ssh start

# Replace imklog to prevent starting problems of rsyslog
/bin/sed -i '/imklog/s/^/#/' /etc/rsyslog.conf

# rsyslog upd reception on port 514
/bin/sed -i '/imudp/s/^#//' /etc/rsyslog.conf

# start syslog
service rsyslog start

# start ssh as daemon and send output to standard error
#/usr/sbin/sshd -D -e
service ssh start

# start REST webservice
socat -d -ly TCP-LISTEN:4004,reuseaddr,fork,pf=ip4 exec:sampo.sh &

sleep 2
# connect syslog to container stdout
tail -f /var/log/syslog

0 comments on commit c5d4f77

Please sign in to comment.