-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathprocess_images.sh
executable file
·109 lines (88 loc) · 3.6 KB
/
process_images.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/bin/bash
# OCR-D task to be run as OCR script step by Kitodo.Production
# To be called (after copying images to directory) via Manager, e.g.:
# ssh -Tn -p 9022 ocrd@ocrd-manager process_images.sh \
# --lang deu --script Fraktur \
# --img-subdir images --ocr-subdir ocr/alto \
# --task-id 501543 --proc-id 3 \
# /home/goobi/work/daten/501543
# full CLI options: see --help
set -Eeu
set -o pipefail
parse_args() {
LANGUAGE=
SCRIPT=
PROCESS_ID=
TASK_ID=
WORKFLOW=/workflows/ocr-workflow-default.sh
VALIDATE=1
IMAGES_SUBDIR=images
RESULT_SUBDIR=ocr/alto
while (($#)); do
case "$1" in
--help|-h) cat <<EOF
SYNOPSIS:
$0 [OPTIONS] DIRECTORY
where OPTIONS can be any/all of:
--lang LANGUAGE overall language of the material to process via OCR
--script SCRIPT overall script of the material to process via OCR
--workflow FILE workflow file to use for processing, default:
$WORKFLOW
--no-validate skip comprehensive validation of workflow results
--img-subdir IMG name of the subdirectory to read images from, default:
$IMAGES_SUBDIR
--ocr-subdir OCR name of the subdirectory to write OCR results to, default:
$RESULT_SUBDIR
--proc-id ID process ID to communicate in ActiveMQ callback
--task-id ID task ID to communicate in ActiveMQ callback
--help show this message and exit
and DIRECTORY is the local path to process. The script will import
the images from DIRECTORY/IMG into a new (temporary) METS and
transfer this to the Controller for processing. After resyncing back
to the Manager, it will then extract OCR results and export them to
DIRECTORY/OCR.
If ActiveMQ is used, the script will exit directly after initialization,
and run processing in the background. Completion will then be signalled
via ActiveMQ network protocol (using the proc and task ID as message).
ENVIRONMENT VARIABLES:
CONTROLLER: host name and port of OCR-D Controller for processing
ACTIVEMQ: URL of ActiveMQ server for result callback (optional)
ACTIVEMQ_QUEUE: Protocol type handling result callbacks. Choose between "FinalizeTaskQueue" (default) or "TaskActionQueue" (optional)
ACTIVEMQ_CLIENT: path to ActiveMQ client library JAR file (optional)
EOF
exit;;
--lang) LANGUAGE="$2"; shift;;
--script) SCRIPT="$2"; shift;;
--workflow) WORKFLOW="$2"; shift;;
--no-validate) VALIDATE=0;;
--img-subdir) IMAGES_SUBDIR="$2"; shift;;
--ocr-subdir) RESULT_SUBDIR="$2"; shift;;
--proc-id) PROCESS_ID="$2"; shift;;
--task-id) TASK_ID="$2"; shift;;
*) PROCESS_DIR="$1";
break;;
esac
shift
done
if (($#>1)); then
logger -p user.error -t $TASK "invalid extra arguments $*"
exit 1
fi
}
source ocrd_lib.sh
init "$@"
# run the workflow script on the Controller non-interactively and log its output locally
# subsequently validate and postprocess the results
# do all this in a subshell in the background, so we can return immediately
(
init_task
pre_process_to_workdir
pre_sync_workdir
kitodo_production_task_action_process
ocrd_exec ocrd_import_workdir ocrd_validate_workflow ocrd_process_workflow
post_sync_workdir
if ((VALIDATE)); then post_validate_workdir; fi
post_process_to_procdir
kitodo_production_task_action_close
) |& tee -a $WORKDIR/ocrd.log 2>&1 | logger -p user.info -t $TASK &>/dev/null & # without output redirect, ssh will not close the connection upon exit, cf. #9
close