Skip to content

Commit

Permalink
rename for_production→process_images, for_presentation→process_mets
Browse files Browse the repository at this point in the history
  • Loading branch information
bertsky committed Feb 15, 2023
1 parent 3d468cb commit c247b2c
Show file tree
Hide file tree
Showing 6 changed files with 214 additions and 209 deletions.
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,18 @@ $(DATA)/testdata-presentation:
test: test-production test-presentation

# run synchronous (without ActiveMQ)
test-production: SCRIPT = process_images.sh --proc-id 1 --lang deu --script Fraktur
test-production: $(DATA)/testdata-production
ifeq ($(NETWORK),bridge)
ssh -i $(PRIVATE) -Tn -p $(PORT) ocrd@localhost for_production.sh --proc-id 1 --lang deu --script Fraktur $(<F)
ssh -i $(PRIVATE) -Tn -p $(PORT) ocrd@localhost $(SCRIPT) $(<F)
else
docker exec -t -u ocrd `docker container ls -qf name=ocrd-manager` for_production.sh --proc-id 1 --lang deu --script Fraktur $(<F)
docker exec -t -u ocrd `docker container ls -qf name=ocrd-manager` $(SCRIPT) $(<F)
endif
test -d $</ocr/alto
test -s $</ocr/alto/00000009.tif.original.xml

test-presentation: PREFIX = https://digital.slub-dresden.de/data/kitodo/LankDres_1760234508
test-presentation: SCRIPT = for_presentation.sh --pages PHYS_0017..PHYS_0021 --img-grp ORIGINAL --url-prefix $(PREFIX)
test-presentation: SCRIPT = process_mets.sh --pages PHYS_0017..PHYS_0021 --img-grp ORIGINAL --url-prefix $(PREFIX)
test-presentation: $(DATA)/testdata-presentation
test-presentation:
ifeq ($(NETWORK),bridge)
Expand Down
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,13 @@ The data management will depend on which Kitodo context you want to integrate in

#### From image to ALTO files

For **Kitodo.Production**, there is a preconfigured script `for_production.sh` which takes the following arguments:
For **Kitodo.Production**, there is a preconfigured script `process_images.sh` (or `for_production.sh`)
which takes the following arguments:

```sh
SYNOPSIS:

for_production.sh [OPTIONS] DIRECTORY
process_images.sh [OPTIONS] DIRECTORY

where OPTIONS can be any/all of:
--lang LANGUAGE overall language of the material to process via OCR
Expand Down Expand Up @@ -144,17 +145,18 @@ It can be replaced with the (path) name of any workflow script mounted under `/d

For example (assuming `testdata` is a directory with image files mounted under `/data`):

ssh -T -p 9022 ocrd@manager for_production.sh --proc-id 1 --task-id 3 --lang deu --script Fraktur --workflow myocr.sh testdata
ssh -T -p 9022 ocrd@manager process_images.sh --proc-id 1 --task-id 3 --lang deu --script Fraktur --workflow myocr.sh testdata


#### From METS to METS file

For **Kitodo.Presentation**, there is a preconfigured script `for_presentation.sh` which takes the following arguments:
For **Kitodo.Presentation**, there is a preconfigured script `process_mets.sh` (or `for_presentation.sh`)
which takes the following arguments:

```sh
SYNOPSIS:

for_presentation.sh [OPTIONS] METS
process_mets.sh [OPTIONS] METS

where OPTIONS can be any/all of:
--workflow FILE workflow file to use for processing, default:
Expand Down Expand Up @@ -183,7 +185,7 @@ The same goes here for the `workflow parameter`.

For example (assuming `testdata` is a directory with image files mounted under `/data`):

ssh -T -p 9022 ocrd@manager for_presentation.sh --lang deu --script Fraktur --workflow myocr.sh testdata/mets.xml
ssh -T -p 9022 ocrd@manager process_mets.sh --lang deu --script Fraktur --workflow myocr.sh testdata/mets.xml


### Data transfer
Expand Down
97 changes: 0 additions & 97 deletions for_presentation.sh

This file was deleted.

1 change: 1 addition & 0 deletions for_presentation.sh
103 changes: 0 additions & 103 deletions for_production.sh

This file was deleted.

1 change: 1 addition & 0 deletions for_production.sh
103 changes: 103 additions & 0 deletions process_images.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/bin/bash
# OCR-D task to be run as OCR script step by Kitodo.Production
# To be called (after copying images to directory) via Manager, e.g.:
# ssh -Tn -p 9022 ocrd@ocrd-manager for_production.sh \
# --lang deu --script Fraktur \
# --img-subdir images --ocr-subdir ocr/alto \
# --task-id 501543 --proc-id 3 \
# /home/goobi/work/daten/501543
# full CLI options: see --help

set -Eeu
set -o pipefail

parse_args() {
LANGUAGE=
SCRIPT=
PROCESS_ID=
TASK_ID=
WORKFLOW=ocr-workflow-default.sh
IMAGES_SUBDIR=images
RESULT_SUBDIR=ocr/alto
while (($#)); do
case "$1" in
--help|-h) cat <<EOF
SYNOPSIS:
$0 [OPTIONS] DIRECTORY
where OPTIONS can be any/all of:
--lang LANGUAGE overall language of the material to process via OCR
--script SCRIPT overall script of the material to process via OCR
--workflow FILE workflow file to use for processing, default:
$WORKFLOW
--img-subdir IMG name of the subdirectory to read images from, default:
$IMAGES_SUBDIR
--ocr-subdir OCR name of the subdirectory to write OCR results to, default:
$RESULT_SUBDIR
--proc-id ID process ID to communicate in ActiveMQ callback
--task-id ID task ID to communicate in ActiveMQ callback
--help show this message and exit
and DIRECTORY is the local path to process. The script will import
the images from DIRECTORY/IMG into a new (temporary) METS and
transfer this to the Controller for processing. After resyncing back
to the Manager, it will then extract OCR results and export them to
DIRECTORY/OCR.
If ActiveMQ is used, the script will exit directly after initialization,
and run processing in the background. Completion will then be signalled
via ActiveMQ network protocol (using the proc and task ID as message).
ENVIRONMENT VARIABLES:
CONTROLLER: host name and port of OCR-D Controller for processing
ACTIVEMQ: URL of ActiveMQ server for result callback (optional)
ACTIVEMQ_CLIENT: path to ActiveMQ client library JAR file (optional)
EOF
exit;;
--lang) LANGUAGE="$2"; shift;;
--script) SCRIPT="$2"; shift;;
--workflow) WORKFLOW="$2"; shift;;
--img-subdir) IMAGES_SUBDIR="$2"; shift;;
--ocr-subdir) RESULT_SUBDIR="$2"; shift;;
--proc-id) PROCESS_ID="$2"; shift;;
--task-id) TASK_ID="$2"; shift;;
*) PROCESS_DIR="$1";
break;;
esac
shift
done
if (($#>1)); then
logger -p user.error -t $TASK "invalid extra arguments $*"
exit 1
fi
}

source ocrd_lib.sh

init "$@"

# run the workflow script on the Controller non-interactively and log its output locally
# subsequently validate and postprocess the results
# do all this in a subshell in the background, so we can return immediately
(
init_task

pre_process_to_workdir

pre_sync_workdir

ocrd_exec ocrd_import_workdir ocrd_validate_workflow ocrd_process_workflow

post_sync_workdir

post_validate_workdir

post_process_to_procdir

close_task

) |& tee -a $WORKDIR/ocrd.log | logger -p user.info -t $TASK &>/dev/null & # without output redirect, ssh will not close the connection upon exit, cf. #9

close
Loading

0 comments on commit c247b2c

Please sign in to comment.