forked from bertsky/ocrd_detectron2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit aeca7e3
Showing
12 changed files
with
1,072 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# vim tmp | ||
*.swp | ||
*.swo | ||
|
||
# emacs bkup | ||
*~ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[MASTER] | ||
extension-pkg-whitelist=cv2 | ||
|
||
[MESSAGES CONTROL] | ||
disable = | ||
ungrouped-imports, | ||
bad-continuation, | ||
missing-docstring, | ||
no-self-use, | ||
superfluous-parens, | ||
invalid-name, | ||
line-too-long, | ||
too-many-arguments, | ||
too-many-branches, | ||
too-many-statements, | ||
too-many-locals, | ||
too-few-public-methods, | ||
too-many-nested-blocks, | ||
wrong-import-order, | ||
duplicate-code | ||
|
||
# allow non-snake-case identifiers: | ||
good-names=n,i |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
PYTHON = python3 | ||
PIP = pip3 | ||
PYTHONIOENCODING=utf8 | ||
|
||
help: | ||
@echo | ||
@echo " Targets" | ||
@echo | ||
@echo " deps Install only Python deps via pip" | ||
@echo " install Install full Python package via pip" | ||
@echo | ||
@echo " Variables" | ||
@echo " PYTHON" | ||
@echo " CUDA_VERSION override detection of CUDA runtime version (e.g. 11.3)" | ||
|
||
# Install Python deps via pip | ||
# There is no prebuilt for detectron2 on PyPI, and the wheels depend on CUDA and Torch version. | ||
# See https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md#install-pre-built-detectron2 | ||
# and https://github.com/facebookresearch/detectron2/issues/969 | ||
# While there is a web site which lists them, which works with `pip -f`, this unfortunately cannot | ||
# be encapsulated via setuptools, see https://github.com/pypa/pip/issues/5898 | ||
# and https://stackoverflow.com/questions/3472430/how-can-i-make-setuptools-install-a-package-thats-not-on-pypi | ||
# and https://github.com/pypa/pip/issues/4187 | ||
deps: | ||
if test -n "$$CUDA_VERSION"; then :; \ | ||
elif test -s /usr/local/cuda/version.txt; then \ | ||
CUDA_VERSION=$$(sed 's/^.* //;s/\([0-9]\+[.][0-9]\).*/\1/' /usr/local/cuda/version.txt); \ | ||
elif command -v nvcc &>/dev/null; then \ | ||
CUDA_VERSION=$$(nvcc --version | sed -n '/^Cuda/{s/.* release //;s/,.*//;p}'); \ | ||
elif command -v nvidia-smi &>/dev/null; then \ | ||
CUDA_VERSION=$$(nvidia-smi --version | sed -n '/CUDA Version/{s/.*CUDA Version: //;s/ .*//;p}'); \ | ||
elif command -v pkg-config &>/dev/null; then \ | ||
CUDA_VERSION=$$(pkg-config --list-all | sed -n '/^cudart/{s/cudart-//;s/ .*//;p;q}'); \ | ||
else \ | ||
echo >&2 "Cannot find CUDA runtime library"; false; \ | ||
fi && $(PIP) install -r requirements.txt \ | ||
-f 'https://dl.fbaipublicfiles.com/detectron2/wheels/cu$${CUDA_VERSION//.}/torch1.10/index.html' | ||
|
||
# Install Python package via pip | ||
install: deps | ||
$(PIP) install . | ||
|
||
.PHONY: help deps install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
[![PyPI version](https://badge.fury.io/py/ocrd-detectron2.svg)](https://badge.fury.io/py/ocrd-detectron2) | ||
|
||
# ocrd_wrap | ||
|
||
OCR-D wrapper for detectron2 based segmentation models | ||
|
||
* [Introduction](#introduction) | ||
* [Installation](#installation) | ||
* [Usage](#usage) | ||
* [OCR-D processor interface ocrd-detectron2-segment](#ocr-d-processor-interface-ocrd-detectron2-segment) | ||
* [Models](#models) | ||
* [TableBank](#tablebank) | ||
* [PubLayNet](#publaynet) | ||
* [PubLayNet](#publaynet-1) | ||
* [LayoutParser](#layoutparser) | ||
* [Testing](#testing) | ||
|
||
|
||
## Introduction | ||
|
||
This offers [OCR-D](https://ocr-d.de) compliant [workspace processors](https://ocr-d.de/en/spec/cli) for | ||
|
||
## Installation | ||
|
||
Create and activate a [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments) as usual. | ||
|
||
To install Python dependencies: | ||
|
||
make deps | ||
|
||
Which is the equivalent of: | ||
|
||
pip install -r requirements.txt -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html # for CUDA 11.3 | ||
|
||
To install this module, then do: | ||
|
||
make install | ||
|
||
Which is the equivalent of: | ||
|
||
pip install . | ||
|
||
## Usage | ||
|
||
### [OCR-D processor](https://ocr-d.de/en/spec/cli) interface `ocrd-detectron2-segment` | ||
|
||
To be used with [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) documents in an [OCR-D](https://ocr-d.de/en/about) annotation workflow. | ||
|
||
``` | ||
Usage: ocrd-detectron2-segment [OPTIONS] | ||
Detect regions with Detectron2 | ||
> Use detectron2 to segment each page into regions. | ||
> Open and deserialize PAGE input files and their respective images. | ||
> Fetch a raw and a binarized image for the page frame (possibly | ||
> cropped and deskewed). | ||
> Feed the raw image into the detectron2 predictor that has been used | ||
> to load the given model. Then, depending on the model capabilities | ||
> (whether it can do panoptic segmentation or only instance | ||
> segmentation, whether the latter can do masks or only bounding | ||
> boxes), post-process the predictions: | ||
> - panoptic segmentation: take the provided segment label map, and | ||
> apply the segment to class label map | ||
> - instance segmentation: find an optimal non-overlapping set (flat | ||
> map) of instances via non-maximum suppression; then extend / shrink | ||
> the surviving masks to fully include / exclude connected components | ||
> in the foreground that are on the boundary | ||
> Finally, find the convex hull polygon for each region, and map its | ||
> class id to a new PAGE region type (and subtype). | ||
> Produce a new output file by serialising the resulting hierarchy. | ||
Options: | ||
-I, --input-file-grp USE File group(s) used as input | ||
-O, --output-file-grp USE File group(s) used as output | ||
-g, --page-id ID Physical page ID(s) to process | ||
--overwrite Remove existing output pages/images | ||
(with --page-id, remove only those) | ||
-p, --parameter JSON-PATH Parameters, either verbatim JSON string | ||
or JSON file path | ||
-P, --param-override KEY VAL Override a single JSON object key-value pair, | ||
taking precedence over --parameter | ||
-m, --mets URL-PATH URL or file path of METS to process | ||
-w, --working-dir PATH Working directory of local workspace | ||
-l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE] | ||
Log level | ||
-C, --show-resource RESNAME Dump the content of processor resource RESNAME | ||
-L, --list-resources List names of processor resources | ||
-J, --dump-json Dump tool description as JSON and exit | ||
-h, --help This help message | ||
-V, --version Show version | ||
Parameters: | ||
"categories" [array - ["TextRegion:paragraph", "TextRegion:heading", | ||
"TextRegion:list-label", "TableRegion", "ImageRegion"]] | ||
maps region category (position) to region type | ||
"min_confidence" [number - 0.5] | ||
confidence threshold for detections | ||
"model_config" [string - REQUIRED] | ||
path name of model config | ||
"model_weights" [string - REQUIRED] | ||
path name of model weights | ||
"device" [string - "cuda"] | ||
select computing device for Torch (e.g. cpu or cuda:0); will fall | ||
back to CPU if no GPU is available | ||
``` | ||
|
||
Example: | ||
|
||
ocrd resmgr download -n ocrd-detectron2-segment https://layoutlm.blob.core.windows.net/tablebank/model_zoo/detection/All_X152/All_X152.yaml | ||
ocrd resmgr download -n ocrd-detectron2-segment https://layoutlm.blob.core.windows.net/tablebank/model_zoo/detection/All_X152/model_final.pth | ||
ocrd-detectron2-segment -I OCR-D-BIN -O OCR-D-SEG-TAB -P categories '["TableRegion"]' -P model_config All_X152.yaml -P model_weights model_final.pth -P min_confidence 0.1 | ||
|
||
## Models | ||
|
||
> Note: These are just examples, no exhaustive search was done yet! | ||
> Note: Make sure you unpack first if the download link is an archive. Also, the filename suffix (.pth vs .pkl) of the weight file does matter! | ||
### [TableBank](https://github.com/doc-analysis/TableBank) | ||
|
||
R152-FPN [config](https://layoutlm.blob.core.windows.net/tablebank/model_zoo/detection/All_X152/All_X152.yaml)|[weights](https://layoutlm.blob.core.windows.net/tablebank/model_zoo/detection/All_X152/model_final.pth)|`["TableRegion"]` | ||
|
||
### [PubLayNet](https://github.com/hpanwar08/detectron2) | ||
|
||
R50-FPN [config](https://github.com/hpanwar08/detectron2/raw/master/configs/DLA_mask_rcnn_R_50_FPN_3x.yaml)|[weights](https://www.dropbox.com/sh/44ez171b2qaocd2/AAB0huidzzOXeo99QdplZRjua)|`["TextRegion:paragraph", "TextRegion:heading", "TextRegion:list-label", "TableRegion", "ImageRegion"]` | ||
|
||
R101-FPN [config](https://github.com/hpanwar08/detectron2/raw/master/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml)|[weights](https://www.dropbox.com/sh/wgt9skz67usliei/AAD9n6qbsyMz1Y3CwpZpHXCpa)|`["TextRegion:paragraph", "TextRegion:heading", "TextRegion:list-label", "TableRegion", "ImageRegion"]` | ||
|
||
X101-FPN [config](https://github.com/hpanwar08/detectron2/raw/master/configs/DLA_mask_rcnn_X_101_32x8d_FPN_3x.yaml)|[weights](https://www.dropbox.com/sh/1098ym6vhad4zi6/AABe16eSdY_34KGp52W0ruwha)|`["TextRegion:paragraph", "TextRegion:heading", "TextRegion:list-label", "TableRegion", "ImageRegion"]` | ||
|
||
### [PubLayNet](https://github.com/JPLeoRX/detectron2-publaynet) | ||
|
||
R50-FPN [config](https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml)|[weights](https://keybase.pub/jpleorx/detectron2-publaynet/mask_rcnn_R_50_FPN_3x/model_final.pth)|`["TextRegion:paragraph", "TextRegion:heading", "TextRegion:list-label", "TableRegion", "ImageRegion"]` | ||
|
||
R101-FPN [config](https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml)|[weights](https://keybase.pub/jpleorx/detectron2-publaynet/mask_rcnn_R_101_FPN_3x/model_final.pth)|`["TextRegion:paragraph", "TextRegion:heading", "TextRegion:list-label", "TableRegion", "ImageRegion"]` | ||
|
||
### [LayoutParser](https://github.com/Layout-Parser/layout-parser/blob/master/src/layoutparser/models/detectron2/catalog.py) | ||
|
||
provides different model variants of various depths for multiple datasets: | ||
- [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) (Medical Research Papers) | ||
- [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) (Tables Computer Typesetting) | ||
- [PRImALayout](https://www.primaresearch.org/dataset/) (Various Computer Typesetting) | ||
- [HJDataset](https://dell-research-harvard.github.io/HJDataset/) (Historical Japanese Magazines) | ||
- [NewspaperNavigator](https://news-navigator.labs.loc.gov/) (Historical Newspapers) | ||
- [Math Formula Detection](http://transcriptorium.eu/~htrcontest/MathsICDAR2021/) | ||
|
||
See [here](https://github.com/Layout-Parser/layout-parser/blob/master/docs/notes/modelzoo.md) for an overview. You will have to adapt the label map to conform to [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) region (sub)types accordingly. | ||
|
||
### [DocBank](https://github.com/doc-analysis/DocBank/blob/master/MODEL_ZOO.md) | ||
|
||
X101-FPN [archive](https://layoutlm.blob.core.windows.net/docbank/model_zoo/X101.zip) | ||
|
||
Proposed mappings: | ||
- `["TextRegion:heading", "TextRegion:credit", "TextRegion:caption", "TextRegion:other", "MathsRegion", "GraphicRegion", "TextRegion:footer", "TextRegion:floating", "TextRegion:paragraph", "TextRegion:endnote", "TextRegion:heading", "TableRegion", "TextRegion:heading"` (using only predefined `@type`) | ||
- `["TextRegion:abstract", "TextRegion:author", "TextRegion:caption", "TextRegion:date", "MathsRegion", "GraphicRegion", "TextRegion:footer", "TextRegion:list", "TextRegion:paragraph", "TextRegion:reference", "TextRegion:heading", "TableRegion", "TextRegion:title"` (using `@custom` as well) | ||
|
||
## Testing | ||
|
||
none yet | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ocrd_detectron2/ocrd-tool.json |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import click | ||
|
||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor | ||
from .segment import Detectron2Segment | ||
|
||
@click.command() | ||
@ocrd_cli_options | ||
def ocrd_detectron2_segment(*args, **kwargs): | ||
return ocrd_cli_wrap_processor(Detectron2Segment, *args, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import json | ||
from pkg_resources import resource_string | ||
|
||
OCRD_TOOL = json.loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
{ | ||
"git_url": "https://github.com/bertsky/ocrd_detectron2", | ||
"version": "0.1.0", | ||
"tools": { | ||
"ocrd-detectron2-segment": { | ||
"executable": "ocrd-detectron2-segment", | ||
"categories": ["Layout analysis"], | ||
"steps": ["layout/segmentation/region"], | ||
"description": "Detect regions with Detectron2", | ||
"input_file_grp": [ | ||
"OCR-D-IMG" | ||
], | ||
"output_file_grp": [ | ||
"OCR-D-SEG-REGION" | ||
], | ||
"parameters": { | ||
"categories": { | ||
"type": "array", | ||
"default": ["TextRegion:paragraph", "TextRegion:heading", "TextRegion:list-label", "TableRegion", "ImageRegion"], | ||
"description": "maps each region category (position) of the model to a PAGE region type (and subtype if separated by colon)" | ||
}, | ||
"min_confidence": { | ||
"type": "number", | ||
"format": "float", | ||
"default": 0.5, | ||
"description": "confidence threshold for detections" | ||
}, | ||
"model_config": { | ||
"type": "string", | ||
"format": "uri", | ||
"content-type": "text/yaml", | ||
"required": true, | ||
"description": "path name of model config" | ||
}, | ||
"model_weights": { | ||
"type": "string", | ||
"format": "uri", | ||
"content-type": "application/octet-stream", | ||
"required": true, | ||
"description": "path name of model weights" | ||
}, | ||
"device": { | ||
"type": "string", | ||
"default": "cuda", | ||
"description": "select computing device for Torch (e.g. cpu or cuda:0); will fall back to CPU if no GPU is available" | ||
} | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.