Skip to content

Commit

Permalink
allow processing of rgb images
Browse files Browse the repository at this point in the history
  • Loading branch information
Julian Balling committed Feb 17, 2022
1 parent 0a4ebf6 commit 1acaac0
Showing 1 changed file with 37 additions and 35 deletions.
72 changes: 37 additions & 35 deletions ocrd_detectron2/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,45 +187,47 @@ class id to a new PAGE region type (and subtype).
else:
zoomed = 1.0

# for morphological post-processing, we will need the binarized image, too
page_image_bin, _, _ = self.workspace.image_from_page(
page, page_id,
feature_selector='binarized')
# workaround for OCR-D/core#687:
if 0 < abs(page_image_raw.width - page_image_bin.width) <= 2:
diff = page_image_raw.width - page_image_bin.width
if diff > 0:
page_image_raw = crop_image(
page_image_raw,
(int(np.floor(diff / 2)), 0,
page_image_raw.width - int(np.ceil(diff / 2)),
page_image_raw.height))
else:
page_image_bin = crop_image(
page_image_bin,
(int(np.floor(-diff / 2)), 0,
page_image_bin.width - int(np.ceil(-diff / 2)),
page_image_bin.height))
if 0 < abs(page_image_raw.height - page_image_bin.height) <= 2:
diff = page_image_raw.height - page_image_bin.height
if diff > 0:
page_image_raw = crop_image(
page_image_raw,
(0, int(np.floor(diff / 2)),
page_image_raw.width,
page_image_raw.height - int(np.ceil(diff / 2))))
else:
page_image_bin = crop_image(
page_image_bin,
(0, int(np.floor(-diff / 2)),
page_image_bin.width,
page_image_bin.height - int(np.ceil(-diff / 2))))
# check wether input image is binarized
if page_image_info.photometricInterpretation == "1":
# for morphological post-processing, we will need the binarized image, too
page_image_bin, _, _ = self.workspace.image_from_page(
page, page_id,
feature_selector='binarized')
# workaround for OCR-D/core#687:
if 0 < abs(page_image_raw.width - page_image_bin.width) <= 2:
diff = page_image_raw.width - page_image_bin.width
if diff > 0:
page_image_raw = crop_image(
page_image_raw,
(int(np.floor(diff / 2)), 0,
page_image_raw.width - int(np.ceil(diff / 2)),
page_image_raw.height))
else:
page_image_bin = crop_image(
page_image_bin,
(int(np.floor(-diff / 2)), 0,
page_image_bin.width - int(np.ceil(-diff / 2)),
page_image_bin.height))
if 0 < abs(page_image_raw.height - page_image_bin.height) <= 2:
diff = page_image_raw.height - page_image_bin.height
if diff > 0:
page_image_raw = crop_image(
page_image_raw,
(0, int(np.floor(diff / 2)),
page_image_raw.width,
page_image_raw.height - int(np.ceil(diff / 2))))
else:
page_image_bin = crop_image(
page_image_bin,
(0, int(np.floor(-diff / 2)),
page_image_bin.width,
page_image_bin.height - int(np.ceil(-diff / 2))))

# ensure RGB (if raw was merely grayscale)
if page_image_raw.mode == '1':
page_image_raw = page_image_raw.convert('L')
page_image_raw = page_image_raw.convert(mode='RGB')
page_image_bin = page_image_bin.convert(mode='1')
page_image_bin = page_image_raw.convert(mode='1')
# reduce resolution to 300 DPI max
if zoomed != 1.0:
page_image_bin = page_image_bin.resize(
Expand Down Expand Up @@ -267,7 +269,7 @@ def _process_page(self, page, ignore, page_coords, page_id, page_array_raw, page
#page.set_TextRegion([])
page.set_custom('coords=%s' % page_coords['transform'])
height, width, _ = page_array_raw.shape
# get connected components to estimate scale
# get connected components to estimate ignorescale
_, components = cv2.connectedComponents(page_array_bin.astype(np.uint8))
# estimate glyph scale (roughly)
_, counts = np.unique(components, return_counts=True)
Expand Down

0 comments on commit 1acaac0

Please sign in to comment.