From ffdc7761928ec60cf03dbd408d94c4fddccbf08c Mon Sep 17 00:00:00 2001 From: vahid Date: Fri, 1 Jul 2022 03:10:01 +0200 Subject: [PATCH 1/7] hybrid cnn & transformer model is integrated --- requirements.txt | 2 +- sbb_binarize/cli.py | 2 +- sbb_binarize/sbb_binarize.py | 102 +++++++++++++++++++++++++++++++++-- 3 files changed, 101 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index a847f92..4bce704 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ numpy setuptools >= 41 opencv-python-headless ocrd >= 2.22.3 -tensorflow >= 2.4.0 +tensorflow == 2.4.* diff --git a/sbb_binarize/cli.py b/sbb_binarize/cli.py index 0176e20..b7eb574 100644 --- a/sbb_binarize/cli.py +++ b/sbb_binarize/cli.py @@ -1,7 +1,7 @@ """ sbb_binarize CLI """ - +import click from click import command, option, argument, version_option, types from .sbb_binarize import SbbBinarizer diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 5424098..639847b 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -17,14 +17,72 @@ import tensorflow as tf from tensorflow.keras.models import load_model from tensorflow.python.keras import backend as tensorflow_backend +from tensorflow.keras import layers +import tensorflow.keras.losses +from tensorflow.keras.layers import * sys.stderr = stderr import logging + +projection_dim = 64 +patch_size = 1 +num_patches =14*14 + def resize_image(img_in, input_height, input_width): return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + +class Patches(layers.Layer): + def __init__(self, **kwargs): + super(Patches, self).__init__() + self.patch_size = patch_size + + def call(self, images): + batch_size = tf.shape(images)[0] + patches = tf.image.extract_patches( + images=images, + sizes=[1, self.patch_size, self.patch_size, 1], + strides=[1, self.patch_size, self.patch_size, 1], + rates=[1, 1, 1, 1], + padding="VALID", + ) + patch_dims = patches.shape[-1] + patches = tf.reshape(patches, [batch_size, -1, patch_dims]) + return patches + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'patch_size': self.patch_size, + }) + return config + + +class PatchEncoder(layers.Layer): + def __init__(self, **kwargs): + super(PatchEncoder, self).__init__() + self.num_patches = num_patches + self.projection = layers.Dense(units=projection_dim) + self.position_embedding = layers.Embedding( + input_dim=num_patches, output_dim=projection_dim + ) + + def call(self, patch): + positions = tf.range(start=0, limit=self.num_patches, delta=1) + encoded = self.projection(patch) + self.position_embedding(positions) + return encoded + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'num_patches': self.num_patches, + 'projection': self.projection, + 'position_embedding': self.position_embedding, + }) + return config + class SbbBinarizer: def __init__(self, model_dir, logger=None): @@ -52,7 +110,10 @@ def end_session(self): del self.session def load_model(self, model_name): - model = load_model(join(self.model_dir, model_name), compile=False) + try: + model = load_model(join(self.model_dir, model_name), compile=False) + except: + model = load_model(join(self.model_dir, model_name) , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) model_height = model.layers[len(model.layers)-1].output_shape[1] model_width = model.layers[len(model.layers)-1].output_shape[2] n_classes = model.layers[len(model.layers)-1].output_shape[3] @@ -153,12 +214,47 @@ def predict(self, model_in, img, use_patches): index_y_d = img_h - model_height img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + h_res = int( img_patch.shape[0]/1.05) + w_res = int( img_patch.shape[1]/1.05) + + img_patch_resize = resize_image(img_patch, h_res, w_res) + + img_patch_resized_padded =np.ones((img_patch.shape[0],img_patch.shape[1],img_patch.shape[2])).astype(float)#self.do_padding() + + h_start=int( abs(img_patch.shape[0]-img_patch_resize.shape[0])/2. ) + + w_start=int( abs(img_patch.shape[1]-img_patch_resize.shape[1])/2. ) + + img_patch_resized_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1],:]=np.copy(img_patch_resize[:,:,:]) + + label_p_pred_padded = model.predict(img_patch_resized_padded.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) - seg = np.argmax(label_p_pred, axis=3)[0] + #seg = np.argmax(label_p_pred, axis=3)[0] + + #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + seg = np.argmax(label_p_pred, axis=3)[0] + + + seg_padded = np.argmax(label_p_pred_padded, axis=3)[0] + + seg_padded_take_core = seg_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1]] + + seg_padded_take_core_org_size= resize_image(seg_padded_take_core, img_patch.shape[0], img_patch.shape[1]) + + #print(seg_padded_take_core_org_size,'sag padded') + #print(seg,'sag') + + seg_tot = seg_padded_take_core_org_size+0#seg + + seg_tot[seg_tot>1]=1 + + seg_color = np.repeat(seg_tot[:, :, np.newaxis], 3, axis=2) + + #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) if i == 0 and j == 0: seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] From de89e7df12ba29c6f284ae95b502a37016cdb2b8 Mon Sep 17 00:00:00 2001 From: vahid Date: Thu, 18 Aug 2022 17:35:11 +0200 Subject: [PATCH 2/7] padding the whole image in order to avoid artifacts on the page boundries --- requirements.txt | 2 +- sbb_binarize/sbb_binarize.py | 57 ++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4bce704..4bad5bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ numpy setuptools >= 41 opencv-python-headless ocrd >= 2.22.3 -tensorflow == 2.4.* +tensorflow-gpu >= 2.6.0 diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 639847b..39e349c 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -112,8 +112,10 @@ def end_session(self): def load_model(self, model_name): try: model = load_model(join(self.model_dir, model_name), compile=False) + self.margin_percent = 0.1 except: model = load_model(join(self.model_dir, model_name) , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + self.margin_percent = 0.15 model_height = model.layers[len(model.layers)-1].output_shape[1] model_width = model.layers[len(model.layers)-1].output_shape[2] n_classes = model.layers[len(model.layers)-1].output_shape[3] @@ -156,14 +158,31 @@ def predict(self, model_in, img, use_patches): index_start_w = 0 img_padded = np.copy(img) + img_org_h_pad = img_padded.shape[0] + img_org_w_pad = img_padded.shape[1] - img = np.copy(img_padded) + index_start_h_alw = 100 + index_start_w_alw = 100 + + img_padded_alw = np.zeros(( img_padded.shape[0]+2*index_start_h_alw, img.shape[1]+2*index_start_w_alw, img.shape[2] )) + + + img_padded_alw [ 0: index_start_h_alw, index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:index_start_h_alw,:,:] + img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], 0:index_start_w_alw, : ] = img_padded[:,0:index_start_w_alw,:] + + img_padded_alw [ img_padded_alw.shape[0]-index_start_h_alw: img_padded_alw.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[img_padded.shape[0]-index_start_h_alw:img_padded.shape[0],:,:] + img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0],img_padded_alw.shape[1]-index_start_w_alw: img_padded_alw.shape[1], : ] = img_padded[:,img_padded.shape[1]-index_start_w_alw:img_padded.shape[1],:] + + img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:,:,:] + + img = np.copy(img_padded_alw) + if use_patches: - margin = int(0.1 * model_width) + margin = int(self.margin_percent * model_width) width_mid = model_width - 2 * margin height_mid = model_height - 2 * margin @@ -215,20 +234,20 @@ def predict(self, model_in, img, use_patches): img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - h_res = int( img_patch.shape[0]/1.05) - w_res = int( img_patch.shape[1]/1.05) + #h_res = int( img_patch.shape[0]/1.05) + #w_res = int( img_patch.shape[1]/1.05) - img_patch_resize = resize_image(img_patch, h_res, w_res) + #img_patch_resize = resize_image(img_patch, h_res, w_res) - img_patch_resized_padded =np.ones((img_patch.shape[0],img_patch.shape[1],img_patch.shape[2])).astype(float)#self.do_padding() + #img_patch_resized_padded =np.ones((img_patch.shape[0],img_patch.shape[1],img_patch.shape[2])).astype(float)#self.do_padding() - h_start=int( abs(img_patch.shape[0]-img_patch_resize.shape[0])/2. ) + #h_start=int( abs(img_patch.shape[0]-img_patch_resize.shape[0])/2. ) - w_start=int( abs(img_patch.shape[1]-img_patch_resize.shape[1])/2. ) + #w_start=int( abs(img_patch.shape[1]-img_patch_resize.shape[1])/2. ) - img_patch_resized_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1],:]=np.copy(img_patch_resize[:,:,:]) + #img_patch_resized_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1],:]=np.copy(img_patch_resize[:,:,:]) - label_p_pred_padded = model.predict(img_patch_resized_padded.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + #label_p_pred_padded = model.predict(img_patch_resized_padded.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) @@ -237,22 +256,8 @@ def predict(self, model_in, img, use_patches): #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] - - - seg_padded = np.argmax(label_p_pred_padded, axis=3)[0] - - seg_padded_take_core = seg_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1]] - - seg_padded_take_core_org_size= resize_image(seg_padded_take_core, img_patch.shape[0], img_patch.shape[1]) - - #print(seg_padded_take_core_org_size,'sag padded') - #print(seg,'sag') - - seg_tot = seg_padded_take_core_org_size+0#seg - - seg_tot[seg_tot>1]=1 - seg_color = np.repeat(seg_tot[:, :, np.newaxis], 3, axis=2) + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) @@ -320,7 +325,7 @@ def predict(self, model_in, img, use_patches): prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color - + prediction_true = prediction_true[index_start_h_alw: index_start_h_alw+img_org_h_pad, index_start_w_alw: index_start_w_alw+img_org_w_pad,:] prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:] prediction_true = prediction_true.astype(np.uint8) From 85f55077a432841afe5a0ae314106515fcba2052 Mon Sep 17 00:00:00 2001 From: vahid Date: Mon, 22 Aug 2022 13:25:23 +0200 Subject: [PATCH 3/7] padding is disabled for evaluation --- sbb_binarize/sbb_binarize.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index 39e349c..f7f7ea9 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -161,21 +161,21 @@ def predict(self, model_in, img, use_patches): img_org_h_pad = img_padded.shape[0] img_org_w_pad = img_padded.shape[1] - index_start_h_alw = 100 - index_start_w_alw = 100 + index_start_h_alw = 0#100 + index_start_w_alw = 0#100 - img_padded_alw = np.zeros(( img_padded.shape[0]+2*index_start_h_alw, img.shape[1]+2*index_start_w_alw, img.shape[2] )) + #img_padded_alw = np.zeros(( img_padded.shape[0]+2*index_start_h_alw, img.shape[1]+2*index_start_w_alw, img.shape[2] )) - img_padded_alw [ 0: index_start_h_alw, index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:index_start_h_alw,:,:] - img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], 0:index_start_w_alw, : ] = img_padded[:,0:index_start_w_alw,:] + #img_padded_alw [ 0: index_start_h_alw, index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:index_start_h_alw,:,:] + #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], 0:index_start_w_alw, : ] = img_padded[:,0:index_start_w_alw,:] - img_padded_alw [ img_padded_alw.shape[0]-index_start_h_alw: img_padded_alw.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[img_padded.shape[0]-index_start_h_alw:img_padded.shape[0],:,:] - img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0],img_padded_alw.shape[1]-index_start_w_alw: img_padded_alw.shape[1], : ] = img_padded[:,img_padded.shape[1]-index_start_w_alw:img_padded.shape[1],:] + #img_padded_alw [ img_padded_alw.shape[0]-index_start_h_alw: img_padded_alw.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[img_padded.shape[0]-index_start_h_alw:img_padded.shape[0],:,:] + #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0],img_padded_alw.shape[1]-index_start_w_alw: img_padded_alw.shape[1], : ] = img_padded[:,img_padded.shape[1]-index_start_w_alw:img_padded.shape[1],:] - img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:,:,:] + #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:,:,:] - img = np.copy(img_padded_alw) + img = np.copy(img_padded) @@ -325,7 +325,7 @@ def predict(self, model_in, img, use_patches): prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color - prediction_true = prediction_true[index_start_h_alw: index_start_h_alw+img_org_h_pad, index_start_w_alw: index_start_w_alw+img_org_w_pad,:] + #prediction_true = prediction_true[index_start_h_alw: index_start_h_alw+img_org_h_pad, index_start_w_alw: index_start_w_alw+img_org_w_pad,:] prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:] prediction_true = prediction_true.astype(np.uint8) From fa354e1e0f7ba8b6accf8d8454a42624d368dad8 Mon Sep 17 00:00:00 2001 From: vahid Date: Thu, 4 May 2023 11:08:56 +0200 Subject: [PATCH 4/7] loading savedmodel --- requirements.txt | 6 +++--- sbb_binarize/sbb_binarize.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4bad5bb..f475316 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ numpy -setuptools >= 41 +setuptools >= 50 opencv-python-headless -ocrd >= 2.22.3 -tensorflow-gpu >= 2.6.0 +ocrd >= 2.23.3 +tensorflow >= 2.6.0 diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py index f7f7ea9..b754b44 100644 --- a/sbb_binarize/sbb_binarize.py +++ b/sbb_binarize/sbb_binarize.py @@ -91,7 +91,8 @@ def __init__(self, model_dir, logger=None): self.start_new_session() - self.model_files = glob('%s/*.h5' % self.model_dir) + #self.model_files = glob('%s/*.h5' % self.model_dir) + self.model_files = glob(self.model_dir+"/*/", recursive = True) self.models = [] for model_file in self.model_files: From 0ba143f291764b30c1743af37f39ee72c7567060 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 15 May 2023 19:13:52 +0200 Subject: [PATCH 5/7] Update requirements.txt --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index f475316..2f57afe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ numpy -setuptools >= 50 +setuptools >= 41 opencv-python-headless -ocrd >= 2.23.3 -tensorflow >= 2.6.0 +ocrd >= 2.38.0 +tensorflow >= 2.4.0 From cb1916a707080c9cda234355ef56d2b5f91b6d15 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 15 May 2023 19:45:29 +0200 Subject: [PATCH 6/7] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2f57afe..594e791 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -numpy +numpy >=1.21 setuptools >= 41 opencv-python-headless ocrd >= 2.38.0 From 0254a91eb1d21fc7cfdf60d885229b7453f017c4 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 15 May 2023 19:59:50 +0200 Subject: [PATCH 7/7] Update config.yml --- .circleci/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8f02829..39f1875 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,6 +10,7 @@ jobs: - restore_cache: keys: - ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }} + - run: python -m pip install --upgrade pip - run: make install - run: make model - save_cache: @@ -27,6 +28,7 @@ jobs: - restore_cache: keys: - ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }} + - run: python -m pip install --upgrade pip - run: make install - run: make model - save_cache: