From ffdc7761928ec60cf03dbd408d94c4fddccbf08c Mon Sep 17 00:00:00 2001
From: vahid <vahid@linux-x3no.fritz.box>
Date: Fri, 1 Jul 2022 03:10:01 +0200
Subject: [PATCH 1/7] hybrid cnn & transformer model is integrated

---
 requirements.txt             |   2 +-
 sbb_binarize/cli.py          |   2 +-
 sbb_binarize/sbb_binarize.py | 102 +++++++++++++++++++++++++++++++++--
 3 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index a847f92..4bce704 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,4 @@ numpy
 setuptools >= 41
 opencv-python-headless
 ocrd >= 2.22.3
-tensorflow >= 2.4.0
+tensorflow == 2.4.*
diff --git a/sbb_binarize/cli.py b/sbb_binarize/cli.py
index 0176e20..b7eb574 100644
--- a/sbb_binarize/cli.py
+++ b/sbb_binarize/cli.py
@@ -1,7 +1,7 @@
 """
 sbb_binarize CLI
 """
-
+import click
 from click import command, option, argument, version_option, types
 from .sbb_binarize import SbbBinarizer
 
diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py
index 5424098..639847b 100644
--- a/sbb_binarize/sbb_binarize.py
+++ b/sbb_binarize/sbb_binarize.py
@@ -17,14 +17,72 @@
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from tensorflow.python.keras import backend as tensorflow_backend
+from tensorflow.keras import layers
+import tensorflow.keras.losses
+from tensorflow.keras.layers import *
 sys.stderr = stderr
 
 
 import logging
 
+
+projection_dim = 64
+patch_size = 1
+num_patches =14*14
+
 def resize_image(img_in, input_height, input_width):
     return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
 
+
+class Patches(layers.Layer):
+    def __init__(self, **kwargs):
+        super(Patches, self).__init__()
+        self.patch_size = patch_size
+
+    def call(self, images):
+        batch_size = tf.shape(images)[0]
+        patches = tf.image.extract_patches(
+            images=images,
+            sizes=[1, self.patch_size, self.patch_size, 1],
+            strides=[1, self.patch_size, self.patch_size, 1],
+            rates=[1, 1, 1, 1],
+            padding="VALID",
+        )
+        patch_dims = patches.shape[-1]
+        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
+        return patches
+    def get_config(self):
+
+        config = super().get_config().copy()
+        config.update({
+            'patch_size': self.patch_size,
+        })
+        return config
+    
+    
+class PatchEncoder(layers.Layer):
+    def __init__(self, **kwargs):
+        super(PatchEncoder, self).__init__()
+        self.num_patches = num_patches
+        self.projection = layers.Dense(units=projection_dim)
+        self.position_embedding = layers.Embedding(
+            input_dim=num_patches, output_dim=projection_dim
+        )
+
+    def call(self, patch):
+        positions = tf.range(start=0, limit=self.num_patches, delta=1)
+        encoded = self.projection(patch) + self.position_embedding(positions)
+        return encoded
+    def get_config(self):
+
+        config = super().get_config().copy()
+        config.update({
+            'num_patches': self.num_patches,
+            'projection': self.projection,
+            'position_embedding': self.position_embedding,
+        })
+        return config
+
 class SbbBinarizer:
 
     def __init__(self, model_dir, logger=None):
@@ -52,7 +110,10 @@ def end_session(self):
         del self.session
 
     def load_model(self, model_name):
-        model = load_model(join(self.model_dir, model_name), compile=False)
+        try:
+            model = load_model(join(self.model_dir, model_name), compile=False)
+        except:
+            model = load_model(join(self.model_dir, model_name) , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
         model_height = model.layers[len(model.layers)-1].output_shape[1]
         model_width = model.layers[len(model.layers)-1].output_shape[2]
         n_classes = model.layers[len(model.layers)-1].output_shape[3]
@@ -153,12 +214,47 @@ def predict(self, model_in, img, use_patches):
                         index_y_d = img_h - model_height
 
                     img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
+                    
+                    h_res = int( img_patch.shape[0]/1.05)
+                    w_res = int( img_patch.shape[1]/1.05)
+                    
+                    img_patch_resize = resize_image(img_patch, h_res, w_res)
+                    
+                    img_patch_resized_padded =np.ones((img_patch.shape[0],img_patch.shape[1],img_patch.shape[2])).astype(float)#self.do_padding()
+                    
+                    h_start=int( abs(img_patch.shape[0]-img_patch_resize.shape[0])/2. )
+                    
+                    w_start=int( abs(img_patch.shape[1]-img_patch_resize.shape[1])/2. )
+                    
+                    img_patch_resized_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1],:]=np.copy(img_patch_resize[:,:,:])
+                    
+                    label_p_pred_padded = model.predict(img_patch_resized_padded.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 
                     label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 
-                    seg = np.argmax(label_p_pred, axis=3)[0]
+                    #seg = np.argmax(label_p_pred, axis=3)[0]
+
+                    #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 
-                    seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
+                    seg = np.argmax(label_p_pred, axis=3)[0]
+                    
+                    
+                    seg_padded = np.argmax(label_p_pred_padded, axis=3)[0]
+                    
+                    seg_padded_take_core = seg_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1]]
+                    
+                    seg_padded_take_core_org_size= resize_image(seg_padded_take_core, img_patch.shape[0], img_patch.shape[1])
+                    
+                    #print(seg_padded_take_core_org_size,'sag padded')
+                    #print(seg,'sag')
+                    
+                    seg_tot  = seg_padded_take_core_org_size+0#seg
+                    
+                    seg_tot[seg_tot>1]=1
+
+                    seg_color = np.repeat(seg_tot[:, :, np.newaxis], 3, axis=2)
+
+                    #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
 
                     if i == 0 and j == 0:
                         seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]

From de89e7df12ba29c6f284ae95b502a37016cdb2b8 Mon Sep 17 00:00:00 2001
From: vahid <vahid@linux-x3no.fritz.box>
Date: Thu, 18 Aug 2022 17:35:11 +0200
Subject: [PATCH 2/7] padding the whole image in order to avoid artifacts on
 the page boundries

---
 requirements.txt             |  2 +-
 sbb_binarize/sbb_binarize.py | 57 ++++++++++++++++++++----------------
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 4bce704..4bad5bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,4 @@ numpy
 setuptools >= 41
 opencv-python-headless
 ocrd >= 2.22.3
-tensorflow == 2.4.*
+tensorflow-gpu >= 2.6.0
diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py
index 639847b..39e349c 100644
--- a/sbb_binarize/sbb_binarize.py
+++ b/sbb_binarize/sbb_binarize.py
@@ -112,8 +112,10 @@ def end_session(self):
     def load_model(self, model_name):
         try:
             model = load_model(join(self.model_dir, model_name), compile=False)
+            self.margin_percent = 0.1
         except:
             model = load_model(join(self.model_dir, model_name) , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
+            self.margin_percent = 0.15
         model_height = model.layers[len(model.layers)-1].output_shape[1]
         model_width = model.layers[len(model.layers)-1].output_shape[2]
         n_classes = model.layers[len(model.layers)-1].output_shape[3]
@@ -156,14 +158,31 @@ def predict(self, model_in, img, use_patches):
             index_start_w  = 0
             img_padded = np.copy(img)
             
+        img_org_h_pad = img_padded.shape[0]
+        img_org_w_pad = img_padded.shape[1]
             
-        img = np.copy(img_padded)
+        index_start_h_alw = 100
+        index_start_w_alw = 100
+        
+        img_padded_alw = np.zeros(( img_padded.shape[0]+2*index_start_h_alw, img.shape[1]+2*index_start_w_alw, img.shape[2] ))
+        
+        
+        img_padded_alw [ 0: index_start_h_alw, index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:index_start_h_alw,:,:]
+        img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], 0:index_start_w_alw, : ] = img_padded[:,0:index_start_w_alw,:]
+        
+        img_padded_alw [ img_padded_alw.shape[0]-index_start_h_alw: img_padded_alw.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[img_padded.shape[0]-index_start_h_alw:img_padded.shape[0],:,:]
+        img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0],img_padded_alw.shape[1]-index_start_w_alw: img_padded_alw.shape[1], : ] = img_padded[:,img_padded.shape[1]-index_start_w_alw:img_padded.shape[1],:]
+            
+        img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:,:,:]
+        
+        img = np.copy(img_padded_alw)
+
         
             
 
         if use_patches:
 
-            margin = int(0.1 * model_width)
+            margin = int(self.margin_percent * model_width)
 
             width_mid = model_width - 2 * margin
             height_mid = model_height - 2 * margin
@@ -215,20 +234,20 @@ def predict(self, model_in, img, use_patches):
 
                     img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
                     
-                    h_res = int( img_patch.shape[0]/1.05)
-                    w_res = int( img_patch.shape[1]/1.05)
+                    #h_res = int( img_patch.shape[0]/1.05)
+                    #w_res = int( img_patch.shape[1]/1.05)
                     
-                    img_patch_resize = resize_image(img_patch, h_res, w_res)
+                    #img_patch_resize = resize_image(img_patch, h_res, w_res)
                     
-                    img_patch_resized_padded =np.ones((img_patch.shape[0],img_patch.shape[1],img_patch.shape[2])).astype(float)#self.do_padding()
+                    #img_patch_resized_padded =np.ones((img_patch.shape[0],img_patch.shape[1],img_patch.shape[2])).astype(float)#self.do_padding()
                     
-                    h_start=int( abs(img_patch.shape[0]-img_patch_resize.shape[0])/2. )
+                    #h_start=int( abs(img_patch.shape[0]-img_patch_resize.shape[0])/2. )
                     
-                    w_start=int( abs(img_patch.shape[1]-img_patch_resize.shape[1])/2. )
+                    #w_start=int( abs(img_patch.shape[1]-img_patch_resize.shape[1])/2. )
                     
-                    img_patch_resized_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1],:]=np.copy(img_patch_resize[:,:,:])
+                    #img_patch_resized_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1],:]=np.copy(img_patch_resize[:,:,:])
                     
-                    label_p_pred_padded = model.predict(img_patch_resized_padded.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
+                    #label_p_pred_padded = model.predict(img_patch_resized_padded.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 
                     label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 
@@ -237,22 +256,8 @@ def predict(self, model_in, img, use_patches):
                     #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 
                     seg = np.argmax(label_p_pred, axis=3)[0]
-                    
-                    
-                    seg_padded = np.argmax(label_p_pred_padded, axis=3)[0]
-                    
-                    seg_padded_take_core = seg_padded[h_start:h_start+img_patch_resize.shape[0],w_start:w_start+img_patch_resize.shape[1]]
-                    
-                    seg_padded_take_core_org_size= resize_image(seg_padded_take_core, img_patch.shape[0], img_patch.shape[1])
-                    
-                    #print(seg_padded_take_core_org_size,'sag padded')
-                    #print(seg,'sag')
-                    
-                    seg_tot  = seg_padded_take_core_org_size+0#seg
-                    
-                    seg_tot[seg_tot>1]=1
 
-                    seg_color = np.repeat(seg_tot[:, :, np.newaxis], 3, axis=2)
+                    seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
 
                     #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
 
@@ -320,7 +325,7 @@ def predict(self, model_in, img, use_patches):
                         prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color
             
             
-            
+            prediction_true = prediction_true[index_start_h_alw: index_start_h_alw+img_org_h_pad, index_start_w_alw: index_start_w_alw+img_org_w_pad,:]
             prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:]
             prediction_true = prediction_true.astype(np.uint8)
 

From 85f55077a432841afe5a0ae314106515fcba2052 Mon Sep 17 00:00:00 2001
From: vahid <vahid@linux-x3no.fritz.box>
Date: Mon, 22 Aug 2022 13:25:23 +0200
Subject: [PATCH 3/7] padding is disabled for evaluation

---
 sbb_binarize/sbb_binarize.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py
index 39e349c..f7f7ea9 100644
--- a/sbb_binarize/sbb_binarize.py
+++ b/sbb_binarize/sbb_binarize.py
@@ -161,21 +161,21 @@ def predict(self, model_in, img, use_patches):
         img_org_h_pad = img_padded.shape[0]
         img_org_w_pad = img_padded.shape[1]
             
-        index_start_h_alw = 100
-        index_start_w_alw = 100
+        index_start_h_alw = 0#100
+        index_start_w_alw = 0#100
         
-        img_padded_alw = np.zeros(( img_padded.shape[0]+2*index_start_h_alw, img.shape[1]+2*index_start_w_alw, img.shape[2] ))
+        #img_padded_alw = np.zeros(( img_padded.shape[0]+2*index_start_h_alw, img.shape[1]+2*index_start_w_alw, img.shape[2] ))
         
         
-        img_padded_alw [ 0: index_start_h_alw, index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:index_start_h_alw,:,:]
-        img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], 0:index_start_w_alw, : ] = img_padded[:,0:index_start_w_alw,:]
+        #img_padded_alw [ 0: index_start_h_alw, index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:index_start_h_alw,:,:]
+        #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], 0:index_start_w_alw, : ] = img_padded[:,0:index_start_w_alw,:]
         
-        img_padded_alw [ img_padded_alw.shape[0]-index_start_h_alw: img_padded_alw.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[img_padded.shape[0]-index_start_h_alw:img_padded.shape[0],:,:]
-        img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0],img_padded_alw.shape[1]-index_start_w_alw: img_padded_alw.shape[1], : ] = img_padded[:,img_padded.shape[1]-index_start_w_alw:img_padded.shape[1],:]
+        #img_padded_alw [ img_padded_alw.shape[0]-index_start_h_alw: img_padded_alw.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[img_padded.shape[0]-index_start_h_alw:img_padded.shape[0],:,:]
+        #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0],img_padded_alw.shape[1]-index_start_w_alw: img_padded_alw.shape[1], : ] = img_padded[:,img_padded.shape[1]-index_start_w_alw:img_padded.shape[1],:]
             
-        img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:,:,:]
+        #img_padded_alw [ index_start_h_alw: index_start_h_alw+img_padded.shape[0], index_start_w_alw: index_start_w_alw+img_padded.shape[1], : ] = img_padded[:,:,:]
         
-        img = np.copy(img_padded_alw)
+        img = np.copy(img_padded)
 
         
             
@@ -325,7 +325,7 @@ def predict(self, model_in, img, use_patches):
                         prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color
             
             
-            prediction_true = prediction_true[index_start_h_alw: index_start_h_alw+img_org_h_pad, index_start_w_alw: index_start_w_alw+img_org_w_pad,:]
+            #prediction_true = prediction_true[index_start_h_alw: index_start_h_alw+img_org_h_pad, index_start_w_alw: index_start_w_alw+img_org_w_pad,:]
             prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:]
             prediction_true = prediction_true.astype(np.uint8)
 

From fa354e1e0f7ba8b6accf8d8454a42624d368dad8 Mon Sep 17 00:00:00 2001
From: vahid <vahid@linux-x3no.fritz.box>
Date: Thu, 4 May 2023 11:08:56 +0200
Subject: [PATCH 4/7] loading savedmodel

---
 requirements.txt             | 6 +++---
 sbb_binarize/sbb_binarize.py | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 4bad5bb..f475316 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 numpy
-setuptools >= 41
+setuptools >= 50
 opencv-python-headless
-ocrd >= 2.22.3
-tensorflow-gpu >= 2.6.0
+ocrd >= 2.23.3
+tensorflow >= 2.6.0
diff --git a/sbb_binarize/sbb_binarize.py b/sbb_binarize/sbb_binarize.py
index f7f7ea9..b754b44 100644
--- a/sbb_binarize/sbb_binarize.py
+++ b/sbb_binarize/sbb_binarize.py
@@ -91,7 +91,8 @@ def __init__(self, model_dir, logger=None):
 
         self.start_new_session()
 
-        self.model_files = glob('%s/*.h5' % self.model_dir)
+        #self.model_files = glob('%s/*.h5' % self.model_dir)
+        self.model_files = glob(self.model_dir+"/*/", recursive = True)
 
         self.models = []
         for model_file in self.model_files:

From 0ba143f291764b30c1743af37f39ee72c7567060 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Mon, 15 May 2023 19:13:52 +0200
Subject: [PATCH 5/7] Update requirements.txt

---
 requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f475316..2f57afe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 numpy
-setuptools >= 50
+setuptools >= 41
 opencv-python-headless
-ocrd >= 2.23.3
-tensorflow >= 2.6.0
+ocrd >= 2.38.0
+tensorflow >= 2.4.0

From cb1916a707080c9cda234355ef56d2b5f91b6d15 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Mon, 15 May 2023 19:45:29 +0200
Subject: [PATCH 6/7] Update requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 2f57afe..594e791 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy
+numpy >=1.21
 setuptools >= 41
 opencv-python-headless
 ocrd >= 2.38.0

From 0254a91eb1d21fc7cfdf60d885229b7453f017c4 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Mon, 15 May 2023 19:59:50 +0200
Subject: [PATCH 7/7] Update config.yml

---
 .circleci/config.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 8f02829..39f1875 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -10,6 +10,7 @@ jobs:
       - restore_cache:
           keys:
             - ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
+      - run: python -m pip install --upgrade pip
       - run: make install
       - run: make model
       - save_cache:
@@ -27,6 +28,7 @@ jobs:
       - restore_cache:
           keys:
             - ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
+      - run: python -m pip install --upgrade pip
       - run: make install
       - run: make model
       - save_cache: