From 71657c31bc0df9b8826c0e9d45ca646640055ddf Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 5 May 2024 20:38:13 -0400 Subject: [PATCH 01/27] YOLO implementation Download the medium model to the models folder for testing wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt @alliomeria yolov8 as service. WIP but works --- nlpserver.py | 120 +++++++++++++++++++++++++++++++++++++++++++++-- requirements.txt | 10 ++++ 2 files changed, 125 insertions(+), 5 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 09e4343..6ffa077 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -15,11 +15,11 @@ default_data = {} default_data['web64'] = { 'app': 'nlpserver', - 'version': '1.0.2', - 'last_modified': '2022-04-21', - 'documentation': 'https://github.com/digitaldogsbody/nlpserver-fasttext/README.md', - 'github': 'https://github.com/digitaldogsbody/nlpserver-fasttext', - 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext'], + 'version': '1.1.0', + 'last_modified': '2024-05-05', + 'documentation': 'https://github.com/esmero/nlpserver-fasttext/README.md', + 'github': 'https://github.com/esmero/nlpserver-fasttext', + 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo'], } default_data['message'] = 'NLP Server by web64.com - with fasttext addition by digitaldogsbody' @@ -533,6 +533,116 @@ def fasttext(): return jsonify(data) +@app.route("/image/yolo", methods=['GET', 'POST']) +def yolo(): + # Import your Libraries + import torch + from torchvision import transforms + from PIL import Image, ImageDraw + from pathlib import Path + from ultralytics import YOLO + import pandas as pd + import numpy as np + from pathlib import Path + from ultralytics import YOLO + from PIL import Image + import requests + from io import BytesIO + from PIL import Image + from keras.preprocessing.image import img_to_array + + intermediate_features = [] + + def hook_fn(module, input, output): + intermediate_features.append(output) + + def extract_features(intermediate_features, model, img, layer_index=20):##Choose the layer that fit your application + hook = model.model.model[layer_index].register_forward_hook(hook_fn) + print(hook) + with torch.no_grad(): + model(img) + hook.remove() + return intermediate_features[0] # Access the first element of the list + def loadImage(url, size = 640): + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.HTTPError as err: + data['error'] = err.strerror + return jsonify(data) + + img_bytes = BytesIO(response.content) + img = Image.open(img_bytes) + img = img.convert('RGB') + img = img.resize((size,size), Image.NEAREST) + img = img_to_array(img) + return img + + data = dict(default_data) + data['message'] = "Yolo - Parameters: 'iiif_image_url', 'labels' a list of valid labels for object detection (default: face)" + data['yolo'] = {} + params = {} + + + if request.method == 'GET': + params['iiif_image_url'] = request.args.get('iiif_image_url') + params['labels'] = request.args.getlist('labels') + elif request.method == 'POST': + params = request.form # postdata + else: + data['error'] = 'Invalid request method' + return jsonify(data) + + if not params: + data['error'] = 'Missing parameters' + return jsonify(data) + + if not params['iiif_image_url']: + data['error'] = '[iiif_image_url] parameter not found' + return jsonify(data) + + if not params['labels']: + params['labels'] = ['face'] + + try: + model = YOLO('models/yolo/yolov8m.pt') + except ValueError: + data['error'] = 'models/yolo/yolov8m.pt' + return jsonify(data) + + if not model: + data['error'] = 'yolov8 model not initialized' + return jsonify(data) + + img = loadImage(params['iiif_image_url'], 640) + data['yolo']['objects'] = [] + data['yolo']['modelinfo'] = {} + object_detect_results = model(img, conf=0.1) + # model.names gives me the classes. + # We don't know if the user set tge obb model or the regular one, so we will have to iterate over both options, bbox and obb + for object_detect_result in object_detect_results: + if hasattr(object_detect_result, "obb") and object_detect_result.obb is not None: # Access the .obb attribute instead of .boxes + print('An obb model') + data['yolo']['objects'] = object_detect_result.tojson(True) + elif hasattr(object_detect_result, "boxes") and object_detect_result.boxes is not None: + print('Not an obb model') + data['yolo']['objects'] = object_detect_result.tojson(True) + else: + data['error'] = 'No features detected' + data['yolo']['objects'] = object_detect_result.tojson(True) + + data['yolo']['modelinfo'] = {'train_args': model.ckpt["train_args"], 'date': model.ckpt["date"], 'name': 'model.ckpt["name"]'} + + # features = extract_features(intermediate_features=intermediate_features,model=model, img = img) // More advanced. Step 2 + # The embed method is pretty new. + vector = model.embed(img, verbose=False)[0] + print(vector.shape[0]) + # Vector size for this layer (i think by default it will be numlayers - 2 so 20) is 576 + vector_string = str(vector.detach().tolist()) + data['yolo']['vector'] = vector_string + data['message'] = 'done' + + return jsonify(data) # @app.route("/tester", methods=['GET', 'POST']) # def tester(): # return render_template('form.html') diff --git a/requirements.txt b/requirements.txt index 652f2a8..0ca55e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ +setuptools pyicu numpy Flask +flask polyglot morfessor langid @@ -13,5 +15,13 @@ BeautifulSoup4 afinn textblob fasttext @ git+https://github.com/facebookresearch/fastText.git +torch +pillow #Fork of PIL +ultralytics +pandas +scikit-learn #To use PCA +keras # keras.preprocessing.image +tensorflow +insightface # for face detection. Arcface used by Apple #summa #pattern \ No newline at end of file From f05d33259a5b7efa83f813bb15394486a289301c Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 5 May 2024 21:12:22 -0400 Subject: [PATCH 02/27] Unit vector. If i am doing this right or not? Math from 1998 for me --- nlpserver.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 6ffa077..5fe94ee 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -550,6 +550,7 @@ def yolo(): from io import BytesIO from PIL import Image from keras.preprocessing.image import img_to_array + from sklearn import preprocessing intermediate_features = [] @@ -638,8 +639,11 @@ def loadImage(url, size = 640): vector = model.embed(img, verbose=False)[0] print(vector.shape[0]) # Vector size for this layer (i think by default it will be numlayers - 2 so 20) is 576 - vector_string = str(vector.detach().tolist()) - data['yolo']['vector'] = vector_string + # array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample + # This "should" return a Unit Vector so we can use "Cosine" in Solr + X_l1 = preprocessing.normalize([vector.detach().tolist()], norm='l1') + # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html + data['yolo']['vector'] = str(X_l1[0]) data['message'] = 'done' return jsonify(data) From a01bc9fe69e602195f2a54669972e4fc5fb6dfb4 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 5 May 2024 21:39:34 -0400 Subject: [PATCH 03/27] Allow Yolo model to be set via an ENV means via docker-compose ENVs Of course you want to mount /models so you can deploy from the host --- nlpserver.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 5fe94ee..0e52da4 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -11,6 +11,11 @@ # configurations #app.config['var1'] = 'test' +app.config["YOLO_MODEL_NAME"] = "yolov8m.pt" +for variable, value in os.environ.items(): + if variable == "YOLO_MODEL_NAME": + # Can be set via Docker ENV + app.config["YOLO_MODEL_NAME"] = value default_data = {} default_data['web64'] = { @@ -606,9 +611,9 @@ def loadImage(url, size = 640): params['labels'] = ['face'] try: - model = YOLO('models/yolo/yolov8m.pt') + model = YOLO('models/yolo/'+ app.config["YOLO_MODEL_NAME"]) except ValueError: - data['error'] = 'models/yolo/yolov8m.pt' + data['error'] = 'models/yolo/'+ app.config["YOLO_MODEL_NAME"] + ' not found' return jsonify(data) if not model: From ea0a0415f1fcfc8906aea77b7a5bfa601a4ae2fb Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 6 May 2024 09:43:34 -0400 Subject: [PATCH 04/27] Much better output. All JSON, no weird strings. --- nlpserver.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 0e52da4..675c3d2 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -556,6 +556,7 @@ def yolo(): from PIL import Image from keras.preprocessing.image import img_to_array from sklearn import preprocessing + import json intermediate_features = [] @@ -629,15 +630,15 @@ def loadImage(url, size = 640): for object_detect_result in object_detect_results: if hasattr(object_detect_result, "obb") and object_detect_result.obb is not None: # Access the .obb attribute instead of .boxes print('An obb model') - data['yolo']['objects'] = object_detect_result.tojson(True) + data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) elif hasattr(object_detect_result, "boxes") and object_detect_result.boxes is not None: print('Not an obb model') - data['yolo']['objects'] = object_detect_result.tojson(True) + data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) else: data['error'] = 'No features detected' - data['yolo']['objects'] = object_detect_result.tojson(True) + data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) - data['yolo']['modelinfo'] = {'train_args': model.ckpt["train_args"], 'date': model.ckpt["date"], 'name': 'model.ckpt["name"]'} + data['yolo']['modelinfo'] = {'train_args': model.ckpt["train_args"], 'date': model.ckpt["date"], 'version': model.ckpt["version"]} # features = extract_features(intermediate_features=intermediate_features,model=model, img = img) // More advanced. Step 2 # The embed method is pretty new. @@ -648,7 +649,7 @@ def loadImage(url, size = 640): # This "should" return a Unit Vector so we can use "Cosine" in Solr X_l1 = preprocessing.normalize([vector.detach().tolist()], norm='l1') # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html - data['yolo']['vector'] = str(X_l1[0]) + data['yolo']['vector'] = X_l1[0].tolist() data['message'] = 'done' return jsonify(data) From c00ac91b922637ea41747ca3c0bd971986a6c505 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 6 May 2024 14:13:56 -0400 Subject: [PATCH 05/27] Adds mobilenetv3 embedder (size 1024) via mediapipe (tensorflow/google) Some cleanups too --- nlpserver.py | 98 +++++++++++++++++++++++++++++++++++++++++++++--- requirements.txt | 1 + 2 files changed, 93 insertions(+), 6 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 675c3d2..e4b7f77 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -12,10 +12,13 @@ # configurations #app.config['var1'] = 'test' app.config["YOLO_MODEL_NAME"] = "yolov8m.pt" +app.config["MOBILENET_MODEL_NAME"] = "mobilenet_v3_small.tflite" for variable, value in os.environ.items(): if variable == "YOLO_MODEL_NAME": # Can be set via Docker ENV app.config["YOLO_MODEL_NAME"] = value + if variable == "MOBILENET_MODEL_NAME": + app.config["MOBILENET_MODEL_NAME"] = value default_data = {} default_data['web64'] = { @@ -24,7 +27,7 @@ 'last_modified': '2024-05-05', 'documentation': 'https://github.com/esmero/nlpserver-fasttext/README.md', 'github': 'https://github.com/esmero/nlpserver-fasttext', - 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo'], + 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', 'image/mobilenet'], } default_data['message'] = 'NLP Server by web64.com - with fasttext addition by digitaldogsbody' @@ -549,11 +552,8 @@ def yolo(): import pandas as pd import numpy as np from pathlib import Path - from ultralytics import YOLO - from PIL import Image import requests from io import BytesIO - from PIL import Image from keras.preprocessing.image import img_to_array from sklearn import preprocessing import json @@ -646,17 +646,103 @@ def loadImage(url, size = 640): print(vector.shape[0]) # Vector size for this layer (i think by default it will be numlayers - 2 so 20) is 576 # array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample - # This "should" return a Unit Vector so we can use "Cosine" in Solr + # This "should" return a Unit Vector so we can use "dot_product" in Solr X_l1 = preprocessing.normalize([vector.detach().tolist()], norm='l1') # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html data['yolo']['vector'] = X_l1[0].tolist() data['message'] = 'done' return jsonify(data) +@app.route("/image/mobilenet", methods=['GET', 'POST']) +def mobilenet(): + # Import your Libraries + from PIL import Image + from pathlib import Path + import pandas as pd + import numpy as np + from pathlib import Path + import requests + from io import BytesIO + from sklearn import preprocessing + import mediapipe as mp + from mediapipe.tasks import python + from mediapipe.tasks.python import vision + + intermediate_features = [] + + def loadImage(url, size = 480): + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.HTTPError as err: + data['error'] = err.strerror + return jsonify(data) + + img_bytes = BytesIO(response.content) + img = Image.open(img_bytes) + img = img.convert('RGB') + img = img.resize((size,size), Image.NEAREST) + # Media pipe uses a different format than YOLO, img here is PIL + img = np.asarray(img) + return img + + data = dict(default_data) + data['message'] = "mobilenet - Parameters: 'iiif_image_url" + data['mobilenet'] = {} + params = {} + + + if request.method == 'GET': + params['iiif_image_url'] = request.args.get('iiif_image_url') + elif request.method == 'POST': + params = request.form # postdata + else: + data['error'] = 'Invalid request method' + return jsonify(data) + + if not params: + data['error'] = 'Missing parameters' + return jsonify(data) + + if not params['iiif_image_url']: + data['error'] = '[iiif_image_url] parameter not found' + return jsonify(data) + try: + # Create options for Image Embedder + base_options = python.BaseOptions(model_asset_path='models/mobilenet/' + app.config["MOBILENET_MODEL_NAME"]) + l2_normalize = True #@param {type:"boolean"} + quantize = True #@param {type:"boolean"} + options = vision.ImageEmbedderOptions( + base_options=base_options, l2_normalize=l2_normalize, quantize=quantize) + + +# Create Image Embedder + with vision.ImageEmbedder.create_from_options(options) as embedder: + + # Format images for MediaPipe + img = loadImage(params['iiif_image_url'], 480) + image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img) + embedding_result = embedder.embed(image) + + except ValueError: + data['error'] = 'models/mobilenet/' + app.config["MOBILENET_MODEL_NAME"] + ' not found' + return jsonify(data) + + + vector = embedding_result.embeddings[0].embedding + print(embedding_result.embeddings[0].embedding.shape[0]) + # Vector size for this layer (inumlayers - 1) is 1024 + # This "should" return a Unit Vector so we can use "dot_product" in Solr + X_l1 = preprocessing.normalize([vector], norm='l1') + # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html + data['mobilenet']['vector'] = X_l1[0].tolist() + data['message'] = 'done' + return jsonify(data) + # @app.route("/tester", methods=['GET', 'POST']) # def tester(): # return render_template('form.html') -app.run(host='0.0.0.0', port=6400, debug=False) +app.run(host='0.0.0.0', port=6401, debug=False) diff --git a/requirements.txt b/requirements.txt index 0ca55e7..df5c93f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,5 +23,6 @@ scikit-learn #To use PCA keras # keras.preprocessing.image tensorflow insightface # for face detection. Arcface used by Apple +mediapipe # for mobilenetv3 embedder #summa #pattern \ No newline at end of file From 9e3b0e145755f66eb25729586be00ca88f2d5562 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 6 May 2024 14:40:22 -0400 Subject: [PATCH 06/27] adds Bert/sentence model loader and embedder --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index df5c93f..ca2b904 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,5 +24,6 @@ keras # keras.preprocessing.image tensorflow insightface # for face detection. Arcface used by Apple mediapipe # for mobilenetv3 embedder +sentence-transformers #For bert based text embeddings #summa #pattern \ No newline at end of file From f99f5b2f05a3e517b38b9783695c474e883be6bc Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 13 May 2024 22:39:31 -0400 Subject: [PATCH 07/27] return to 6400 for nlpserver.py --- nlpserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nlpserver.py b/nlpserver.py index e4b7f77..25980a1 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -743,6 +743,6 @@ def loadImage(url, size = 480): # def tester(): # return render_template('form.html') -app.run(host='0.0.0.0', port=6401, debug=False) +app.run(host='0.0.0.0', port=6400, debug=False) From 8dd8af9d2def3858c871802a4dc9cc1b06010943 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 13 May 2024 23:09:21 -0400 Subject: [PATCH 08/27] Ignore models when committing --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fe06ad5..5ad9e9a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .vscode -*__pycache__ \ No newline at end of file +*__pycache__ +*.tflite +*.pt From 1023cdd031985a1c1f5f65074491c3c3158e37a6 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 22 May 2024 16:31:28 -0400 Subject: [PATCH 09/27] Smooth out the code --- nlpserver.py | 53 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 25980a1..0ab4d7b 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -13,12 +13,18 @@ #app.config['var1'] = 'test' app.config["YOLO_MODEL_NAME"] = "yolov8m.pt" app.config["MOBILENET_MODEL_NAME"] = "mobilenet_v3_small.tflite" +app.config["EFFICIENTDET_DETECT_MODEL_NAME"] = "efficientdet_lite2.tflite" +app.config["MOBILENET_DETECT_MODEL_NAME"] = "ssd_mobilenet_v2.tflite" for variable, value in os.environ.items(): if variable == "YOLO_MODEL_NAME": # Can be set via Docker ENV app.config["YOLO_MODEL_NAME"] = value if variable == "MOBILENET_MODEL_NAME": app.config["MOBILENET_MODEL_NAME"] = value + if variable == "EFFICIENTDET_MODEL_NAME": + app.config["EFFICIENTDET_MODEL_NAME"] = value + if variable == "MOBILENET_DETECT_MODEL_NAME": + app.config["MOBILENET_DETECT_MODEL_NAME"] = value default_data = {} default_data['web64'] = { @@ -574,7 +580,7 @@ def loadImage(url, size = 640): try: response = requests.get(url) response.raise_for_status() - except requests.exceptions.HTTPError as err: + except requests.exceptions.RequestException as err: data['error'] = err.strerror return jsonify(data) @@ -631,12 +637,13 @@ def loadImage(url, size = 640): if hasattr(object_detect_result, "obb") and object_detect_result.obb is not None: # Access the .obb attribute instead of .boxes print('An obb model') data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) - elif hasattr(object_detect_result, "boxes") and object_detect_result.boxes is not None: + elif hasattr(object_detect_result, "boxes") and object_detect_result.boxes is not None and object_detect_result.probs is not None: print('Not an obb model') - data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) + if type(object_detect_result) != 'NoneType': + data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) else: data['error'] = 'No features detected' - data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) + data['yolo']['objects'] = [] data['yolo']['modelinfo'] = {'train_args': model.ckpt["train_args"], 'date': model.ckpt["date"], 'version': model.ckpt["version"]} @@ -674,14 +681,14 @@ def loadImage(url, size = 480): try: response = requests.get(url) response.raise_for_status() - except requests.exceptions.HTTPError as err: + except requests.exceptions.RequestException as err: data['error'] = err.strerror return jsonify(data) img_bytes = BytesIO(response.content) img = Image.open(img_bytes) img = img.convert('RGB') - img = img.resize((size,size), Image.NEAREST) + img.thumbnail((size,size), Image.NEAREST) # Media pipe uses a different format than YOLO, img here is PIL img = np.asarray(img) return img @@ -689,7 +696,9 @@ def loadImage(url, size = 480): data = dict(default_data) data['message'] = "mobilenet - Parameters: 'iiif_image_url" data['mobilenet'] = {} + data['efficientdet'] = {} params = {} + objects = [] if request.method == 'GET': @@ -709,33 +718,49 @@ def loadImage(url, size = 480): return jsonify(data) try: # Create options for Image Embedder - base_options = python.BaseOptions(model_asset_path='models/mobilenet/' + app.config["MOBILENET_MODEL_NAME"]) + base_options_embedder = python.BaseOptions(model_asset_path='models/mobilenet/' + app.config["MOBILENET_MODEL_NAME"]) + base_options_detected = python.BaseOptions(model_asset_path='models/mobilenet/' + app.config["MOBILENET_DETECT_MODEL_NAME"]) l2_normalize = True #@param {type:"boolean"} quantize = True #@param {type:"boolean"} - options = vision.ImageEmbedderOptions( - base_options=base_options, l2_normalize=l2_normalize, quantize=quantize) + options_embedder = vision.ImageEmbedderOptions(base_options=base_options_embedder, l2_normalize=l2_normalize, quantize=quantize) + options_detector = vision.ObjectDetectorOptions(base_options=base_options_detected, score_threshold=0.5) + + # Create Image Embedder - with vision.ImageEmbedder.create_from_options(options) as embedder: + with vision.ImageEmbedder.create_from_options(options_embedder) as embedder: # Format images for MediaPipe - img = loadImage(params['iiif_image_url'], 480) + img = loadImage(params['iiif_image_url'], 640) image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img) embedding_result = embedder.embed(image) + with vision.ObjectDetector.create_from_options(options_detector) as detector: + detector_results = detector.detect(image) except ValueError: data['error'] = 'models/mobilenet/' + app.config["MOBILENET_MODEL_NAME"] + ' not found' return jsonify(data) - + if not detector_results.detections: + objects = [] + else: + # make the coordinates percentage based. + for ml_result_index in range(len(detector_results.detections)): + detector_results.detections[ml_result_index].bounding_box.origin_x = detector_results.detections[ml_result_index].bounding_box.origin_x/image.width + detector_results.detections[ml_result_index].bounding_box.origin_y = detector_results.detections[ml_result_index].bounding_box.origin_y/image.height + detector_results.detections[ml_result_index].bounding_box.width = detector_results.detections[ml_result_index].bounding_box.width/image.width + detector_results.detections[ml_result_index].bounding_box.height = detector_results.detections[ml_result_index].bounding_box.width/image.height + objects = detector_results.detections vector = embedding_result.embeddings[0].embedding - print(embedding_result.embeddings[0].embedding.shape[0]) + # print(embedding_result.embeddings[0].embedding.shape[0]) # Vector size for this layer (inumlayers - 1) is 1024 # This "should" return a Unit Vector so we can use "dot_product" in Solr X_l1 = preprocessing.normalize([vector], norm='l1') # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html data['mobilenet']['vector'] = X_l1[0].tolist() + + data['mobilenet']['objects'] = objects data['message'] = 'done' return jsonify(data) @@ -743,6 +768,6 @@ def loadImage(url, size = 480): # def tester(): # return render_template('form.html') -app.run(host='0.0.0.0', port=6400, debug=False) +app.run(host='0.0.0.0', port=6401, debug=False) From f8f97cd7465374245c1cec50b5a4f043dec4e5ee Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 22 May 2024 18:15:16 -0400 Subject: [PATCH 10/27] Keep making the same mistakes. Gosh. No more alpha releases. let's git this --- nlpserver.py | 2 +- requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/nlpserver.py b/nlpserver.py index 0ab4d7b..05c139a 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -768,6 +768,6 @@ def loadImage(url, size = 480): # def tester(): # return render_template('form.html') -app.run(host='0.0.0.0', port=6401, debug=False) +app.run(host='0.0.0.0', port=6400, debug=False) diff --git a/requirements.txt b/requirements.txt index ca2b904..c307f18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,7 @@ pandas scikit-learn #To use PCA keras # keras.preprocessing.image tensorflow +onnxruntime # for insightface > 0.2 insightface # for face detection. Arcface used by Apple mediapipe # for mobilenetv3 embedder sentence-transformers #For bert based text embeddings From 70c6150c7e9225d11ae543bbbdbe55a956cfe683 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 22 May 2024 20:09:18 -0400 Subject: [PATCH 11/27] Nop. probs are always None --- nlpserver.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 05c139a..4f3a743 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -637,12 +637,11 @@ def loadImage(url, size = 640): if hasattr(object_detect_result, "obb") and object_detect_result.obb is not None: # Access the .obb attribute instead of .boxes print('An obb model') data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) - elif hasattr(object_detect_result, "boxes") and object_detect_result.boxes is not None and object_detect_result.probs is not None: + elif hasattr(object_detect_result, "boxes") and object_detect_result.boxes is not None: print('Not an obb model') if type(object_detect_result) != 'NoneType': data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) else: - data['error'] = 'No features detected' data['yolo']['objects'] = [] data['yolo']['modelinfo'] = {'train_args': model.ckpt["train_args"], 'date': model.ckpt["date"], 'version': model.ckpt["version"]} From b700b5e5de79d98ae14ee3bb81777c192ac6b539 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 10:06:48 -0400 Subject: [PATCH 12/27] Trying with norm L2, because dot_product between same vector is not 1 On L1 affecting score and who else knows what when searching with Solr --- nlpserver.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 4f3a743..e8906d4 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -639,7 +639,7 @@ def loadImage(url, size = 640): data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) elif hasattr(object_detect_result, "boxes") and object_detect_result.boxes is not None: print('Not an obb model') - if type(object_detect_result) != 'NoneType': + if type(object_detect_result) != 'NoneType' and len(object_detect_result.boxes) and object_detect_result.boxes[0].xywh == torch.Tensor: data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) else: data['yolo']['objects'] = [] @@ -653,8 +653,10 @@ def loadImage(url, size = 640): # Vector size for this layer (i think by default it will be numlayers - 2 so 20) is 576 # array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample # This "should" return a Unit Vector so we can use "dot_product" in Solr - X_l1 = preprocessing.normalize([vector.detach().tolist()], norm='l1') + # Even if Norm L1 is better, dot product on Solr gives me less than 1 of itself. So will try with L2 + X_l1 = preprocessing.normalize([vector.detach().tolist()], norm='l2') # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html + print(np.dot(X_l1[0], X_l1[0])); data['yolo']['vector'] = X_l1[0].tolist() data['message'] = 'done' @@ -767,6 +769,6 @@ def loadImage(url, size = 480): # def tester(): # return render_template('form.html') -app.run(host='0.0.0.0', port=6400, debug=False) +app.run(host='0.0.0.0', port=6401, debug=False) From 94c2be19d2daf043a95cd58ed907b8bb9eb8c8d7 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 10:13:15 -0400 Subject: [PATCH 13/27] Literally need to stop doing this While testing because i have two Servers running i change to 6401. Then fetch and fail. gosh --- nlpserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nlpserver.py b/nlpserver.py index e8906d4..2c9a377 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -769,6 +769,6 @@ def loadImage(url, size = 480): # def tester(): # return render_template('form.html') -app.run(host='0.0.0.0', port=6401, debug=False) +app.run(host='0.0.0.0', port=6400, debug=False) From 4b8a67578b7b80bfa46ddc9d596753dcfbccad1e Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 10:39:57 -0400 Subject: [PATCH 14/27] torch Tensor stuff condition + empirical confidence of at least 0.3 Try again (i'm a :snake: but also not a :snake:) --- nlpserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 2c9a377..c46fa53 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -630,7 +630,7 @@ def loadImage(url, size = 640): img = loadImage(params['iiif_image_url'], 640) data['yolo']['objects'] = [] data['yolo']['modelinfo'] = {} - object_detect_results = model(img, conf=0.1) + object_detect_results = model(img, conf=0.3) # model.names gives me the classes. # We don't know if the user set tge obb model or the regular one, so we will have to iterate over both options, bbox and obb for object_detect_result in object_detect_results: @@ -639,7 +639,7 @@ def loadImage(url, size = 640): data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) elif hasattr(object_detect_result, "boxes") and object_detect_result.boxes is not None: print('Not an obb model') - if type(object_detect_result) != 'NoneType' and len(object_detect_result.boxes) and object_detect_result.boxes[0].xywh == torch.Tensor: + if type(object_detect_result) != 'NoneType' and len(object_detect_result.boxes) and len(object_detect_result.boxes[0].xywh): data['yolo']['objects'] = json.loads(object_detect_result.tojson(True)) else: data['yolo']['objects'] = [] From f85591b7414e7a551a9b3aeaae44dd0ab2531c14 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 11:24:42 -0400 Subject: [PATCH 15/27] Fix mobilenet "capabilities" check/key --- nlpserver.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index c46fa53..abf00c3 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -33,7 +33,7 @@ 'last_modified': '2024-05-05', 'documentation': 'https://github.com/esmero/nlpserver-fasttext/README.md', 'github': 'https://github.com/esmero/nlpserver-fasttext', - 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', 'image/mobilenet'], + 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', '/image/mobilenet'], } default_data['message'] = 'NLP Server by web64.com - with fasttext addition by digitaldogsbody' @@ -653,11 +653,11 @@ def loadImage(url, size = 640): # Vector size for this layer (i think by default it will be numlayers - 2 so 20) is 576 # array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample # This "should" return a Unit Vector so we can use "dot_product" in Solr - # Even if Norm L1 is better, dot product on Solr gives me less than 1 of itself. So will try with L2 - X_l1 = preprocessing.normalize([vector.detach().tolist()], norm='l2') + # Even if Norm L1 is better for comparison, dot product on Solr gives me less than 1 of itself. So will try with L2 + normalized = preprocessing.normalize([vector.detach().tolist()], norm='l2') # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html - print(np.dot(X_l1[0], X_l1[0])); - data['yolo']['vector'] = X_l1[0].tolist() + # interesting, this is never 1 sharp... like 1.000000005 etc ... mmmm print(np.dot(normalized[0], normalized[0])); + data['yolo']['vector'] = normalized[0].tolist() data['message'] = 'done' return jsonify(data) @@ -751,15 +751,17 @@ def loadImage(url, size = 480): detector_results.detections[ml_result_index].bounding_box.origin_x = detector_results.detections[ml_result_index].bounding_box.origin_x/image.width detector_results.detections[ml_result_index].bounding_box.origin_y = detector_results.detections[ml_result_index].bounding_box.origin_y/image.height detector_results.detections[ml_result_index].bounding_box.width = detector_results.detections[ml_result_index].bounding_box.width/image.width - detector_results.detections[ml_result_index].bounding_box.height = detector_results.detections[ml_result_index].bounding_box.width/image.height + detector_results.detections[ml_result_index].bounding_box.height = detector_results.detections[ml_result_index].bounding_box.height/image.height objects = detector_results.detections vector = embedding_result.embeddings[0].embedding # print(embedding_result.embeddings[0].embedding.shape[0]) # Vector size for this layer (inumlayers - 1) is 1024 # This "should" return a Unit Vector so we can use "dot_product" in Solr - X_l1 = preprocessing.normalize([vector], norm='l1') + # in theory vision embedder here is already L2. But let's do it manually again. + normalized = preprocessing.normalize([vector], norm='l2') + print(np.dot(normalized[0], normalized[0])); # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html - data['mobilenet']['vector'] = X_l1[0].tolist() + data['mobilenet']['vector'] = normalized[0].tolist() data['mobilenet']['objects'] = objects data['message'] = 'done' From 067624661385f33aa0cd7aba63fa67dc261fa7b0 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 11:31:00 -0400 Subject: [PATCH 16/27] WE ship with the large model now --- nlpserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nlpserver.py b/nlpserver.py index abf00c3..1027a8a 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -12,7 +12,7 @@ # configurations #app.config['var1'] = 'test' app.config["YOLO_MODEL_NAME"] = "yolov8m.pt" -app.config["MOBILENET_MODEL_NAME"] = "mobilenet_v3_small.tflite" +app.config["MOBILENET_MODEL_NAME"] = "mobilenet_v3_large.tflite" app.config["EFFICIENTDET_DETECT_MODEL_NAME"] = "efficientdet_lite2.tflite" app.config["MOBILENET_DETECT_MODEL_NAME"] = "ssd_mobilenet_v2.tflite" for variable, value in os.environ.items(): From 6fe9d47933fd7e0b6a8951bf61110425e0d0747b Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 12:45:48 -0400 Subject: [PATCH 17/27] Because not convinced that L2 (even if correct) handles size/coverage correctly And we are doing research so i need all options available. ML processors will also get an option to define the requested norm. --- nlpserver.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 1027a8a..7563f26 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -12,7 +12,7 @@ # configurations #app.config['var1'] = 'test' app.config["YOLO_MODEL_NAME"] = "yolov8m.pt" -app.config["MOBILENET_MODEL_NAME"] = "mobilenet_v3_large.tflite" +app.config["MOBILENET_MODEL_NAME"] = "mobilenet_v3_small.tflite" app.config["EFFICIENTDET_DETECT_MODEL_NAME"] = "efficientdet_lite2.tflite" app.config["MOBILENET_DETECT_MODEL_NAME"] = "ssd_mobilenet_v2.tflite" for variable, value in os.environ.items(): @@ -571,7 +571,6 @@ def hook_fn(module, input, output): def extract_features(intermediate_features, model, img, layer_index=20):##Choose the layer that fit your application hook = model.model.model[layer_index].register_forward_hook(hook_fn) - print(hook) with torch.no_grad(): model(img) hook.remove() @@ -600,6 +599,7 @@ def loadImage(url, size = 640): if request.method == 'GET': params['iiif_image_url'] = request.args.get('iiif_image_url') params['labels'] = request.args.getlist('labels') + params['norm'] = request.args.getlist('norm') elif request.method == 'POST': params = request.form # postdata else: @@ -617,6 +617,9 @@ def loadImage(url, size = 640): if not params['labels']: params['labels'] = ['face'] + if params['norm'] not in ['l1','l2','max']: + params['norm'] = 'l2' + try: model = YOLO('models/yolo/'+ app.config["YOLO_MODEL_NAME"]) except ValueError: @@ -654,7 +657,7 @@ def loadImage(url, size = 640): # array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample # This "should" return a Unit Vector so we can use "dot_product" in Solr # Even if Norm L1 is better for comparison, dot product on Solr gives me less than 1 of itself. So will try with L2 - normalized = preprocessing.normalize([vector.detach().tolist()], norm='l2') + normalized = preprocessing.normalize([vector.detach().tolist()], norm=params['norm']) # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html # interesting, this is never 1 sharp... like 1.000000005 etc ... mmmm print(np.dot(normalized[0], normalized[0])); data['yolo']['vector'] = normalized[0].tolist() @@ -701,7 +704,7 @@ def loadImage(url, size = 480): params = {} objects = [] - + detect = True if request.method == 'GET': params['iiif_image_url'] = request.args.get('iiif_image_url') elif request.method == 'POST': @@ -717,6 +720,11 @@ def loadImage(url, size = 480): if not params['iiif_image_url']: data['error'] = '[iiif_image_url] parameter not found' return jsonify(data) + if params['norm'] not in ['l1','l2','max']: + params['norm'] = 'l2' + if params['detect'] == False : + detect = False + try: # Create options for Image Embedder base_options_embedder = python.BaseOptions(model_asset_path='models/mobilenet/' + app.config["MOBILENET_MODEL_NAME"]) @@ -758,11 +766,9 @@ def loadImage(url, size = 480): # Vector size for this layer (inumlayers - 1) is 1024 # This "should" return a Unit Vector so we can use "dot_product" in Solr # in theory vision embedder here is already L2. But let's do it manually again. - normalized = preprocessing.normalize([vector], norm='l2') - print(np.dot(normalized[0], normalized[0])); + normalized = preprocessing.normalize([vector], norm=params['norm']) # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html data['mobilenet']['vector'] = normalized[0].tolist() - data['mobilenet']['objects'] = objects data['message'] = 'done' return jsonify(data) From da1a18fc4b51ed2694851f4984c332f893128b7a Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 12:56:47 -0400 Subject: [PATCH 18/27] Python driving me crazy --- nlpserver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nlpserver.py b/nlpserver.py index 7563f26..e941a33 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -617,8 +617,12 @@ def loadImage(url, size = 640): if not params['labels']: params['labels'] = ['face'] - if params['norm'] not in ['l1','l2','max']: + if not params['norm']: + params['norm'] = 'l2' + + if params['norm'] and params['norm'] not in ['l1','l2','max']: params['norm'] = 'l2' + try: model = YOLO('models/yolo/'+ app.config["YOLO_MODEL_NAME"]) From 33cc504d273a146edf0fc08e75c214f3aa214193 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 12:59:31 -0400 Subject: [PATCH 19/27] more argument checking --- nlpserver.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index e941a33..ac30eb6 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -619,7 +619,7 @@ def loadImage(url, size = 640): if not params['norm']: params['norm'] = 'l2' - + if params['norm'] and params['norm'] not in ['l1','l2','max']: params['norm'] = 'l2' @@ -708,7 +708,6 @@ def loadImage(url, size = 480): params = {} objects = [] - detect = True if request.method == 'GET': params['iiif_image_url'] = request.args.get('iiif_image_url') elif request.method == 'POST': @@ -724,10 +723,12 @@ def loadImage(url, size = 480): if not params['iiif_image_url']: data['error'] = '[iiif_image_url] parameter not found' return jsonify(data) + if not params['norm']: + params['norm'] = 'l2' + if params['norm'] not in ['l1','l2','max']: params['norm'] = 'l2' - if params['detect'] == False : - detect = False + try: # Create options for Image Embedder From 85d9f5711c0b286ac07dcf43422ce41d1badac3e Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 May 2024 13:08:37 -0400 Subject: [PATCH 20/27] So distracted. Single argument not a list --- nlpserver.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index ac30eb6..b4d5023 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -599,7 +599,7 @@ def loadImage(url, size = 640): if request.method == 'GET': params['iiif_image_url'] = request.args.get('iiif_image_url') params['labels'] = request.args.getlist('labels') - params['norm'] = request.args.getlist('norm') + params['norm'] = request.args.get('norm') elif request.method == 'POST': params = request.form # postdata else: @@ -707,9 +707,10 @@ def loadImage(url, size = 480): data['efficientdet'] = {} params = {} objects = [] - + if request.method == 'GET': params['iiif_image_url'] = request.args.get('iiif_image_url') + params['norm'] = request.args.get('norm') elif request.method == 'POST': params = request.form # postdata else: From 0946d78b9ebdf3481fcdeb2286157be4d4886c0a Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 24 May 2024 11:16:59 -0400 Subject: [PATCH 21/27] First pass on using BGE-M3 (sentence transformer) This is above of my pay grade ...https://github.com/FlagOpen/FlagEmbedding see bge-small-en-v1.5 So far i am letting hugging face download it on first call. Not ideal ... so the app.config["SENTENCE_TRANSFORMER_MODEL_FOLDER"] has no use yet. But once i move into building docker i will fetch the folder with requires LFS access via git (more more bytes/apps, etc. Bananas) --- nlpserver.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/nlpserver.py b/nlpserver.py index b4d5023..0d89357 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -15,6 +15,7 @@ app.config["MOBILENET_MODEL_NAME"] = "mobilenet_v3_small.tflite" app.config["EFFICIENTDET_DETECT_MODEL_NAME"] = "efficientdet_lite2.tflite" app.config["MOBILENET_DETECT_MODEL_NAME"] = "ssd_mobilenet_v2.tflite" +app.config["SENTENCE_TRANSFORMER_MODEL_FOLDER"] = "models/sentencetransformer/" for variable, value in os.environ.items(): if variable == "YOLO_MODEL_NAME": # Can be set via Docker ENV @@ -33,7 +34,7 @@ 'last_modified': '2024-05-05', 'documentation': 'https://github.com/esmero/nlpserver-fasttext/README.md', 'github': 'https://github.com/esmero/nlpserver-fasttext', - 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', '/image/mobilenet'], + 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', '/image/mobilenet', '/text/sentence_transformer'], } default_data['message'] = 'NLP Server by web64.com - with fasttext addition by digitaldogsbody' @@ -773,12 +774,70 @@ def loadImage(url, size = 480): # This "should" return a Unit Vector so we can use "dot_product" in Solr # in theory vision embedder here is already L2. But let's do it manually again. normalized = preprocessing.normalize([vector], norm=params['norm']) + print(vector.shape[0]) # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html data['mobilenet']['vector'] = normalized[0].tolist() data['mobilenet']['objects'] = objects + data['mobilenet']['modelinfo'] = {'version':app.config["MOBILENET_MODEL_NAME"]} data['message'] = 'done' return jsonify(data) +@app.route("/text/sentence_transformer", methods=['GET', 'POST']) +def sentence_transformer(): + from sentence_transformers import SentenceTransformer + + data = dict(default_data) + data['message'] = 'Sentence transformer (embedding)' + data['sentence_transformer'] = {} + params = {} + # For s2p(short query to long passage) retrieval task, each short query should start with an instruction (instructions see Model List...NOTE link leads to nothing good). But the instruction is not needed for passages. # + instructions = ['Generate a representation for this sentence that can be used to retrieve related articles:'] + + if request.method == 'GET': + params['text'] = request.args.get('text') + params['query'] = request.args.get('query') + elif request.method == 'POST': + params = request.form # postdata + else: + data['error'] = 'Invalid request method' + return jsonify(data) + + if not params: + data['error'] = 'Missing parameters' + return jsonify(data) + + if not params['text']: + data['error'] = '[text] parameter not found' + return jsonify(data) + + if params['query']: + # query so we add the instruction + params['text'] = instructions[0] + params['text']; + + print(params['text']) + + try: + model = SentenceTransformer('BAAI/bge-small-en-v1.5') + except ValueError: + data['error'] = app.config["SENTENCE_TRANSFORMER_MODEL_FOLDER"] + ' Failed' + return jsonify(data) + + if not model: + data['error'] = 'Sentence Transformer model not initialised' + return jsonify(data) + + # class sentence_transformers.SentenceTransformer(model_name_or_path: Optional[str] = None, modules: + # Optional[Iterable[torch.nn.modules.module.Module]] = None, device: Optional[str] = None, prompts: Optional[Dict[str, + #str]] = None, default_prompt_name: Optional[str] = None, cache_folder: Optional[str] = None, + #trust_remote_code: bool = False, revision: Optional[str] = None, token: Optional[Union[bool, str]] = None, use_auth_token: Optional[Union[bool, str]] = None, truncate_dim: Optional[int] = None) + # M3 does not call to the outside, but other hugging face stuff does... why hugging face. + embed = model.encode(params['text'],normalize_embeddings=True) + print(embed.shape[0]) + data['sentence_transformer']['vector'] = embed.tolist() + data['sentence_transformer']['modelinfo'] = model._model_config + data['message'] = "Done" + + return jsonify(data) # @app.route("/tester", methods=['GET', 'POST']) # def tester(): # return render_template('form.html') From f5e07bf7784efa48bf58ff1ffff04ffa9d353232 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sat, 25 May 2024 16:17:34 -0400 Subject: [PATCH 22/27] Adds InsightFace. Not an easy one For now i am letting the app to download the model on a first run but will eventually manage to get it upfront. I just don't want a huge Docker Container There is no communication going out to the world but i am not returning Gender or Age (Unethical and also Spookie ) This model is the same one used internally by Apple (ArcFace) I need to figure out (at the Archipelago side) how to make a single Processor generate multiple Flavor Documents, so right now i am extracting embbeding (l2) vector for only ONE face, the one with the highest score and normalizing the bounding box @alliomeria (just pinging bc this is the lonely ML mountain and there is no queen/king/dwarf under the mountain, not even a dragon) --- nlpserver.py | 134 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 131 insertions(+), 3 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 0d89357..5702b64 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -34,7 +34,7 @@ 'last_modified': '2024-05-05', 'documentation': 'https://github.com/esmero/nlpserver-fasttext/README.md', 'github': 'https://github.com/esmero/nlpserver-fasttext', - 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', '/image/mobilenet', '/text/sentence_transformer'], + 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', '/image/mobilenet', '/image/insightface', '/text/sentence_transformer'], } default_data['message'] = 'NLP Server by web64.com - with fasttext addition by digitaldogsbody' @@ -782,6 +782,136 @@ def loadImage(url, size = 480): data['message'] = 'done' return jsonify(data) +@app.route("/image/insightface", methods=['GET', 'POST']) +def insightface(): + # Import your Libraries + import torch + from torchvision import transforms + from PIL import Image, ImageDraw + from pathlib import Path + import pandas as pd + import numpy as np + from pathlib import Path + import requests + import cv2 + from io import BytesIO + from keras.preprocessing.image import img_to_array + import insightface + from insightface.app import FaceAnalysis + from sklearn import preprocessing + import json + + intermediate_features = [] + + def loadImage(url, size = 640): + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.RequestException as err: + data['error'] = err.strerror + return False + + img_bytes = BytesIO(response.content) + img = Image.open(img_bytes) + img = img.convert('RGB') + img = np.array(img) + img = img[:, :, ::-1].copy() + return img + + data = dict(default_data) + data['message'] = "Insightface - Parameters: 'iiif_image_url'" + data['insightface'] = {} + data['insightface']['objects'] = [] + data['insightface']['vector'] = [] + data['insightface']['modelinfo'] = {} + params = {} + + + if request.method == 'GET': + params['iiif_image_url'] = request.args.get('iiif_image_url') + params['norm'] = request.args.get('norm') + elif request.method == 'POST': + params = request.form # postdata + else: + data['error'] = 'Invalid request method' + return jsonify(data) + + if not params: + data['error'] = 'Missing parameters' + return jsonify(data) + + if not params['iiif_image_url']: + data['error'] = '[iiif_image_url] parameter not found' + return jsonify(data) + + if not params['norm']: + params['norm'] = 'l2' + + if params['norm'] and params['norm'] not in ['l1','l2','max']: + params['norm'] = 'l2' + + + try: + # providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] + img = loadImage(params['iiif_image_url'], 640) + if img is not False: + app = FaceAnalysis(providers=['CPUExecutionProvider']) + # This will get all models. Inclussive age, gender. (bad juju) etc. But we won't return those + # We could limit to just 'detection' and 'recognition' (last one provides the embeddings) + app.prepare(ctx_id=0, det_size=(640, 640)) + # img = ins_get_image('t1') + # by default will only get me bboxs and eyes, etc. + faces = app.get(img, max_num=1) + #faces = app.det_model.detect(img, max_num=1, metric='default') + for idx, face in enumerate(faces): + if face.embedding is None: + if face.bbox.shape[0] !=0: + faces[idx] = app.models['recognition'].get(img, face.kps) + + # face.normed_embedding() by default will be also L2. + # rimg = app.draw_on(img, faces) + # cv2.imwrite("./t1_output.jpg", rimg) + except ValueError: + data['error'] = 'Failed to execute Insigthface' + return jsonify(data) + + if not app: + data['error'] = ' Insigthface model not initialized' + return jsonify(data) + + if not faces or faces[0].bbox.shape[0] != 4: + data['message'] = 'No face detected' + return jsonify(data) + + # we need to bring data back to percentage + for idx in range(len(faces)): + faces[idx].bbox[0] = faces[idx].bbox[0].item()/img.shape[0] + faces[idx].bbox[1] = faces[idx].bbox[1].item()/img.shape[1] + faces[idx].bbox[2] = faces[idx].bbox[2].item()/img.shape[0] + faces[idx].bbox[3] = faces[idx].bbox[3].item()/img.shape[1] + # It was already normalized by ArcFace BUT the origal array is a list of Objects. This actuallty flattes it to float32. Values stay the same. + normalized = preprocessing.normalize([faces[0].normed_embedding], norm='l2') + + data['insightface']['objects'] = [{ "bbox": faces[0].bbox.tolist(), "score": faces[0].det_score.item()}] + data['insightface']['vector'] = normalized[0].tolist() + data['insightface']['modelinfo'] = {} + data['message'] = 'done' + + # features = extract_features(intermediate_features=intermediate_features,model=model, img = img) // More advanced. Step 2 + #vector = faces.embed(img, verbose=False)[0] + #print(vector.shape[0]) + # Vector size for this layer (i think by default it will be numlayers - 2 so 20) is 576 + # array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample + # This "should" return a Unit Vector so we can use "dot_product" in Solr + # Even if Norm L1 is better for comparison, dot product on Solr gives me less than 1 of itself. So will try with L2 + # normalized = preprocessing.normalize([vector.detach().tolist()], norm=params['norm']) + # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html + # interesting, this is never 1 sharp... like 1.000000005 etc ... mmmm print(np.dot(normalized[0], normalized[0])); + #data['insigthface']['vector'] = normalized[0].tolist() + #data['message'] = 'done' + + return jsonify(data) + @app.route("/text/sentence_transformer", methods=['GET', 'POST']) def sentence_transformer(): from sentence_transformers import SentenceTransformer @@ -813,8 +943,6 @@ def sentence_transformer(): if params['query']: # query so we add the instruction params['text'] = instructions[0] + params['text']; - - print(params['text']) try: model = SentenceTransformer('BAAI/bge-small-en-v1.5') From de1633225f16209402d0477406fbb28017640211 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 27 May 2024 23:04:09 -0400 Subject: [PATCH 23/27] Reserved Dimensions of Image --- nlpserver.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 5702b64..c22fa2b 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -884,11 +884,12 @@ def loadImage(url, size = 640): return jsonify(data) # we need to bring data back to percentage + # Shape dimensions are reversed ... [1] is X, [0] is Y for idx in range(len(faces)): - faces[idx].bbox[0] = faces[idx].bbox[0].item()/img.shape[0] - faces[idx].bbox[1] = faces[idx].bbox[1].item()/img.shape[1] - faces[idx].bbox[2] = faces[idx].bbox[2].item()/img.shape[0] - faces[idx].bbox[3] = faces[idx].bbox[3].item()/img.shape[1] + faces[idx].bbox[0] = faces[idx].bbox[0].item()/img.shape[1] + faces[idx].bbox[1] = faces[idx].bbox[1].item()/img.shape[0] + faces[idx].bbox[2] = faces[idx].bbox[2].item()/img.shape[1] + faces[idx].bbox[3] = faces[idx].bbox[3].item()/img.shape[0] # It was already normalized by ArcFace BUT the origal array is a list of Objects. This actuallty flattes it to float32. Values stay the same. normalized = preprocessing.normalize([faces[0].normed_embedding], norm='l2') From 695bef6c299f92a22ceffcc9228901044d163f38 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 24 Jun 2024 15:54:15 -0400 Subject: [PATCH 24/27] print the Embedding Size on the logs (for future data-scientist delights) and set a root/download folder for insightface, which will be pre-loaded by the docker container --- nlpserver.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index c22fa2b..eb3ee20 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -657,12 +657,12 @@ def loadImage(url, size = 640): # features = extract_features(intermediate_features=intermediate_features,model=model, img = img) // More advanced. Step 2 # The embed method is pretty new. vector = model.embed(img, verbose=False)[0] - print(vector.shape[0]) # Vector size for this layer (i think by default it will be numlayers - 2 so 20) is 576 # array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample # This "should" return a Unit Vector so we can use "dot_product" in Solr # Even if Norm L1 is better for comparison, dot product on Solr gives me less than 1 of itself. So will try with L2 normalized = preprocessing.normalize([vector.detach().tolist()], norm=params['norm']) + print('Embedding size ' + str(normalized[0].shape[0])) # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html # interesting, this is never 1 sharp... like 1.000000005 etc ... mmmm print(np.dot(normalized[0], normalized[0])); data['yolo']['vector'] = normalized[0].tolist() @@ -774,7 +774,7 @@ def loadImage(url, size = 480): # This "should" return a Unit Vector so we can use "dot_product" in Solr # in theory vision embedder here is already L2. But let's do it manually again. normalized = preprocessing.normalize([vector], norm=params['norm']) - print(vector.shape[0]) + print('Embedding size ' + str(normalized[0].shape[0])) # see https://nightlies.apache.org/solr/draft-guides/solr-reference-guide-antora/solr/10_0/query-guide/dense-vector-search.html data['mobilenet']['vector'] = normalized[0].tolist() data['mobilenet']['objects'] = objects @@ -849,13 +849,14 @@ def loadImage(url, size = 640): if params['norm'] and params['norm'] not in ['l1','l2','max']: params['norm'] = 'l2' - + + app = False try: # providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] img = loadImage(params['iiif_image_url'], 640) if img is not False: - app = FaceAnalysis(providers=['CPUExecutionProvider']) + app = FaceAnalysis(name='buffalo_l', root='models/insightface', providers=['CPUExecutionProvider']) # This will get all models. Inclussive age, gender. (bad juju) etc. But we won't return those # We could limit to just 'detection' and 'recognition' (last one provides the embeddings) app.prepare(ctx_id=0, det_size=(640, 640)) @@ -876,7 +877,7 @@ def loadImage(url, size = 640): return jsonify(data) if not app: - data['error'] = ' Insigthface model not initialized' + data['error'] = 'Insigthface model not initialized' return jsonify(data) if not faces or faces[0].bbox.shape[0] != 4: @@ -891,8 +892,8 @@ def loadImage(url, size = 640): faces[idx].bbox[2] = faces[idx].bbox[2].item()/img.shape[1] faces[idx].bbox[3] = faces[idx].bbox[3].item()/img.shape[0] # It was already normalized by ArcFace BUT the origal array is a list of Objects. This actuallty flattes it to float32. Values stay the same. - normalized = preprocessing.normalize([faces[0].normed_embedding], norm='l2') - + normalized = preprocessing.normalize([faces[0].normed_embedding], norm=params['norm']) + print('Embedding size ' + str(normalized[0].shape[0])) data['insightface']['objects'] = [{ "bbox": faces[0].bbox.tolist(), "score": faces[0].det_score.item()}] data['insightface']['vector'] = normalized[0].tolist() data['insightface']['modelinfo'] = {} From a823aeb4770466a5bad59d65aa118e3285c7ba33 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 11 Jul 2024 19:01:56 -0400 Subject: [PATCH 25/27] Keras is driving me nuts --- nlpserver.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index eb3ee20..d577d38 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -4,7 +4,8 @@ # To run: # $ nohup python3 nlpserver.py >logs/nlpserver_out.log 2>logs/nlpserver_errors.log & # -from flask import Flask, jsonify, abort, request, send_from_directory, render_template +from flask import Flask, jsonify, request, send_from_directory, render_template +from tensorflow.keras.utils import img_to_array import os app = Flask(__name__) @@ -558,10 +559,8 @@ def yolo(): from ultralytics import YOLO import pandas as pd import numpy as np - from pathlib import Path import requests from io import BytesIO - from keras.preprocessing.image import img_to_array from sklearn import preprocessing import json @@ -676,7 +675,6 @@ def mobilenet(): from pathlib import Path import pandas as pd import numpy as np - from pathlib import Path import requests from io import BytesIO from sklearn import preprocessing @@ -785,7 +783,6 @@ def loadImage(url, size = 480): @app.route("/image/insightface", methods=['GET', 'POST']) def insightface(): # Import your Libraries - import torch from torchvision import transforms from PIL import Image, ImageDraw from pathlib import Path @@ -793,10 +790,7 @@ def insightface(): import numpy as np from pathlib import Path import requests - import cv2 from io import BytesIO - from keras.preprocessing.image import img_to_array - import insightface from insightface.app import FaceAnalysis from sklearn import preprocessing import json @@ -871,7 +865,6 @@ def loadImage(url, size = 640): # face.normed_embedding() by default will be also L2. # rimg = app.draw_on(img, faces) - # cv2.imwrite("./t1_output.jpg", rimg) except ValueError: data['error'] = 'Failed to execute Insigthface' return jsonify(data) From e7cb7ba20879a0b8d7d3527736c73da793a9e3a2 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 15 Jan 2025 16:50:49 -0500 Subject: [PATCH 26/27] WIP ViT endpoint. Also Polyglot is having issues so commenting out the check --- nlpserver.py | 125 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 118 insertions(+), 7 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index d577d38..1241f3c 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -35,7 +35,7 @@ 'last_modified': '2024-05-05', 'documentation': 'https://github.com/esmero/nlpserver-fasttext/README.md', 'github': 'https://github.com/esmero/nlpserver-fasttext', - 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', '/image/mobilenet', '/image/insightface', '/text/sentence_transformer'], + 'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn', '/fasttext', '/image/yolo', '/image/mobilenet', '/image/insightface','/image/vision_transformer','/text/sentence_transformer'], } default_data['message'] = 'NLP Server by web64.com - with fasttext addition by digitaldogsbody' @@ -95,14 +95,14 @@ def status(): except ImportError: data['missing_libraries'].append('polyglot') else: - from polyglot.downloader import Downloader - dwnld = Downloader() + #from polyglot.downloader import Downloader + #dwnld = Downloader() data['polyglot_lang_models'] = {} - for info in sorted(dwnld.collections(), key=str): - status = dwnld.status(info) - if info.id.startswith('LANG:') and status != 'not installed': - data['polyglot_lang_models'][info.id] = status + #for info in sorted(dwnld.collections(), key=str): + # status = dwnld.status(info) + # if info.id.startswith('LANG:') and status != 'not installed': + # data['polyglot_lang_models'][info.id] = status try: import fasttext @@ -961,6 +961,117 @@ def sentence_transformer(): data['message'] = "Done" return jsonify(data) +@app.route("/image/vision_transformer", methods=['GET', 'POST']) +def vision_transformer(): + # Import your Libraries + from transformers import ViTForImageClassification, ViTImageProcessor, pipeline + import torch + from PIL import Image + from pathlib import Path + import pandas as pd + import numpy as np + import requests + from io import BytesIO + from sklearn import preprocessing + + intermediate_features = [] + + def loadImage(url, size = 224): + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.RequestException as err: + data['error'] = err.strerror + return jsonify(data) + + img_bytes = BytesIO(response.content) + img = Image.open(img_bytes) + img = img.convert('RGB') + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224') + inputs = processor(images=img, return_tensors="pt").to(device) + pixel_values = inputs.pixel_values + return pixel_values + + data = dict(default_data) + data['message'] = "ViT - Parameters: 'iiif_image_url" + data['vision_transformer'] = {} + + params = {} + objects = [] + + if request.method == 'GET': + params['iiif_image_url'] = request.args.get('iiif_image_url') + params['norm'] = request.args.get('norm') + elif request.method == 'POST': + params = request.form # postdata + else: + data['error'] = 'Invalid request method' + return jsonify(data) + + if not params: + data['error'] = 'Missing parameters' + return jsonify(data) + + if not params['iiif_image_url']: + data['error'] = '[iiif_image_url] parameter not found' + return jsonify(data) + if not params['norm']: + params['norm'] = 'l2' + + if params['norm'] not in ['l1','l2','max']: + params['norm'] = 'l2' + + + try: + # Create options for Image Embedder + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + # or google/vit-base-patch16-224-in21k which has 21,843 classes v/s 10K on the normal one + # but has no actual label/output, so requires "refining" see https://huggingface.co/google/vit-base-patch16-224-in21k + # We should also check https://huggingface.co/apple/DFN5B-CLIP-ViT-H-14-378 + model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224') + model.to(device) + img = loadImage(params['iiif_image_url'], 224) + objects = [] + normalized = [] + with torch.no_grad(): + outputs = model(img, output_hidden_states=True) + logits = outputs.logits + logits.shape + best_prediction = logits.argmax(-1) + print("Best Predicted class:", model.config.id2label[best_prediction.item()]) + # All predictions + allpredictions = (-logits).argsort(axis=-1)[:, :3] + for prediction in allpredictions[0]: + detection = {} + detection["bounding_box"] = {} + #TODO convert attention grey maps into actual annotations. For now 100% coverage + detection["bounding_box"]["origin_y"] = 0 + detection["bounding_box"]["origin_x"] = 0 + detection["bounding_box"]["width"]= 1 + detection["bounding_box"]["height"] = 1 + detection["label"] = model.config.id2label[prediction.item()] + # this is a tensor so we need to detach and then get the item + detection["score"] = torch.softmax(logits, 1)[0][prediction.item()].detach().item() + objects.append(detection) + + vec1 = outputs.hidden_states[-1][0, 0, :].squeeze(0) + normalized = preprocessing.normalize([vec1.detach().tolist()], norm=params['norm']) + print('Embedding size ' + str(normalized[0].shape[0])) + normalized = normalized[0].tolist() + + #print(output.pooler_output.shape) + #print(output.last_hidden_state.shape) + except ValueError: + data['message'] = 'error' + return jsonify(data) + + data['vision_transformer']['vector'] = normalized + data['vision_transformer']['objects'] = objects + data['vision_transformer']['modelinfo'] = {'version':'vit-base-patch16-224'} + data['message'] = 'done' + return jsonify(data) + # @app.route("/tester", methods=['GET', 'POST']) # def tester(): # return render_template('form.html') From 4ebe3f2e590f315f91d6e3a8e5629338bf1e864b Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sat, 25 Jan 2025 15:15:07 -0500 Subject: [PATCH 27/27] Try to convert via a buffer past Greyscale to RGB --- nlpserver.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/nlpserver.py b/nlpserver.py index 1241f3c..c02d15d 100644 --- a/nlpserver.py +++ b/nlpserver.py @@ -587,8 +587,11 @@ def loadImage(url, size = 640): img = Image.open(img_bytes) img = img.convert('RGB') img = img.resize((size,size), Image.NEAREST) - img = img_to_array(img) - return img + + rgbimg = Image.new("RGB", img.size) + rgbimg.paste(img) + rgbimg = img_to_array(rgbimg) + return rgbimg data = dict(default_data) data['message'] = "Yolo - Parameters: 'iiif_image_url', 'labels' a list of valid labels for object detection (default: face)" @@ -696,9 +699,11 @@ def loadImage(url, size = 480): img = Image.open(img_bytes) img = img.convert('RGB') img.thumbnail((size,size), Image.NEAREST) + rgbimg = Image.new("RGB", img.size) + rgbimg.paste(img) # Media pipe uses a different format than YOLO, img here is PIL - img = np.asarray(img) - return img + rgbimg = np.asarray(rgbimg) + return rgbimg data = dict(default_data) data['message'] = "mobilenet - Parameters: 'iiif_image_url" @@ -808,9 +813,11 @@ def loadImage(url, size = 640): img_bytes = BytesIO(response.content) img = Image.open(img_bytes) img = img.convert('RGB') - img = np.array(img) - img = img[:, :, ::-1].copy() - return img + rgbimg = Image.new("RGB", img.size) + rgbimg.paste(img) + rgbimg = np.array(rgbimg) + rgbimg = rgbimg[:, :, ::-1].copy() + return rgbimg data = dict(default_data) data['message'] = "Insightface - Parameters: 'iiif_image_url'" @@ -987,9 +994,11 @@ def loadImage(url, size = 224): img_bytes = BytesIO(response.content) img = Image.open(img_bytes) img = img.convert('RGB') + rgbimg = Image.new("RGB", img.size) + rgbimg.paste(img) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224') - inputs = processor(images=img, return_tensors="pt").to(device) + inputs = processor(images=rgbimg, return_tensors="pt").to(device) pixel_values = inputs.pixel_values return pixel_values