fixed pytorch image classification with CPU and CUDA and preprocessed…

… ImageNet on Linux and Windows
Ailurus1 · Dec 2, 2022 · 0626fa7 · 0626fa7
1 parent 2e090e3
commit 0626fa7
Show file tree

Hide file tree

Showing 14 changed files with 203 additions and 67 deletions.
diff --git a/cm-mlops/script/app-image-classification-torch-py/_cm.json b/cm-mlops/script/app-image-classification-torch-py/_cm.json
@@ -22,16 +22,48 @@
       "tags": "get,python"
     },
     {
-      "tags": "get,dataset,preprocessed,image-classification"
+      "tags": "get,dataset,imagenet,image-classification,preprocessed"
     },
     {
-      "tags": "get,dataset-aux,image-classification"
+      "tags": "get,dataset-aux,imagenet-aux,image-classification"
     },
     {
       "tags": "get,imagenet-helper"
     },
     {
-      "tags": "get,generic-python-lib,_torch"
+      "tags": "get,ml-model,image-classification,resnet50,_pytorch,_fp32"
+    },
+    {
+      "tags": "get,generic-python-lib,_torch",
+      "skip_if_env": {
+        "USE_CUDA": [
+          "yes"
+        ]
+      }
+    },
+    {
+      "tags": "get,generic-python-lib,_torch_cuda",
+      "enable_if_env": {
+        "USE_CUDA": [
+          "yes"
+        ]
+      }
+    },
+    {
+      "tags": "get,generic-python-lib,_torchvision",
+      "skip_if_env": {
+        "USE_CUDA": [
+          "yes"
+        ]
+      }
+    },
+    {
+      "tags": "get,generic-python-lib,_torchvision_cuda",
+      "enable_if_env": {
+        "USE_CUDA": [
+          "yes"
+        ]
+      }
     }
   ],
   "tags": [
@@ -40,5 +72,17 @@
     "torch",
     "python"
   ],
+  "variations": {
+    "cuda": {
+      "env": {
+         "USE_CUDA": "yes"
+      },
+      "deps": [
+        {
+          "tags": "get,cuda"
+        }
+      ]
+    }
+  },
   "uid": "e3986ae887b84ca8"
 }
diff --git a/cm-mlops/script/app-image-classification-torch-py/img/computer_mouse.jpg b/cm-mlops/script/app-image-classification-torch-py/img/computer_mouse.jpg
diff --git a/cm-mlops/script/app-image-classification-torch-py/run.bat b/cm-mlops/script/app-image-classification-torch-py/run.bat
@@ -1,20 +1,17 @@
 rem connect CM portable scripts with CK env
-set ML_MODEL_IMAGE_HEIGHT=224
-set ML_MODEL_IMAGE_WIDTH=224
-set CK_BATCH_SIZE=1
-set CK_BATCH_COUNT=1
-set CK_ENV_ONNX_MODEL_ONNX_FILEPATH=
-set CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR=%CM_DATASET_PREPROCESSED_PATH%
-set CK_ENV_DATASET_IMAGENET_PREPROCESSED_SUBSET_FOF=%CM_DATASET_PREPROCESSED_PATH%\names.txt
-set CK_CAFFE_IMAGENET_SYNSET_WORDS_TXT=%CM_DATASET_AUX_PATH%\synsets.txt
-set CK_ENV_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE=float32
-set CK_RESULTS_DIR=%CM_TMP_CURRENT_SCRIPT_PATH%\results
-set ML_MODEL_DATA_TYPE=float32
-set USE_LLVM=no
 
+set CM_ML_TORCH_MODEL_NAME=resnet50
+set CM_ML_MODEL_INPUT_DATA_TYPE=float32
+set CM_ML_MODEL_IMAGE_HEIGHT=224
+set CM_ML_MODEL_IMAGE_WIDTH=224
 
-set CK_BATCH_SIZE=%CM_BATCH_SIZE%
-set CK_BATCH_COUNT=%CM_BATCH_COUNT%
+rem set CM_DATASET_IMAGENET_PREPROCESSED_DIR=%CM_DATASET_PREPROCESSED_PATH%
+
+set CM_DATASET_IMAGENET_PREPROCESSED_DIR=%CM_DATASET_PREPROCESSED_FULL_PATH%
+set CM_CAFFE_IMAGENET_SYNSET_WORDS_TXT=%CM_DATASET_AUX_PATH%\synset_words.txt
+set CM_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE=float32
+set CM_RESULTS_DIR=%CM_TMP_CURRENT_SCRIPT_PATH%\results
+set ML_MODEL_DATA_LAYOUT=NCHW
 
 %CM_PYTHON_BIN_WITH_PATH% -m pip install -r %CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt
 IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL%

diff --git a/cm-mlops/script/app-image-classification-torch-py/run.sh b/cm-mlops/script/app-image-classification-torch-py/run.sh
@@ -3,22 +3,15 @@
 CM_TMP_CURRENT_SCRIPT_PATH=${CM_TMP_CURRENT_SCRIPT_PATH:-$PWD}
 
 # connect CM intelligent components with CK env
-export ML_MODEL_IMAGE_HEIGHT=224
-export ML_MODEL_IMAGE_WIDTH=224
-export CK_BATCH_SIZE=1
-export CK_BATCH_COUNT=1
-export CK_ENV_ONNX_MODEL_ONNX_FILEPATH=""
-export CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR=${CM_DATASET_PREPROCESSED_PATH}
-export CK_ENV_DATASET_IMAGENET_PREPROCESSED_SUBSET_FOF=${CM_DATASET_PREPROCESSED_PATH}/names.txt
-export CK_CAFFE_IMAGENET_SYNSET_WORDS_TXT=${CM_DATASET_AUX_PATH}/synsets.txt
-export CK_ENV_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE=float32
-export CK_RESULTS_DIR=${CM_TMP_CURRENT_SCRIPT_PATH}/results
-export ML_MODEL_DATA_TYPE=float32
-export USE_LLVM=no
-
-
-export CK_BATCH_SIZE=${CM_BATCH_SIZE}
-export CK_BATCH_COUNT=${CM_BATCH_COUNT}
+export CM_ML_TORCH_MODEL_NAME=resnet50
+export CM_ML_MODEL_INPUT_DATA_TYPE=float32
+export CM_ML_MODEL_IMAGE_HEIGHT=224
+export CM_ML_MODEL_IMAGE_WIDTH=224
+export CM_DATASET_IMAGENET_PREPROCESSED_DIR=${CM_DATASET_PREPROCESSED_FULL_PATH}
+export CM_CAFFE_IMAGENET_SYNSET_WORDS_TXT=${CM_DATASET_AUX_PATH}/synset_words.txt
+export CM_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE=float32
+export CM_RESULTS_DIR=${CM_TMP_CURRENT_SCRIPT_PATH}/results
+export ML_MODEL_DATA_LAYOUT=NCHW
 
 ${CM_PYTHON_BIN} -m pip install -r ${CM_TMP_CURRENT_SCRIPT_PATH}/requirements.txt
 test $? -eq 0 || exit 1

diff --git a/cm-mlops/script/app-image-classification-torch-py/src/pytorch_classify_preprocessed.py b/cm-mlops/script/app-image-classification-torch-py/src/pytorch_classify_preprocessed.py
@@ -6,23 +6,40 @@
 import shutil
 import numpy as np
 import torch
+import torchvision.models as models
+
 import imagenet_helper
 from imagenet_helper import (load_preprocessed_batch, image_list, class_labels, BATCH_SIZE)
 
-TORCH_MODEL_NAME        = os.getenv('ML_TORCH_MODEL_NAME', 'resnet50')
-
 ## Writing the results out:
 #
-RESULTS_DIR             = os.getenv('CK_RESULTS_DIR')
-FULL_REPORT             = os.getenv('CK_SILENT_MODE', '0') in ('NO', 'no', 'OFF', 'off', '0')
+RESULTS_DIR             = os.getenv('CM_RESULTS_DIR')
+FULL_REPORT             = os.getenv('CM_SILENT_MODE', '0') in ('NO', 'no', 'OFF', 'off', '0')
 
 ## Processing by batches:
 #
-BATCH_COUNT             = int(os.getenv('CK_BATCH_COUNT', 1))
+BATCH_COUNT             = int(os.getenv('CM_BATCH_COUNT', 1))
 
 ## Enabling GPU if available and not disabled:
 #
-USE_CUDA                = torch.cuda.is_available() and (os.getenv('CK_DISABLE_CUDA', '0') in ('NO', 'no', 'OFF', 'off', '0'))
+USE_CUDA                = (os.getenv('USE_CUDA', '').strip()=='yes')
+
+
+labels_path         = os.environ['CM_CAFFE_IMAGENET_SYNSET_WORDS_TXT']
+
+def load_labels(labels_filepath):
+    my_labels = []
+    input_file = open(labels_filepath, 'r')
+    for l in input_file:
+        my_labels.append(l.strip())
+    return my_labels
+
+
+labels              = load_labels(labels_path)
+
+
+data_layout         = os.environ['ML_MODEL_DATA_LAYOUT']
+
 
 
 def main():
@@ -39,14 +56,11 @@ def main():
     os.mkdir(RESULTS_DIR)
 
     # Load the [cached] Torch model
-    torchvision_version = ''    # master by default
-    try:
-        import torchvision
-        torchvision_version = ':v' + torchvision.__version__
-    except Exception:
-        pass
-
-    model = torch.hub.load('pytorch/vision' + torchvision_version, TORCH_MODEL_NAME, pretrained=True)
+    path_to_model_pth = os.environ['CM_ML_MODEL_FILE_WITH_PATH']
+
+    model=models.resnet50(pretrained=False)
+    model.load_state_dict(torch.load(path_to_model_pth))
+
     model.eval()
 
     # move the model to GPU for speed if available
@@ -63,13 +77,57 @@ def main():
     first_classification_time = 0
     images_loaded = 0
 
+    image_path = os.environ.get('CM_INPUT','')
+    if image_path !='':
+
+        normalize_data_bool=True
+        subtract_mean_bool=False
+
+        from PIL import Image
+
+        def load_and_resize_image(image_filepath, height, width):
+            pillow_img = Image.open(image_filepath).resize((width, height)) # sic! The order of dimensions in resize is (W,H)
+
+            input_data = np.float32(pillow_img)
+
+            # Normalize
+            if normalize_data_bool:
+                input_data = input_data/127.5 - 1.0
+
+            # Subtract mean value
+            if subtract_mean_bool:
+                if len(given_channel_means):
+                    input_data -= given_channel_means
+                else:
+                    input_data -= np.mean(input_data)
+
+        #    print(np.array(pillow_img).shape)
+            nhwc_data = np.expand_dims(input_data, axis=0)
+
+            if data_layout == 'NHWC':
+                # print(nhwc_data.shape)
+                return nhwc_data
+            else:
+                nchw_data = nhwc_data.transpose(0,3,1,2)
+                # print(nchw_data.shape)
+                return nchw_data
+
+        BATCH_COUNT=1
+
+
     for batch_index in range(BATCH_COUNT):
         batch_number = batch_index+1
         if FULL_REPORT or (batch_number % 10 == 0):
             print("\nBatch {} of {}".format(batch_number, BATCH_COUNT))
 
         begin_time = time.time()
-        batch_data, image_index = load_preprocessed_batch(image_list, image_index)
+
+        if image_path=='':
+            batch_data, image_index = load_preprocessed_batch(image_list, image_index)
+        else:
+            batch_data = load_and_resize_image(image_path, 224, 224)
+            image_index = 1
+
         torch_batch = torch.from_numpy( batch_data )
 
         load_time = time.time() - begin_time
@@ -101,11 +159,19 @@ def main():
         for index_in_batch in range(BATCH_SIZE):
             softmax_vector = batch_results[index_in_batch][bg_class_offset:]    # skipping the background class on the left (if present)
             global_index = batch_index * BATCH_SIZE + index_in_batch
+
             res_file = os.path.join(RESULTS_DIR, image_list[global_index])
+
             with open(res_file + '.txt', 'w') as f:
                 for prob in softmax_vector:
                     f.write('{}\n'.format(prob))
-
+
+            top5_indices = list(reversed(softmax_vector.argsort()))[:5]
+            for class_idx in top5_indices:
+                print("\t{}\t{}\t{}".format(class_idx, softmax_vector[class_idx], labels[class_idx]))
+            print("")
+
+
     test_time = time.time() - test_time_begin
 
     if BATCH_COUNT > 1:

diff --git a/cm-mlops/script/get-dataset-imagenet-helper/_cm.json b/cm-mlops/script/get-dataset-imagenet-helper/_cm.json
@@ -5,6 +5,10 @@
   "category": "ML/AI datasets",
   "category_sort":8500,
   "cache": true,
+  "new_env_keys": [
+    "+PYTHONPATH",
+    "CM_DATASET_IMAGENET_HELPER_PATH"
+  ],
   "tags": [
     "get",
     "imagenet",

diff --git a/cm-mlops/script/get-dataset-imagenet-helper/imagenet_helper/__init__.py b/cm-mlops/script/get-dataset-imagenet-helper/imagenet_helper/__init__.py
@@ -62,10 +62,7 @@
 ## Preprocessed input images' properties:
 #
 IMAGE_DIR               = os.getenv('CM_DATASET_IMAGENET_PREPROCESSED_DIR')
-IMAGE_LIST_FILE_NAME    = os.getenv('CM_DATASET_IMAGENET_PREPROCESSED_SUBSET_FOF')
-IMAGE_LIST_FILE         = os.path.join(IMAGE_DIR, IMAGE_LIST_FILE_NAME)
 IMAGE_DATA_TYPE         = os.getenv('CM_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE', 'float32')
-IMAGE_DATA_TYPE         = "uint8"
 
 
 def load_labels(labels_filepath):
@@ -79,12 +76,16 @@ def load_labels(labels_filepath):
 
 
 # Load preprocessed image filenames:
-with open(IMAGE_LIST_FILE, 'r') as f:
-    image_list = [ s.strip() for s in f ]
-
+image_list = []
+all_images = os.listdir(IMAGE_DIR)
+for image_file in all_images:
+    if image_file.endswith('.npy'):
+        image_list.append(image_file)
 
 def load_image_by_index_and_normalize(image_index):
+
     img_file = os.path.join(IMAGE_DIR, image_list[image_index])
+
     img = np.fromfile(img_file, np.dtype(IMAGE_DATA_TYPE))
     #img = img.reshape((1,MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, 3))
     img.resize(224*224*3)

diff --git a/cm-mlops/script/get-dataset-imagenet-helper/run.sh b/cm-mlops/script/get-dataset-imagenet-helper/run.sh
diff --git a/cm-mlops/script/get-mlperf-inference-src/customize.py b/cm-mlops/script/get-mlperf-inference-src/customize.py
@@ -6,8 +6,8 @@ def preprocess(i):
 
     os_info = i['os_info']
 
-    if os_info['platform'] == 'windows':
-        return {'return':1, 'error': 'Windows is not supported in this script yet'}
+#    if os_info['platform'] == 'windows':
+#        return {'return':1, 'error': 'Windows is not supported in this script yet'}
 
     env = i['env']
     meta = i['meta']

diff --git a/cm-mlops/script/get-mlperf-inference-src/run.bat b/cm-mlops/script/get-mlperf-inference-src/run.bat
@@ -0,0 +1,10 @@
+@echo off
+
+echo ******************************************************
+echo Cloning Mlcommons from %CM_GIT_URL% with branch %CM_GIT_CHECKOUT% %CM_GIT_DEPTH% %CM_GIT_RECURSE_SUBMODULES% ...
+
+git clone %CM_GIT_RECURSE_SUBMODULES% %CM_GIT_URL% %CM_GIT_DEPTH% inference
+cd inference
+git checkout -b "%CM_GIT_CHECKOUT%"
+
+exit /b 0
diff --git a/cm-mlops/script/get-mlperf-inference-src/run.sh b/cm-mlops/script/get-mlperf-inference-src/run.sh
@@ -4,7 +4,7 @@ CUR_DIR=$PWD
 SCRIPT_DIR=${CM_TMP_CURRENT_SCRIPT_PATH}
 
 echo "******************************************************"
-echo "Cloning Mlcommons from ${CM_GIT_URL} with branch ${CM_GIT_CHECKOUT} ${CM_GIT_DEPTH} ${CM_GIT_RECURSE_SUBMODULES}..."
+echo "Cloning Mlcommons from ${CM_GIT_URL} with branch ${CM_GIT_CHECKOUT} ${CM_GIT_DEPTH} ${CM_GIT_RECURSE_SUBMODULES} ..."
 
 if [ ! -d "inference" ]; then
   if [ -z ${CM_GIT_SHA} ]; then

diff --git a/cm-mlops/script/get-preprocessed-dataset-imagenet/customize.py b/cm-mlops/script/get-preprocessed-dataset-imagenet/customize.py
@@ -22,10 +22,32 @@ def preprocess(i):
             shutil.copy(os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt"), os.path.join(env['CM_DATASET_PATH'],
             "val_map.txt"))
 
-    if not exists(os.path.join(env['CM_DATASET_PREPROCESSED_PATH'], "val_map.txt")):
-        shutil.copy(os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt"), os.path.join(env['CM_DATASET_PREPROCESSED_PATH'],
-        "val_map.txt"))
+    preprocessed_path = env['CM_DATASET_PREPROCESSED_PATH']
+
+    if not exists(os.path.join(preprocessed_path, "val_map.txt")):
+        shutil.copy(os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt"), 
+                    os.path.join(preprocessed_path, "val_map.txt"))
+
     if env.get('CM_IMAGENET_QUANTIZED', "no") == "yes":
         env['CM_QUANTIZE'] = "1"
 
+
     return {'return': 0}
+
+def postprocess(i):
+
+    env = i['env']
+
+    # finalize path
+    preprocessed_path = env['CM_DATASET_PREPROCESSED_PATH']
+    img_format = os.environ.get('CM_ML_MODEL_DATA_LAYOUT', 'NHWC')
+
+    full_preprocessed_path = os.path.join(preprocessed_path, 
+                                          'preprocessed',
+                                          'imagenet',
+                                          img_format)
+
+    if os.path.isdir(full_preprocessed_path):
+        env['CM_DATASET_PREPROCESSED_FULL_PATH']=full_preprocessed_path
+
+    return {'return':0}