Skip to content

Commit

Permalink
fixed pytorch image classification with CPU and CUDA and preprocessed…
Browse files Browse the repository at this point in the history
… ImageNet on Linux and Windows
  • Loading branch information
gfursin committed Dec 2, 2022
1 parent 2e090e3 commit 0626fa7
Show file tree
Hide file tree
Showing 14 changed files with 203 additions and 67 deletions.
50 changes: 47 additions & 3 deletions cm-mlops/script/app-image-classification-torch-py/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,48 @@
"tags": "get,python"
},
{
"tags": "get,dataset,preprocessed,image-classification"
"tags": "get,dataset,imagenet,image-classification,preprocessed"
},
{
"tags": "get,dataset-aux,image-classification"
"tags": "get,dataset-aux,imagenet-aux,image-classification"
},
{
"tags": "get,imagenet-helper"
},
{
"tags": "get,generic-python-lib,_torch"
"tags": "get,ml-model,image-classification,resnet50,_pytorch,_fp32"
},
{
"tags": "get,generic-python-lib,_torch",
"skip_if_env": {
"USE_CUDA": [
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_torch_cuda",
"enable_if_env": {
"USE_CUDA": [
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_torchvision",
"skip_if_env": {
"USE_CUDA": [
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_torchvision_cuda",
"enable_if_env": {
"USE_CUDA": [
"yes"
]
}
}
],
"tags": [
Expand All @@ -40,5 +72,17 @@
"torch",
"python"
],
"variations": {
"cuda": {
"env": {
"USE_CUDA": "yes"
},
"deps": [
{
"tags": "get,cuda"
}
]
}
},
"uid": "e3986ae887b84ca8"
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
25 changes: 11 additions & 14 deletions cm-mlops/script/app-image-classification-torch-py/run.bat
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
rem connect CM portable scripts with CK env
set ML_MODEL_IMAGE_HEIGHT=224
set ML_MODEL_IMAGE_WIDTH=224
set CK_BATCH_SIZE=1
set CK_BATCH_COUNT=1
set CK_ENV_ONNX_MODEL_ONNX_FILEPATH=
set CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR=%CM_DATASET_PREPROCESSED_PATH%
set CK_ENV_DATASET_IMAGENET_PREPROCESSED_SUBSET_FOF=%CM_DATASET_PREPROCESSED_PATH%\names.txt
set CK_CAFFE_IMAGENET_SYNSET_WORDS_TXT=%CM_DATASET_AUX_PATH%\synsets.txt
set CK_ENV_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE=float32
set CK_RESULTS_DIR=%CM_TMP_CURRENT_SCRIPT_PATH%\results
set ML_MODEL_DATA_TYPE=float32
set USE_LLVM=no

set CM_ML_TORCH_MODEL_NAME=resnet50
set CM_ML_MODEL_INPUT_DATA_TYPE=float32
set CM_ML_MODEL_IMAGE_HEIGHT=224
set CM_ML_MODEL_IMAGE_WIDTH=224

set CK_BATCH_SIZE=%CM_BATCH_SIZE%
set CK_BATCH_COUNT=%CM_BATCH_COUNT%
rem set CM_DATASET_IMAGENET_PREPROCESSED_DIR=%CM_DATASET_PREPROCESSED_PATH%

set CM_DATASET_IMAGENET_PREPROCESSED_DIR=%CM_DATASET_PREPROCESSED_FULL_PATH%
set CM_CAFFE_IMAGENET_SYNSET_WORDS_TXT=%CM_DATASET_AUX_PATH%\synset_words.txt
set CM_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE=float32
set CM_RESULTS_DIR=%CM_TMP_CURRENT_SCRIPT_PATH%\results
set ML_MODEL_DATA_LAYOUT=NCHW

%CM_PYTHON_BIN_WITH_PATH% -m pip install -r %CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt
IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL%
Expand Down
25 changes: 9 additions & 16 deletions cm-mlops/script/app-image-classification-torch-py/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,15 @@
CM_TMP_CURRENT_SCRIPT_PATH=${CM_TMP_CURRENT_SCRIPT_PATH:-$PWD}

# connect CM intelligent components with CK env
export ML_MODEL_IMAGE_HEIGHT=224
export ML_MODEL_IMAGE_WIDTH=224
export CK_BATCH_SIZE=1
export CK_BATCH_COUNT=1
export CK_ENV_ONNX_MODEL_ONNX_FILEPATH=""
export CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR=${CM_DATASET_PREPROCESSED_PATH}
export CK_ENV_DATASET_IMAGENET_PREPROCESSED_SUBSET_FOF=${CM_DATASET_PREPROCESSED_PATH}/names.txt
export CK_CAFFE_IMAGENET_SYNSET_WORDS_TXT=${CM_DATASET_AUX_PATH}/synsets.txt
export CK_ENV_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE=float32
export CK_RESULTS_DIR=${CM_TMP_CURRENT_SCRIPT_PATH}/results
export ML_MODEL_DATA_TYPE=float32
export USE_LLVM=no


export CK_BATCH_SIZE=${CM_BATCH_SIZE}
export CK_BATCH_COUNT=${CM_BATCH_COUNT}
export CM_ML_TORCH_MODEL_NAME=resnet50
export CM_ML_MODEL_INPUT_DATA_TYPE=float32
export CM_ML_MODEL_IMAGE_HEIGHT=224
export CM_ML_MODEL_IMAGE_WIDTH=224
export CM_DATASET_IMAGENET_PREPROCESSED_DIR=${CM_DATASET_PREPROCESSED_FULL_PATH}
export CM_CAFFE_IMAGENET_SYNSET_WORDS_TXT=${CM_DATASET_AUX_PATH}/synset_words.txt
export CM_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE=float32
export CM_RESULTS_DIR=${CM_TMP_CURRENT_SCRIPT_PATH}/results
export ML_MODEL_DATA_LAYOUT=NCHW

${CM_PYTHON_BIN} -m pip install -r ${CM_TMP_CURRENT_SCRIPT_PATH}/requirements.txt
test $? -eq 0 || exit 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,40 @@
import shutil
import numpy as np
import torch
import torchvision.models as models

import imagenet_helper
from imagenet_helper import (load_preprocessed_batch, image_list, class_labels, BATCH_SIZE)

TORCH_MODEL_NAME = os.getenv('ML_TORCH_MODEL_NAME', 'resnet50')

## Writing the results out:
#
RESULTS_DIR = os.getenv('CK_RESULTS_DIR')
FULL_REPORT = os.getenv('CK_SILENT_MODE', '0') in ('NO', 'no', 'OFF', 'off', '0')
RESULTS_DIR = os.getenv('CM_RESULTS_DIR')
FULL_REPORT = os.getenv('CM_SILENT_MODE', '0') in ('NO', 'no', 'OFF', 'off', '0')

## Processing by batches:
#
BATCH_COUNT = int(os.getenv('CK_BATCH_COUNT', 1))
BATCH_COUNT = int(os.getenv('CM_BATCH_COUNT', 1))

## Enabling GPU if available and not disabled:
#
USE_CUDA = torch.cuda.is_available() and (os.getenv('CK_DISABLE_CUDA', '0') in ('NO', 'no', 'OFF', 'off', '0'))
USE_CUDA = (os.getenv('USE_CUDA', '').strip()=='yes')


labels_path = os.environ['CM_CAFFE_IMAGENET_SYNSET_WORDS_TXT']

def load_labels(labels_filepath):
my_labels = []
input_file = open(labels_filepath, 'r')
for l in input_file:
my_labels.append(l.strip())
return my_labels


labels = load_labels(labels_path)


data_layout = os.environ['ML_MODEL_DATA_LAYOUT']



def main():
Expand All @@ -39,14 +56,11 @@ def main():
os.mkdir(RESULTS_DIR)

# Load the [cached] Torch model
torchvision_version = '' # master by default
try:
import torchvision
torchvision_version = ':v' + torchvision.__version__
except Exception:
pass

model = torch.hub.load('pytorch/vision' + torchvision_version, TORCH_MODEL_NAME, pretrained=True)
path_to_model_pth = os.environ['CM_ML_MODEL_FILE_WITH_PATH']

model=models.resnet50(pretrained=False)
model.load_state_dict(torch.load(path_to_model_pth))

model.eval()

# move the model to GPU for speed if available
Expand All @@ -63,13 +77,57 @@ def main():
first_classification_time = 0
images_loaded = 0

image_path = os.environ.get('CM_INPUT','')
if image_path !='':

normalize_data_bool=True
subtract_mean_bool=False

from PIL import Image

def load_and_resize_image(image_filepath, height, width):
pillow_img = Image.open(image_filepath).resize((width, height)) # sic! The order of dimensions in resize is (W,H)

input_data = np.float32(pillow_img)

# Normalize
if normalize_data_bool:
input_data = input_data/127.5 - 1.0

# Subtract mean value
if subtract_mean_bool:
if len(given_channel_means):
input_data -= given_channel_means
else:
input_data -= np.mean(input_data)

# print(np.array(pillow_img).shape)
nhwc_data = np.expand_dims(input_data, axis=0)

if data_layout == 'NHWC':
# print(nhwc_data.shape)
return nhwc_data
else:
nchw_data = nhwc_data.transpose(0,3,1,2)
# print(nchw_data.shape)
return nchw_data

BATCH_COUNT=1


for batch_index in range(BATCH_COUNT):
batch_number = batch_index+1
if FULL_REPORT or (batch_number % 10 == 0):
print("\nBatch {} of {}".format(batch_number, BATCH_COUNT))

begin_time = time.time()
batch_data, image_index = load_preprocessed_batch(image_list, image_index)

if image_path=='':
batch_data, image_index = load_preprocessed_batch(image_list, image_index)
else:
batch_data = load_and_resize_image(image_path, 224, 224)
image_index = 1

torch_batch = torch.from_numpy( batch_data )

load_time = time.time() - begin_time
Expand Down Expand Up @@ -101,11 +159,19 @@ def main():
for index_in_batch in range(BATCH_SIZE):
softmax_vector = batch_results[index_in_batch][bg_class_offset:] # skipping the background class on the left (if present)
global_index = batch_index * BATCH_SIZE + index_in_batch

res_file = os.path.join(RESULTS_DIR, image_list[global_index])

with open(res_file + '.txt', 'w') as f:
for prob in softmax_vector:
f.write('{}\n'.format(prob))


top5_indices = list(reversed(softmax_vector.argsort()))[:5]
for class_idx in top5_indices:
print("\t{}\t{}\t{}".format(class_idx, softmax_vector[class_idx], labels[class_idx]))
print("")


test_time = time.time() - test_time_begin

if BATCH_COUNT > 1:
Expand Down
4 changes: 4 additions & 0 deletions cm-mlops/script/get-dataset-imagenet-helper/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
"category": "ML/AI datasets",
"category_sort":8500,
"cache": true,
"new_env_keys": [
"+PYTHONPATH",
"CM_DATASET_IMAGENET_HELPER_PATH"
],
"tags": [
"get",
"imagenet",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,7 @@
## Preprocessed input images' properties:
#
IMAGE_DIR = os.getenv('CM_DATASET_IMAGENET_PREPROCESSED_DIR')
IMAGE_LIST_FILE_NAME = os.getenv('CM_DATASET_IMAGENET_PREPROCESSED_SUBSET_FOF')
IMAGE_LIST_FILE = os.path.join(IMAGE_DIR, IMAGE_LIST_FILE_NAME)
IMAGE_DATA_TYPE = os.getenv('CM_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE', 'float32')
IMAGE_DATA_TYPE = "uint8"


def load_labels(labels_filepath):
Expand All @@ -79,12 +76,16 @@ def load_labels(labels_filepath):


# Load preprocessed image filenames:
with open(IMAGE_LIST_FILE, 'r') as f:
image_list = [ s.strip() for s in f ]

image_list = []
all_images = os.listdir(IMAGE_DIR)
for image_file in all_images:
if image_file.endswith('.npy'):
image_list.append(image_file)

def load_image_by_index_and_normalize(image_index):

img_file = os.path.join(IMAGE_DIR, image_list[image_index])

img = np.fromfile(img_file, np.dtype(IMAGE_DATA_TYPE))
#img = img.reshape((1,MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, 3))
img.resize(224*224*3)
Expand Down
6 changes: 0 additions & 6 deletions cm-mlops/script/get-dataset-imagenet-helper/run.sh

This file was deleted.

4 changes: 2 additions & 2 deletions cm-mlops/script/get-mlperf-inference-src/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ def preprocess(i):

os_info = i['os_info']

if os_info['platform'] == 'windows':
return {'return':1, 'error': 'Windows is not supported in this script yet'}
# if os_info['platform'] == 'windows':
# return {'return':1, 'error': 'Windows is not supported in this script yet'}

env = i['env']
meta = i['meta']
Expand Down
10 changes: 10 additions & 0 deletions cm-mlops/script/get-mlperf-inference-src/run.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@echo off

echo ******************************************************
echo Cloning Mlcommons from %CM_GIT_URL% with branch %CM_GIT_CHECKOUT% %CM_GIT_DEPTH% %CM_GIT_RECURSE_SUBMODULES% ...

git clone %CM_GIT_RECURSE_SUBMODULES% %CM_GIT_URL% %CM_GIT_DEPTH% inference
cd inference
git checkout -b "%CM_GIT_CHECKOUT%"

exit /b 0
2 changes: 1 addition & 1 deletion cm-mlops/script/get-mlperf-inference-src/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CUR_DIR=$PWD
SCRIPT_DIR=${CM_TMP_CURRENT_SCRIPT_PATH}

echo "******************************************************"
echo "Cloning Mlcommons from ${CM_GIT_URL} with branch ${CM_GIT_CHECKOUT} ${CM_GIT_DEPTH} ${CM_GIT_RECURSE_SUBMODULES}..."
echo "Cloning Mlcommons from ${CM_GIT_URL} with branch ${CM_GIT_CHECKOUT} ${CM_GIT_DEPTH} ${CM_GIT_RECURSE_SUBMODULES} ..."

if [ ! -d "inference" ]; then
if [ -z ${CM_GIT_SHA} ]; then
Expand Down
28 changes: 25 additions & 3 deletions cm-mlops/script/get-preprocessed-dataset-imagenet/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,32 @@ def preprocess(i):
shutil.copy(os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt"), os.path.join(env['CM_DATASET_PATH'],
"val_map.txt"))

if not exists(os.path.join(env['CM_DATASET_PREPROCESSED_PATH'], "val_map.txt")):
shutil.copy(os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt"), os.path.join(env['CM_DATASET_PREPROCESSED_PATH'],
"val_map.txt"))
preprocessed_path = env['CM_DATASET_PREPROCESSED_PATH']

if not exists(os.path.join(preprocessed_path, "val_map.txt")):
shutil.copy(os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt"),
os.path.join(preprocessed_path, "val_map.txt"))

if env.get('CM_IMAGENET_QUANTIZED', "no") == "yes":
env['CM_QUANTIZE'] = "1"


return {'return': 0}

def postprocess(i):

env = i['env']

# finalize path
preprocessed_path = env['CM_DATASET_PREPROCESSED_PATH']
img_format = os.environ.get('CM_ML_MODEL_DATA_LAYOUT', 'NHWC')

full_preprocessed_path = os.path.join(preprocessed_path,
'preprocessed',
'imagenet',
img_format)

if os.path.isdir(full_preprocessed_path):
env['CM_DATASET_PREPROCESSED_FULL_PATH']=full_preprocessed_path

return {'return':0}
Loading

0 comments on commit 0626fa7

Please sign in to comment.