Azure · rdondera-microsoft · Oct 29, 2024 · Oct 16, 2024 · Oct 22, 2024 · Oct 22, 2024
@@ -1,14 +1,14 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 type: command
 
-version: 0.0.18
+version: 0.0.19
 name: mmdetection_image_objectdetection_instancesegmentation_finetune
 display_name: Image Object Detection and Instance Segmentation MMDetection Model Finetune
 description: Component to finetune MMDetection models for image object detection and instance segmentation.
 
 is_deterministic: false
 
-environment: azureml://registries/azureml/environments/acft-mmdetection-image-gpu/versions/37
+environment: azureml://registries/azureml/environments/acft-mmdetection-image-gpu/versions/45
 
 code: ../../../src/finetune
 

@@ -1,14 +1,14 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 type: command
 
-version: 0.0.18
+version: 0.0.19
 name: mmdetection_image_objectdetection_instancesegmentation_model_import
 display_name: Image Object Detection and Instance Segmentation MMDetection Model Import
 description: Import PyTorch / MLflow model
 
 is_deterministic: True
 
-environment: azureml://registries/azureml/environments/acft-mmdetection-image-gpu/versions/37
+environment: azureml://registries/azureml/environments/acft-mmdetection-image-gpu/versions/45
 
 code: ../../../src/model_selector
 

@@ -263,7 +263,7 @@ jobs:
       compute_model_import: ${{parent.inputs.compute_model_import}}
       compute_finetune: ${{parent.inputs.compute_finetune}}
       task_name: ${{parent.inputs.task_type}}
-      user_column_names: image_url, label
+      user_column_names: image,label
       task_specific_extra_params: '"model_family=HuggingFaceImage;model_name=${{parent.inputs.model_name}};metric_for_best_model=${{parent.inputs.primary_metric}};number_of_epochs=${{parent.inputs.number_of_epochs}}"'
 
   framework_selector:

@@ -363,7 +363,7 @@ inputs:
 
   input_column_names:
     type: string
-    default: image_url
+    default: image
     optional: true
     description: Input column names in provided test dataset, for example column1. Add comma delimited values in case of multiple input columns, for example column1,column2.
 

@@ -1,7 +1,7 @@
 $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
 type: pipeline
 
-version: 0.0.20
+version: 0.0.21
 name: image_instance_segmentation_pipeline
 display_name: Image Instance Segmentation Pipeline
 description: Pipeline component for image instance segmentation.
@@ -300,20 +300,20 @@ jobs:
 
   finetune_common_validation:
     type: command
-    component: azureml:finetune_common_validation:0.0.5
+    component: azureml:finetune_common_validation:0.0.6
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       train_mltable_path: ${{parent.inputs.training_data}}
       validation_mltable_path: ${{parent.inputs.validation_data}}
       compute_model_import: ${{parent.inputs.compute_model_import}}
       compute_finetune: ${{parent.inputs.compute_finetune}}
       task_name: ${{parent.inputs.task_type}}
-      user_column_names: image_url, label
+      user_column_names: image,label
       task_specific_extra_params: '"model_family=MmDetectionImage;model_name=${{parent.inputs.model_name}};metric_for_best_model=${{parent.inputs.primary_metric}};number_of_epochs=${{parent.inputs.number_of_epochs}}"'
 
   framework_selector:
     type: command
-    component: azureml:image_framework_selector:0.0.17
+    component: azureml:image_framework_selector:0.0.18
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       task_type: ${{parent.inputs.task_type}}
@@ -369,7 +369,7 @@ jobs:
 
   mm_detection_model_import:
     type: command
-    component: azureml:mmdetection_image_objectdetection_instancesegmentation_model_import:0.0.18
+    component: azureml:mmdetection_image_objectdetection_instancesegmentation_model_import:0.0.19
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       model_family: 'MmDetectionImage'
@@ -379,7 +379,7 @@ jobs:
 
   mm_detection_finetune:
     type: command
-    component: azureml:mmdetection_image_objectdetection_instancesegmentation_finetune:0.0.18
+    component: azureml:mmdetection_image_objectdetection_instancesegmentation_finetune:0.0.19
     compute: ${{parent.inputs.compute_finetune}}
     distribution:
       type: pytorch
@@ -423,7 +423,7 @@ jobs:
 
   output_selector:
     type: command
-    component: azureml:image_model_output_selector:0.0.16
+    component: azureml:image_model_output_selector:0.0.17
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       mlflow_model_t: ${{parent.jobs.image_instance_segmentation_runtime_component.outputs.mlflow_model_folder}}

@@ -1,7 +1,7 @@
 $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
 type: pipeline
 
-version: 0.0.20
+version: 0.0.21
 name: mmdetection_image_objectdetection_instancesegmentation_pipeline
 display_name: Image Object Detection and Instance Segmentation MMDetection Pipeline
 description: Pipeline component for image object detection and instance segmentation using MMDetection models.
@@ -361,7 +361,7 @@ inputs:
 
   input_column_names:
     type: string
-    default: image_url
+    default: image,image_meta_info,text_prompt
     optional: true
     description: Input column names in provided test dataset, for example column1. Add comma delimited values in case of multiple input columns, for example column1,column2.
 
@@ -391,7 +391,7 @@ outputs:
 jobs:
   finetune_common_validation:
     type: command
-    component: azureml:finetune_common_validation:0.0.5
+    component: azureml:finetune_common_validation:0.0.6
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       mlflow_model_path: ${{parent.inputs.mlflow_model}}
@@ -405,7 +405,7 @@ jobs:
       compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
       task_name: ${{parent.inputs.task_name}}
       label_column_name: '${{parent.inputs.label_column_name}}'
-      user_column_names: '${{parent.inputs.input_column_names}}, ${{parent.inputs.label_column_name}}'
+      user_column_names: '${{parent.inputs.input_column_names}},${{parent.inputs.label_column_name}}'
       test_batch_size: ${{parent.inputs.test_batch_size}}
       device: auto
       evaluation_config: ${{parent.inputs.evaluation_config}}
@@ -414,7 +414,7 @@ jobs:
 
   image_od_is_model_import:
     type: command
-    component: azureml:mmdetection_image_objectdetection_instancesegmentation_model_import:0.0.18
+    component: azureml:mmdetection_image_objectdetection_instancesegmentation_model_import:0.0.19
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       model_family: ${{parent.inputs.model_family}}
@@ -426,7 +426,7 @@ jobs:
 
   image_od_is_finetune:
     type: command
-    component: azureml:mmdetection_image_objectdetection_instancesegmentation_finetune:0.0.18
+    component: azureml:mmdetection_image_objectdetection_instancesegmentation_finetune:0.0.19
     compute: ${{parent.inputs.compute_finetune}}
     distribution:
       type: pytorch
@@ -482,7 +482,7 @@ jobs:
 
   model_prediction:
     type: command
-    component: azureml:model_prediction:0.0.28
+    component: azureml:model_prediction:0.0.34
     compute: '${{parent.inputs.compute_model_evaluation}}'
     inputs:
       task: '${{parent.inputs.task_name}}'
@@ -497,7 +497,7 @@ jobs:
 
   compute_metrics:
     type: command
-    component: azureml:compute_metrics:0.0.28
+    component: azureml:compute_metrics:0.0.33
     compute: '${{parent.inputs.compute_model_evaluation}}'
     inputs:
       task: '${{parent.inputs.task_name}}'

@@ -319,7 +319,7 @@ jobs:
       compute_model_import: ${{parent.inputs.compute_model_import}}
       compute_finetune: ${{parent.inputs.compute_finetune}}
       task_name: ${{parent.inputs.task_name}}
-      user_column_names: image_url, label
+      user_column_names: image,label
       task_specific_extra_params: '"model_family=${{parent.inputs.model_family}};model_name=${{parent.inputs.model_name}};metric_for_best_model=${{parent.inputs.metric_for_best_model}};number_of_epochs=${{parent.inputs.number_of_epochs}}"'
 
   model_import:

@@ -1,7 +1,7 @@
 $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
 type: pipeline
 
-version: 0.0.20
+version: 0.0.21
 name: image_object_detection_pipeline
 display_name: Image Object Detection Pipeline
 description: Pipeline component for image object detection.
@@ -325,20 +325,20 @@ jobs:
 
   finetune_common_validation:
     type: command
-    component: azureml:finetune_common_validation:0.0.5
+    component: azureml:finetune_common_validation:0.0.6
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       train_mltable_path: ${{parent.inputs.training_data}}
       validation_mltable_path: ${{parent.inputs.validation_data}}
       compute_model_import: ${{parent.inputs.compute_model_import}}
       compute_finetune: ${{parent.inputs.compute_finetune}}
       task_name: ${{parent.inputs.task_type}}
-      user_column_names: image_url, label
+      user_column_names: image,label
       task_specific_extra_params: '"model_family=MmDetectionImage;model_name=${{parent.inputs.model_name}};metric_for_best_model=${{parent.inputs.primary_metric}};number_of_epochs=${{parent.inputs.number_of_epochs}}"'
 
   framework_selector:
     type: command
-    component: azureml:image_framework_selector:0.0.17
+    component: azureml:image_framework_selector:0.0.18
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       task_type: ${{parent.inputs.task_type}}
@@ -396,7 +396,7 @@ jobs:
 
   mm_detection_model_import:
     type: command
-    component: azureml:mmdetection_image_objectdetection_instancesegmentation_model_import:0.0.18
+    component: azureml:mmdetection_image_objectdetection_instancesegmentation_model_import:0.0.19
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       model_family: 'MmDetectionImage'
@@ -406,7 +406,7 @@ jobs:
 
   mm_detection_finetune:
     type: command
-    component: azureml:mmdetection_image_objectdetection_instancesegmentation_finetune:0.0.18
+    component: azureml:mmdetection_image_objectdetection_instancesegmentation_finetune:0.0.19
     compute: ${{parent.inputs.compute_finetune}}
     distribution:
       type: pytorch
@@ -450,7 +450,7 @@ jobs:
 
   output_selector:
     type: command
-    component: azureml:image_model_output_selector:0.0.16
+    component: azureml:image_model_output_selector:0.0.17
     compute: ${{parent.inputs.compute_model_import}}
     inputs:
       mlflow_model_t: ${{parent.jobs.image_object_detection_runtime_component.outputs.mlflow_model_folder}}

diff --git a/assets/training/finetune_acft_image/environments/acft_image_mmdetection/context/Dockerfile b/assets/training/finetune_acft_image/environments/acft_image_mmdetection/context/Dockerfile
@@ -9,9 +9,14 @@ COPY requirements.txt .
 RUN pip install -r requirements.txt --no-cache-dir
 
 # # # Install mmdet
+# Temporary workaround for https://github.com/open-mmlab/mim/issues/244
+RUN pip install pip==24.0
 # # Note that MMDet installs pycocotools
 # Note: mmdet should be installed via mim to access the model zoo config folder.
 RUN mim install mmdet==3.3.0
+# Temporary workaround for https://github.com/open-mmlab/mmdetection/issues/11668 (when mmdet updated, remove lines below)
+RUN mim install mmcv==2.2.0 -f https://download.openmmlab.com/mmcv/dist/cu118/torch2.2/index.html --no-cache-dir
+RUN sed -i 's/2.2.0/2.3.0/' /opt/conda/envs/ptca/lib/python3.10/site-packages/mmdet/__init__.py
 
 # Vulnerability Fix
 RUN pip install gunicorn==22.0.0

@@ -272,7 +272,7 @@ def get_object_detection_dataset(
             frame_rows.append({
                 input_column_names[0]: base64.encodebytes(read_image(image_path)).decode("utf-8"),
                 input_column_names[1]: image_meta_info,
-                input_column_names[2]: ". ".join(test_dataset.classes),
+                input_column_names[2]: ". ".join([str(c) for c in test_dataset.classes]),
                 label_column_name: label,
             })
 

@@ -11,6 +11,8 @@
 
 from unittest.mock import patch
 
+from PIL import Image
+
 from azureml.acft.common_components.image.runtime_common.common.dataset_helper import AmlDatasetHelper
 
 MODEL_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)), "./src"))
@@ -148,8 +150,8 @@ def get_mock_run_context():
 
 
 @pytest.mark.parametrize("task_type,input_column_names,label_column_name", [
-    (TASK.IMAGE_CLASSIFICATION, ["image_url"], "label"),
-    (TASK.IMAGE_OBJECT_DETECTION, ["image_url"], "label"),
+    (TASK.IMAGE_CLASSIFICATION, ["image"], "label"),
+    (TASK.IMAGE_OBJECT_DETECTION, ["image", "image_meta_info", "text_prompt"], "label"),
     (TASK.IMAGE_GENERATION, ["prompt"], "label"),
 ])
 def test_image_dataset(task_type, input_column_names, label_column_name):
@@ -178,13 +180,44 @@ def test_image_dataset(task_type, input_column_names, label_column_name):
                 patch(
                     "azureml.acft.common_components.image.runtime_common.common.utils.download_or_mount_image_files"
                 ), \
-                patch.object(AmlDatasetHelper, "get_data_dir", return_value=directory_name):
+                patch(
+                    "azureml.acft.common_components.image.runtime_common.common.utils._read_image",
+                    return_value=Image.new("RGB", (640, 480))
+                ), \
+                patch.object(AmlDatasetHelper, "get_data_dir", return_value=directory_name), \
+                patch("image_dataset.is_valid_image", return_value=True), \
+                patch("image_dataset.read_image", return_value=b"123"):
             df = get_image_dataset(task_type, directory_name, input_column_names, label_column_name)
 
         # Compare the loaded dataset with the original.
-        if task_type == TASK.IMAGE_GENERATION:
+        if task_type == TASK.IMAGE_CLASSIFICATION:
+            loaded_dataset = [
+                {k: row[k] for k in ["image", "label"]} for _, row in df.iterrows()
+            ]
+
+            for r1, r2 in zip(dataset, loaded_dataset):
+                assert r2["label"] == r1["label"]
+
+        elif task_type == TASK.IMAGE_OBJECT_DETECTION:
+            loaded_dataset = [
+                {k: row[k] for k in ["image", "image_meta_info", "text_prompt", "label"]} for _, row in df.iterrows()
+            ]
+
+            classes = set([o["label"] for r in dataset for o in r["label"]])
+            class_list_str = ". ".join([str(c) for c in sorted(classes)])
+
+            for r1, r2 in zip(dataset, loaded_dataset):
+                assert set(["filename", "width", "height"]).issubset(set(r2["image_meta_info"].keys()))
+
+                assert r2["text_prompt"] == class_list_str
+
+                # Simple sanity check for image label. Component tests check correctness of label fields in detail.
+                assert len(r2["label"]["boxes"]) == len(r1["label"])
+                assert len(r2["label"]["labels"]) == len(r1["label"])
+                assert len(r2["label"]["classes"]) == len(r1["label"])
+
+        elif task_type == TASK.IMAGE_GENERATION:
             loaded_dataset = [{k: row[k] for k in ["prompt", "label"]} for _, row in df.iterrows()]
-            for r1, r2 in zip(
-                sorted(dataset, key=lambda x: x["label"]), sorted(loaded_dataset, key=lambda x: x["prompt"])
-            ):
+
+            for r1, r2 in zip(dataset, loaded_dataset):
                 assert r2 == {"prompt": r1["label"], "label": r1["image_url"]}