Segregate tests + Script to upgrade components (#2288)

* script to upgrade components * add underscore directory name * fix code style issue * fix doc style issue * add env_version arg and upgrade components * add missing changes * update package name for yaml * update README * try fixing token expiration issue * Revert "try fixing token expiration issue" This reverts commit 798f2ab. * fix token expiration issue via singleton pattern * fix thread issue * Revert "fix thread issue" This reverts commit 8959601. * Revert "fix token expiration issue via singleton pattern" This reverts commit 2a21f93. * separate claude, batch_bench, prompt_crafter tests * fix failing tests * fix downloader test * fix for when component is not published
Azure · Feb 13, 2024 · 2665d01 · 2665d01
1 parent e31af43
commit 2665d01
Show file tree

Hide file tree

Showing 50 changed files with 675 additions and 381 deletions.
diff --git a/assets/aml-benchmark/README.md b/assets/aml-benchmark/README.md
@@ -72,4 +72,21 @@ python scripts/validation/copyright_validation.py -i assets/aml-benchmark/
 In the root of the repo, run the following in **powershell**:
 ```
 python scripts/validation/doc_style.py -i assets/aml-benchmark/
-```
+```
+
+# Release checklist
+
+## 1. Component release
+- From the root of this repo, you can run either of the following to install the dependencies:
+    - `pip install -r assets/aml-benchmark/requirements.txt`
+    - `conda env create -f assets/aml-benchmark/dev_conda_env.yaml`
+- We need to make sure that the spec file is updated for all the components before kicking off the release process. From the root of this repo, run the following command to upgrade the components:
+    ```
+    python assets/aml-benchmark/scripts/_internal/upgrade_components.py [--env_version <version>]
+    ```
+    parameter `env_version` can take the following values:
+    | **Value** | **Description** |
+    | --- | --- |
+    | `"latest"` | This is the default value. It will upgrade the components' environment to the latest version. |
+    | `""` | This will keep the components' environment version as is. |
+    | `"<specific_version>"` | This will upgrade the components' environment to the specified version. |
diff --git a/assets/aml-benchmark/components/batch-benchmark-inference/asset.yaml b/assets/aml-benchmark/components/batch-benchmark-inference/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_batch_benchmark_inference
diff --git a/assets/aml-benchmark/components/batch-benchmark-inference/spec.yaml b/assets/aml-benchmark/components/batch-benchmark-inference/spec.yaml
@@ -4,7 +4,7 @@ type: pipeline
 name: batch_benchmark_inference
 display_name: Batch Benchmark Inference
 description: Components for batch endpoint inference
-version: 0.0.5
+version: 0.0.6
 
 inputs:
   input_dataset:
@@ -149,7 +149,7 @@ jobs:
   # Preparer
   batch_inference_preparer: 
     type: command
-    component: azureml:batch_inference_preparer:0.0.6
+    component: azureml:batch_inference_preparer:0.0.7
     inputs:
       input_dataset: ${{parent.inputs.input_dataset}}
       model_type: ${{parent.inputs.model_type}}
@@ -167,7 +167,7 @@ jobs:
   # Inference
   endpoint_batch_score:
     type: parallel
-    component: azureml:batch_benchmark_score:0.0.5
+    component: azureml:batch_benchmark_score:0.0.6
     inputs:
       model_type: ${{parent.inputs.model_type}}
       online_endpoint_url: ${{parent.inputs.endpoint_url}}
@@ -199,7 +199,7 @@ jobs:
   # Reformat
   batch_output_formatter: 
     type: command
-    component: azureml:batch_output_formatter:0.0.6
+    component: azureml:batch_output_formatter:0.0.7
     inputs:
       model_type: ${{parent.inputs.model_type}}
       batch_inference_output: ${{parent.jobs.endpoint_batch_score.outputs.mini_batch_results_out_directory}}

diff --git a/assets/aml-benchmark/components/batch-benchmark-score/asset.yaml b/assets/aml-benchmark/components/batch-benchmark-score/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_batch_benchmark_inference
diff --git a/assets/aml-benchmark/components/batch-benchmark-score/spec.yaml b/assets/aml-benchmark/components/batch-benchmark-score/spec.yaml
@@ -1,6 +1,6 @@
 $schema: http://azureml/sdk-2-0/ParallelComponent.json
 name: batch_benchmark_score
-version: 0.0.5
+version: 0.0.6
 display_name: Batch Benchmark Score
 is_deterministic: False
 type: parallel
@@ -77,7 +77,7 @@ outputs:
     type: uri_folder
 task:
   code: ../src
-  environment: azureml://registries/azureml/environments/model-evaluation/versions/15
+  environment: azureml://registries/azureml/environments/model-evaluation/versions/19
   program_arguments: --append_row_safe_output True --debug_mode ${{inputs.debug_mode}} $[[--model_type ${{inputs.model_type}}]] --online_endpoint_url ${{inputs.online_endpoint_url}} $[[--additional_properties ${{inputs.additional_properties}}]] $[[--additional_headers ${{inputs.additional_headers}}]] $[[--user_agent_segment ${{inputs.user_agent_segment}}]] --metrics_out_directory ${{outputs.metrics_out_directory}} --tally_failed_requests False --tally_exclusions none --run_type parallel --segment_large_requests disabled --segment_max_token_size 600  --ensure_ascii ${{inputs.ensure_ascii}} --output_behavior append_row --initial_worker_count ${{inputs.initial_worker_count}} --max_worker_count ${{inputs.max_worker_count}} $[[--max_retry_time_interval ${{inputs.max_retry_time_interval}}]] --save_mini_batch_results enabled --mini_batch_results_out_directory ${{outputs.mini_batch_results_out_directory}} --connections_name ${{inputs.connections_name}}  $[[--deployment_name ${{inputs.deployment_name}}]] $[[--input_metadata ${{inputs.deployment_metadata}}]] $[[--mini_batch_size ${{inputs.mini_batch_size}}]]
   entry_script: aml_benchmark.batch_benchmark_score.batch_score.main
   type: run_function

diff --git a/assets/aml-benchmark/components/batch-inference-preparer/asset.yaml b/assets/aml-benchmark/components/batch-inference-preparer/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_batch_benchmark_inference
diff --git a/assets/aml-benchmark/components/batch-inference-preparer/spec.yaml b/assets/aml-benchmark/components/batch-inference-preparer/spec.yaml
@@ -4,7 +4,7 @@ type: command
 name: batch_inference_preparer
 display_name: Batch Inference Preparer
 description: Prepare the jsonl file and endpoint for batch inference component.
-version: 0.0.6
+version: 0.0.7
 
 inputs:
   input_dataset: 
@@ -64,7 +64,7 @@ outputs:
     description: Path to the folder where the ground truth metadata will be stored.
 
 code: ../src
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 command: >-
   python -m aml_benchmark.batch_inference_preparer.main
   --batch_input_pattern '${{inputs.batch_input_pattern}}'

diff --git a/assets/aml-benchmark/components/batch-output-formatter/asset.yaml b/assets/aml-benchmark/components/batch-output-formatter/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_batch_benchmark_inference
diff --git a/assets/aml-benchmark/components/batch-output-formatter/spec.yaml b/assets/aml-benchmark/components/batch-output-formatter/spec.yaml
@@ -1,5 +1,5 @@
 name: batch_output_formatter
-version: 0.0.6
+version: 0.0.7
 display_name: Batch Output Formatter
 is_deterministic: True
 type: command
@@ -53,7 +53,7 @@ outputs:
   ground_truth:
     type: uri_file
 code: ../src
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 
 resources:
   instance_count: 1

diff --git a/assets/aml-benchmark/components/batch-resource-manager/asset.yaml b/assets/aml-benchmark/components/batch-resource-manager/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_batch_benchmark_inference
diff --git a/assets/aml-benchmark/components/batch_benchmark_inference_claude/asset.yaml b/assets/aml-benchmark/components/batch_benchmark_inference_claude/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_claude
diff --git a/assets/aml-benchmark/components/batch_benchmark_inference_claude/spec.yaml b/assets/aml-benchmark/components/batch_benchmark_inference_claude/spec.yaml
@@ -4,7 +4,7 @@ type: pipeline
 name: batch_benchmark_inference_claude
 display_name: Batch Benchmark Inference with claude support
 description: Components for batch endpoint inference
-version: 0.0.2
+version: 0.0.3
 
 inputs:
   input_dataset:
@@ -151,7 +151,7 @@ jobs:
   # Preparer
   batch_inference_preparer: 
     type: command
-    component: azureml:batch_inference_preparer:0.0.6
+    component: azureml:batch_inference_preparer:0.0.7
     inputs:
       input_dataset: ${{parent.inputs.input_dataset}}
       model_type: ${{parent.inputs.model_type}}
@@ -168,7 +168,7 @@ jobs:
   # Inference
   endpoint_batch_score:
     type: parallel
-    component: azureml:batch_benchmark_score:0.0.5
+    component: azureml:batch_benchmark_score:0.0.6
     inputs:
       model_type: ${{parent.inputs.model_type}}
       online_endpoint_url: ${{parent.inputs.endpoint_url}}
@@ -199,7 +199,7 @@ jobs:
   # Reformat
   batch_output_formatter: 
     type: command
-    component: azureml:batch_output_formatter:0.0.6
+    component: azureml:batch_output_formatter:0.0.7
     inputs:
       model_type: ${{parent.inputs.model_type}}
       batch_inference_output: ${{parent.jobs.endpoint_batch_score.outputs.mini_batch_results_out_directory}}

diff --git a/assets/aml-benchmark/components/benchmark-result-aggregator/asset.yaml b/assets/aml-benchmark/components/benchmark-result-aggregator/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_benchmark_result_aggregator.py
diff --git a/assets/aml-benchmark/components/benchmark-result-aggregator/spec.yaml b/assets/aml-benchmark/components/benchmark-result-aggregator/spec.yaml
@@ -4,7 +4,7 @@ type: command
 name: benchmark_result_aggregator
 display_name: Benchmark result aggregator
 description: Aggregate quality metrics, performance metrics and all of the metadata from the pipeline. Also add them to the root run.
-version: 0.0.4
+version: 0.0.5
 is_deterministic: false
 
 inputs:
@@ -23,7 +23,7 @@ outputs:
     description: The json file with all of the aggregated results.
 
 code: ../src
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 command: >-
   python -m aml_benchmark.result_aggregator.main 
   $[[--quality_metrics_path ${{inputs.quality_metrics}}]] 

diff --git a/assets/aml-benchmark/components/compute-performance-metrics/asset.yaml b/assets/aml-benchmark/components/compute-performance-metrics/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_compute_perf_metrics.py
diff --git a/assets/aml-benchmark/components/compute-performance-metrics/spec.yaml b/assets/aml-benchmark/components/compute-performance-metrics/spec.yaml
@@ -4,7 +4,7 @@ type: command
 name: compute_performance_metrics
 display_name: Compute Performance Metrics
 description: Performs performance metric post processing using data from a model inference run.
-version: 0.0.2
+version: 0.0.3
 is_deterministic: true
 
 inputs:
@@ -57,7 +57,7 @@ outputs:
     description: Path to the file where the calculated performance metric results will be stored.
 
 code: ../src
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 command: >-
   python -m aml_benchmark.perf_metrics.main
   --performance_data ${{inputs.performance_data}}

diff --git a/assets/aml-benchmark/components/dataset-downloader/asset.yaml b/assets/aml-benchmark/components/dataset-downloader/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_dataset_downloader.py
diff --git a/assets/aml-benchmark/components/dataset-downloader/spec.yaml b/assets/aml-benchmark/components/dataset-downloader/spec.yaml
@@ -4,7 +4,7 @@ type: command
 name: dataset_downloader
 display_name: Dataset Downloader
 description: Downloads the dataset onto blob store.
-version: 0.0.2
+version: 0.0.3
 
 inputs:
   dataset_name: 
@@ -34,7 +34,7 @@ outputs:
     description: Path to the directory where the dataset will be downloaded.
 
 code: ../src
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 command: >-
   python -m aml_benchmark.dataset_downloader.main
   $[[--dataset_name ${{inputs.dataset_name}}]] 

diff --git a/assets/aml-benchmark/components/dataset-preprocessor/asset.yaml b/assets/aml-benchmark/components/dataset-preprocessor/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_dataset_preprocessor.py
diff --git a/assets/aml-benchmark/components/dataset-preprocessor/spec.yaml b/assets/aml-benchmark/components/dataset-preprocessor/spec.yaml
@@ -4,7 +4,7 @@ type: command
 name:  dataset_preprocessor 
 display_name: Dataset Preprocessor
 description: Dataset Preprocessor
-version: 0.0.2
+version: 0.0.3
 is_deterministic: true
 
 inputs:
@@ -50,7 +50,7 @@ outputs:
 
 code: ../src
 
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 
 command: >-
   python -m aml_benchmark.dataset_preprocessor.main

diff --git a/assets/aml-benchmark/components/dataset-sampler/asset.yaml b/assets/aml-benchmark/components/dataset-sampler/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_dataset_sampler.py
diff --git a/assets/aml-benchmark/components/dataset-sampler/spec.yaml b/assets/aml-benchmark/components/dataset-sampler/spec.yaml
@@ -4,7 +4,7 @@ type: command
 name: dataset_sampler
 display_name: Dataset Sampler
 description: Samples a dataset containing JSONL file(s).
-version: 0.0.2
+version: 0.0.3
 
 inputs:
   dataset: 
@@ -47,7 +47,7 @@ outputs:
     description: Path to the jsonl file where the sampled dataset will be saved.
 
 code: ../src
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 command: >-
   python -m aml_benchmark.dataset_sampler.main 
   --dataset ${{inputs.dataset}} 

diff --git a/assets/aml-benchmark/components/inference-postprocessor/asset.yaml b/assets/aml-benchmark/components/inference-postprocessor/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_inference_postprocessor.py
diff --git a/assets/aml-benchmark/components/inference-postprocessor/spec.yaml b/assets/aml-benchmark/components/inference-postprocessor/spec.yaml
@@ -4,7 +4,7 @@ type: command
 name: inference_postprocessor
 display_name: Inference Postprocessor
 description: Inference Postprocessor
-version: 0.0.3
+version: 0.0.4
 is_deterministic: true
 
 inputs:
@@ -130,7 +130,7 @@ outputs:
 
 code: ../src
 
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 
 command: >-
   python -m aml_benchmark.inference_postprocessor.main

diff --git a/assets/aml-benchmark/components/prompt_crafter/asset.yaml b/assets/aml-benchmark/components/prompt_crafter/asset.yaml
@@ -5,4 +5,4 @@ test:
   pytest:
     enabled: true
     conda_environment: ../../dev_conda_env.yaml
-    tests_dir: ../../tests
+    tests_dir: ../../tests/test_prompt_crafter
diff --git a/assets/aml-benchmark/components/prompt_crafter/spec.yaml b/assets/aml-benchmark/components/prompt_crafter/spec.yaml
@@ -6,7 +6,7 @@ display_name: Prompt Crafter
 description: This component is used to create prompts from a given dataset. From a 
   given jinja prompt template, it will generate prompts. It can also create 
   few-shot prompts given a few-shot dataset and the number of shots.
-version: 0.0.5
+version: 0.0.6
 is_deterministic: true
 
 inputs:
@@ -134,7 +134,7 @@ outputs:
     description: Output file path where few_shot_prompt data will be written.
 
 code: ../src
-environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
+environment: azureml://registries/azureml/environments/model-evaluation/versions/19
 command: >-
   python -m aml_benchmark.prompt_crafter.main 
   --test_data ${{inputs.test_data}}

diff --git a/assets/aml-benchmark/dev_conda_env.yaml b/assets/aml-benchmark/dev_conda_env.yaml
@@ -15,6 +15,9 @@ dependencies:
     - mltable>=1.5.0
     - datasets
     - ddt
+    - tqdm
+    - pyyaml
+    - azure-core
     ## Test requirements
     - pytest
     - pytest-xdist

diff --git a/assets/aml-benchmark/requirements.txt b/assets/aml-benchmark/requirements.txt
@@ -10,3 +10,6 @@ datasets
 pytest
 pytest-xdist
 ddt
+tqdm
+pyyaml
+azure-core
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,3 +10,6 @@ datasets @@
     pytest
     pytest-xdist
     ddt
+    tqdm
+    pyyaml
+    azure-core