-
Notifications
You must be signed in to change notification settings - Fork 136
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/Azure/azureml-assets into u…
…tkshukla/mlflow_version_fix
- Loading branch information
Showing
83 changed files
with
1,485 additions
and
171 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 6 additions & 0 deletions
6
assets/evaluation_on_cloud/environments/evaluations-built-in/asset.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
name: evaluations-built-in | ||
version: auto | ||
type: environment | ||
spec: spec.yaml | ||
extra_config: environment.yaml | ||
categories: ["Evaluation"] |
8 changes: 8 additions & 0 deletions
8
assets/evaluation_on_cloud/environments/evaluations-built-in/context/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04:latest | ||
|
||
COPY requirements.txt /app/requirements.txt | ||
RUN pip install -r /app/requirements.txt | ||
|
||
# Copy your Python file into the image | ||
COPY evaluate_on_data.py /app/evaluate_on_data.py | ||
COPY save_evaluation.py /app/save_evaluation.py |
124 changes: 124 additions & 0 deletions
124
assets/evaluation_on_cloud/environments/evaluations-built-in/context/evaluate_on_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
|
||
"""Evaluate for a built-in or custom evulator.""" | ||
import argparse | ||
import json | ||
import logging | ||
import mlflow | ||
import os | ||
import pandas as pd | ||
import requests | ||
import shutil | ||
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential | ||
from azure.ai.evaluation import evaluate | ||
from save_eval import load_evaluator | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def update_value_in_dict(d, key_substring, new_func): | ||
"""Recursively search for a value containing 'key_substring' and apply 'new_func' to modify it.""" | ||
for key, value in d.items(): | ||
if isinstance(value, dict): | ||
update_value_in_dict(value, key_substring, new_func) | ||
elif isinstance(value, str) and key_substring in value: | ||
d[key] = new_func(value) | ||
|
||
|
||
def find_file_and_get_parent_dir(root_dir, file_name="flow.flex.yaml"): | ||
"""Find the flex flow or any given file in a directory and return the parent directory.""" | ||
for dirpath, _, filenames in os.walk(root_dir): | ||
if file_name in filenames: | ||
logger.info(f"Found {file_name} in {dirpath}") | ||
return dirpath | ||
|
||
|
||
def copy_evaluator_files(command_line_args): | ||
"""Copy the mounted evaluator files to the relative paths to enable read/write.""" | ||
evaluator_name_id_map = json.loads(command_line_args.evaluator_name_id_map) | ||
for evaluator_name, evaluator_id in evaluator_name_id_map.items(): | ||
dir_path = find_file_and_get_parent_dir(evaluator_id) | ||
if dir_path: | ||
shutil.copytree(dir_path, f"./{evaluator_name}") | ||
logger.info(f"Copying {dir_path} to ./{evaluator_name}") | ||
logger.info(evaluator_name, os.listdir(f"./{evaluator_name}")) | ||
else: | ||
logger.info(f"Directory for evaluator {evaluator_name} not found.") | ||
|
||
|
||
def initialize_evaluators(command_line_args): | ||
"""Initialize the evaluators using correct parameters and credentials for rai evaluators.""" | ||
evaluators = {} | ||
evaluators_o = json.loads(command_line_args.evaluators) | ||
for evaluator_name, evaluator in evaluators_o.items(): | ||
init_params = evaluator["InitParams"] | ||
update_value_in_dict(init_params, "AZURE_OPENAI_API_KEY", lambda x: os.environ[x.upper()]) | ||
flow = load_evaluator('./' + evaluator_name) | ||
if any(rai_eval in evaluator["Id"] for rai_eval in rai_evaluators): | ||
init_params["credential"] = AzureMLOnBehalfOfCredential() | ||
evaluators[evaluator_name] = flow(**init_params) | ||
return evaluators | ||
|
||
|
||
def run_evaluation(command_line_args, evaluators): | ||
"""Run evaluation using evaluators.""" | ||
results = evaluate( | ||
data=command_line_args.eval_data, | ||
evaluators=evaluators | ||
) | ||
metrics = {} | ||
for metric_name, metric_value in results['metrics'].items(): | ||
logger.info("Logging metric:", metric_name, metric_value) | ||
metrics[metric_name] = metric_value | ||
mlflow.log_metrics(metrics) | ||
|
||
if results and results.get("rows"): | ||
# Convert the results to a DataFrame | ||
df = pd.DataFrame(results["rows"]) | ||
|
||
# Save the DataFrame as a JSONL file | ||
df.to_json("instance_results.jsonl", orient="records", lines=True) | ||
df.to_json("eval_results.jsonl", orient="records", lines=True) | ||
mlflow.log_artifact("instance_results.jsonl") | ||
mlflow.log_artifact("eval_results.jsonl") | ||
|
||
|
||
def get_promptflow_run_logs(): | ||
"""Get promptflow run logs.""" | ||
if os.path.exists("/root/.promptflow/.runs/"): | ||
runs = os.listdir("/root/.promptflow/.runs/") | ||
for run in runs: | ||
if os.path.exists(f"/root/.promptflow/.runs/{run}/logs.txt"): | ||
with open(f"/root/.promptflow/.runs/{run}/logs.txt", "r") as f: | ||
logger.info(f"RUN {run} =========================") | ||
logger.info(f.read()) | ||
else: | ||
logger.info("RUN DOES NOT EXIST") | ||
|
||
|
||
# Create a session for making HTTP requests | ||
session = requests.Session() | ||
|
||
# Parse command line arguments and debug to ensure working | ||
parser = argparse.ArgumentParser("eval") | ||
parser.add_argument("--eval_data", type=str) | ||
parser.add_argument("--eval_output", type=str) | ||
parser.add_argument("--evaluators", type=str) | ||
parser.add_argument("--evaluator_name_id_map", type=str) | ||
|
||
args = parser.parse_args() | ||
rai_evaluators = ['HateUnfairnessEvaluator', 'Sexual-Content-Evaluator', 'Hate-and-Unfairness-Evaluator', | ||
'Violent-Content-Evaluator', 'Self-Harm-Related-Content-Evaluator'] | ||
|
||
if __name__ == '__main__': | ||
copy_evaluator_files(args) | ||
evaluators = initialize_evaluators(args) | ||
logger.info("*************** Collecting Result of Evaluators ******************") | ||
# Run the evaluation | ||
with mlflow.start_run() as run: | ||
try: | ||
run_evaluation(args, evaluators) | ||
except Exception as e: | ||
logger.error("EXCEPT", e) | ||
get_promptflow_run_logs() |
5 changes: 5 additions & 0 deletions
5
assets/evaluation_on_cloud/environments/evaluations-built-in/context/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
azure-ai-evaluation | ||
openai | ||
azureml-mlflow | ||
azure-identity | ||
azure-ai-ml |
30 changes: 30 additions & 0 deletions
30
assets/evaluation_on_cloud/environments/evaluations-built-in/context/save_evaluation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
|
||
"""Load a built-in or custom evulator as flow.""" | ||
import importlib | ||
import logging | ||
import os | ||
import sys | ||
from promptflow.client import load_flow | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def load_evaluator(evaluator): | ||
"""Load evaluator as flow.""" | ||
logger.info(f"Loading evaluator {evaluator}") | ||
loaded_evaluator = load_flow(evaluator) | ||
logger.info(loaded_evaluator) | ||
module_parent = loaded_evaluator.path.parent.name | ||
module_name = loaded_evaluator.entry.split(":")[0] | ||
logger.info(f"Loading module {os.getcwd()} {module_name} from {module_parent}") | ||
module_path = os.path.join(os.getcwd(), module_parent, module_name + ".py") | ||
logger.info(f"Loading module {module_name} from {module_path}") | ||
spec = importlib.util.spec_from_file_location(module_name, module_path) | ||
mod = importlib.util.module_from_spec(spec) | ||
logger.info(f"Loaded module {mod}") | ||
sys.modules[module_name] = mod | ||
spec.loader.exec_module(mod) | ||
eval_class = getattr(mod, loaded_evaluator.entry.split(":")[1]) | ||
return eval_class |
11 changes: 11 additions & 0 deletions
11
assets/evaluation_on_cloud/environments/evaluations-built-in/environment.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
image: | ||
name: azureml/curated/evaluations-built-in | ||
os: linux | ||
context: | ||
dir: context | ||
dockerfile: Dockerfile | ||
template_files: | ||
- Dockerfile | ||
publish: | ||
location: mcr | ||
visibility: public |
11 changes: 11 additions & 0 deletions
11
assets/evaluation_on_cloud/environments/evaluations-built-in/spec.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json | ||
description: Python environment for running promptflow-evals based evaluators. | ||
|
||
name: "{{asset.name}}" | ||
version: "{{asset.version}}" | ||
|
||
os_type: linux | ||
|
||
build: | ||
path: "{{image.context.path}}" | ||
dockerfile_path: "{{image.dockerfile.path}}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 4 additions & 0 deletions
4
assets/models/system/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224/asset.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
extra_config: model.yaml | ||
spec: spec.yaml | ||
type: model | ||
categories: ["Foundation Models"] |
71 changes: 71 additions & 0 deletions
71
assets/models/system/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224/description.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
BiomedCLIP is a biomedical vision-language foundation model that is pretrained on PMC-15M, a dataset of 15 million figure-caption pairs extracted from biomedical research articles in PubMed Central, using contrastive learning. It uses PubMedBERT as the text encoder and Vision Transformer as the image encoder, with domain-specific adaptations. It can perform various vision-language processing (VLP) tasks such as cross-modal retrieval, image classification, and visual question answering. BiomedCLIP establishes new state of the art in a wide range of standard datasets, and substantially outperforms prior VLP approaches: | ||
|
||
 | ||
|
||
**Citation** | ||
|
||
``` | ||
@misc{https://doi.org/10.48550/arXiv.2303.00915, | ||
doi = {10.48550/ARXIV.2303.00915}, | ||
url = {https://arxiv.org/abs/2303.00915}, | ||
author = {Zhang, Sheng and Xu, Yanbo and Usuyama, Naoto and Bagga, Jaspreet and Tinn, Robert and Preston, Sam and Rao, Rajesh and Wei, Mu and Valluri, Naveen and Wong, Cliff and Lungren, Matthew and Naumann, Tristan and Poon, Hoifung}, | ||
title = {Large-Scale Domain-Specific Pretraining for Biomedical Vision-Language Processing}, | ||
publisher = {arXiv}, | ||
year = {2023}, | ||
} | ||
``` | ||
|
||
## Model Use | ||
|
||
**Intended Use** | ||
This model is intended to be used solely for (I) future research on visual-language processing and (II) reproducibility of the experimental results reported in the reference paper. | ||
|
||
**Primary Intended Use** | ||
The primary intended use is to support AI researchers building on top of this work. BiomedCLIP and its associated models should be helpful for exploring various biomedical VLP research questions, especially in the radiology domain. | ||
|
||
**Out-of-Scope Use** | ||
Any deployed use case of the model --- commercial or otherwise --- is currently out of scope. Although we evaluated the models using a broad set of publicly-available research benchmarks, the models and evaluations are not intended for deployed use cases. Please refer to the associated paper for more details. | ||
|
||
**Data** | ||
This model builds upon PMC-15M dataset, which is a large-scale parallel image-text dataset for biomedical vision-language processing. It contains 15 million figure-caption pairs extracted from biomedical research articles in PubMed Central. It covers a diverse range of biomedical image types, such as microscopy, radiography, histology, and more. | ||
|
||
**Limitations** | ||
This model was developed using English corpora, and thus can be considered English-only. | ||
|
||
**Further information** | ||
Please refer to the corresponding paper, "Large-Scale Domain-Specific Pretraining for Biomedical Vision-Language Processing" for additional details on the model training and evaluation. | ||
|
||
## Sample Input and Output (for real-time inference) | ||
|
||
### Sample Input | ||
|
||
```json | ||
{ | ||
"input_data": { | ||
"columns": [ | ||
"image", | ||
"text" | ||
], | ||
"index":[0, 1, 2], | ||
"data": [ | ||
["image1", "labe1, label2, label3"], | ||
["image2", "labe1, label2, label3"], | ||
["image3", "labe1, label2, label3"], | ||
] | ||
} | ||
} | ||
``` | ||
### Sample Output | ||
```json | ||
[ | ||
{ | ||
"probs": [0.95, 0.03, 0.02], | ||
"labels": ["label1", "label2", "label3"] | ||
}, | ||
{ | ||
"probs": [0.04, 0.93, 0.03], | ||
"labels": ["label1", "label2", "label3"] | ||
} | ||
] | ||
``` |
8 changes: 8 additions & 0 deletions
8
assets/models/system/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224/model.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
path: | ||
container_name: models | ||
container_path: huggingface/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224/mlflow_model_folder | ||
storage_name: automlcesdkdataresources | ||
type: azureblob | ||
publish: | ||
description: description.md | ||
type: mlflow_model |
34 changes: 34 additions & 0 deletions
34
assets/models/system/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224/spec.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json | ||
|
||
name: BiomedCLIP-PubMedBERT_256-vit_base_patch16_224 | ||
path: ./ | ||
|
||
properties: | ||
inference-min-sku-spec: 6|1|112|64 | ||
inference-recommended-sku: Standard_NC6s_v3, Standard_NC12s_v3, Standard_NC24s_v3, Standard_NC24ads_A100_v4, Standard_NC48ads_A100_v4, Standard_NC96ads_A100_v4, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4, Standard_ND40rs_v2 | ||
languages: en | ||
SharedComputeCapacityEnabled: true | ||
|
||
tags: | ||
task: zero-shot-image-classification | ||
industry: health-and-life-sciences | ||
Preview: "" | ||
inference_supported_envs: | ||
- hf | ||
license: mit | ||
author: Microsoft | ||
hiddenlayerscanned: "" | ||
SharedComputeCapacityEnabled: "" | ||
inference_compute_allow_list: | ||
[ | ||
Standard_NC6s_v3, | ||
Standard_NC12s_v3, | ||
Standard_NC24s_v3, | ||
Standard_NC24ads_A100_v4, | ||
Standard_NC48ads_A100_v4, | ||
Standard_NC96ads_A100_v4, | ||
Standard_ND96asr_v4, | ||
Standard_ND96amsr_A100_v4, | ||
Standard_ND40rs_v2, | ||
] | ||
version: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
extra_config: model.yaml | ||
spec: spec.yaml | ||
type: model | ||
categories: ["Foundation Models"] |
Oops, something went wrong.