guillaume-chervet · guillaume-chervet · Dec 17, 2023 · Dec 17, 2023 · Dec 17, 2023 · Dec 17, 2023
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -198,19 +198,38 @@ jobs:
             --resource_group_name ${{ env.AZURE_RESOURCE_GROUP_NAME }} \
             --workspace_name ${{ env.AZURE_ML_WORKSPACE_NAME }} \
             --location ${{ env.AZURE_LOCATION }} \
-            --tags "{\"triggering_actor\":\"${{github.triggering_actor}}\"}" \
+            --tags "{\"git\":\"${{ github.head_ref }}.${{ github.sha }}\",\"version\":\"${{ needs.tags.outputs.new_version }}\",\"triggering_actor\":\"${{github.triggering_actor}}\"}" \
             > train_output.txt
           cat train_output.txt
       working-directory: train
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        client-id: ${{ secrets.AZURE_CLIENT_ID }}
+        tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+        subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+    - name: Run Train Post Pipeline
+      run: |
+          EXPERIMENT_ID=$(python bin/retrieve_output.py ./train/train_output.txt experiment_id)
+          cd ./train
+          poetry run python azureml_post_pipeline.py \
+            --subscription_id ${{ secrets.AZURE_SUBSCRIPTION_ID }}  \
+            --resource_group_name ${{ env.AZURE_RESOURCE_GROUP_NAME }} \
+            --workspace_name ${{ env.AZURE_ML_WORKSPACE_NAME }} \
+            --location ${{ env.AZURE_LOCATION }} \
+            --tags "{\"git\":\"${{ github.head_ref }}.${{ github.sha }}\",\"version\":\"${{ needs.tags.outputs.new_version }}\",\"triggering_actor\":\"${{github.triggering_actor}}\"}" \
+            --experiment_id $EXPERIMENT_ID \
+            > post_train_output.txt
+          cat post_train_output.txt
     - name: download model
       id: train
       run: |
           az extension add -n ml
-          MODEL_VERSION=$(python bin/retrieve_output.py ./train/train_output.txt model_version)
+          MODEL_VERSION=$(python bin/retrieve_output.py ./train/post_train_output.txt model_version)
           echo "MODEL_VERSION=$MODEL_VERSION" >> $GITHUB_OUTPUT
-          INTEGRATION_DATASET_VERSION=$(python bin/retrieve_output.py ./train/train_output.txt integration_dataset_version)
+          INTEGRATION_DATASET_VERSION=$(python bin/retrieve_output.py ./train/post_train_output.txt integration_dataset_version)
           echo "INTEGRATION_DATASET_VERSION=$INTEGRATION_DATASET_VERSION" >> $GITHUB_OUTPUT
-          EXPERIMENT_ID=$(python bin/retrieve_output.py ./train/train_output.txt experiment_id)
+          EXPERIMENT_ID=$(python bin/retrieve_output.py ./train/post_train_output.txt experiment_id)
           echo "EXPERIMENT_ID=$EXPERIMENT_ID" >> $GITHUB_OUTPUT
 
           mkdir model

diff --git a/train/azureml_post_pipeline.py b/train/azureml_post_pipeline.py
@@ -0,0 +1,112 @@
+import argparse
+
+from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
+
+from azure.ai.ml import MLClient, Input, Output, load_component
+from azure.ai.ml.dsl import pipeline
+from azure.ai.ml.entities import Model
+from azure.ai.ml.constants import AssetTypes
+from azure.ai.ml.entities import Data
+from azure.ai.ml.entities import AmlCompute
+
+from extraction_dataset import register_extracted_dataset
+
+import uuid
+
+import json
+
+parser = argparse.ArgumentParser("train")
+parser.add_argument("--subscription_id", type=str)
+parser.add_argument("--resource_group_name", type=str)
+parser.add_argument("--workspace_name", type=str)
+parser.add_argument("--location", type=str)
+parser.add_argument("--tags", type=str, default="{}")
+parser.add_argument("--experiment_id", type=str)
+
+args = parser.parse_args()
+subscription_id = args.subscription_id
+resource_group_name = args.resource_group_name
+workspace_name = args.workspace_name
+location = args.location
+tags = json.loads(args.tags)
+experiment_id = args.experiment_id
+
+try:
+    credential = DefaultAzureCredential()
+    # Check if given credential can get token successfully.
+    credential.get_token("https://management.azure.com/.default")
+except Exception as ex:
+    print(ex)
+    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
+    credential = InteractiveBrowserCredential()
+
+
+# Get a handle to workspace
+ml_client = MLClient(
+    credential=credential,
+    subscription_id=subscription_id,
+    resource_group_name=resource_group_name,
+    workspace_name=workspace_name,
+)
+
+azure_blob = "azureml://datastores/workspaceblobstore/paths/"
+path_experiment = azure_blob + "cats-dogs-others/" + experiment_id
+custom_extraction_path = (
+        path_experiment + "/extraction/"
+)
+
+# custom_extraction_hash_path = (
+#    azure_blob + "extraction_hash/cats-dogs-others/" + experiment_id + "/"
+# )
+# pipeline_job.outputs.extraction_hash_output = Output(
+#    type=AssetTypes.URI_FOLDER, mode="rw_mount", path=custom_extraction_hash_path
+# )
+
+custom_model_path = path_experiment + "/models/"
+custom_integration_path = (
+        path_experiment + "/integration/"
+)
+
+# register_extracted_dataset(
+#    ml_client, custom_extraction_hash_path, custom_extraction_path, {}
+# )
+
+model_name = "cats-dogs-others"
+try:
+    model_version = str(len(list(ml_client.models.list(model_name))) + 1)
+except:
+    model_version = "1"
+
+file_model = Model(
+    version=model_version,
+    path=custom_model_path,
+    type=AssetTypes.CUSTOM_MODEL,
+    name=model_name,
+    tags={**tags},
+    description="Model created from azureML.",
+)
+saved_model = ml_client.models.create_or_update(file_model)
+
+print(
+    f"Model with name {saved_model.name} was registered to workspace, the model version is {saved_model.version}."
+)
+
+integration_dataset_name = "cats-dogs-others-integration"
+integration_dataset = Data(
+    name="cats-dogs-others-integration",
+    path=custom_integration_path,
+    type=AssetTypes.URI_FOLDER,
+    description="Integration dataset for cats and dogs and others",
+    tags={**tags},
+)
+integration_dataset = ml_client.data.create_or_update(integration_dataset)
+
+output_data = {
+    "model_version": saved_model.version,
+    "model_name": saved_model.name,
+    "integration_dataset_name": integration_dataset.name,
+    "integration_dataset_version": integration_dataset.version,
+    "experiment_id": experiment_id,
+}
+
+print(json.dumps(output_data))
diff --git a/train/azureml_run_pipeline.py b/train/azureml_run_pipeline.py
@@ -99,7 +99,7 @@ def azureml_pipeline(
 
 pipeline_job = azureml_pipeline(
     pdfs_input_data=Input(
-        path="azureml:cats_dogs_others:1", type=AssetTypes.URI_FOLDER
+        path="azureml:cats_dogs_others:1", type=AssetTypes.URI_FOLDER,
     ),
     labels_input_data=Input(
         path="azureml:cats_dogs_others_labels:1", type=AssetTypes.URI_FOLDER
@@ -109,8 +109,9 @@ def azureml_pipeline(
 
 azure_blob = "azureml://datastores/workspaceblobstore/paths/"
 experiment_id = str(uuid.uuid4())
+path_experiment = azure_blob + "cats-dogs-others/" + experiment_id
 custom_extraction_path = (
-    azure_blob + "extraction/cats-dogs-others/" + experiment_id + "/"
+        path_experiment + "/extraction/"
 )
 pipeline_job.outputs.model_output = Output(
     type=AssetTypes.URI_FOLDER, mode="rw_mount", path=custom_extraction_path
@@ -122,12 +123,12 @@ def azureml_pipeline(
 #    type=AssetTypes.URI_FOLDER, mode="rw_mount", path=custom_extraction_hash_path
 # )
 
-custom_model_path = azure_blob + "models/cats-dogs-others/" + experiment_id + "/"
+custom_model_path = path_experiment + "/models/"
 pipeline_job.outputs.model_output = Output(
     type=AssetTypes.URI_FOLDER, mode="rw_mount", path=custom_model_path
 )
 custom_integration_path = (
-    azure_blob + "/integration/cats-dogs-others/" + experiment_id + "/"
+        path_experiment + "/integration/"
 )
 pipeline_job.outputs.integration_output = Output(
     type=AssetTypes.URI_FOLDER, mode="rw_mount", path=custom_integration_path
@@ -137,79 +138,12 @@ def azureml_pipeline(
     pipeline_job, experiment_name="cats_dos_others_pipeline"
 )
 
-import threading
-import time
-
-run_get_token = True
-def get_token():
-    while run_get_token:
-        try:
-            token = credential.get_token("https://management.azure.com/.default")
-            print("Token obtenu:", token.token)
-        except Exception as ex:
-            print(ex)
-        time.sleep(60)  # Attendre 60 secondes
-
-
-token_thread = threading.Thread(target=get_token)
-token_thread.start()
-
-def run_pipeline():
-    ml_client.jobs.stream(pipeline_job.name)
-
-pipeline_thread = threading.Thread(target=run_pipeline)
-pipeline_thread.start()
-pipeline_thread.join()
-
 try:
-    credential.get_token("https://management.azure.com/.default")
+    ml_client.jobs.stream(pipeline_job.name)
 except Exception as ex:
     print(ex)
-# register_extracted_dataset(
-#    ml_client, custom_extraction_hash_path, custom_extraction_path, {}
-# )
-
-model_name = "cats-dogs-others"
-try:
-    model_version = str(len(list(ml_client.models.list(model_name))) + 1)
-except:
-    model_version = "1"
-
-file_model = Model(
-    version=model_version,
-    path=custom_model_path,
-    type=AssetTypes.CUSTOM_MODEL,
-    name=model_name,
-    tags={**tags},
-    description="Model created from azureML.",
-)
-saved_model = ml_client.models.create_or_update(file_model)
-
-print(
-    f"Model with name {saved_model.name} was registered to workspace, the model version is {saved_model.version}."
-)
-
-integration_dataset_name = "cats-dogs-others-integration"
-integration_dataset = Data(
-    name="cats-dogs-others-integration",
-    path=custom_integration_path,
-    type=AssetTypes.URI_FOLDER,
-    description="Integration dataset for cats and dogs and others",
-    tags={**tags},
-)
-integration_dataset = ml_client.data.create_or_update(integration_dataset)
-print(
-    f"Dataset with name {integration_dataset.name} was registered to workspace, the dataset version is {integration_dataset.version}"
-)
-
-run_get_token = False
-token_thread.join()
 
 output_data = {
-    "model_version": saved_model.version,
-    "model_name": saved_model.name,
-    "integration_dataset_name": integration_dataset.name,
-    "integration_dataset_version": integration_dataset.version,
     "experiment_id": experiment_id,
 }
 

diff --git a/train/extraction/command.py b/train/extraction/command.py
@@ -24,7 +24,10 @@
 console_output = f""" 
     number_files_input: {result.number_files_input}
     number_images_output: {result.number_images_output}
-    computed_hash: {computed_hash}"""
+    computed_hash: {computed_hash}
+
+
+"""
 
 mlflow.log_metric("number_files_input", result.number_files_input)
 mlflow.log_metric("number_images_output", result.number_images_output)