Skip to content

Commit

Permalink
Modifying DBCopilot to support managed Identity (#3075)
Browse files Browse the repository at this point in the history
* Fix DBCopilot Vulnerabilities

* fix

* fix

* fix

* fix
  • Loading branch information
ricardrao authored Jun 24, 2024
1 parent 7aa989c commit f2084d6
Show file tree
Hide file tree
Showing 14 changed files with 60 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ tags:
Preview: ""
name: llm_ingest_db_to_acs
display_name: LLM - SQL Datastore to ACS Pipeline
version: 0.0.87
version: 0.0.88
description: Single job pipeline to chunk data from AzureML sql data store, and create ACS embeddings index
settings:
default_compute: serverless
Expand Down Expand Up @@ -109,10 +109,11 @@ inputs:
type: string
optional: true
description: "The instruct template for the LLM."
user_managed_identity_client_id:
type: string
managed_identity_enabled:
type: boolean
default: flase
optional: true
description: "The user identity client_id for the deployment."
description: "Whether to connect using managed identity."
outputs:
grounding_index:
type: uri_folder
Expand Down Expand Up @@ -152,8 +153,8 @@ jobs:
type: uri_folder
output_grounding_context_file: ${{parent.outputs.db_context}}
environment_variables:
USER_MANAGED_IDENTITY_CLIENT_ID: ${{parent.inputs.user_managed_identity_client_id}}
component: "azureml:llm_dbcopilot_grounding:0.0.61"
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
component: "azureml:llm_dbcopilot_grounding:0.0.62"
type: command
generate_meta_embeddings:
type: command
Expand Down Expand Up @@ -220,7 +221,7 @@ jobs:
#########################################
db_sample_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.36"
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.37"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand Down Expand Up @@ -301,7 +302,7 @@ jobs:
environment_variables:
AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING: ${{parent.inputs.embedding_connection}}
AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT: ${{parent.inputs.llm_connection}}
USER_MANAGED_IDENTITY_CLIENT_ID: ${{parent.inputs.user_managed_identity_client_id}}
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand Down Expand Up @@ -335,5 +336,5 @@ jobs:
path: ${{parent.inputs.include_views}}
instruct_template:
path: ${{parent.inputs.instruct_template}}
component: "azureml:llm_dbcopilot_create_promptflow:0.0.61"
component: "azureml:llm_dbcopilot_create_promptflow:0.0.62"
type: command
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ tags:
Preview: ""
name: llm_ingest_db_to_faiss
display_name: LLM - SQL Datastore to FAISS Pipeline
version: 0.0.87
version: 0.0.88
description: Single job pipeline to chunk data from AzureML sql data store, and create FAISS embeddings index
settings:
default_compute: serverless
Expand Down Expand Up @@ -99,10 +99,11 @@ inputs:
type: string
optional: true
description: "The instruct template for the LLM."
user_managed_identity_client_id:
type: string
managed_identity_enabled:
type: boolean
default: flase
optional: true
description: "The user identity client_id for the deployment."
description: "Whether to connect using managed identity."
outputs:
grounding_index:
type: uri_folder
Expand Down Expand Up @@ -142,8 +143,8 @@ jobs:
type: uri_folder
output_grounding_context_file: ${{parent.outputs.db_context}}
environment_variables:
USER_MANAGED_IDENTITY_CLIENT_ID: ${{parent.inputs.user_managed_identity_client_id}}
component: "azureml:llm_dbcopilot_grounding:0.0.61"
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
component: "azureml:llm_dbcopilot_grounding:0.0.62"
type: command
generate_meta_embeddings:
type: command
Expand Down Expand Up @@ -208,7 +209,7 @@ jobs:
#########################################
db_sample_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.36"
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.37"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand Down Expand Up @@ -285,7 +286,7 @@ jobs:
environment_variables:
AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING: ${{parent.inputs.embedding_connection}}
AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT: ${{parent.inputs.llm_connection}}
USER_MANAGED_IDENTITY_CLIENT_ID: ${{parent.inputs.user_managed_identity_client_id}}
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand Down Expand Up @@ -319,5 +320,5 @@ jobs:
path: ${{parent.inputs.include_views}}
instruct_template:
path: ${{parent.inputs.instruct_template}}
component: "azureml:llm_dbcopilot_create_promptflow:0.0.61"
component: "azureml:llm_dbcopilot_create_promptflow:0.0.62"
type: command
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.js
type: pipeline

name: llm_ingest_dbcopilot_acs_e2e
version: 0.0.55
version: 0.0.56
display_name: Data Ingestion for DB Data Output to ACS E2E Deployment
description: Single job pipeline to chunk data from AzureML DB Datastore and create acs embeddings index

Expand Down Expand Up @@ -122,10 +122,11 @@ inputs:
type: string
optional: true
description: "The instruct template for the LLM."
user_managed_identity_client_id:
type: string
managed_identity_enabled:
type: boolean
default: flase
optional: true
description: "The user identity client_id for the deployment."
description: "Whether to connect using managed identity."
outputs:
grounding_index:
type: uri_folder
Expand All @@ -135,7 +136,7 @@ jobs:
#########################################
db_meta_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding:0.0.61"
component: "azureml:llm_dbcopilot_grounding:0.0.62"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand All @@ -160,7 +161,7 @@ jobs:
type: uri_folder
path: ${{parent.outputs.db_context}}
environment_variables:
USER_MANAGED_IDENTITY_CLIENT_ID: ${{parent.inputs.user_managed_identity_client_id}}
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
#########################################
generate_meta_embeddings:
type: command
Expand Down Expand Up @@ -207,7 +208,7 @@ jobs:
#########################################
db_sample_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.36"
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.37"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand Down Expand Up @@ -269,7 +270,7 @@ jobs:
#########################################
endpoint_deployment_job:
type: command
component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.36"
component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.37"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand Down Expand Up @@ -309,4 +310,4 @@ jobs:
environment_variables:
AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING: ${{parent.inputs.embedding_connection}}
AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT: ${{parent.inputs.llm_connection}}
USER_MANAGED_IDENTITY_CLIENT_ID: ${{parent.inputs.user_managed_identity_client_id}}
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.js
type: pipeline

name: llm_ingest_dbcopilot_faiss_e2e
version: 0.0.55
version: 0.0.56
display_name: Data Ingestion for DB Data Output to FAISS E2E Deployment
description: Single job pipeline to chunk data from AzureML DB Datastore and create faiss embeddings index

Expand Down Expand Up @@ -112,10 +112,11 @@ inputs:
type: string
optional: true
description: "The instruct template for the LLM."
user_managed_identity_client_id:
type: string
managed_identity_enabled:
type: boolean
default: flase
optional: true
description: "The user identity client_id for the deployment."
description: "Whether to connect using managed identity."
outputs:
grounding_index:
type: uri_folder
Expand All @@ -125,7 +126,7 @@ jobs:
#########################################
db_meta_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding:0.0.61"
component: "azureml:llm_dbcopilot_grounding:0.0.62"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand All @@ -150,7 +151,7 @@ jobs:
type: uri_folder
path: ${{parent.outputs.db_context}}
environment_variables:
USER_MANAGED_IDENTITY_CLIENT_ID: ${{parent.inputs.user_managed_identity_client_id}}
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
#########################################
generate_meta_embeddings:
type: command
Expand Down Expand Up @@ -195,7 +196,7 @@ jobs:
#########################################
db_sample_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.36"
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.37"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand Down Expand Up @@ -253,7 +254,7 @@ jobs:
#########################################
endpoint_deployment_job:
type: command
component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.36"
component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.37"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
Expand Down Expand Up @@ -293,4 +294,4 @@ jobs:
environment_variables:
AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING: ${{parent.inputs.embedding_connection}}
AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT: ${{parent.inputs.llm_connection}}
USER_MANAGED_IDENTITY_CLIENT_ID: ${{parent.inputs.user_managed_identity_client_id}}
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ tags:
Preview: ""
name: llm_dbcopilot_create_promptflow
display_name: LLM - Create DBCopilot Prompt Flow
version: 0.0.61
version: 0.0.62
inputs:
index_name:
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ type: command
tags: {}
name: llm_dbcopilot_deploy_endpoint
display_name: LLM - DBCopilot Deploy Endpoint Component
version: 0.0.36
version: 0.0.37
inputs:
deployment_name:
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ type: command
tags:
Preview: ""
name: llm_dbcopilot_grounding
version: 0.0.61
version: 0.0.62
inputs:
asset_uri:
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ type: command
tags: {}
name: llm_dbcopilot_grounding_ground_samples
display_name: LLM - DBCopilot Grounding Ground Samples Component
version: 0.0.36
version: 0.0.37
inputs:
grounding_context:
type: uri_folder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def deploy(
logging.info("dumped secrets to secrets.json")
with open(os.path.join(code_dir, "configs.json"), "w") as f:
json.dump([asdict(config)], f)
user_managed_identity_client_id = os.getenv("USER_MANAGED_IDENTITY_CLIENT_ID", None)
managed_identity_enabled = os.getenv("MANAGED_IDENTITY_ENABLED", None)
self._deploy_endpoint(
mir_environment,
endpoint_name,
Expand All @@ -113,7 +113,7 @@ def deploy(
score_script="score_zero.py",
extra_environment_variables={
"INSTRUCT_TEMPLATE": instruct_template,
"USER_MANAGED_IDENTITY_CLIENT_ID": user_managed_identity_client_id,
"MANAGED_IDENTITY_ENABLED": managed_identity_enabled,
},
sku=sku,
)
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def create(
datastore_uri = get_datastore_uri(workspace, asset_uri)
logging.info(f"Datastore uri: {datastore_uri}")

user_managed_identity_client_id = os.environ.get("USER_MANAGED_IDENTITY_CLIENT_ID", None)
managed_identity_enabled = os.environ.get("MANAGED_IDENTITY_ENABLED", None)
embedding_connection_id = os.environ.get(
"AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING", None
)
Expand Down Expand Up @@ -127,7 +127,7 @@ def create(
"AZUREML_WORKSPACE_NAME": self.workspace.name,
"AZUREML_SUBSCRIPTION_ID": self.workspace.subscription_id,
"AZUREML_RESOURCE_GROUP": self.workspace.resource_group,
"USER_MANAGED_IDENTITY_CLIENT_ID": user_managed_identity_client_id,
"MANAGED_IDENTITY_ENABLED": managed_identity_enabled,
}
base_run = pf_client.run(
flow=flow,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ RUN apt-get update && apt-get install -y \
curl \
gnupg \
unixodbc-dev \
git=1:2.25.1-1ubuntu3.12\
git-man=1:2.25.1-1ubuntu3.12 &&\
libc-bin=2.31-0ubuntu9.16 \
libc6-dev=2.31-0ubuntu9.16 \
libc6=2.31-0ubuntu9.16 \
libc-dev-bin=2.31-0ubuntu9.16 \
git \
git-man &&\
rm -rf /var/lib/apt/lists/*

# Install MS SQL ODBC Driver
Expand All @@ -35,8 +39,8 @@ RUN /bin/bash -c "source /opt/miniconda/etc/profile.d/conda.sh && \
conda activate $AZUREML_CONDA_ENVIRONMENT_PATH && \
pip install --upgrade pip && \
pip install -r requirements.txt && \
pip install promptflow-vectordb==0.2.9 && \
pip install https://ragsample.blob.core.windows.net/ragdata/wheels/dbcopilot/db_copilot_tool-0.1.22-py3-none-any.whl &&\
pip install promptflow-vectordb==0.2.10 && \
pip install https://ragsample.blob.core.windows.net/ragdata/wheels/dbcopilot/db_copilot_tool-0.1.23-py3-none-any.whl &&\
pip install cryptography==42.0.5 langchain==0.1.11 idna==3.7 sqlparse==0.5.0 gunicorn==22.0.0 Werkzeug==3.0.3 requests==2.32.0"

# Fix vunerabilities
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
azureml-rag[cognitive_search,data_generation]==0.2.29.1
azureml-rag[cognitive_search,data_generation]==0.2.34
azureml-contrib-services
azure-identity==1.14.0
azureml-core~=1.53.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,6 @@ RUN apt-get update \
&& curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list | tee /etc/apt/sources.list.d/mssql-release.list \
&& apt-get update \
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18=18.3.3.1-1 \
&& apt-get install -y libpam0g=1.4.0-11ubuntu2.4 \
&& apt-get install -y libexpat1=2.4.7-1ubuntu0.3 \
&& apt-get install -y bash=5.1-6ubuntu1.1 \
&& apt-get install -y libuuid1=2.37.2-4ubuntu3.4 \
&& apt-get install -y libblkid1=2.37.2-4ubuntu3.4\
&& apt-get install -y util-linux=2.37.2-4ubuntu3.4 \
&& apt-get install -y mount=2.37.2-4ubuntu3.4 \
&& apt-get install -y libsmartcols1=2.37.2-4ubuntu3.4 \
&& apt-get install -y libmount1=2.37.2-4ubuntu3.4 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

Expand All @@ -34,9 +25,9 @@ RUN python -m pip install --upgrade pip && \
pip install -r requirements.txt && \
# Install promptflow environment
pip install promptflow-image-bundle[azure]==0.3.0 && \
pip install promptflow-vectordb==0.2.9 && \
pip install promptflow-vectordb==0.2.10 && \
## Install dbcopilot
pip install https://ragsample.blob.core.windows.net/ragdata/wheels/dbcopilot/db_copilot_tool-0.1.22-py3-none-any.whl && \
pip install https://ragsample.blob.core.windows.net/ragdata/wheels/dbcopilot/db_copilot_tool-0.1.23-py3-none-any.whl && \
## Fix vulnerabilities
pip install idna==3.7

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
azureml-rag[cognitive_search,data_generation]==0.2.29.1
azureml-rag[cognitive_search,data_generation]==0.2.34
plotly~=5.13.1
azure-kusto-data==4.2.0
recognizers-text-suite~=1.0.2a2
Expand Down

0 comments on commit f2084d6

Please sign in to comment.