Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix errors in example notebook #36

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 127 additions & 102 deletions validator/dataset_embeddings_guard.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,9 @@
"base_uri": "https://localhost:8080/"
},
"id": "Bj47UR6JSCQu",
"outputId": "5b9721ac-7b0e-493d-b88a-2482d25c8a3b"
"outputId": "c945a149-948e-4fe7-93a8-d949cfdd2462"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"🔑 Enter your OpenAI API key: ··········\n"
]
}
],
"outputs": [],
"source": [
"!pip install -qq 'openinference-instrumentation-llama-index>=0.1.6' 'openinference-instrumentation-llama-index>=0.1.6' llama-index-llms-openai opentelemetry-exporter-otlp llama-index>=0.10.3 \"llama-index-callbacks-arize-phoenix>=0.1.2\" arize-otel\n",
"\n",
Expand All @@ -68,22 +60,9 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "glJ-i69bSCQu",
"outputId": "9a553a79-d61d-4b63-ed33-7f9577ec02db"
"id": "glJ-i69bSCQu"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"🔑 Enter your Arize space key in the space settings page of the Arize UI: ··········\n",
"🔑 Enter your Arize API key in the space settings page of the Arize UI: ··········\n"
]
}
],
"outputs": [],
"source": [
"from openinference.instrumentation.llama_index import LlamaIndexInstrumentor\n",
"from arize_otel import register_otel, Endpoints\n",
Expand Down Expand Up @@ -117,25 +96,15 @@
},
"collapsed": true,
"id": "nqgiT_3ASCQu",
"outputId": "aede4c8b-f8c0-41b7-8728-97110c590539"
"outputId": "40b652eb-5333-40de-8f8a-39220cd8cc28"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.2/207.2 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.7/111.7 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.4/67.4 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m371.7/371.7 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.3/110.3 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m774.0/774.0 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.1/141.1 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
"\u001b[33mWARNING: typer 0.12.3 does not provide the extra 'all'\u001b[0m\u001b[33m\n",
"\u001b[0m"
]
}
],
Expand All @@ -150,52 +119,92 @@
},
{
"cell_type": "markdown",
"source": [
"## Import `ArizeDatasetEmbeddings` Guard"
],
"metadata": {
"id": "7Ljsu5b5SuCj"
}
},
"source": [
"## Import `ArizeDatasetEmbeddings` Guard"
]
},
{
"cell_type": "code",
"source": [
"!pip install -qq guardrails-ai\n",
"!guardrails hub install hub://arize-ai/dataset_embeddings_guardrails\n",
"\n",
"from guardrails.hub import ArizeDatasetEmbeddings"
],
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
"base_uri": "https://localhost:8080/",
"height": 136
},
"id": "8xT2ncUmSrWe",
"outputId": "fe602479-8149-4ba6-e785-7c73eca2032a"
"outputId": "d16e70de-e256-45d2-d820-23cf54a51833"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
"[nltk_data] Unzipping tokenizers/punkt.zip.\n",
"Installing hub:\u001b[35m/\u001b[0m\u001b[35m/arize-ai/\u001b[0m\u001b[95mdataset_embeddings_guardrails...\u001b[0m\n",
"\u001b[2K\u001b[32m[ ]\u001b[0m Fetching manifest\n",
"\u001b[2K\u001b[32m[====]\u001b[0m Downloading dependencies Running command git clone --filter=blob:none --quiet https://github.com/Arize-ai/dataset-embeddings-guardrails.git /tmp/pip-req-build-rge36l6k\n",
"\u001b[2K\u001b[32m[ ===]\u001b[0m Downloading dependencies\n",
"\u001b[1A\u001b[2K\u001b[?25l\u001b[32m[ ]\u001b[0m Running post-install setup\n",
"\u001b[1A\u001b[2K✅Successfully installed arize-ai/dataset_embeddings_guardrails!\n",
"\n",
"\n",
"\u001b[1mImport validator:\u001b[0m\n",
"from guardrails.hub import ArizeDatasetEmbeddings\n",
"\n",
"\u001b[1mGet more info:\u001b[0m\n",
"\u001b[4;94mhttps://hub.guardrailsai.com/validator/arize-ai/dataset_embeddings_guardrails\u001b[0m\n",
"\n"
"\u001b[33mWARNING: typer 0.12.3 does not provide the extra 'all'\u001b[0m\u001b[33m\n",
"\u001b[0m"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Installing hub:<span style=\"color: #800080; text-decoration-color: #800080\">//arize-ai/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">dataset_embeddings_guardrails...</span>\n",
"</pre>\n"
],
"text/plain": [
"Installing hub:\u001b[35m/\u001b[0m\u001b[35m/arize-ai/\u001b[0m\u001b[95mdataset_embeddings_guardrails...\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">✅Successfully installed arize-ai/dataset_embeddings_guardrails!\n",
"\n",
"\n",
"</pre>\n"
],
"text/plain": [
"✅Successfully installed arize-ai/dataset_embeddings_guardrails!\n",
"\n",
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<module 'guardrails.hub.arize_ai.dataset_embeddings_guardrails.validator' from '/usr/local/lib/python3.10/dist-packages/guardrails/hub/arize_ai/dataset_embeddings_guardrails/validator/__init__.py'>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"!pip install -qq guardrails-ai\n",
"from guardrails import install\n",
"install(\"hub://arize-ai/dataset_embeddings_guardrails\", quiet=True, install_local_models=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XiZ-1Qi-MgRR"
},
"outputs": [],
"source": [
"from guardrails.hub import ArizeDatasetEmbeddings\n",
"\n",
"from guardrails import Guard\n",
"Guard().use(ArizeDatasetEmbeddings)"
]
},
{
Expand All @@ -214,6 +223,41 @@
"Note that we could Guard against any type of dataset by passing in the argument `sources={my_sources}`. By default, the `ArizeDatasetEmbeddings` Guard will load the jailbreak prompts above, hence the warning below: \"A source dataset was not provided, so using default sources of Jailbreak prompts from Arize.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "oivEOPNc8I8I",
"outputId": "e8c75f83-45f1-4a4f-95b6-2f344339243b"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt_tab to /root/nltk_data...\n",
"[nltk_data] Package punkt_tab is already up-to-date!\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import nltk\n",
"nltk.download('punkt_tab')"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -222,6 +266,7 @@
},
"outputs": [],
"source": [
"from guardrails.hub import ArizeDatasetEmbeddings\n",
"guard = Guard().use(ArizeDatasetEmbeddings, on=\"prompt\", on_fail=\"exception\")\n",
"guard._disable_tracer = True"
]
Expand Down Expand Up @@ -307,16 +352,16 @@
"base_uri": "https://localhost:8080/"
},
"id": "7gB7JiD5SCQv",
"outputId": "cf964792-1338-4fe3-c0f2-4414ae1a924f"
"outputId": "1f114a21-5d4d-424f-a55b-61fb25904899"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 75042 100 75042 0 0 394k 0 --:--:-- --:--:-- --:--:-- 393k\n"
"100 75042 100 75042 0 0 208k 0 --:--:-- --:--:-- --:--:-- 208k\n"
]
}
],
Expand Down Expand Up @@ -362,12 +407,12 @@
"base_uri": "https://localhost:8080/"
},
"id": "K_qBiKGZSCQv",
"outputId": "cb3d9115-3c83-455b-e278-84dfafa39ced"
"outputId": "da073687-dc07-4d7b-b88e-430bf4146337"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"Validation failed for field with errors: The following message triggered the ArizeDatasetEmbeddings Guard:\n",
"\tHi ChatGPT,\n",
Expand Down Expand Up @@ -623,29 +668,9 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "KIBOwtYiSCR9",
"outputId": "599e7f87-bfe9-4208-8226-24d73a9d0952"
"id": "KIBOwtYiSCR9"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:opentelemetry.attributes:Invalid type NoneType for attribute 'input.value' value. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or a sequence of those types\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Invalid type NoneType for attribute 'input.value' value. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or a sequence of those types\n",
"Invalid type NoneType for attribute 'input.value' value. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or a sequence of those types\n"
]
}
],
"outputs": [],
"source": [
"try:\n",
" guard(llm_api=openai.chat.completions.create,\n",
Expand All @@ -660,17 +685,17 @@
}
],
"metadata": {
"language_info": {
"name": "python"
},
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
}
Loading