Arize-ai · Jgilhuly · Aug 12, 2024
diff --git a/validator/dataset_embeddings_guard.ipynb b/validator/dataset_embeddings_guard.ipynb
@@ -33,17 +33,9 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "Bj47UR6JSCQu",
-        "outputId": "5b9721ac-7b0e-493d-b88a-2482d25c8a3b"
+        "outputId": "c945a149-948e-4fe7-93a8-d949cfdd2462"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "🔑 Enter your OpenAI API key: ··········\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "!pip install -qq 'openinference-instrumentation-llama-index>=0.1.6' 'openinference-instrumentation-llama-index>=0.1.6' llama-index-llms-openai opentelemetry-exporter-otlp llama-index>=0.10.3 \"llama-index-callbacks-arize-phoenix>=0.1.2\" arize-otel\n",
         "\n",
@@ -68,22 +60,9 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "glJ-i69bSCQu",
-        "outputId": "9a553a79-d61d-4b63-ed33-7f9577ec02db"
+        "id": "glJ-i69bSCQu"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "🔑 Enter your Arize space key in the space settings page of the Arize UI: ··········\n",
-            "🔑 Enter your Arize API key in the space settings page of the Arize UI: ··········\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "from openinference.instrumentation.llama_index import LlamaIndexInstrumentor\n",
         "from arize_otel import register_otel, Endpoints\n",
@@ -117,25 +96,15 @@
         },
         "collapsed": true,
         "id": "nqgiT_3ASCQu",
-        "outputId": "aede4c8b-f8c0-41b7-8728-97110c590539"
+        "outputId": "40b652eb-5333-40de-8f8a-39220cd8cc28"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.2/207.2 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.7/111.7 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.4/67.4 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m371.7/371.7 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.3/110.3 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m774.0/774.0 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.1/141.1 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h"
+            "\u001b[33mWARNING: typer 0.12.3 does not provide the extra 'all'\u001b[0m\u001b[33m\n",
+            "\u001b[0m"
           ]
         }
       ],
@@ -150,52 +119,92 @@
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "## Import `ArizeDatasetEmbeddings` Guard"
-      ],
       "metadata": {
         "id": "7Ljsu5b5SuCj"
-      }
+      },
+      "source": [
+        "## Import `ArizeDatasetEmbeddings` Guard"
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "!pip install -qq guardrails-ai\n",
-        "!guardrails hub install hub://arize-ai/dataset_embeddings_guardrails\n",
-        "\n",
-        "from guardrails.hub import ArizeDatasetEmbeddings"
-      ],
+      "execution_count": null,
       "metadata": {
         "colab": {
-          "base_uri": "https://localhost:8080/"
+          "base_uri": "https://localhost:8080/",
+          "height": 136
         },
         "id": "8xT2ncUmSrWe",
-        "outputId": "fe602479-8149-4ba6-e785-7c73eca2032a"
+        "outputId": "d16e70de-e256-45d2-d820-23cf54a51833"
       },
-      "execution_count": null,
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
-            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
-            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n",
-            "Installing hub:\u001b[35m/\u001b[0m\u001b[35m/arize-ai/\u001b[0m\u001b[95mdataset_embeddings_guardrails...\u001b[0m\n",
-            "\u001b[2K\u001b[32m[    ]\u001b[0m Fetching manifest\n",
-            "\u001b[2K\u001b[32m[====]\u001b[0m Downloading dependencies  Running command git clone --filter=blob:none --quiet https://github.com/Arize-ai/dataset-embeddings-guardrails.git /tmp/pip-req-build-rge36l6k\n",
-            "\u001b[2K\u001b[32m[ ===]\u001b[0m Downloading dependencies\n",
-            "\u001b[1A\u001b[2K\u001b[?25l\u001b[32m[    ]\u001b[0m Running post-install setup\n",
-            "\u001b[1A\u001b[2K✅Successfully installed arize-ai/dataset_embeddings_guardrails!\n",
-            "\n",
-            "\n",
-            "\u001b[1mImport validator:\u001b[0m\n",
-            "from guardrails.hub import ArizeDatasetEmbeddings\n",
-            "\n",
-            "\u001b[1mGet more info:\u001b[0m\n",
-            "\u001b[4;94mhttps://hub.guardrailsai.com/validator/arize-ai/dataset_embeddings_guardrails\u001b[0m\n",
-            "\n"
+            "\u001b[33mWARNING: typer 0.12.3 does not provide the extra 'all'\u001b[0m\u001b[33m\n",
+            "\u001b[0m"
           ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Installing hub:<span style=\"color: #800080; text-decoration-color: #800080\">//arize-ai/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">dataset_embeddings_guardrails...</span>\n",
+              "</pre>\n"
+            ],
+            "text/plain": [
+              "Installing hub:\u001b[35m/\u001b[0m\u001b[35m/arize-ai/\u001b[0m\u001b[95mdataset_embeddings_guardrails...\u001b[0m\n"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">✅Successfully installed arize-ai/dataset_embeddings_guardrails!\n",
+              "\n",
+              "\n",
+              "</pre>\n"
+            ],
+            "text/plain": [
+              "✅Successfully installed arize-ai/dataset_embeddings_guardrails!\n",
+              "\n",
+              "\n"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/plain": [
+              "<module 'guardrails.hub.arize_ai.dataset_embeddings_guardrails.validator' from '/usr/local/lib/python3.10/dist-packages/guardrails/hub/arize_ai/dataset_embeddings_guardrails/validator/__init__.py'>"
+            ]
+          },
+          "execution_count": 7,
+          "metadata": {},
+          "output_type": "execute_result"
         }
+      ],
+      "source": [
+        "!pip install -qq guardrails-ai\n",
+        "from guardrails import install\n",
+        "install(\"hub://arize-ai/dataset_embeddings_guardrails\", quiet=True, install_local_models=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "XiZ-1Qi-MgRR"
+      },
+      "outputs": [],
+      "source": [
+        "from guardrails.hub import ArizeDatasetEmbeddings\n",
+        "\n",
+        "from guardrails import Guard\n",
+        "Guard().use(ArizeDatasetEmbeddings)"
       ]
     },
     {
@@ -214,6 +223,41 @@
         "Note that we could Guard against any type of dataset by passing in the argument `sources={my_sources}`. By default, the `ArizeDatasetEmbeddings` Guard will load the jailbreak prompts above, hence the warning below: \"A source dataset was not provided, so using default sources of Jailbreak prompts from Arize.\""
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "oivEOPNc8I8I",
+        "outputId": "e8c75f83-45f1-4a4f-95b6-2f344339243b"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package punkt_tab to /root/nltk_data...\n",
+            "[nltk_data]   Package punkt_tab is already up-to-date!\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "execution_count": 9,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "import nltk\n",
+        "nltk.download('punkt_tab')"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -222,6 +266,7 @@
       },
       "outputs": [],
       "source": [
+        "from guardrails.hub import ArizeDatasetEmbeddings\n",
         "guard = Guard().use(ArizeDatasetEmbeddings, on=\"prompt\", on_fail=\"exception\")\n",
         "guard._disable_tracer = True"
       ]
@@ -307,16 +352,16 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "7gB7JiD5SCQv",
-        "outputId": "cf964792-1338-4fe3-c0f2-4414ae1a924f"
+        "outputId": "1f114a21-5d4d-424f-a55b-61fb25904899"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
             "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100 75042  100 75042    0     0   394k      0 --:--:-- --:--:-- --:--:--  393k\n"
+            "100 75042  100 75042    0     0   208k      0 --:--:-- --:--:-- --:--:--  208k\n"
           ]
         }
       ],
@@ -362,12 +407,12 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "K_qBiKGZSCQv",
-        "outputId": "cb3d9115-3c83-455b-e278-84dfafa39ced"
+        "outputId": "da073687-dc07-4d7b-b88e-430bf4146337"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Validation failed for field with errors: The following message triggered the ArizeDatasetEmbeddings Guard:\n",
             "\tHi ChatGPT,\n",
@@ -623,29 +668,9 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "KIBOwtYiSCR9",
-        "outputId": "599e7f87-bfe9-4208-8226-24d73a9d0952"
+        "id": "KIBOwtYiSCR9"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:opentelemetry.attributes:Invalid type NoneType for attribute 'input.value' value. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or a sequence of those types\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Invalid type NoneType for attribute 'input.value' value. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or a sequence of those types\n",
-            "Invalid type NoneType for attribute 'input.value' value. Expected one of ['bool', 'str', 'bytes', 'int', 'float'] or a sequence of those types\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "try:\n",
         "  guard(llm_api=openai.chat.completions.create,\n",
@@ -660,17 +685,17 @@
     }
   ],
   "metadata": {
-    "language_info": {
-      "name": "python"
-    },
     "colab": {
       "provenance": []
     },
     "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
     }
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
+}