Merge branch 'main' into validation-outcome-cherrypick

guardrails-ai · Nov 21, 2023 · d342b0b · d342b0b
2 parents b768e51 + 58d2316
commit d342b0b
Show file tree

Hide file tree

Showing 38 changed files with 2,066 additions and 270 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -51,6 +51,7 @@ jobs:
             matrix:
                 python-version: ['3.8', '3.9', '3.10', '3.11']
                 pydantic-version: ['1.10.9', '2.4.2']
+                openai-version: ['0.28.1', '1.2.4']
         steps:
             -   uses: actions/checkout@v2
             -   name: Set up Python ${{ matrix.python-version }}
@@ -72,16 +73,27 @@ jobs:
                 run: |
                     make full
                     poetry run pip install pydantic==${{ matrix.pydantic-version }}
+                    poetry run pip install openai==${{ matrix.openai-version }}
 
-            -   if: matrix.pydantic-version == '2.4.2'
-                name: Static analysis with pyright (ignoring pydantic v1)
+            -   if: matrix.pydantic-version == '2.4.2' && matrix.openai-version == '0.28.1'
+                name: Static analysis with pyright (ignoring pydantic v1 and openai v1)
                 run: |
-                    make type-pydantic-v2
+                    make type-pydantic-v2-openai-v0
 
-            -   if: matrix.pydantic-version == '1.10.9'
-                name: Static analysis with mypy (ignoring pydantic v2)
+            -   if: matrix.pydantic-version == '1.10.9' && matrix.openai-version == '0.28.1'
+                name: Static analysis with mypy (ignoring pydantic v2 and openai v1)
                 run: |
-                    make type-pydantic-v1
+                    make type-pydantic-v1-openai-v0
+
+            -   if: matrix.pydantic-version == '2.4.2' && matrix.openai-version == '1.2.4'
+                name: Static analysis with pyright (ignoring pydantic v1 and openai v0)
+                run: |
+                    make type-pydantic-v2-openai-v1
+
+            -   if: matrix.pydantic-version == '1.10.9' && matrix.openai-version == '1.2.4'
+                name: Static analysis with mypy (ignoring pydantic v2 and openai v0)
+                run: |
+                    make type-pydantic-v1-openai-v1
 
     Pytests:
         runs-on: ubuntu-latest
@@ -92,6 +104,7 @@ jobs:
                 # dependencies: ['dev', 'full']
                 dependencies: ['full']
                 pydantic-version: ['1.10.9', '2.4.2']
+                openai-version: ['0.28.1', '1.2.4']
         steps:
             -   uses: actions/checkout@v2
             -   name: Set up Python ${{ matrix.python-version }}
@@ -103,15 +116,16 @@ jobs:
                 uses: actions/cache@v3
                 with:
                     path: ~/.cache/pypoetry
-                    key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ matrix.pydantic-version }}
+                    key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ matrix.pydantic-version }}-${{ matrix.openai-version }}
 
             -   name: Install Poetry
                 uses: snok/install-poetry@v1
 
             -   name: Install Dependencies
                 run: |
                     make ${{ matrix.dependencies }}
-                    python -m pip install pydantic==${{ matrix.pydantic-version }}
+                    poetry run pip install pydantic==${{ matrix.pydantic-version }}
+                    poetry run pip install openai==${{ matrix.openai-version }}
 
             -   name: Run Pytests
                 run: |

diff --git a/Makefile b/Makefile
@@ -8,13 +8,23 @@ autoformat:
 type:
 	poetry run pyright guardrails/
 
-type-pydantic-v1:
-	echo '{"exclude": ["guardrails/utils/pydantic_utils/v2.py"]}' > pyrightconfig.json
+type-pydantic-v1-openai-v0:
+	echo '{"exclude": ["guardrails/utils/pydantic_utils/v2.py", "guardrails/utils/openai_utils/v1.py"]}' > pyrightconfig.json
 	poetry run pyright guardrails/
 	rm pyrightconfig.json
 
-type-pydantic-v2:
-	echo '{"exclude": ["guardrails/utils/pydantic_utils/v1.py"]}' > pyrightconfig.json
+type-pydantic-v1-openai-v1:
+	echo '{"exclude": ["guardrails/utils/pydantic_utils/v2.py", "guardrails/utils/openai_utils/v0.py"]}' > pyrightconfig.json
+	poetry run pyright guardrails/
+	rm pyrightconfig.json
+
+type-pydantic-v2-openai-v0:
+	echo '{"exclude": ["guardrails/utils/pydantic_utils/v1.py", "guardrails/utils/openai_utils/v1.py"]}' > pyrightconfig.json
+	poetry run pyright guardrails/
+	rm pyrightconfig.json
+
+type-pydantic-v2-openai-v1:
+	echo '{"exclude": ["guardrails/utils/pydantic_utils/v1.py", "guardrails/utils/openai_utils/v0.py"]}' > pyrightconfig.json
 	poetry run pyright guardrails/
 	rm pyrightconfig.json
 

diff --git a/docs/examples/check_for_pii.ipynb b/docs/examples/check_for_pii.ipynb
@@ -0,0 +1,267 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Check whether an LLM response contains PII (Personally Identifiable Information)\n",
+    "\n",
+    "**Using the `PIIFilter` validator**\n",
+    "\n",
+    "This is a simple check that looks for the presence of a few common PII patterns\n",
+    "It is not intended to be a comprehensive check for PII and to be a quick check that can be used to filter out responses that are likely to contain PII. It uses the Microsoft Presidio library to check for PII.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
+      "You can now load the package via spacy.load('en_core_web_lg')\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Install the necessary packages\n",
+    "! pip install presidio-analyzer presidio-anonymizer -q\n",
+    "! python -m spacy download en_core_web_lg -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import the guardrails package\n",
+    "import guardrails as gd\n",
+    "from guardrails.validators import PIIFilter\n",
+    "from rich import print"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/karanacharya/guardrails-ai/guardrails/guardrails/rail.py:115: UserWarning: Prompt must be provided during __call__.\n",
+      "  warnings.warn(\"Prompt must be provided during __call__.\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create Guard object with this validator\n",
+    "# One can specify either pre-defined set of PII or SPI (Sensitive Personal Information) entities by passing in the `pii` or `spi` argument respectively.\n",
+    "# It can be passed either durring intialization or later through the metadata argument in parse method.\n",
+    "\n",
+    "# One can also pass in a list of entities supported by Presidio to the `pii_entities` argument.\n",
+    "guard = gd.Guard.from_string(\n",
+    "    validators=[PIIFilter(pii_entities=\"pii\", on_fail=\"fix\")],\n",
+    "    description=\"testmeout\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">My email address is <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">EMAIL_ADDRESS</span><span style=\"color: #000000; text-decoration-color: #000000\">&gt;, and my phone number is &lt;PHONE_NUMBER</span><span style=\"font-weight: bold\">&gt;</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "My email address is \u001b[1m<\u001b[0m\u001b[1;95mEMAIL_ADDRESS\u001b[0m\u001b[39m>, and my phone number is <PHONE_NUMBER\u001b[0m\u001b[1m>\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Parse the text\n",
+    "text = \"My email address is [email protected], and my phone number is 1234567890\"\n",
+    "output = guard.parse(\n",
+    "    llm_output=text,\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, both EMAIL_ADDRESS and PHONE_NUMBER are detected as PII.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">My email address is <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">EMAIL_ADDRESS</span><span style=\"font-weight: bold\">&gt;</span>, and my phone number is <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1234567890</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "My email address is \u001b[1m<\u001b[0m\u001b[1;95mEMAIL_ADDRESS\u001b[0m\u001b[1m>\u001b[0m, and my phone number is \u001b[1;36m1234567890\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Let's test with passing through metadata for the same guard object\n",
+    "# This will take precendence over the entities passed in during initialization\n",
+    "output = guard.parse(\n",
+    "    llm_output=text,\n",
+    "    metadata={\"pii_entities\": [\"EMAIL_ADDRESS\"]},\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see here, only EMAIL_ADDRESS is detected as PII, and the PHONE_NUMBER is not detected as PII.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's try with SPI entities\n",
+    "# Create a new guard object\n",
+    "guard = gd.Guard.from_string(\n",
+    "    validators=[PIIFilter(pii_entities=\"spi\", on_fail=\"fix\")],\n",
+    "    description=\"testmeout\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">My email address is [email protected], and my account number is <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">US_BANK_NUMBER</span><span style=\"font-weight: bold\">&gt;</span>.\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "My email address is [email protected], and my account number is \u001b[1m<\u001b[0m\u001b[1;95mUS_BANK_NUMBER\u001b[0m\u001b[1m>\u001b[0m.\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Parse text\n",
+    "text = \"My email address is [email protected], and my account number is 1234789012367654.\"\n",
+    "\n",
+    "output = guard.parse(\n",
+    "    llm_output=text,\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, only the US_BANK_NUMBER is detected as PII, as specified in the \"spi\" entities. Refer to the documentation for more information on the \"pii\" and \"spi\" entities. Obviosuly, you can pass in any [Presidio-supported entities](https://microsoft.github.io/presidio/supported_entities/) through the metadata.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">My ITIN is <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">US_ITIN</span><span style=\"color: #000000; text-decoration-color: #000000\">&gt; and my driver's license number is &lt;US_DRIVER_LICENSE</span><span style=\"font-weight: bold\">&gt;</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "My ITIN is \u001b[1m<\u001b[0m\u001b[1;95mUS_ITIN\u001b[0m\u001b[39m> and my driver's license number is <US_DRIVER_LICENSE\u001b[0m\u001b[1m>\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Another example\n",
+    "text = \"My ITIN is 923756789 and my driver's license number is 87651239\"\n",
+    "\n",
+    "output = guard.parse(\n",
+    "    llm_output=text,\n",
+    "    metadata={\"pii_entities\": [\"US_ITIN\", \"US_DRIVER_LICENSE\"]},\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### In this way, any PII entity that you want to check for can be passed in through the metadata and masked by Guardrails for your LLM outputs. Of-course, like all other examples, you can integrate this into your own code and workflows through the complete Guard execution.\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "guard-venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}