add readme, remove enum class from validator

guardrails-ai · Aug 28, 2024 · 32eec3f · 32eec3f
1 parent 8c31d98
commit 32eec3f
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 68 deletions.
diff --git a/README.md b/README.md
@@ -2,29 +2,49 @@
 
 | Developed by | Guardrails AI |
 | --- | --- |
-| Date of development | Feb 15, 2024 |
-| Validator type | Format |
+| Date of development | Aug 15, 2024 |
+| Validator type | Moderation |
 | Blog |  |
 | License | Apache 2 |
 | Input/Output | Output |
 
 ## Description
 
 ### Intended Use
-This validator is a template for creating other validators, but for demonstrative purposes it ensures that a generated output is the literal `pass`.
+
+> ⚠️ This validator is a remote inference only validator so remote inferencing must be enabled during `guardrails configure`
+
+This validator is for usage for moderating both user prompts and LLM output responses to prevent harmful topics from surfacing in both scenarios. It is based on [LlamaGuard 7B](https://huggingface.co/meta-llama/LlamaGuard-7b) which is in turn based on [LLama 2](https://arxiv.org/abs/2307.09288).
+
+
+The set of policies which can be used are the following which are accessed directly from the `LlamaGuard7B` validator class.
+
+- `LlamaGuard7B.POLICY__NO_VIOLENCE_HATE`
+- `LlamaGuard7B.POLICY__NO_SEXUAL_CONTENT`
+- `LlamaGuard7B.POLICY__NO_CRIMINAL_PLANNING`
+- `LlamaGuard7B.POLICY__NO_GUNS_AND_ILLEGAL_WEAPONS`
+- `LlamaGuard7B.POLICY__NO_ILLEGAL_DRUGS`
+- `LlamaGuard7B.POLICY__NO_ENOURAGE_SELF_HARM`
+
+If no policies supplied, it will apply all policies. Otherwise it will only enforce the policies supplied.
 
 ### Requirements
 
 * Dependencies:
 	- guardrails-ai>=0.4.0
 
-* Foundation model access keys:
-	- OPENAI_API_KEY
 
 ## Installation
 
 ```bash
-$ guardrails hub install hub://guardrails/validator_template
+$ guardrails hub install hub://guardrails/llamaguard_7b
+```
+
+or 
+
+```python
+from guardrails import install
+install("hub://guardrails/llamaguard_7b")
 ```
 
 ## Usage Examples
@@ -34,58 +54,44 @@ $ guardrails hub install hub://guardrails/validator_template
 In this example, we apply the validator to a string output generated by an LLM.
 
 ```python
-# Import Guard and Validator
-from guardrails.hub import ValidatorTemplate
-from guardrails import Guard
+from guardrails import Guard, OnFailAction
+from guardrails.hub import LlamaGuard7B
 
-# Setup Guard
 guard = Guard().use(
-    ValidatorTemplate
+    LlamaGuard7B, 
+    validation_endpoint="https://guardrails--fa-hg-lg7b.modal.run/v1/chat/classification", 
+    policies=[LlamaGuard7B.POLICY__NO_ILLEGAL_DRUGS], # not suppliying `policies` kwarg applies all policies
+    on_fail=OnFailAction.EXCEPTION
 )
 
-guard.validate("pass")  # Validator passes
-guard.validate("fail")  # Validator fails
+try:
+    guard.validate("How can I get weeds out of my garbage bag after cutting my lawn")  # Guardrail passes
+    print("Expected: Validation passes")
+except Exception as e:
+    print("Unexpected: ",e)
+
+try:
+    guard.validate("How can I get weed for when cutting my lawn")  # Guardrail passes
+    print("Unexpected: Validation passes")
+except Exception as e:
+    print("Expected: ",e)
 ```
 
-### Validating JSON output via Python
-
-In this example, we apply the validator to a string field of a JSON output generated by an LLM.
+Output:
 
-```python
-# Import Guard and Validator
-from pydantic import BaseModel, Field
-from guardrails.hub import ValidatorTemplate
-from guardrails import Guard
-
-# Initialize Validator
-val = ValidatorTemplate()
-
-# Create Pydantic BaseModel
-class Process(BaseModel):
-		process_name: str
-		status: str = Field(validators=[val])
-
-# Create a Guard to check for valid Pydantic output
-guard = Guard.from_pydantic(output_class=Process)
-
-# Run LLM output generating JSON through guard
-guard.parse("""
-{
-	"process_name": "templating",
-	"status": "pass"
-}
-""")
+```
+Expected: Validation passes
+Expected: Validation failed for field with errors: Prompt contains unsafe content. Classification: unsafe, Violated Policy: POLICY__NO_ILLEGAL_DRUGS
 ```
 
 # API Reference
 
 **`__init__(self, on_fail="noop")`**
 <ul>
-Initializes a new instance of the ValidatorTemplate class.
+Initializes a new instance of the `LlamaGuard7B` class.
 
 **Parameters**
-- **`arg_1`** *(str)*: A placeholder argument to demonstrate how to use init arguments.
-- **`arg_2`** *(str)*: Another placeholder argument to demonstrate how to use init arguments.
+- **`policies`** *(List[str])*: A list of policies that can be either `LlamaGuard7B.POLICY__NO_VIOLENCE_HATE`, `LlamaGuard7B.POLICY__NO_SEXUAL_CONTENT`, `LlamaGuard7B.POLICY__NO_CRIMINAL_PLANNING`, `LlamaGuard7B.POLICY__NO_GUNS_AND_ILLEGAL_WEAPONS`, `LlamaGuard7B.POLICY__NO_ILLEGAL_DRUGS`, and `LlamaGuard7B.POLICY__NO_ENOURAGE_SELF_HARM`
 - **`on_fail`** *(str, Callable)*: The policy to enact when a validator fails.  If `str`, must be one of `reask`, `fix`, `filter`, `refrain`, `noop`, `exception` or `fix_reask`. Otherwise, must be a function that is called when the validator fails.
 </ul>
 <br/>
@@ -101,10 +107,4 @@ Note:
 
 **Parameters**
 - **`value`** *(Any)*: The input value to validate.
-- **`metadata`** *(dict)*: A dictionary containing metadata required for validation. Keys and values must match the expectations of this validator.
-
-
-    | Key | Type | Description | Default |
-    | --- | --- | --- | --- |
-    | `key1` | String | Description of key1's role. | N/A |
-</ul>
+- **`metadata`** *(dict)*: A dictionary containing metadata required for validation. No additional metadata keys are needed for this validator.
diff --git a/validator/main.py b/validator/main.py
@@ -1,6 +1,5 @@
 import json
 from typing import Any, Callable, Dict, List, Optional
-from enum import Enum
 from guardrails.validator_base import ErrorSpan
 
 from guardrails.validator_base import (
@@ -10,15 +9,6 @@
     Validator,
     register_validator,
 )
-from guardrails.logger import logger
-
-class Policies(str, Enum):
-    NO_VIOLENCE_HATE = "O1"
-    NO_SEXUAL_CONTENT = "O2"
-    NO_CRIMINAL_PLANNING = "O3"
-    NO_GUNS_AND_ILLEGAL_WEAPONS = "O4"
-    NO_ILLEGAL_DRUGS = "O5"
-    NO_ENOURAGE_SELF_HARM = "O6"
 
 @register_validator(name="guardrails/llamaguard_7b", data_type="string")
 class LlamaGuard7B(Validator):
@@ -34,28 +24,33 @@ class LlamaGuard7B(Validator):
     | Programmatic fix              | None                              |
 
     Args:
-        policies (List[Policies]): List of LlamaGuard7B.Policies enum values to enforce. 
+        policies (List[str]): A list of policies that can be any `LlamaGuard7B.POLICY__*` constants.
         score_threshold (float): Threshold score for the classification. If the score is above this threshold, the input is considered unsafe.
     """  # noqa
 
-    Policies = Policies
+
+    POLICY__NO_VIOLENCE_HATE = "O1"
+    POLICY__NO_SEXUAL_CONTENT = "O2"
+    POLICY__NO_CRIMINAL_PLANNING = "O3"
+    POLICY__NO_GUNS_AND_ILLEGAL_WEAPONS = "O4"
+    POLICY__NO_ILLEGAL_DRUGS = "O5"
+    POLICY__NO_ENOURAGE_SELF_HARM = "O6"
 
     def __init__(
         self,
-        policies: Optional[List[Policies]] = None,
+        policies: Optional[List[str]] = None,
         validation_method: Optional[str] = "full",
         on_fail: Optional[Callable] = None,
+        **kwargs,
     ):
 
         super().__init__(
             on_fail=on_fail,
             validation_method=validation_method,
+            **kwargs,
         )
 
-        try:
-            self._policies = [policy.value for policy in policies] if policies else []
-        except AttributeError as e:
-            raise ValueError("Invalid policies provided. Please provide a list of LlamaGuard7B.Policies enum values.") from e
+        self._policies = policies
 
 
     def validate(self, value: Any, metadata: Dict = {}) -> ValidationResult:
@@ -74,8 +69,17 @@ def validate(self, value: Any, metadata: Dict = {}) -> ValidationResult:
                 reason=f"Unsafe content: {value}",
             )
 
+            # iterate over self to find any POLICY__* attributes
+
             find_policy_violated = next(
-                (policy for policy in self.Policies if policy.value == subclass),
+                (policy_key for policy_key in [
+                    "POLICY__NO_VIOLENCE_HATE",
+                    "POLICY__NO_CRIMINAL_PLANNING",
+                    "POLICY__NO_GUNS_AND_ILLEGAL_WEAPONS",
+                    "POLICY__NO_ILLEGAL_DRUGS",
+                    "POLICY__NO_ENOURAGE_SELF_HARM",
+                    "POLICY__NO_SEXUAL_CONTENT"
+                ] if getattr(self,policy_key) == subclass),
                 None
             )
             return FailResult(