From 8b1d1bfe377c9f6b84a8d801c57e79f7102cbb64 Mon Sep 17 00:00:00 2001 From: Razvan Dinu Date: Mon, 1 Apr 2024 13:29:16 +0300 Subject: [PATCH] Update documentation to allow rendering with Sphinx. --- docs/_static/css/custom.css | 9 ++ docs/architecture/index.rst | 7 ++ docs/evaluation/index.rst | 10 +++ docs/evaluation/llm-vulnerability-scanning.md | 2 +- docs/getting_started/1_hello_world/index.rst | 9 ++ .../2_core_colang_concepts/README.md | 4 +- .../2_core_colang_concepts/index.rst | 9 ++ .../getting_started/3_demo_use_case/README.md | 10 +-- .../getting_started/3_demo_use_case/index.rst | 9 ++ docs/getting_started/4_input_rails/README.md | 6 +- docs/getting_started/4_input_rails/index.rst | 9 ++ docs/getting_started/5_output_rails/README.md | 4 +- docs/getting_started/5_output_rails/index.rst | 9 ++ .../getting_started/6_topical_rails/README.md | 2 +- .../getting_started/6_topical_rails/index.rst | 9 ++ docs/getting_started/7_rag/README.md | 4 +- docs/getting_started/7_rag/index.rst | 9 ++ docs/getting_started/README.md | 23 +++-- docs/getting_started/index.rst | 22 +++++ docs/getting_started/installation-guide.md | 6 +- docs/index.rst | 90 +++++++++++++++++++ docs/security/guidelines.md | 2 +- docs/security/index.rst | 10 +++ docs/security/red-teaming.md | 4 +- .../advanced/align-score-deployment.md | 2 +- .../advanced/embedding-search-providers.md | 2 +- docs/user_guides/advanced/index.rst | 21 +++++ ...ilbreak-detection-heuristics-deployment.md | 4 +- .../advanced/llama-guard-deployment.md | 6 +- .../advanced/prompt-customization.md | 6 +- docs/user_guides/advanced/streaming.md | 4 +- docs/user_guides/advanced/using-docker.md | 4 +- docs/user_guides/cli.md | 2 +- .../colang-language-syntax-guide.md | 2 +- docs/user_guides/configuration-guide.md | 8 +- docs/user_guides/detailed_logging/index.rst | 7 ++ docs/user_guides/guardrails-library.md | 40 ++++----- docs/user_guides/guardrails-process.md | 2 +- docs/user_guides/index.rst | 23 +++++ .../input_output_rails_only/README.md | 4 +- .../input_output_rails_only/index.rst | 7 ++ .../jailbreak_detection_heuristics/README.md | 8 +- .../jailbreak_detection_heuristics/index.rst | 7 ++ .../langchain/chain-with-guardrails/README.md | 2 +- .../langchain/chain-with-guardrails/index.rst | 7 ++ docs/user_guides/langchain/index.rst | 10 +++ .../langchain/langchain-integration.md | 4 +- .../langchain/runnable-as-action/README.md | 2 +- .../langchain/runnable-as-action/index.rst | 7 ++ docs/user_guides/llm-support.md | 6 +- docs/user_guides/llm/index.rst | 8 ++ .../llm/nvidia_ai_endpoints/README.md | 2 +- .../llm/nvidia_ai_endpoints/index.rst | 7 ++ docs/user_guides/llm/vertexai/README.md | 4 +- docs/user_guides/llm/vertexai/index.rst | 7 ++ docs/user_guides/multi_config_api/README.md | 2 +- docs/user_guides/multi_config_api/index.rst | 7 ++ docs/user_guides/python-api.md | 2 +- docs/user_guides/server-guide.md | 6 +- 59 files changed, 432 insertions(+), 88 deletions(-) create mode 100644 docs/_static/css/custom.css create mode 100644 docs/architecture/index.rst create mode 100644 docs/evaluation/index.rst create mode 100644 docs/getting_started/1_hello_world/index.rst create mode 100644 docs/getting_started/2_core_colang_concepts/index.rst create mode 100644 docs/getting_started/3_demo_use_case/index.rst create mode 100644 docs/getting_started/4_input_rails/index.rst create mode 100644 docs/getting_started/5_output_rails/index.rst create mode 100644 docs/getting_started/6_topical_rails/index.rst create mode 100644 docs/getting_started/7_rag/index.rst create mode 100644 docs/getting_started/index.rst create mode 100644 docs/index.rst create mode 100644 docs/security/index.rst create mode 100644 docs/user_guides/advanced/index.rst create mode 100644 docs/user_guides/detailed_logging/index.rst create mode 100644 docs/user_guides/index.rst create mode 100644 docs/user_guides/input_output_rails_only/index.rst create mode 100644 docs/user_guides/jailbreak_detection_heuristics/index.rst create mode 100644 docs/user_guides/langchain/chain-with-guardrails/index.rst create mode 100644 docs/user_guides/langchain/index.rst create mode 100644 docs/user_guides/langchain/runnable-as-action/index.rst create mode 100644 docs/user_guides/llm/index.rst create mode 100644 docs/user_guides/llm/nvidia_ai_endpoints/index.rst create mode 100644 docs/user_guides/llm/vertexai/index.rst create mode 100644 docs/user_guides/multi_config_api/index.rst diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css new file mode 100644 index 000000000..d66265b2d --- /dev/null +++ b/docs/_static/css/custom.css @@ -0,0 +1,9 @@ +.swagger-ui code { + white-space: pre-wrap; +} + +.microlight code { + color: white; + background: none; + border: none; +} diff --git a/docs/architecture/index.rst b/docs/architecture/index.rst new file mode 100644 index 000000000..dbf68f794 --- /dev/null +++ b/docs/architecture/index.rst @@ -0,0 +1,7 @@ +Architecture +============ + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/evaluation/index.rst b/docs/evaluation/index.rst new file mode 100644 index 000000000..5083b68c1 --- /dev/null +++ b/docs/evaluation/index.rst @@ -0,0 +1,10 @@ +:orphan: + +Evaluation +========== + +.. toctree:: + :maxdepth: 2 + + README + llm-vulnerability-scanning diff --git a/docs/evaluation/llm-vulnerability-scanning.md b/docs/evaluation/llm-vulnerability-scanning.md index aed8c5779..19ee3f95d 100644 --- a/docs/evaluation/llm-vulnerability-scanning.md +++ b/docs/evaluation/llm-vulnerability-scanning.md @@ -53,4 +53,4 @@ At the same time, this experiment does not investigate if the guardrails also bl ## LLM Vulnerability Categories -If you are interested in additional information about each vulnerability category in Garak, please consult the full results [here](./../_static/html/) and the [Garak GitHub](https://github.com/leondz/garak/) page. +If you are interested in additional information about each vulnerability category in Garak, please consult the full results [here](./../_static/html/README.md) and the [Garak GitHub](https://github.com/leondz/garak/) page. diff --git a/docs/getting_started/1_hello_world/index.rst b/docs/getting_started/1_hello_world/index.rst new file mode 100644 index 000000000..5641fced0 --- /dev/null +++ b/docs/getting_started/1_hello_world/index.rst @@ -0,0 +1,9 @@ +:orphan: + +1 Hello World +============= + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/getting_started/2_core_colang_concepts/README.md b/docs/getting_started/2_core_colang_concepts/README.md index 1f9689ab5..c829446d1 100644 --- a/docs/getting_started/2_core_colang_concepts/README.md +++ b/docs/getting_started/2_core_colang_concepts/README.md @@ -224,7 +224,7 @@ The prompt has four logical sections: 2. A sample conversation, which can also be [configured](../../user_guides/configuration-guide.md#sample-conversation) using the `sample_conversation` key in *config.yml*. -3. A set of examples for converting user utterances to canonical forms. The top five most relevant examples are chosen by performing a vector search against all the user message examples. For more details see [ABC Bot](../../../examples/bots/abc). +3. A set of examples for converting user utterances to canonical forms. The top five most relevant examples are chosen by performing a vector search against all the user message examples. For more details see [ABC Bot](../../../examples/bots/abc/README.md). 4. The current conversation preceded by the first two turns from the sample conversation. @@ -327,4 +327,4 @@ This guide provides a detailed overview of two core Colang concepts: *messages* ## Next -The next guide, [Demo Use Case](../3_demo_use_case), guides you through selecting a demo use case to implement different types of rails, such as for input, output, or dialog. +The next guide, [Demo Use Case](../3_demo_use_case/README.md), guides you through selecting a demo use case to implement different types of rails, such as for input, output, or dialog. diff --git a/docs/getting_started/2_core_colang_concepts/index.rst b/docs/getting_started/2_core_colang_concepts/index.rst new file mode 100644 index 000000000..c38254256 --- /dev/null +++ b/docs/getting_started/2_core_colang_concepts/index.rst @@ -0,0 +1,9 @@ +:orphan: + +2 Core Colang Concepts +====================== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/getting_started/3_demo_use_case/README.md b/docs/getting_started/3_demo_use_case/README.md index 16b5d39f9..171c64a67 100644 --- a/docs/getting_started/3_demo_use_case/README.md +++ b/docs/getting_started/3_demo_use_case/README.md @@ -4,11 +4,11 @@ This topic describes a use case used in the remaining guide topics. The use case The following guide topics lead you through a step-by-step configuration process, addressing various challenges that might arise. -1. [Input moderation](../4_input_rails): Verify that any user input is safe before proceeding. -2. [Output moderation](../5_output_rails): Ensure that the bot's output is not offensive and does not include specific words. -3. [Preventing off-topic questions](../6_topical_rails): Guarantee that the bot only responds to specific topics. -4. [Retrieval augmented generation](../7_rag): Integrate external knowledge bases. +1. [Input moderation](../4_input_rails/README.md): Verify that any user input is safe before proceeding. +2. [Output moderation](../5_output_rails/README.md): Ensure that the bot's output is not offensive and does not include specific words. +3. [Preventing off-topic questions](../6_topical_rails/README.md): Guarantee that the bot only responds to specific topics. +4. [Retrieval augmented generation](../7_rag/README.md): Integrate external knowledge bases. ## Next -Start with adding [Input Moderation](../4_input_rails) to the ABC Bot. +Start with adding [Input Moderation](../4_input_rails/README.md) to the ABC Bot. diff --git a/docs/getting_started/3_demo_use_case/index.rst b/docs/getting_started/3_demo_use_case/index.rst new file mode 100644 index 000000000..8dd39ff97 --- /dev/null +++ b/docs/getting_started/3_demo_use_case/index.rst @@ -0,0 +1,9 @@ +:orphan: + +3 Demo Use Case +=============== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/getting_started/4_input_rails/README.md b/docs/getting_started/4_input_rails/README.md index 552d05418..1c230747e 100644 --- a/docs/getting_started/4_input_rails/README.md +++ b/docs/getting_started/4_input_rails/README.md @@ -1,6 +1,6 @@ # Input Rails -This topic demonstrates how to add input rails to a guardrails configuration. As discussed in the previous guide, [Demo Use Case](../3_demo_use_case), this topic guides you through building the ABC Bot. +This topic demonstrates how to add input rails to a guardrails configuration. As discussed in the previous guide, [Demo Use Case](../3_demo_use_case/README.md), this topic guides you through building the ABC Bot. ## Prerequisites @@ -105,7 +105,7 @@ Summary: 1 LLM call(s) took 0.92 seconds and used 106 tokens. 1. Task `general` took 0.92 seconds and used 106 tokens. ``` -The summary shows that a single call was made to the LLM using the prompt for the task `general`. In contrast to the [Core Colang Concepts guide](../2_core_colang_concepts), where the `generate_user_intent` task is used as a first phase for each user message, if no user canonical forms are defined for the Guardrails configuration, the `general` task is used instead. Take a closer look at the prompt and the completion: +The summary shows that a single call was made to the LLM using the prompt for the task `general`. In contrast to the [Core Colang Concepts guide](../2_core_colang_concepts/README.md), where the `generate_user_intent` task is used as a first phase for each user message, if no user canonical forms are defined for the Guardrails configuration, the `general` task is used instead. Take a closer look at the prompt and the completion: ```python print(info.llm_calls[0].prompt) @@ -364,4 +364,4 @@ Input rails also have the ability to alter the message from the user. By changin ## Next -The next guide, [Output Rails](../5_output_rails), adds output moderation to the bot. +The next guide, [Output Rails](../5_output_rails/README.md), adds output moderation to the bot. diff --git a/docs/getting_started/4_input_rails/index.rst b/docs/getting_started/4_input_rails/index.rst new file mode 100644 index 000000000..5785f85a9 --- /dev/null +++ b/docs/getting_started/4_input_rails/index.rst @@ -0,0 +1,9 @@ +:orphan: + +4 Input Rails +=============== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/getting_started/5_output_rails/README.md b/docs/getting_started/5_output_rails/README.md index 53d840980..6ba6d2eec 100644 --- a/docs/getting_started/5_output_rails/README.md +++ b/docs/getting_started/5_output_rails/README.md @@ -1,6 +1,6 @@ # Output Rails -This guide describes how to add output rails to a guardrails configuration. This guide builds on the previous guide, [Input Rails](../4_input_rails), developing further the demo ABC Bot. +This guide describes how to add output rails to a guardrails configuration. This guide builds on the previous guide, [Input Rails](../4_input_rails/README.md), developing further the demo ABC Bot. ## Prerequisites @@ -292,4 +292,4 @@ I cannot talk about proprietary technology. ## Next -The next guide, [Topical Rails](../6_topical_rails), adds a topical rails to the ABC bot, to make sure it only responds to questions related to the employment situation. +The next guide, [Topical Rails](../6_topical_rails/README.md), adds a topical rails to the ABC bot, to make sure it only responds to questions related to the employment situation. diff --git a/docs/getting_started/5_output_rails/index.rst b/docs/getting_started/5_output_rails/index.rst new file mode 100644 index 000000000..5e60fafd9 --- /dev/null +++ b/docs/getting_started/5_output_rails/index.rst @@ -0,0 +1,9 @@ +:orphan: + +5 Output Rails +============== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/getting_started/6_topical_rails/README.md b/docs/getting_started/6_topical_rails/README.md index 900b0c894..770837145 100644 --- a/docs/getting_started/6_topical_rails/README.md +++ b/docs/getting_started/6_topical_rails/README.md @@ -1,6 +1,6 @@ # Topical Rails -This guide will teach you what *topical rails* are and how to integrate them into your guardrails configuration. This guide builds on the [previous guide](../5_output_rails), developing further the demo ABC Bot. +This guide will teach you what *topical rails* are and how to integrate them into your guardrails configuration. This guide builds on the [previous guide](../5_output_rails/README.md), developing further the demo ABC Bot. ## Prerequisites diff --git a/docs/getting_started/6_topical_rails/index.rst b/docs/getting_started/6_topical_rails/index.rst new file mode 100644 index 000000000..1eba2542e --- /dev/null +++ b/docs/getting_started/6_topical_rails/index.rst @@ -0,0 +1,9 @@ +:orphan: + +6 Topical Rails +=============== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/getting_started/7_rag/README.md b/docs/getting_started/7_rag/README.md index 6040e1bf8..0ce6b9b0c 100644 --- a/docs/getting_started/7_rag/README.md +++ b/docs/getting_started/7_rag/README.md @@ -1,6 +1,6 @@ # Retrieval-Augmented Generation -This guide shows how to apply a guardrails configuration in a RAG scenario. This guide builds on the [previous guide](../6_topical_rails), developing further the demo ABC Bot. +This guide shows how to apply a guardrails configuration in a RAG scenario. This guide builds on the [previous guide](../6_topical_rails/README.md), developing further the demo ABC Bot. ## Prerequisites @@ -99,7 +99,7 @@ There are three ways you can configure a knowledge base directly into a guardrai 2. Using a custom `retrieve_relevant_chunks` action. 3. Using a custom `EmbeddingSearchProvider`. -For option 1, you can add a knowledge base directly into your guardrails configuration by creating a *kb* folder inside the *config* folder and adding documents there. Currently, only the Markdown format is supported. For a quick example, check out the complete implementation of the [ABC Bot](../../../examples/bots/abc). +For option 1, you can add a knowledge base directly into your guardrails configuration by creating a *kb* folder inside the *config* folder and adding documents there. Currently, only the Markdown format is supported. For a quick example, check out the complete implementation of the [ABC Bot](../../../examples/bots/abc/README.md). Options 2 and 3 represent advanced use cases beyond the scope of this topic. diff --git a/docs/getting_started/7_rag/index.rst b/docs/getting_started/7_rag/index.rst new file mode 100644 index 000000000..45009b1b6 --- /dev/null +++ b/docs/getting_started/7_rag/index.rst @@ -0,0 +1,9 @@ +:orphan: + +7 Rag +===== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/getting_started/README.md b/docs/getting_started/README.md index 13bc536f9..19167eb1e 100644 --- a/docs/getting_started/README.md +++ b/docs/getting_started/README.md @@ -1,11 +1,24 @@ # Getting Started +```{toctree} +:hidden: +:maxdepth: 2 +:caption: Contents + +1_hello_world/README +2_core_colang_concepts/README +3_demo_use_case/README +4_input_rails/README +5_output_rails/README +6_topical_rails/README +7_rag/README +``` This *Getting Started* section of the documentation is meant to help you get started with NeMo Guardrails. It is structured as a sequence of guides focused on specific topics. Each guide builds on the previous one by introducing new concepts and features. For each guide, in addition to the README, you will find a corresponding Jupyter notebook and the final configuration (*config.yml*) in the *config* folder. 1. [Hello World](./1_hello_world/README.md): get started with the basics of NeMo Guardrails by building a simple rail that controls the greeting behavior. 2. [Core Colang Concepts](./2_core_colang_concepts/README.md): learn about the core concepts of Colang: messages and flows. -3. [Demo Use Case](./3_demo_use_case): the choice of a representative use case. -4. [Input moderation](./4_input_rails): make sure the input from the user is safe, before engaging with it. -5. [Output moderation](./5_output_rails): make sure the output of the bot is not offensive and making sure it does not contain certain words. -6. [Preventing off-topic questions](./6_topical_rails): make sure that the bot responds only to a specific set of topics. -7. [Retrieval Augmented Generation](./7_rag): integrate an external knowledge base. +3. [Demo Use Case](./3_demo_use_case/README.md): the choice of a representative use case. +4. [Input moderation](./4_input_rails/README.md): make sure the input from the user is safe, before engaging with it. +5. [Output moderation](./5_output_rails/README.md): make sure the output of the bot is not offensive and making sure it does not contain certain words. +6. [Preventing off-topic questions](./6_topical_rails/README.md): make sure that the bot responds only to a specific set of topics. +7. [Retrieval Augmented Generation](./7_rag/README.md): integrate an external knowledge base. diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst new file mode 100644 index 000000000..4513534c5 --- /dev/null +++ b/docs/getting_started/index.rst @@ -0,0 +1,22 @@ +:orphan: + +Getting Started +=============== + +.. toctree:: + :maxdepth: 2 + + installation-guide + README + +.. toctree:: + :maxdepth: 2 + :hidden: + + 1_hello_world/index + 2_core_colang_concepts/index + 3_demo_use_case/index + 4_input_rails/index + 5_output_rails/index + 6_topical_rails/index + 7_rag/index diff --git a/docs/getting_started/installation-guide.md b/docs/getting_started/installation-guide.md index 4eaa65137..3a166e342 100644 --- a/docs/getting_started/installation-guide.md +++ b/docs/getting_started/installation-guide.md @@ -122,6 +122,6 @@ NeMo Guardrails can also be used through Docker. For details on how to build and ## What's next? -* Check out the [Getting Started Guide](../getting_started) and start with the ["Hello World" example](../getting_started/1_hello_world). -* Explore more examples in the [examples](../../examples) folder. -* Review the [User Guides](../user_guides). +* Check out the [Getting Started Guide](../getting_started/README.md) and start with the ["Hello World" example](../getting_started/1_hello_world/README.md). +* Explore more examples in the [examples](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples) folder. +* Review the [User Guides](../README.md). diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..6a3f77968 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,90 @@ +NVIDIA NeMo Guardrails +==================================================== + +.. toctree:: + :caption: NVIDIA NeMo Guardrails + :name: NVIDIA NeMo Guardrails + :maxdepth: 1 + + introduction.md + documentation.md + getting_started/installation-guide + +.. toctree:: + :caption: Getting Started + :name: Getting Started + :maxdepth: 2 + + getting_started/1_hello_world/README + getting_started/2_core_colang_concepts/README + getting_started/3_demo_use_case/README + getting_started/4_input_rails/README + getting_started/5_output_rails/README + getting_started/6_topical_rails/README + getting_started/7_rag/README + + +.. toctree:: + :caption: User Guides + :name: User Guides + :maxdepth: 2 + + user_guides/cli + user_guides/colang-language-syntax-guide + user_guides/configuration-guide + user_guides/guardrails-library + user_guides/guardrails-process + user_guides/llm-support + user_guides/python-api + user_guides/server-guide + user_guides/detailed_logging/index + user_guides/input_output_rails_only/index + user_guides/jailbreak_detection_heuristics/index + user_guides/langchain/index + user_guides/llm/index + user_guides/multi_config_api/index + +.. toctree:: + :caption: Evaluation + :name: Evaluation + :maxdepth: 2 + + evaluation/README + evaluation/llm-vulnerability-scanning + +.. toctree:: + :caption: Security + :name: Security + :maxdepth: 2 + + security/guidelines + security/red-teaming + +.. toctree:: + :caption: Advanced User Guides + :name: Advanced User Guides + :maxdepth: 2 + + user_guides/advanced/align-score-deployment + user_guides/advanced/bot-message-instructions + user_guides/advanced/embedding-search-providers + user_guides/advanced/event-based-api + user_guides/advanced/extract-user-provided-values + user_guides/advanced/generation-options + user_guides/advanced/jailbreak-detection-heuristics-deployment + user_guides/advanced/llama-guard-deployment + user_guides/advanced/nested-async-loop + user_guides/advanced/prompt-customization + user_guides/advanced/streaming + user_guides/advanced/using-docker + user_guides/advanced/vertexai-setup + +.. toctree:: + :caption: Other + :name: Other + :maxdepth: 2 + + architecture/index + glossary + faqs + changes diff --git a/docs/security/guidelines.md b/docs/security/guidelines.md index de09c95dc..d78e8c909 100644 --- a/docs/security/guidelines.md +++ b/docs/security/guidelines.md @@ -92,7 +92,7 @@ Like with a web server, red-teaming and testing at the scale of the web is a req AI safety and security is a community effort, and this is one of the main reasons we have released NeMo Guardrails to the community. We hope to bring many developers and enthusiasts together to build better solutions for Trustworthy AI. Our initial release is a starting point. We have built a collection of guardrails and educational examples that provide helpful controls and resist a variety of common attacks, however, they are not perfect. We have conducted adversarial testing on these example bots and will soon release a whitepaper on a larger-scale study. Here are some items to watch out for when creating your own bots: -1. Over-aggressive moderation: Some of the AI Safety rails, such as [moderation](../../examples/moderation_rail/README.md) and [fact-checking](../../examples/grounding_rail/README.md), can occasionally block otherwise safe requests. This is more likely to happen when multiple guardrails are used together. One possible strategy to resolve this is to use logic in the flow to reduce unnecessary calls; for example to call fact-checking only for factual questions. +1. Over-aggressive moderation: Some of the AI Safety rails, can occasionally block otherwise safe requests. This is more likely to happen when multiple guardrails are used together. One possible strategy to resolve this is to use logic in the flow to reduce unnecessary calls; for example to call fact-checking only for factual questions. 2. Overgeneralization of canonical forms: NeMo Guardrails uses canonical forms like `ask about jobs report` to guide its behavior and to generalize to situations not explicitly defined in the Colang configuration. It may occasionally get the generalization wrong, so that guardrails miss certain examples or trigger unexpectedly. If this happens, it can often be improved by adding or adjusting the `define user` forms in the [Colang files](../user_guides/colang-language-syntax-guide.md), or modifying the sample conversations in the [configuration](../user_guides/configuration-guide.md). 3. Nondeterminism: LLMs use a concept known as *temperature*, as well as other techniques, to introduce variation in their responses. This creates a much more natural experience, however, it can on occasion create unexpected behavior in LLM applications that can be difficult to reproduce. As with all AI applications, it is a good practice to use thorough evaluation and regression-testing suites. diff --git a/docs/security/index.rst b/docs/security/index.rst new file mode 100644 index 000000000..e3d9d838a --- /dev/null +++ b/docs/security/index.rst @@ -0,0 +1,10 @@ +:orphan: + +Security +======== + +.. toctree:: + :maxdepth: 2 + + guidelines + red-teaming diff --git a/docs/security/red-teaming.md b/docs/security/red-teaming.md index ebb66bc42..4efaa2375 100644 --- a/docs/security/red-teaming.md +++ b/docs/security/red-teaming.md @@ -12,7 +12,7 @@ To run a red teaming process, there are three steps involved: 2. Create a set of challenges (`challenges.json`) and add them to the `config` folder. 3. Start the server `nemoguardrails server` and use the Chat UI to interact with various configurations. -For a server configuration template with two guardrails configuration and a set of challenges, check out [this example](../../examples/configs/red-teaming). +For a server configuration template with two guardrails configuration and a set of challenges, check out [this example](../../examples/configs/red-teaming/README.md). ## Challenges @@ -54,4 +54,4 @@ The UI enables the user to rate the attack's success (No Success, Some Success, ## Recording the results -The sample configuration [here](../../examples/configs/red-teaming) includes an example of how to use a "custom logger" to save the ratings, including the complete history of the conversation, in a CSV file. +The sample configuration [here](../../examples/configs/red-teaming/README.md) includes an example of how to use a "custom logger" to save the ratings, including the complete history of the conversation, in a CSV file. diff --git a/docs/user_guides/advanced/align-score-deployment.md b/docs/user_guides/advanced/align-score-deployment.md index 65cb3d5bb..ef57d8788 100644 --- a/docs/user_guides/advanced/align-score-deployment.md +++ b/docs/user_guides/advanced/align-score-deployment.md @@ -1,6 +1,6 @@ # AlignScore Deployment -**NOTE**: The recommended way to use AlignScore with NeMo Guardrails is using the provided [Dockerfile](../../../nemoguardrails/library/factchecking/align_score/Dockerfile). For more details, check out how to [build and use the image](./using-docker.md). +**NOTE**: The recommended way to use AlignScore with NeMo Guardrails is using the provided [Dockerfile](https://github.com/NVIDIA/NeMo-Guardrails/blob/develop/nemoguardrails/library/factchecking/align_score/Dockerfile). For more details, check out how to [build and use the image](using-docker.md). In order to deploy an AlignScore server, follow these steps: diff --git a/docs/user_guides/advanced/embedding-search-providers.md b/docs/user_guides/advanced/embedding-search-providers.md index cfa446197..c8a551d7b 100644 --- a/docs/user_guides/advanced/embedding-search-providers.md +++ b/docs/user_guides/advanced/embedding-search-providers.md @@ -71,7 +71,7 @@ The default implementation is also designed to support asynchronous execution of The `cache` configuration is optional. If enabled, it uses the specified `key_generator` and `store` to cache the embeddings. The `store_config` can be used to provide additional configuration options required for the store. The default `cache` configuration uses the `md5` key generator and the `filesystem` store. The cache is disabled by default. -### Batch Implementation +## Batch Implementation The default embedding provider includes a batch processing feature designed to optimize the embedding generation process. This feature is designed to initiate the embedding generation process after a predefined latency of 10 milliseconds. diff --git a/docs/user_guides/advanced/index.rst b/docs/user_guides/advanced/index.rst new file mode 100644 index 000000000..a37a4d80f --- /dev/null +++ b/docs/user_guides/advanced/index.rst @@ -0,0 +1,21 @@ +:orphan: + +Advanced +======== + +.. toctree:: + :maxdepth: 2 + + align-score-deployment + bot-message-instructions + embedding-search-providers + event-based-api + extract-user-provided-values + generation-options + jailbreak-detection-heuristics-deployment + llama-guard-deployment + nested-async-loop + prompt-customization + streaming + using-docker + vertexai-setup diff --git a/docs/user_guides/advanced/jailbreak-detection-heuristics-deployment.md b/docs/user_guides/advanced/jailbreak-detection-heuristics-deployment.md index 40b1eb835..982613aac 100644 --- a/docs/user_guides/advanced/jailbreak-detection-heuristics-deployment.md +++ b/docs/user_guides/advanced/jailbreak-detection-heuristics-deployment.md @@ -1,6 +1,6 @@ # Jailbreak Detection Heuristics Deployment -**NOTE**: The recommended way to use Jailbreak Detection Heuristics with NeMo Guardrails is using the provided [Dockerfile](../../../nemoguardrails/library/jailbreak_detection/Dockerfile). For more details, check out how to [build and use the image](./using-docker.md). +**NOTE**: The recommended way to use Jailbreak Detection Heuristics with NeMo Guardrails is using the provided [Dockerfile](https://github.com/NVIDIA/NeMo-Guardrails/blob/develop/nemoguardrails/library/jailbreak_detection/Dockerfile). For more details, check out how to [build and use the image](using-docker.md). In order to deploy jailbreak detection heuristics server, follow these steps: @@ -19,7 +19,7 @@ By default, the jailbreak detection server listens on port `1337`. You can chang ## Running on GPU To run on GPU, ensure you have the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) installed. -If you are building a container from the provided dockerfiles, make sure that you specify the correct [Dockerfile](../../../nemoguardrails/library/jailbreak_detection/Dockerfile-GPU) and include the `-f` parameter with `docker build`. +If you are building a container from the provided dockerfiles, make sure that you specify the correct [Dockerfile](https://github.com/NVIDIA/NeMo-Guardrails/blob/develop/nemoguardrails/library/jailbreak_detection/Dockerfile-GPU) and include the `-f` parameter with `docker build`. When running docker, ensure you pass the `-e NVIDIA_DRIVER_CAPABILITIES=compute,utility`, `-e NVIDIA_VISIBLE_DEVICES=all` and the `--runtime=nvidia` argument to `docker run`. ```shell diff --git a/docs/user_guides/advanced/llama-guard-deployment.md b/docs/user_guides/advanced/llama-guard-deployment.md index 0140d729b..6f537b2f8 100644 --- a/docs/user_guides/advanced/llama-guard-deployment.md +++ b/docs/user_guides/advanced/llama-guard-deployment.md @@ -1,4 +1,4 @@ -## Self-hosting Llama Guard using vLLM +# Self-hosting Llama Guard using vLLM Detailed below are steps to self-host Llama Guard using vLLM and HuggingFace. Alternatively, you can do this using your own custom inference code with the downloaded model weights, too. @@ -10,10 +10,12 @@ huggingface-cli login ``` 3. Here, we use vLLM to host a Llama Guard inference endpoint in the OpenAI-compatible mode. + ``` pip install vllm python -m vllm.entrypoints.openai.api_server --port 5123 --model meta-llama/LlamaGuard-7b ``` + This will serve up the vLLM inference server on `http://localhost:5123/`. -4. Set the host and port in your bot's YAML configuration files ([example config](../../../examples/configs/llama_guard)). If you're running the `nemoguardrails` app on another server, remember to replace `localhost` with your vLLM server's public IP address. +4. Set the host and port in your bot's YAML configuration files ([example config](https://github.com/NVIDIA/NeMo-Guardrails/blob/develop/examples/configs/llama_guard/README.md)). If you're running the `nemoguardrails` app on another server, remember to replace `localhost` with your vLLM server's public IP address. diff --git a/docs/user_guides/advanced/prompt-customization.md b/docs/user_guides/advanced/prompt-customization.md index ba73137fd..44fc84c46 100644 --- a/docs/user_guides/advanced/prompt-customization.md +++ b/docs/user_guides/advanced/prompt-customization.md @@ -11,11 +11,11 @@ The interaction with the LLM is designed in a task-oriented way, i.e., each time 3. `generate_bot_message`: decide the exact bot message that should be returned. 4. `general`: generate the next bot message based on the history of user and bot messages; this task is used when there are no dialog rails defined (i.e., no user message canonical forms). -Check out the [Task type](../../../nemoguardrails/llm/types.py) for the complete list of tasks. +Check out the [Task type](https://github.com/NVIDIA/NeMo-Guardrails/blob/develop/nemoguardrails/llm/types.py) for the complete list of tasks. ## Prompt Configuration -The toolkit provides predefined prompts for each task and for certain LLM models. They are located in the [nemoguardrails/llm/prompts](../../../nemoguardrails/llm/prompts) folder. You can customize the prompts further by including a `prompts.yml` file in a guardrails configuration (technically, the file name is not essential, and you can also include the `prompts` key in the general `config.yml` file). +The toolkit provides predefined prompts for each task and for certain LLM models. They are located in the [nemoguardrails/llm/prompts](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/llm/prompts) folder. You can customize the prompts further by including a `prompts.yml` file in a guardrails configuration (technically, the file name is not essential, and you can also include the `prompts` key in the general `config.yml` file). Additionally, if the environment variable `PROMPTS_DIR` is set, the toolkit will also load any prompts defined in the specified directory. The loading is performed once, when the python module is loaded. The folder must contain one or more `.yml` files which contain prompt definitions (inside the `prompts` key). @@ -210,7 +210,7 @@ Currently, the NeMo Guardrails toolkit includes prompts for `openai/gpt-3.5-turb ## Custom Tasks and Prompts In the scenario where you would like to create a custom task beyond those included in -[the default tasks](../../../nemoguardrails/llm/types.py), you can include the task and associated prompt as provided in the example below: +[the default tasks](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/llm/types.py), you can include the task and associated prompt as provided in the example below: ```yaml prompts: diff --git a/docs/user_guides/advanced/streaming.md b/docs/user_guides/advanced/streaming.md index b01f56deb..a95f970a2 100644 --- a/docs/user_guides/advanced/streaming.md +++ b/docs/user_guides/advanced/streaming.md @@ -68,7 +68,7 @@ result = await app.generate_async( print(result) ``` -For the complete working example, check out this [demo script](../../../examples/scripts/demo_streaming.py). +For the complete working example, check out this [demo script](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/scripts/demo_streaming.py). ### Server API @@ -90,7 +90,7 @@ POST /v1/chat/completions ### Streaming for LLMs deployed using HuggingFacePipeline We also support streaming for LLMs deployed using `HuggingFacePipeline`. -One example is provided in the [HF Pipeline Dolly](./../../../examples/configs/llm/hf_pipeline_dolly/README.md) configuration. +One example is provided in the [HF Pipeline Dolly](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/configs/llm/hf_pipeline_dolly/README.md) configuration. To use streaming for HF Pipeline LLMs, you first need to set the streaming flag in your `config.yml`. diff --git a/docs/user_guides/advanced/using-docker.md b/docs/user_guides/advanced/using-docker.md index abbbc5328..9e6657b7c 100644 --- a/docs/user_guides/advanced/using-docker.md +++ b/docs/user_guides/advanced/using-docker.md @@ -34,7 +34,7 @@ docker build -t nemoguardrails . ### 3. \[Optional] Build the AlignScore Server Image -If you want to use AlignScore-based fact-checking, you can also build a Docker image using the provided [Dockerfile](../../../nemoguardrails/library/factchecking/align_score/Dockerfile). +If you want to use AlignScore-based fact-checking, you can also build a Docker image using the provided [Dockerfile](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/factchecking/align_score/Dockerfile). ```bash cd nemoguardrails/library/factchecking/align_score @@ -45,7 +45,7 @@ NOTE: the provided Dockerfile downloads only the `base` AlignScore image. If you ### 4. \[Optional] Build the Jailbreak Detection Heuristics Server Image -If you want to use the jailbreak detection heuristics server, you can also build a Docker image using the provided [Dockerfile](../../../nemoguardrails/library/jailbreak_detection/Dockerfile). +If you want to use the jailbreak detection heuristics server, you can also build a Docker image using the provided [Dockerfile](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/jailbreak_detection/Dockerfile). ```bash cd nemoguardrails/jailbreak_detection diff --git a/docs/user_guides/cli.md b/docs/user_guides/cli.md index 5de30f264..8e49a47d2 100644 --- a/docs/user_guides/cli.md +++ b/docs/user_guides/cli.md @@ -7,7 +7,7 @@ For testing purposes, the Guardrails toolkit provides a command line chat that c ``` > nemoguardrails chat --config examples/ [--verbose] [--verbose-llm-calls] ``` -#### Options +## Options - `--config`: The configuration that should be used. Can be a folder or a .co/.yml file. - `--verbose`: In verbose mode, detailed debugging information is also shown. - `--verbose-llm-calls`: In verbose LLM calls mode, the debugging information includes the entire prompt that is sent to the LLM and the completion. diff --git a/docs/user_guides/colang-language-syntax-guide.md b/docs/user_guides/colang-language-syntax-guide.md index d2aacd2c9..ddff198aa 100644 --- a/docs/user_guides/colang-language-syntax-guide.md +++ b/docs/user_guides/colang-language-syntax-guide.md @@ -189,4 +189,4 @@ Actions **are not defined** in Colang. They are made available to the guardrails ## Conclusion -This was a brief introduction to Colang 1.0. For more details, check out the [Examples folder](../../examples) document. +This was a brief introduction to Colang 1.0. For more details, check out the [Examples folder](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples) document. diff --git a/docs/user_guides/configuration-guide.md b/docs/user_guides/configuration-guide.md index d4268996e..bf5b6e330 100644 --- a/docs/user_guides/configuration-guide.md +++ b/docs/user_guides/configuration-guide.md @@ -125,7 +125,7 @@ You can specify additional parameters when using NeMo LLM models using the `para The `api_host`, `api_key`, and `organization_id` are fetched automatically from the environment variables `NGC_API_HOST`, `NGC_API_KEY`, and `NGC_ORGANIZATION_ID`, respectively. -For more details, please refer to the NeMo LLM Service documentation and check out the [NeMo LLM example configuration](../../examples/configs/llm/nemollm). +For more details, please refer to the NeMo LLM Service documentation and check out the [NeMo LLM example configuration](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/configs/llm/nemollm/README.md). #### TRT-LLM @@ -206,7 +206,7 @@ models: NeMo Guardrails uses embedding search (a.k.a. vector databases) for implementing the [guardrails process](../architecture/README.md#the-guardrails-process) and for the [knowledge base](#knowledge-base-documents) functionality. The default embedding search uses FastEmbed for computing the embeddings (the `all-MiniLM-L6-v2` model) and [Annoy](https://github.com/spotify/annoy) for performing the search. As shown in the previous section, the embeddings model supports both FastEmbed and OpenAI. SentenceTransformers is also supported. -For advanced use cases or integrations with existing knowledge bases, you can [provide a custom embedding search provider](./advanced/embedding-search-providers.md). +For advanced use cases or integrations with existing knowledge bases, you can [provide a custom embedding search provider](advanced/embedding-search-providers.md). ### General Instructions @@ -287,7 +287,7 @@ The full list of tasks used by the NeMo Guardrails toolkit is the following: - `self_check_output`: check if bot response should be allowed; - `self_check_hallucination`: check if the bot response is a hallucination. -You can check the default prompts in the [prompts](../../nemoguardrails/llm/prompts) folder. +You can check the default prompts in the [prompts](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/llm/prompts) folder. ### Multi-step Generation @@ -408,7 +408,7 @@ Retrieval rails process the retrieved chunks, i.e., the `$relevant_chunks` varia ### Dialog Rails -Dialog rails enforce specific predefined conversational paths. To use dialog rails, you must define canonical form forms for various user messages and use them to trigger the dialog flows. Check out the [Hello World](../../examples/bots/hello_world) bot for a quick example. For a slightly more advanced example, check out the [ABC bot](../../examples/bots/abc), where dialog rails are used to ensure the bot does not talk about specific topics. +Dialog rails enforce specific predefined conversational paths. To use dialog rails, you must define canonical form forms for various user messages and use them to trigger the dialog flows. Check out the [Hello World](.https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/bots/hello_world/README.md) bot for a quick example. For a slightly more advanced example, check out the [ABC bot](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/bots/abc/README.md), where dialog rails are used to ensure the bot does not talk about specific topics. The use of dialog rails requires a three-step process: diff --git a/docs/user_guides/detailed_logging/index.rst b/docs/user_guides/detailed_logging/index.rst new file mode 100644 index 000000000..ec81a35a5 --- /dev/null +++ b/docs/user_guides/detailed_logging/index.rst @@ -0,0 +1,7 @@ +Detailed Logging +================ + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/user_guides/guardrails-library.md b/docs/user_guides/guardrails-library.md index 04236b192..d1a56fc78 100644 --- a/docs/user_guides/guardrails-library.md +++ b/docs/user_guides/guardrails-library.md @@ -50,7 +50,7 @@ rails: 2. Define the `self_check_input` prompt in the `prompts.yml` file: -```yml +```yaml prompts: - task: self_check_input content: |- @@ -63,7 +63,7 @@ prompts: The above is an example prompt you can use with the *self check input rail*. See the [Example Prompts](#example-prompts) section below for more details. The `self_check_input` prompt has an input variable `{{ user_input }}` which includes the input from the user. The completion must be "yes" if the input should be blocked and "no" otherwise. -The self-check input rail executes the [`self_check_input` action](../../nemoguardrails/library/self_check/input_check/actions.py), which returns `True` if the input should be allowed, and `False` otherwise: +The self-check input rail executes the [`self_check_input` action](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/self_check/input_check/actions.py), which returns `True` if the input should be allowed, and `False` otherwise: ```colang define flow self check input @@ -89,7 +89,7 @@ This section provides two example prompts you can use with the self-check input This prompt relies on the capability of the model to understand what "breaking moderation policies" and "good aligned responses" mean. -```yml +```yaml prompts: - task: self_check_input content: > @@ -152,7 +152,7 @@ rails: 2. Define the `self_check_output` prompt in the `prompts.yml` file: -```yml +```yaml prompts: - task: self_check_output content: |- @@ -167,7 +167,7 @@ prompts: The above is an example prompt you can use with the *self check output rail*. See the [Example Prompts](#example-prompts-1) section below for more details. The `self_check_output` prompt has an input variable `{{ bot_response }}` which includes the output from the bot. The completion must be "yes" if the output should be blocked and "no" otherwise. -The self-check output rail executes the [`self_check_output` action](../../nemoguardrails/library/self_check/output_check/actions.py), which returns `True` if the output should be allowed, and `False` otherwise: +The self-check output rail executes the [`self_check_output` action](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/self_check/output_check/actions.py), which returns `True` if the output should be allowed, and `False` otherwise: ```colang define flow self check output @@ -193,7 +193,7 @@ This section provides two example prompts for the self-check output rail. The si This prompt relies on the capability of the model to understand what "legal", "ethical" and "not harmful to any person" mean. -```yml +```yaml prompts: - task: self_check_output content: > @@ -236,7 +236,7 @@ prompts: The goal of the self-check fact-checking output rail is to ensure that the answer to a RAG (Retrieval Augmented Generation) query is grounded in the provided evidence extracted from the knowledge base (KB). -NeMo Guardrails uses the concept of **relevant chunks** (which are stored in the `$relevant_chunks` context variable) as the evidence against which fact-checking should be performed. The relevant chunks can be extracted automatically, if the built-in knowledge base support is used, or provided directly alongside the query (see the [Getting Started Guide example](../getting_started/7_rag)). +NeMo Guardrails uses the concept of **relevant chunks** (which are stored in the `$relevant_chunks` context variable) as the evidence against which fact-checking should be performed. The relevant chunks can be extracted automatically, if the built-in knowledge base support is used, or provided directly alongside the query (see the [Getting Started Guide example](../getting_started/7_rag/README.md)). **IMPORTANT**: The performance of this rail is strongly dependent on the capability of the LLM to follow the instructions in the `self_check_facts` prompt. @@ -255,7 +255,7 @@ rails: 2. Define the `self_check_facts` prompt in the `prompts.yml` file: -```yml +```yaml prompts: - task: self_check_facts content: |- @@ -268,7 +268,7 @@ prompts: The above is an example prompt that you can use with the *self check facts rail*. The `self_check_facts` prompt has two input variables: `{{ evidence }}`, which includes the relevant chunks, and `{{ response }}`, which includes the bot response that should be fact-checked. The completion must be "yes" if the response is factually correct and "no" otherwise. -The self-check fact-checking rail executes the [`self_check_facts` action](../../nemoguardrails/library/self_check/output_check/actions.py), which returns a score between `0.0` (response is not accurate) and `1.0` (response is accurate). The reason a number is returned, instead of a boolean, is to keep a consistent API with other methods that return a score, e.g., the AlignScore method below. +The self-check fact-checking rail executes the [`self_check_facts` action](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/self_check/output_check/actions.py), which returns a score between `0.0` (response is not accurate) and `1.0` (response is accurate). The reason a number is returned, instead of a boolean, is to keep a consistent API with other methods that return a score, e.g., the AlignScore method below. ```colang define subflow self check facts @@ -304,7 +304,7 @@ define flow answer report question bot $answer ``` -Please refer to the [Custom RAG Output Rails example](../../examples/configs/rag/custom_rag_output_rails/README.md). +Please refer to the [Custom RAG Output Rails example](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/configs/rag/custom_rag_output_rails/README.md). ### Hallucination Detection @@ -325,7 +325,7 @@ rails: 2. Define a `self_check_hallucinations` prompt in the `prompts.yml` file: -```yml +```yaml prompts: - task: self_check_hallucinations content: |- @@ -391,7 +391,7 @@ define flow answer report question bot $answer ``` -Please refer to the [Custom RAG Output Rails example](../../examples/configs/rag/custom_rag_output_rails/README.md). +Please refer to the [Custom RAG Output Rails example](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/configs/rag/custom_rag_output_rails/README.md). #### Implementation Details @@ -411,7 +411,7 @@ This category of rails relies on open-source models and libraries. NeMo Guardrails provides out-of-the-box support for the [AlignScore metric (Zha et al.)](https://aclanthology.org/2023.acl-long.634.pdf), which uses a RoBERTa-based model for scoring factual consistency in model responses with respect to the knowledge base. -In our testing, we observed an average latency of ~220ms on hosting AlignScore as an HTTP service, and ~45ms on direct inference with the model loaded in-memory. This makes it much faster than the self-check method. However, this method requires an on-prem deployment of the publicly available AlignScore model. Please see the [AlignScore Deployment](./advanced/align-score-deployment.md) guide for more details. +In our testing, we observed an average latency of ~220ms on hosting AlignScore as an HTTP service, and ~45ms on direct inference with the model loaded in-memory. This makes it much faster than the self-check method. However, this method requires an on-prem deployment of the publicly available AlignScore model. Please see the [AlignScore Deployment](advanced/align-score-deployment.md) guide for more details. #### Usage @@ -443,7 +443,7 @@ define flow NeMo Guardrails provides out-of-the-box support for content moderation using Meta's [Llama Guard](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/) model. -In our testing, we observe significantly improved input and output content moderation performance compared to the [self-check method](#llm-self-checking). Please see additional documentation for more details on the [recommended deployment method](./advanced/llama-guard-deployment.md) and the [performance evaluation](./../evaluation/README.md#llamaguard-based-moderation-rails-performance) numbers. +In our testing, we observe significantly improved input and output content moderation performance compared to the [self-check method](#llm-self-checking). Please see additional documentation for more details on the [recommended deployment method](advanced/llama-guard-deployment.md) and the [performance evaluation](../evaluation/README.md#llamaguard-based-moderation-rails-performance) numbers. #### Usage @@ -475,7 +475,7 @@ rails: 3. Define the `llama_guard_check_input` and the `llama_guard_check_output` prompts in the `prompts.yml` file: -```yml +```yaml prompts: - task: llama_guard_check_input content: | @@ -491,7 +491,7 @@ prompts: O2: ... ``` -The rails execute the [`llama_guard_check_*` actions](../../nemoguardrails/library/llama_guard/actions.py), which return `True` if the user input or the bot message should be allowed, and `False` otherwise, along with a list of the unsafe content categories as defined in the Llama Guard prompt. +The rails execute the [`llama_guard_check_*` actions](.https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/llama_guard/actions.py), which return `True` if the user input or the bot message should be allowed, and `False` otherwise, along with a list of the unsafe content categories as defined in the Llama Guard prompt. ```colang define flow llama guard check input @@ -506,7 +506,7 @@ define flow llama guard check input # (similar flow for checking output) ``` -A complete example configuration that uses Llama Guard for input and output moderation is provided in this [example folder](./../../examples/configs/llama_guard/README.md). +A complete example configuration that uses Llama Guard for input and output moderation is provided in this [example folder](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/configs/llama_guard/README.md). ### Presidio-based Sensitive Data Detection @@ -635,7 +635,7 @@ rails: #### Custom Detection -If you want to implement a completely different sensitive data detection mechanism, you can override the default actions [`detect_sensitive_data`](../../nemoguardrails/library/sensitive_data_detection/actions.py) and [`mask_sensitive_data`](../../nemoguardrails/library/sensitive_data_detection/actions.py). +If you want to implement a completely different sensitive data detection mechanism, you can override the default actions [`detect_sensitive_data`](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/sensitive_data_detection/actions.py) and [`mask_sensitive_data`](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/sensitive_data_detection/actions.py). ## Third-Party APIs @@ -659,7 +659,7 @@ rails: The `activefence moderation` flow uses the maximum risk score with an 0.85 threshold to decide if the input should be allowed or not (i.e., if the risk score is above the threshold, it is considered a violation). The `activefence moderation detailed` has individual scores per category of violation. -To customize the scores, you have to overwrite the [default flows](../../nemoguardrails/library/activefence/flows.co) in your config. For example, to change the threshold for `activefence moderation` you can add the following flow to your config: +To customize the scores, you have to overwrite the [default flows](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/library/activefence/flows.co) in your config. For example, to change the threshold for `activefence moderation` you can add the following flow to your config: ```colang define subflow activefence moderation @@ -764,7 +764,7 @@ To compute the perplexity of a string, the current implementation uses the `gpt2 #### Setup -The recommended way for using the jailbreak detection heuristics is to [deploy the jailbreak detection heuristics server](../user_guides/advanced/jailbreak-detection-heuristics-deployment.md) separately. +The recommended way for using the jailbreak detection heuristics is to [deploy the jailbreak detection heuristics server](advanced/jailbreak-detection-heuristics-deployment.md) separately. For quick testing, you can use the jailbreak detection heuristics rail locally by first installing `transformers` and `tourch`. diff --git a/docs/user_guides/guardrails-process.md b/docs/user_guides/guardrails-process.md index 7e78d64ea..a9a279d60 100644 --- a/docs/user_guides/guardrails-process.md +++ b/docs/user_guides/guardrails-process.md @@ -16,7 +16,7 @@ There are five types of rails supported in NeMo Guardrails: 1. **Input rails**: applied to the input from the user; an input rail can reject the input ( stopping any additional processing) or alter the input (e.g., to mask potentially sensitive data, to rephrase). -2. **Dialog rails**: influence how the dialog evolves and how the LLM is prompted; dialog rails operate on canonical form messages (more details [here](./colang-language-syntax-guide.md)) and determine if an action should be executed, if the LLM should be invoked to generate the next step or a response, if a predefined response should be used instead, etc. +2. **Dialog rails**: influence how the dialog evolves and how the LLM is prompted; dialog rails operate on canonical form messages (more details [here](colang-language-syntax-guide.md)) and determine if an action should be executed, if the LLM should be invoked to generate the next step or a response, if a predefined response should be used instead, etc. 3. **Retrieval rails**: applied to the retrieved chunks in the case of a RAG (Retrieval Augmented Generation) scenario; a retrieval rail can reject a chunk, preventing it from being used to prompt the LLM, or alter the relevant chunks (e.g., to mask potentially sensitive data). diff --git a/docs/user_guides/index.rst b/docs/user_guides/index.rst new file mode 100644 index 000000000..4e4918cfc --- /dev/null +++ b/docs/user_guides/index.rst @@ -0,0 +1,23 @@ +:orphan: + +User Guides +=========== + +.. toctree:: + :maxdepth: 2 + + cli + colang-language-syntax-guide + configuration-guide + guardrails-library + guardrails-process + llm-support + python-api + server-guide + advanced/index + detailed_logging/index + input_output_rails_only/index + jailbreak_detection_heuristics/index + langchain/index + llm/index + multi_config_api/index diff --git a/docs/user_guides/input_output_rails_only/README.md b/docs/user_guides/input_output_rails_only/README.md index c1c6d093d..cf4041858 100644 --- a/docs/user_guides/input_output_rails_only/README.md +++ b/docs/user_guides/input_output_rails_only/README.md @@ -2,7 +2,7 @@ This guide demonstrates how [generation options](../advanced/generation-options.md) can be used to activate only a specific set of rails - input and output rails in this case, and to disable the other rails defined in a guardrails configuration. -We will use the guardrails configuration for the ABC Bot defined for the [topical rails example](../../getting_started/6_topical_rails) part of the [Getting Started Guide](../../getting_started). +We will use the guardrails configuration for the ABC Bot defined for the [topical rails example](../../getting_started/6_topical_rails/README.md) part of the [Getting Started Guide](../../getting_started/README.md). ## Prerequisites @@ -102,7 +102,7 @@ To test the bot with the default behaviour having all the rails active, we just ```python from nemoguardrails import RailsConfig, LLMRails -config = RailsConfig.from_path("./config") +config = RailsConfig.from_path("config") rails = LLMRails(config) messages = [{ "role": "user", diff --git a/docs/user_guides/input_output_rails_only/index.rst b/docs/user_guides/input_output_rails_only/index.rst new file mode 100644 index 000000000..431a67d6f --- /dev/null +++ b/docs/user_guides/input_output_rails_only/index.rst @@ -0,0 +1,7 @@ +Input Output Rails Only +======================= + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/user_guides/jailbreak_detection_heuristics/README.md b/docs/user_guides/jailbreak_detection_heuristics/README.md index 2cc8f94ce..2b72c4d28 100644 --- a/docs/user_guides/jailbreak_detection_heuristics/README.md +++ b/docs/user_guides/jailbreak_detection_heuristics/README.md @@ -2,7 +2,7 @@ This guide demonstrates how to use jailbreak detection heuristics in a guardrails configuration to detect malicious prompts. -We will use the guardrails configuration for the ABC Bot defined for the [topical rails example](../../getting_started/6_topical_rails) part of the [Getting Started Guide](../../getting_started). +We will use the guardrails configuration for the ABC Bot defined for the [topical rails example](../../getting_started/6_topical_rails/README.md) part of the [Getting Started Guide](../../getting_started/README.md). ```bash # Init: remove any existing configuration and copy the ABC bot from topical rails example @@ -70,7 +70,7 @@ More information about these heuristics can be found in the [Guardrails Library] ### Activating Jailbreak Detection Heuristics -To activate the jailbreak detection heuristics, we first need to include the `jailbreak detection heuristics` flow as an input rail in our guardrails configuration. We can do this by adding the following to the [config.yml](./config/config.yml) of the ABC bot: +To activate the jailbreak detection heuristics, we first need to include the `jailbreak detection heuristics` flow as an input rail in our guardrails configuration. We can do this by adding the following to the [config.yml](config/config.yml) of the ABC bot: ```colang rails: @@ -79,7 +79,7 @@ rails: - jailbreak detection heuristics ``` -To the same file we need to configure the jailbreak detection heuristics. We can do this by adding the following to the [config.yml](./config/config.yml) +To the same file we need to configure the jailbreak detection heuristics. We can do this by adding the following to the [config.yml](config/config.yml) ```colang rails: @@ -98,7 +98,7 @@ To test the bot with the jailbreak detection heuristics as the input rail, we ne ```python from nemoguardrails import RailsConfig, LLMRails -config = RailsConfig.from_path("../../../docs/getting_started/6_topical_rails/config/") +config = RailsConfig.from_path("../../getting_started/6_topical_rails/config/") rails = LLMRails(config) messages = [{ "role": "user", diff --git a/docs/user_guides/jailbreak_detection_heuristics/index.rst b/docs/user_guides/jailbreak_detection_heuristics/index.rst new file mode 100644 index 000000000..c3118b58a --- /dev/null +++ b/docs/user_guides/jailbreak_detection_heuristics/index.rst @@ -0,0 +1,7 @@ +Jailbreak Detection Heuristics +============================== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/user_guides/langchain/chain-with-guardrails/README.md b/docs/user_guides/langchain/chain-with-guardrails/README.md index f751c76f1..38db373c2 100644 --- a/docs/user_guides/langchain/chain-with-guardrails/README.md +++ b/docs/user_guides/langchain/chain-with-guardrails/README.md @@ -109,7 +109,7 @@ prompts: from nemoguardrails import RailsConfig from nemoguardrails.integrations.langchain.runnable_rails import RunnableRails -config = RailsConfig.from_path("./config") +config = RailsConfig.from_path("config") guardrails = RunnableRails(config) ``` diff --git a/docs/user_guides/langchain/chain-with-guardrails/index.rst b/docs/user_guides/langchain/chain-with-guardrails/index.rst new file mode 100644 index 000000000..aff5bb8c0 --- /dev/null +++ b/docs/user_guides/langchain/chain-with-guardrails/index.rst @@ -0,0 +1,7 @@ +Chain-With-Guardrails +===================== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/user_guides/langchain/index.rst b/docs/user_guides/langchain/index.rst new file mode 100644 index 000000000..ee52156eb --- /dev/null +++ b/docs/user_guides/langchain/index.rst @@ -0,0 +1,10 @@ +Langchain +========= + +.. toctree:: + :maxdepth: 2 + + langchain-integration + runnable-rails + chain-with-guardrails/index + runnable-as-action/index diff --git a/docs/user_guides/langchain/langchain-integration.md b/docs/user_guides/langchain/langchain-integration.md index 0e5368ab1..d3340fc11 100644 --- a/docs/user_guides/langchain/langchain-integration.md +++ b/docs/user_guides/langchain/langchain-integration.md @@ -26,7 +26,7 @@ chain_with_guardrails = guardrails | some_chain chain_with_guardrails = RunnableRails(config, runnable=some_chain) ``` -For more details, check out the [RunnableRails Guide](runnable-rails.md) and the [Chain with Guardrails Guide](./chain-with-guardrails). +For more details, check out the [RunnableRails Guide](runnable-rails.md) and the [Chain with Guardrails Guide](chain-with-guardrails/README.md). ## Using a Chain inside Guardrails @@ -50,7 +50,7 @@ define flow ... ``` -For a complete example, check out the [Runnable as Action Guide](./runnable-as-action). +For a complete example, check out the [Runnable as Action Guide](runnable-as-action/README.md). ## LangSmith Integration diff --git a/docs/user_guides/langchain/runnable-as-action/README.md b/docs/user_guides/langchain/runnable-as-action/README.md index 32d5618d6..84fe8f791 100644 --- a/docs/user_guides/langchain/runnable-as-action/README.md +++ b/docs/user_guides/langchain/runnable-as-action/README.md @@ -78,7 +78,7 @@ rails: ```python from nemoguardrails import RailsConfig, LLMRails -config = RailsConfig.from_path("./config") +config = RailsConfig.from_path("config") rails = LLMRails(config) rails.register_action(CheckKeywordsRunnable(), "check_keywords") diff --git a/docs/user_guides/langchain/runnable-as-action/index.rst b/docs/user_guides/langchain/runnable-as-action/index.rst new file mode 100644 index 000000000..d7330ea5e --- /dev/null +++ b/docs/user_guides/langchain/runnable-as-action/index.rst @@ -0,0 +1,7 @@ +Runnable-As-Action +================== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/user_guides/llm-support.md b/docs/user_guides/llm-support.md index 1d773fa73..1bb64a597 100644 --- a/docs/user_guides/llm-support.md +++ b/docs/user_guides/llm-support.md @@ -11,14 +11,14 @@ This document aims to provide a summary of the evaluation experiments we have em the performance of various LLMs for the different type of rails. For more details about the evaluation of guardrails, including datasets and quantitative results, -please read [this document](../../nemoguardrails/eval/README.md). +please read [this document](../evaluation/README.md). The tools used for evaluation are described in the same file, for a summary of topics [read this section](../README.md#evaluation-tools) from the user guide. Any new LLM available in Guardrails should be evaluated using at least this set of tools. ## LLM Support and Guidance The following tables summarize the LLM support for the main features of NeMo Guardrails, focusing on the different rails available out of the box. -If you want to use an LLM and you cannot see a prompt in the [prompts folder](../../nemoguardrails/llm/prompts), please also check the configuration defined in the [LLM examples' configurations](../../examples/configs/llm). +If you want to use an LLM and you cannot see a prompt in the [prompts folder](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/llm/prompts), please also check the configuration defined in the [LLM examples' configurations](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/configs/llm/README.md). | Feature | gpt-3.5-turbo-instruct | text-davinci-003 | nemollm-43b | llama-2-13b-chat | falcon-7b-instruct | gpt-3.5-turbo | gpt-4 | gpt4all-13b-snoozy | vicuna-7b-v1.3 | mpt-7b-instruct | dolly-v2-3b | HF Pipeline model | |----------------------------------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|--------------------|----------------------|----------------------|----------------------|----------------------|------------------------------------| @@ -44,4 +44,4 @@ Table legend: The performance numbers reported in the table above for each LLM-feature pair are as follows: - the banking dataset evaluation for dialog (topical) rails - fact-checking using MSMARCO dataset and moderation rails experiments -More details in the [evaluation docs](../../nemoguardrails/eval/README.md). +More details in the [evaluation docs](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/eval/README.md). diff --git a/docs/user_guides/llm/index.rst b/docs/user_guides/llm/index.rst new file mode 100644 index 000000000..d505cbd7f --- /dev/null +++ b/docs/user_guides/llm/index.rst @@ -0,0 +1,8 @@ +Llm +=== + +.. toctree:: + :maxdepth: 2 + + nvidia_ai_endpoints/index + vertexai/index diff --git a/docs/user_guides/llm/nvidia_ai_endpoints/README.md b/docs/user_guides/llm/nvidia_ai_endpoints/README.md index 69921b6f2..cd8817d5c 100644 --- a/docs/user_guides/llm/nvidia_ai_endpoints/README.md +++ b/docs/user_guides/llm/nvidia_ai_endpoints/README.md @@ -1,6 +1,6 @@ # Using LLMs hosted on NVIDIA AI Foundation -This guide teaches you how to use NeMo Guardrails with LLMs hosted on NVIDIA AI Foundation. It uses the [ABC Bot configuration](../../../../examples/bots/abc) and changes the model to `playground_mixtral_8x7b`. +This guide teaches you how to use NeMo Guardrails with LLMs hosted on NVIDIA AI Foundation. It uses the [ABC Bot configuration](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/bots/abc/README.md) and changes the model to `playground_mixtral_8x7b`. ## Prerequisites diff --git a/docs/user_guides/llm/nvidia_ai_endpoints/index.rst b/docs/user_guides/llm/nvidia_ai_endpoints/index.rst new file mode 100644 index 000000000..e61f76224 --- /dev/null +++ b/docs/user_guides/llm/nvidia_ai_endpoints/index.rst @@ -0,0 +1,7 @@ +Nvidia Ai Endpoints +=================== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/user_guides/llm/vertexai/README.md b/docs/user_guides/llm/vertexai/README.md index f1816a8cb..c3e002e9a 100644 --- a/docs/user_guides/llm/vertexai/README.md +++ b/docs/user_guides/llm/vertexai/README.md @@ -1,6 +1,6 @@ # Using LLMs hosted on Vertex AI -This guide teaches you how to use NeMo Guardrails with LLMs hosted on Vertex AI. It uses the [ABC Bot configuration](../../../../examples/bots/abc) and changes the model to `gemini-1.0-pro`. +This guide teaches you how to use NeMo Guardrails with LLMs hosted on Vertex AI. It uses the [ABC Bot configuration](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/bots/abc/README.md) and changes the model to `gemini-1.0-pro`. This guide assumes you have configured and tested working with Vertex AI models. If not, refer to [this guide](../../advanced/vertexai-setup.md). @@ -91,7 +91,7 @@ Summary: 5 LLM call(s) took 3.99 seconds . ## Evaluation -The `gemini-1.0-pro` and `text-bison` models have been evaluated for topical rails, and `gemini-1.0-pro` has also been evaluated as a self-checking model for hallucination and content moderation. Evaluation results can be found [here](../../../../docs/evaluation/README.md). +The `gemini-1.0-pro` and `text-bison` models have been evaluated for topical rails, and `gemini-1.0-pro` has also been evaluated as a self-checking model for hallucination and content moderation. Evaluation results can be found [here](../../../evaluation/README.md). ## Conclusion diff --git a/docs/user_guides/llm/vertexai/index.rst b/docs/user_guides/llm/vertexai/index.rst new file mode 100644 index 000000000..9d55f8436 --- /dev/null +++ b/docs/user_guides/llm/vertexai/index.rst @@ -0,0 +1,7 @@ +Vertexai +======== + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/user_guides/multi_config_api/README.md b/docs/user_guides/multi_config_api/README.md index c1e14bb3e..276da459d 100644 --- a/docs/user_guides/multi_config_api/README.md +++ b/docs/user_guides/multi_config_api/README.md @@ -4,7 +4,7 @@ This guide describes how to use multiple configurations as part of the same serv ## Motivation -When running a guardrails server, it is convenient to create *atomic configurations* which can be reused across multiple "complete" configurations. In this guide, we use [these example configurations](../../../examples/server_configs/atomic): +When running a guardrails server, it is convenient to create *atomic configurations* which can be reused across multiple "complete" configurations. In this guide, we use [these example configurations](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/server_configs/atomic): 1. `input_checking`: which uses the self-check input rail. 2. `output_checking`: which uses the self-check output rail. 3. `main`: which uses the `gpt-3.5-turbo-instruct` model with no guardrails. diff --git a/docs/user_guides/multi_config_api/index.rst b/docs/user_guides/multi_config_api/index.rst new file mode 100644 index 000000000..49cc035ce --- /dev/null +++ b/docs/user_guides/multi_config_api/index.rst @@ -0,0 +1,7 @@ +Multi Config Api +================ + +.. toctree:: + :maxdepth: 2 + + README diff --git a/docs/user_guides/python-api.md b/docs/user_guides/python-api.md index 191923748..3c11acfe1 100644 --- a/docs/user_guides/python-api.md +++ b/docs/user_guides/python-api.md @@ -138,7 +138,7 @@ You can register a Langchain chain as an action using the [LLMRails.register_act app.register_action(some_chain, name="some_chain") ``` -When a chain is invoked as an action, the parameters of the action correspond to the input keys of the chain. For the return value, if the output of the chain has a single key, the value will be returned. If the chain has multiple output keys, the dictionary of output keys and their values is returned. See the [LangChain Integration Guide](./langchain/langchain-integration.md) for more details. +When a chain is invoked as an action, the parameters of the action correspond to the input keys of the chain. For the return value, if the output of the chain has a single key, the value will be returned. If the chain has multiple output keys, the dictionary of output keys and their values is returned. See the [LangChain Integration Guide](langchain/langchain-integration.md) for more details. ### Custom Actions diff --git a/docs/user_guides/server-guide.md b/docs/user_guides/server-guide.md index f04ac3086..2fd079edd 100644 --- a/docs/user_guides/server-guide.md +++ b/docs/user_guides/server-guide.md @@ -111,7 +111,7 @@ The Guardrails Server has basic support for storing the conversation threads. Th To use server-side threads, you have to register a datastore. To do this, you must create a `config.py` file in the root of the configurations folder (i.e., the folder containing all the guardrails configurations the server must load). Inside `config.py` use the `register_datastore` function to register the datastore you want to use. -Out-of-the-box, NeMo Guardrails has support for `MemoryStore` (useful for quick testing) and `RedisStore`. If you want to use a different backend, you can implement the [`DataStore`](../../nemoguardrails/server/datastore/datastore.py) interface and register a different instance in `config.py`. +Out-of-the-box, NeMo Guardrails has support for `MemoryStore` (useful for quick testing) and `RedisStore`. If you want to use a different backend, you can implement the [`DataStore`](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/nemoguardrails/server/datastore/datastore.py) interface and register a different instance in `config.py`. > NOTE: to use `RedisStore` you must install `aioredis >= 2.0.1`. @@ -133,7 +133,7 @@ POST /v1/chat/completions > NOTE: for security reasons, the `thread_id` must have a minimum length of 16 characters. -As an example, check out this [configuration](../../examples/configs/threads). +As an example, check out this [configuration](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/examples/configs/threads/README.md). #### Limitations @@ -166,7 +166,7 @@ The OpenAPI specification for the actions server is available at `http://localho #### `/v1/actions/list` -To list the [available actions](./python-api.md#actions) for the server, use the `/v1/actions/list` endpoint. +To list the [available actions](python-api.md#actions) for the server, use the `/v1/actions/list` endpoint. ``` GET /v1/actions/list